From 473941ceff766d295795587218cdaad8b90c758b Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Fri, 22 May 2026 22:09:10 -0400
Subject: [PATCH 01/21] drive: add goal_placement=random for goals anywhere on
 the map

New [env] knob goal_placement (gigaflow): 0 = route (existing forward-route
waypoints), 1 = random. In random mode compute_goals samples each target
waypoint at a random drivable point anywhere on the map, rejecting points
within min_waypoint_spacing of the agent, so goals can land in any direction
including behind it. The route/path are still built at spawn for lane
observations and progression; only the goal positions change. Goal-reached
stays a pure euclidean check, so random goals reward correctly.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pufferlib/config/ocean/drive.ini |  2 +
 pufferlib/ocean/drive/binding.c  |  1 +
 pufferlib/ocean/drive/drive.h    | 75 ++++++++++++++++++++++++++++++++
 pufferlib/ocean/drive/drive.py   |  5 +++
 4 files changed, 83 insertions(+)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 571b2ea582..1a40e67747 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -54,6 +54,8 @@ collision_behavior = 1
 offroad_behavior = 1
 ; Traffic light behavior - options: 0 - Ignore, 1 - Stop, 2 - Remove
 traffic_light_behavior = 1
+; Goal placement (gigaflow) - options: 0 - route (forward waypoints), 1 - random (anywhere on map)
+goal_placement = 0
 ; Number of steps before reset
 scenario_length = 1280
 ; Frequency of resampling scenario (in steps), 0 to disable
diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c
index f67243a32d..81ba26d86f 100644
--- a/pufferlib/ocean/drive/binding.c
+++ b/pufferlib/ocean/drive/binding.c
@@ -1796,6 +1796,7 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
     env->collision_behavior = (int) unpack(kwargs, "collision_behavior");
     env->offroad_behavior = (int) unpack(kwargs, "offroad_behavior");
     env->traffic_light_behavior = (int) unpack(kwargs, "traffic_light_behavior");
+    env->goal_placement = (int) unpack(kwargs, "goal_placement");
     env->emit_completed_episodes = (int) unpack(kwargs, "emit_completed_episodes");
     env->next_episode_index = 0;
     env->completed_episodes_count = 0;
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 329e1bdf76..a2b268a433 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -121,6 +121,10 @@
 #define TARGET_STATIC 0
 #define TARGET_DYNAMIC 1
 
+// GOAL_PLACEMENT modes (controls how gigaflow goals are sampled)
+#define GOAL_PLACEMENT_ROUTE 0  // goals lie ahead along a forward random-walk route
+#define GOAL_PLACEMENT_RANDOM 1 // goals are random drivable points anywhere on the map
+
 // Observation feature counts
 #define EGO_FEATURES_CLASSIC 8
 #define EGO_FEATURES_JERK 10
@@ -354,6 +358,7 @@ struct Drive {
     int collision_behavior;     // 0 = none, 1=stop, 2 = remove
     int offroad_behavior;       // 0 = none, 1=stop, 2 = remove
     int traffic_light_behavior; // 0 = none, 1=stop, 2 = remove
+    int goal_placement;         // 0 = route (forward waypoints), 1 = random (anywhere on map)
     // Metadata fields
     char scenario_id[128];
     char dataset_name[32];
@@ -1862,10 +1867,80 @@ static int compute_new_route(Drive *env, int agent_idx, int current_lane_idx) {
     return 1; // Success
 }
 
+// Place each target waypoint at a random drivable point anywhere on the map,
+// rejecting points closer than min_waypoint_spacing so goals are never trivially
+// close. Goals can land in any direction, including behind the agent. The route
+// and path are still built at spawn (used for lane observations and progression);
+// only the goal positions are overridden here.
+static void compute_goals_random(Drive *env, int agent_idx) {
+    Agent *agent = &env->agents[agent_idx];
+
+    int num_target_waypoints = env->num_target_waypoints;
+    if (num_target_waypoints <= 0 || num_target_waypoints > MAX_TARGET_WAYPOINTS) {
+        num_target_waypoints = MAX_TARGET_WAYPOINTS;
+    }
+
+    float min_dist_sq = env->min_waypoint_spacing * env->min_waypoint_spacing;
+    const int MAX_GOAL_ATTEMPTS = 30;
+
+    for (int i = 0; i < num_target_waypoints; i++) {
+        // Fall back to the agent's own position if no distant point is found.
+        float goal_x = agent->sim_x;
+        float goal_y = agent->sim_y;
+        float goal_z = agent->sim_z;
+
+        for (int attempt = 0; attempt < MAX_GOAL_ATTEMPTS; attempt++) {
+            int list_idx = rand() % env->grid_map->num_drivable_grid_cell;
+            int grid_idx = env->grid_map->grid_index_drivable[list_idx];
+
+            GridMapEntity cell_candidates[MAX_ENTITIES_PER_CELL];
+            int candidate_count = 0;
+            for (int j = 0; j < env->grid_map->cell_entities_count[grid_idx]; j++) {
+                GridMapEntity entity = env->grid_map->cells[grid_idx][j];
+                if (is_drivable_road_lane(env->road_elements[entity.entity_idx].type)) {
+                    cell_candidates[candidate_count++] = entity;
+                }
+            }
+            if (candidate_count == 0) {
+                continue;
+            }
+
+            GridMapEntity chosen = cell_candidates[rand() % candidate_count];
+            RoadMapElement *lane = &env->road_elements[chosen.entity_idx];
+            float cx = lane->x[chosen.geometry_idx];
+            float cy = lane->y[chosen.geometry_idx];
+            float dx = cx - agent->sim_x;
+            float dy = cy - agent->sim_y;
+            if (dx * dx + dy * dy < min_dist_sq) {
+                continue;
+            }
+
+            goal_x = cx;
+            goal_y = cy;
+            goal_z = lane->z[chosen.geometry_idx];
+            break;
+        }
+
+        agent->goal_positions_x[i] = goal_x;
+        agent->goal_positions_y[i] = goal_y;
+        agent->goal_positions_z[i] = goal_z;
+    }
+
+    agent->current_goal_idx = 0;
+    agent->goal_position_x = agent->goal_positions_x[0];
+    agent->goal_position_y = agent->goal_positions_y[0];
+    agent->goal_position_z = agent->goal_positions_z[0];
+}
+
 static void compute_goals(Drive *env, int agent_idx) {
     Agent *agent = &env->agents[agent_idx];
     struct Path *path = agent->path;
 
+    if (env->goal_placement == GOAL_PLACEMENT_RANDOM) {
+        compute_goals_random(env, agent_idx);
+        return;
+    }
+
     // Validate path exists
     if (path == NULL || path->num_waypoints == 0) {
         printf("[GIGAFLOW WARNING] -> Agent %d has no valid path\n", agent_idx);
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index d49563ebf6..1c44e453f1 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -47,6 +47,7 @@ def __init__(
         collision_behavior=0,
         offroad_behavior=0,
         traffic_light_behavior=0,
+        goal_placement=0,
         # emit_completed_episodes=True: env emits one summary dict per
         # completed episode via info (drained from a per-env C-side queue).
         # capture_compact_replay=True additionally records per-step agent and
@@ -146,6 +147,9 @@ def __init__(
         self.collision_behavior = collision_behavior
         self.offroad_behavior = offroad_behavior
         self.traffic_light_behavior = traffic_light_behavior
+        if goal_placement not in (0, 1):
+            raise ValueError(f"goal_placement must be 0 (route) or 1 (random). Got: {goal_placement}")
+        self.goal_placement = goal_placement
         self.capture_compact_replay = bool(capture_compact_replay)
         # capture_compact_replay implies emit_completed_episodes, since the
         # bundle rides on the per-episode summary.
@@ -381,6 +385,7 @@ def _env_init_kwargs(self, map_file, max_agents):
             "collision_behavior": self.collision_behavior,
             "offroad_behavior": self.offroad_behavior,
             "traffic_light_behavior": self.traffic_light_behavior,
+            "goal_placement": self.goal_placement,
             "emit_completed_episodes": int(self.emit_completed_episodes),
             "goal_radius": self.goal_radius,
             "min_waypoint_spacing": self.min_waypoint_spacing,

From 02d8bd274ce1bda9823f61aa2c924b4caba5df9d Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Fri, 22 May 2026 22:13:53 -0400
Subject: [PATCH 02/21] scripts: add single_agent_speed_run.sbatch launcher

Manual launcher for single-agent-per-map training: one agent per env on Town02
only, many envs, goal_placement=random (goals anywhere on the map incl. behind).
Auto-creates the Town02-only map dir, runs gpu_heartbeat alongside, wandb-tracked.
Scheduling and scale (NUM_AGENTS) are overridable without editing the file.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 scripts/single_agent_speed_run.sbatch | 69 +++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100755 scripts/single_agent_speed_run.sbatch

diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch
new file mode 100755
index 0000000000..907659d140
--- /dev/null
+++ b/scripts/single_agent_speed_run.sbatch
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Single-agent "speed run" training.
+#
+# One controlled agent per environment on a single CARLA map (Town02), with many
+# parallel environments, and goals sampled at random drivable points anywhere on
+# the map (goal_placement=1) -- so the goal can be far away and in any direction,
+# including behind the agent. On reaching a goal a new random one is drawn, so the
+# agent does continuous point-to-point runs across the town.
+#
+# Launch:
+#   sbatch scripts/single_agent_speed_run.sbatch
+#
+# Override scheduling / scale without editing this file:
+#   sbatch --partition=a100_tandon --account=torch_pr_924_tandon_advanced scripts/single_agent_speed_run.sbatch
+#   NUM_AGENTS=2048 sbatch scripts/single_agent_speed_run.sbatch
+#
+# Logs land in slurm-<jobid>.out in the directory you submit from (repo root).
+# The h200_tandon QOS pool is often full (QOSGrpGRES); if it won't schedule,
+# resubmit with --partition=a100_tandon or --partition=h100_tandon.
+#
+#SBATCH --job-name=single_agent_speed_run
+#SBATCH --account=torch_pr_924_tandon_advanced
+#SBATCH --partition=h200_tandon
+#SBATCH --gres=gpu:1
+#SBATCH --cpus-per-task=16
+#SBATCH --mem=128gb
+#SBATCH --time=12:00:00
+
+set -euo pipefail
+
+# Tunables (override via the environment, e.g. NUM_AGENTS=2048 sbatch ...).
+NUM_AGENTS="${NUM_AGENTS:-1024}"            # number of single-agent environments
+MAX_GOAL_POSITION="${MAX_GOAL_POSITION:-300}"  # obs goal-vector normalization (m); cover Town02's extent
+WANDB_GROUP="${WANDB_GROUP:-single_agent_speed_runs}"
+
+OVERLAY="/scratch/$USER/images/PufferDrive/overlay-15GB-500K.ext3"
+IMAGE="/share/apps/images/cuda12.8.1-cudnn9.8.0-ubuntu24.04.2.sif"
+
+singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c '
+    source /scratch/'"$USER"'/venvs/pufferdrive/bin/activate
+    export TORCH_CUDA_ARCH_LIST="8.0;8.9;9.0"
+    export PYTHONNOUSERSITE=1
+    NCCL_DIR=$(compgen -G "/scratch/'"$USER"'/venvs/pufferdrive/lib/python3.12/site-packages/nvidia/nccl/lib" 2>/dev/null) \
+        && export LD_LIBRARY_PATH="$NCCL_DIR:$LD_LIBRARY_PATH"
+    cd /scratch/'"$USER"'/code/PufferDrive
+
+    # Single-map dir holding only Town02, derived from the bundled carla maps.
+    if [ ! -e pufferlib/resources/drive/binaries/carla_town02/opendrive__Town02.bin ]; then
+        mkdir -p pufferlib/resources/drive/binaries/carla_town02
+        cp pufferlib/resources/drive/binaries/carla/opendrive__Town02.bin pufferlib/resources/drive/binaries/carla_town02/
+    fi
+
+    python scripts/gpu_heartbeat.py > /tmp/gpu_heartbeat_'"$SLURM_JOB_ID"'.log 2>&1 &
+    HEARTBEAT_PID=$!
+
+    python -m pufferlib.pufferl train puffer_drive \
+        --env.map_dir pufferlib/resources/drive/binaries/carla_town02 \
+        --env.num_maps 1 \
+        --env.max_agents_per_env 1 \
+        --env.num_agents '"$NUM_AGENTS"' \
+        --env.goal_placement 1 \
+        --env.num_target_waypoints 1 \
+        --env.max_goal_position '"$MAX_GOAL_POSITION"' \
+        --wandb --wandb-project puffer_drive --wandb-group '"$WANDB_GROUP"'
+    TRAIN_EXIT=$?
+
+    kill $HEARTBEAT_PID 2>/dev/null || true
+    exit $TRAIN_EXIT
+'

From fd9ca04114cf6aaede77d864a399e4e73d99a7e2 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Fri, 22 May 2026 22:20:57 -0400
Subject: [PATCH 03/21] scripts: 3-seed array into wandb group "Nightly Test"

Launch the single-agent speed run as a 3-task SLURM array (one seed each via
--train.seed), all logged to wandb group "Nightly Test". max_goal_position=1000
and total_timesteps capped at 1B. Town02-folder create is atomic so concurrent
array tasks can't read a partial map file.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 scripts/single_agent_speed_run.sbatch | 30 +++++++++++++++++++--------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch
index 907659d140..7c5adbe140 100755
--- a/scripts/single_agent_speed_run.sbatch
+++ b/scripts/single_agent_speed_run.sbatch
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Single-agent "speed run" training.
+# Single-agent "speed run" training -- nightly 3-seed sweep.
 #
 # One controlled agent per environment on a single CARLA map (Town02), with many
 # parallel environments, and goals sampled at random drivable points anywhere on
@@ -7,20 +7,24 @@
 # including behind the agent. On reaching a goal a new random one is drawn, so the
 # agent does continuous point-to-point runs across the town.
 #
+# Launches 3 seeds as a SLURM job array, all logged to wandb group "Nightly Test".
+#
 # Launch:
 #   sbatch scripts/single_agent_speed_run.sbatch
 #
-# Override scheduling / scale without editing this file:
+# Override scheduling / scale / seeds without editing this file:
 #   sbatch --partition=a100_tandon --account=torch_pr_924_tandon_advanced scripts/single_agent_speed_run.sbatch
 #   NUM_AGENTS=2048 sbatch scripts/single_agent_speed_run.sbatch
+#   SEED_BASE=100 sbatch scripts/single_agent_speed_run.sbatch   # seeds 100,101,102
 #
-# Logs land in slurm-<jobid>.out in the directory you submit from (repo root).
+# Logs land in slurm-<jobid>_<seedidx>.out in the directory you submit from.
 # The h200_tandon QOS pool is often full (QOSGrpGRES); if it won't schedule,
 # resubmit with --partition=a100_tandon or --partition=h100_tandon.
 #
 #SBATCH --job-name=single_agent_speed_run
 #SBATCH --account=torch_pr_924_tandon_advanced
 #SBATCH --partition=h200_tandon
+#SBATCH --array=0-2
 #SBATCH --gres=gpu:1
 #SBATCH --cpus-per-task=16
 #SBATCH --mem=128gb
@@ -29,9 +33,12 @@
 set -euo pipefail
 
 # Tunables (override via the environment, e.g. NUM_AGENTS=2048 sbatch ...).
-NUM_AGENTS="${NUM_AGENTS:-1024}"            # number of single-agent environments
-MAX_GOAL_POSITION="${MAX_GOAL_POSITION:-300}"  # obs goal-vector normalization (m); cover Town02's extent
-WANDB_GROUP="${WANDB_GROUP:-single_agent_speed_runs}"
+NUM_AGENTS="${NUM_AGENTS:-1024}"                # number of single-agent environments
+MAX_GOAL_POSITION="${MAX_GOAL_POSITION:-1000}"  # obs goal-vector normalization (m); cover Town02's extent
+TOTAL_TIMESTEPS="${TOTAL_TIMESTEPS:-1000000000}"  # cap the run at 1B steps
+WANDB_GROUP="${WANDB_GROUP:-Nightly Test}"
+SEED_BASE="${SEED_BASE:-0}"
+SEED=$((SEED_BASE + SLURM_ARRAY_TASK_ID))      # one seed per array task: SEED_BASE .. SEED_BASE+2
 
 OVERLAY="/scratch/$USER/images/PufferDrive/overlay-15GB-500K.ext3"
 IMAGE="/share/apps/images/cuda12.8.1-cudnn9.8.0-ubuntu24.04.2.sif"
@@ -45,9 +52,12 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c '
     cd /scratch/'"$USER"'/code/PufferDrive
 
     # Single-map dir holding only Town02, derived from the bundled carla maps.
-    if [ ! -e pufferlib/resources/drive/binaries/carla_town02/opendrive__Town02.bin ]; then
+    # Atomic create (temp + mv) so concurrent array tasks never read a partial file.
+    DEST=pufferlib/resources/drive/binaries/carla_town02/opendrive__Town02.bin
+    if [ ! -e "$DEST" ]; then
         mkdir -p pufferlib/resources/drive/binaries/carla_town02
-        cp pufferlib/resources/drive/binaries/carla/opendrive__Town02.bin pufferlib/resources/drive/binaries/carla_town02/
+        TMP=$(mktemp pufferlib/resources/drive/binaries/carla_town02/Town02.XXXXXX)
+        cp pufferlib/resources/drive/binaries/carla/opendrive__Town02.bin "$TMP" && mv -f "$TMP" "$DEST"
     fi
 
     python scripts/gpu_heartbeat.py > /tmp/gpu_heartbeat_'"$SLURM_JOB_ID"'.log 2>&1 &
@@ -61,7 +71,9 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c '
         --env.goal_placement 1 \
         --env.num_target_waypoints 1 \
         --env.max_goal_position '"$MAX_GOAL_POSITION"' \
-        --wandb --wandb-project puffer_drive --wandb-group '"$WANDB_GROUP"'
+        --train.total_timesteps '"$TOTAL_TIMESTEPS"' \
+        --train.seed '"$SEED"' \
+        --wandb --wandb-project puffer_drive --wandb-group "'"$WANDB_GROUP"'" --tag seed'"$SEED"'
     TRAIN_EXIT=$?
 
     kill $HEARTBEAT_PID 2>/dev/null || true

From 490f5715980318300f19795983fa24b6b327106c Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Fri, 22 May 2026 22:54:13 -0400
Subject: [PATCH 04/21] drive: optional shared map cache (use_map_cache) for
 static geometry

Adds an opt-in [env] knob use_map_cache (default 0). When set, environments
loading the same map file share one read-only copy of the static geometry --
road_elements, the spatial grid (cells + neighbor cache), and the lane graph
(incl. its O(n^2) distance matrix) -- via a per-process, reference-counted cache
keyed by map filename. Per-env mutable state (agents, traffic-light states) is
never shared, so dynamic traffic lights stay correct.

This removes the per-env map duplication that OOMs single-map / many-env runs
(e.g. 1 map x 1024 single-agent envs): the geometry is built once and borrowed.

Lifecycle: init() builds-or-borrows; c_close() decrements the refcount and frees
the entry only on the last reference. A getpid() guard prevents a forked worker
from freeing the parent's copy-on-write geometry (the use-after-free that the
upstream WIP #346 hit). Default off keeps existing single-owner behavior; the
single_agent_speed_run launcher passes --env.use_map_cache 1.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pufferlib/config/ocean/drive.ini      |   2 +
 pufferlib/ocean/drive/binding.c       |   1 +
 pufferlib/ocean/drive/drive.h         | 180 ++++++++++++++++++++++----
 pufferlib/ocean/drive/drive.py        |   5 +
 scripts/single_agent_speed_run.sbatch |   1 +
 5 files changed, 163 insertions(+), 26 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 1a40e67747..9f2f217428 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -56,6 +56,8 @@ offroad_behavior = 1
 traffic_light_behavior = 1
 ; Goal placement (gigaflow) - options: 0 - route (forward waypoints), 1 - random (anywhere on map)
 goal_placement = 0
+; Share static map geometry (roads/grid/lane-graph) across envs using the same map - 0 off, 1 on
+use_map_cache = 0
 ; Number of steps before reset
 scenario_length = 1280
 ; Frequency of resampling scenario (in steps), 0 to disable
diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c
index 81ba26d86f..9b5ce3d536 100644
--- a/pufferlib/ocean/drive/binding.c
+++ b/pufferlib/ocean/drive/binding.c
@@ -1797,6 +1797,7 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) {
     env->offroad_behavior = (int) unpack(kwargs, "offroad_behavior");
     env->traffic_light_behavior = (int) unpack(kwargs, "traffic_light_behavior");
     env->goal_placement = (int) unpack(kwargs, "goal_placement");
+    env->use_map_cache = (int) unpack(kwargs, "use_map_cache");
     env->emit_completed_episodes = (int) unpack(kwargs, "emit_completed_episodes");
     env->next_episode_index = 0;
     env->completed_episodes_count = 0;
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index a2b268a433..5fdfefe581 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -288,6 +288,30 @@ struct GridMap {
     int num_drivable_grid_cell;
 };
 
+// Static, read-only map geometry shared across environments that load the same
+// map file when use_map_cache is set. Holds only data that is never mutated after
+// load: road geometry, the spatial grid (cells + neighbor cache), and the lane
+// graph. Per-env mutable data (agents, traffic-light states) is never shared.
+// Reference-counted; freed when the last borrowing env in the owning process
+// closes. See init() / c_close().
+struct SharedMapData {
+    char *map_name;
+    RoadMapElement *road_elements;
+    int num_road_elements;
+    GridMap *grid_map;
+    int *neighbor_offsets;
+    struct LaneGraph lane_graph;
+    int ref_count;
+};
+
+// Per-process map cache. Built lazily in init(). g_map_cache_pid stamps the
+// process that built it: a forked worker inherits these pointers via copy-on-write
+// and must never free them (it would corrupt the parent's heap), so the free path
+// in c_close is guarded by getpid() == g_map_cache_pid.
+static struct SharedMapData **g_map_cache = NULL;
+static int g_map_cache_count = 0;
+static pid_t g_map_cache_pid = 0;
+
 struct Drive {
     Client *client;
     // Render mode: RENDER_WINDOW (0) for interactive viewer, RENDER_HEADLESS (1)
@@ -359,6 +383,8 @@ struct Drive {
     int offroad_behavior;       // 0 = none, 1=stop, 2 = remove
     int traffic_light_behavior; // 0 = none, 1=stop, 2 = remove
     int goal_placement;         // 0 = route (forward waypoints), 1 = random (anywhere on map)
+    int use_map_cache;          // 0 = each env owns its map copy, 1 = share static geometry across envs
+    struct SharedMapData *shared_map; // non-NULL when this env borrows cached geometry
     // Metadata fields
     char scenario_id[128];
     char dataset_name[32];
@@ -3433,18 +3459,107 @@ void remove_bad_trajectories(Drive *env) {
     env->timestep = 0;
 }
 
+static struct SharedMapData *map_cache_lookup(const char *map_name) {
+    for (int i = 0; i < g_map_cache_count; i++) {
+        if (g_map_cache[i] != NULL && strcmp(g_map_cache[i]->map_name, map_name) == 0) {
+            return g_map_cache[i];
+        }
+    }
+    return NULL;
+}
+
+static void map_cache_insert(struct SharedMapData *entry) {
+    if (g_map_cache_pid == 0) {
+        g_map_cache_pid = getpid();
+    }
+    g_map_cache =
+        (struct SharedMapData **) realloc(g_map_cache, (g_map_cache_count + 1) * sizeof(struct SharedMapData *));
+    g_map_cache[g_map_cache_count++] = entry;
+}
+
+// Free a shared geometry entry and everything it owns, and clear its cache slot.
+// Mirrors the owned-geometry branch of c_close. Only call in the building process.
+static void free_shared_map_data(struct SharedMapData *shared) {
+    if (shared == NULL) {
+        return;
+    }
+    for (int i = 0; i < shared->num_road_elements; i++) {
+        free_road_element(&shared->road_elements[i]);
+    }
+    free(shared->road_elements);
+    int grid_cell_count = shared->grid_map->grid_cols * shared->grid_map->grid_rows;
+    for (int grid_index = 0; grid_index < grid_cell_count; grid_index++) {
+        free(shared->grid_map->cells[grid_index]);
+    }
+    free(shared->grid_map->cells);
+    free(shared->grid_map->cell_entities_count);
+    free(shared->grid_map->grid_index_drivable);
+    for (int i = 0; i < grid_cell_count; i++) {
+        free(shared->grid_map->neighbor_cache_entities[i]);
+    }
+    free(shared->grid_map->neighbor_cache_entities);
+    free(shared->grid_map->neighbor_cache_count);
+    free(shared->grid_map);
+    free(shared->neighbor_offsets);
+    free_lane_graph(&shared->lane_graph);
+    free(shared->map_name);
+    for (int i = 0; i < g_map_cache_count; i++) {
+        if (g_map_cache[i] == shared) {
+            g_map_cache[i] = NULL;
+            break;
+        }
+    }
+    free(shared);
+}
+
 void init(Drive *env) {
     env->human_agent_idx = 0;
     env->timestep = 0;
-    load_map_binary(env->map_name, env);
+    env->shared_map = NULL;
+
+    struct SharedMapData *shared = env->use_map_cache ? map_cache_lookup(env->map_name) : NULL;
+    if (shared != NULL) {
+        // Cache hit: load only the per-env data (agents, traffic-control elements),
+        // then discard the freshly-loaded geometry and borrow the shared copy.
+        load_map_binary(env->map_name, env);
+        for (int i = 0; i < env->num_road_elements; i++) {
+            free_road_element(&env->road_elements[i]);
+        }
+        free(env->road_elements);
+        free_lane_graph(&env->lane_graph);
+        env->road_elements = shared->road_elements;
+        env->num_road_elements = shared->num_road_elements;
+        env->grid_map = shared->grid_map;
+        env->neighbor_offsets = shared->neighbor_offsets;
+        env->lane_graph = shared->lane_graph;
+        env->shared_map = shared;
+        shared->ref_count++;
+    } else {
+        // Cache miss (or caching off): load and build the geometry as usual.
+        load_map_binary(env->map_name, env);
+        init_grid_map(env);
+        int vision_half_range = (int) ceilf(
+            fmaxf(fmaxf(env->road_obs_front_dist, env->road_obs_behind_dist), env->road_obs_side_dist) / GRID_CELL_SIZE);
+        env->grid_map->vision_range = 2 * vision_half_range + 1;
+        init_neighbor_offsets(env);
+        cache_neighbor_offsets(env);
+        if (env->use_map_cache) {
+            // Transfer the just-built geometry into a shared, ref-counted entry that
+            // this env borrows (ref_count starts at 1).
+            struct SharedMapData *entry = (struct SharedMapData *) calloc(1, sizeof(struct SharedMapData));
+            entry->map_name = strdup(env->map_name);
+            entry->road_elements = env->road_elements;
+            entry->num_road_elements = env->num_road_elements;
+            entry->grid_map = env->grid_map;
+            entry->neighbor_offsets = env->neighbor_offsets;
+            entry->lane_graph = env->lane_graph;
+            entry->ref_count = 1;
+            map_cache_insert(entry);
+            env->shared_map = entry;
+        }
+    }
     env->road_dropout_enabled = (env->obs_lane_segment_count < env->max_lane_segment_observations)
         || (env->obs_boundary_segment_count < env->max_boundary_segment_observations);
-    init_grid_map(env);
-    int vision_half_range = (int) ceilf(
-        fmaxf(fmaxf(env->road_obs_front_dist, env->road_obs_behind_dist), env->road_obs_side_dist) / GRID_CELL_SIZE);
-    env->grid_map->vision_range = 2 * vision_half_range + 1;
-    init_neighbor_offsets(env);
-    cache_neighbor_offsets(env);
     env->logs_capacity = 0;
     set_active_agents(env);
     env->logs_capacity = env->active_agent_count;
@@ -3517,38 +3632,51 @@ void c_close(Drive *env) {
     for (int i = 0; i < env->num_total_agents; i++) {
         free_agent(&env->agents[i]);
     }
-    for (int i = 0; i < env->num_road_elements; i++) {
-        free_road_element(&env->road_elements[i]);
-    }
     for (int i = 0; i < env->num_traffic_elements; i++) {
         free_traffic_element(&env->traffic_elements[i]);
     }
     free(env->agents);
-    free(env->road_elements);
     free(env->traffic_elements);
     free(env->active_agent_indices);
     free(env->logs);
-    // GridMap cleanup
-    int grid_cell_count = env->grid_map->grid_cols * env->grid_map->grid_rows;
-    for (int grid_index = 0; grid_index < grid_cell_count; grid_index++) {
-        free(env->grid_map->cells[grid_index]);
-    }
-    free(env->grid_map->cells);
-    free(env->grid_map->cell_entities_count);
-    free(env->grid_map->grid_index_drivable);
-    free(env->neighbor_offsets);
 
-    for (int i = 0; i < grid_cell_count; i++) {
-        free(env->grid_map->neighbor_cache_entities[i]);
+    if (env->shared_map != NULL) {
+        // Geometry is borrowed from the cache: release our reference. Free the
+        // shared entry only when it is the last reference AND we are the process
+        // that built it (a forked worker must not free the parent's copy-on-write
+        // pages — it would corrupt the parent's heap).
+        env->shared_map->ref_count--;
+        if (env->shared_map->ref_count <= 0 && g_map_cache_pid == getpid()) {
+            free_shared_map_data(env->shared_map);
+        }
+        env->shared_map = NULL;
+    } else {
+        // Geometry is owned by this env: free it.
+        for (int i = 0; i < env->num_road_elements; i++) {
+            free_road_element(&env->road_elements[i]);
+        }
+        free(env->road_elements);
+        int grid_cell_count = env->grid_map->grid_cols * env->grid_map->grid_rows;
+        for (int grid_index = 0; grid_index < grid_cell_count; grid_index++) {
+            free(env->grid_map->cells[grid_index]);
+        }
+        free(env->grid_map->cells);
+        free(env->grid_map->cell_entities_count);
+        free(env->grid_map->grid_index_drivable);
+        free(env->neighbor_offsets);
+        for (int i = 0; i < grid_cell_count; i++) {
+            free(env->grid_map->neighbor_cache_entities[i]);
+        }
+        free(env->grid_map->neighbor_cache_entities);
+        free(env->grid_map->neighbor_cache_count);
+        free(env->grid_map);
+        free_lane_graph(&env->lane_graph);
     }
-    free(env->grid_map->neighbor_cache_entities);
-    free(env->grid_map->neighbor_cache_count);
-    free(env->grid_map);
+
     free(env->static_agent_indices);
     free(env->expert_static_agent_indices);
     free(env->objects_of_interest);
     free(env->tracks_to_predict);
-    free_lane_graph(&env->lane_graph);
     free(env->map_name);
     free(env->ini_file);
 }
diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py
index 1c44e453f1..8fd492ee39 100644
--- a/pufferlib/ocean/drive/drive.py
+++ b/pufferlib/ocean/drive/drive.py
@@ -48,6 +48,7 @@ def __init__(
         offroad_behavior=0,
         traffic_light_behavior=0,
         goal_placement=0,
+        use_map_cache=0,
         # emit_completed_episodes=True: env emits one summary dict per
         # completed episode via info (drained from a per-env C-side queue).
         # capture_compact_replay=True additionally records per-step agent and
@@ -150,6 +151,9 @@ def __init__(
         if goal_placement not in (0, 1):
             raise ValueError(f"goal_placement must be 0 (route) or 1 (random). Got: {goal_placement}")
         self.goal_placement = goal_placement
+        if use_map_cache not in (0, 1):
+            raise ValueError(f"use_map_cache must be 0 (off) or 1 (on). Got: {use_map_cache}")
+        self.use_map_cache = use_map_cache
         self.capture_compact_replay = bool(capture_compact_replay)
         # capture_compact_replay implies emit_completed_episodes, since the
         # bundle rides on the per-episode summary.
@@ -386,6 +390,7 @@ def _env_init_kwargs(self, map_file, max_agents):
             "offroad_behavior": self.offroad_behavior,
             "traffic_light_behavior": self.traffic_light_behavior,
             "goal_placement": self.goal_placement,
+            "use_map_cache": self.use_map_cache,
             "emit_completed_episodes": int(self.emit_completed_episodes),
             "goal_radius": self.goal_radius,
             "min_waypoint_spacing": self.min_waypoint_spacing,
diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch
index 7c5adbe140..65e0108573 100755
--- a/scripts/single_agent_speed_run.sbatch
+++ b/scripts/single_agent_speed_run.sbatch
@@ -68,6 +68,7 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c '
         --env.num_maps 1 \
         --env.max_agents_per_env 1 \
         --env.num_agents '"$NUM_AGENTS"' \
+        --env.use_map_cache 1 \
         --env.goal_placement 1 \
         --env.num_target_waypoints 1 \
         --env.max_goal_position '"$MAX_GOAL_POSITION"' \

From befb01e3099bdf7dfb3d559c3b5b6751ed784d91 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Fri, 22 May 2026 22:56:44 -0400
Subject: [PATCH 05/21] scripts: turn traffic lights off for single-agent speed
 runs

Pass --env.traffic_light_behavior 0 so the agent isn't stopped/penalized at
lights during point-to-point speed runs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 scripts/single_agent_speed_run.sbatch | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch
index 65e0108573..b33da2735e 100755
--- a/scripts/single_agent_speed_run.sbatch
+++ b/scripts/single_agent_speed_run.sbatch
@@ -69,6 +69,7 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c '
         --env.max_agents_per_env 1 \
         --env.num_agents '"$NUM_AGENTS"' \
         --env.use_map_cache 1 \
+        --env.traffic_light_behavior 0 \
         --env.goal_placement 1 \
         --env.num_target_waypoints 1 \
         --env.max_goal_position '"$MAX_GOAL_POSITION"' \

From e18b13d85f337ee1d692244a5ce51a0794222769 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Fri, 22 May 2026 23:00:32 -0400
Subject: [PATCH 06/21] scripts: fix dotted CLI flags to hyphens; disable
 nuPlan evals

The dynamic pufferl argparser registers dotted [env]/[train] keys with hyphens
(map_dir -> --env.map-dir), so the underscore forms were rejected as unrecognized
arguments. Convert all dotted overrides to hyphens. Also disable the nuPlan-based
evaluators (validation_replay + behavior buckets) for single-map CARLA training
via --eval.<name>.enabled 0; validation_gigaflow stays enabled.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 scripts/single_agent_speed_run.sbatch | 34 ++++++++++++++++++---------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch
index b33da2735e..4c6533eff3 100755
--- a/scripts/single_agent_speed_run.sbatch
+++ b/scripts/single_agent_speed_run.sbatch
@@ -40,6 +40,18 @@ WANDB_GROUP="${WANDB_GROUP:-Nightly Test}"
 SEED_BASE="${SEED_BASE:-0}"
 SEED=$((SEED_BASE + SLURM_ARRAY_TASK_ID))      # one seed per array task: SEED_BASE .. SEED_BASE+2
 
+# Disable the nuPlan-based evaluators (replay + behavior buckets); they are
+# irrelevant to single-map CARLA training and spawn their own envs. validation_gigaflow
+# (CARLA) stays on. enabled 0 -> bool(0)=False in the eval manager. Note: dotted CLI
+# flags use hyphens (the argparser maps _ -> -), so section names are hyphenated here.
+DISABLE_EVALS=""
+for ev in validation-replay behaviors-full-dir behaviors-hard-stop behaviors-highway-straight \
+          behaviors-lane-change behaviors-merge behaviors-parked-cars behaviors-roundabout \
+          behaviors-stopped-traffic behaviors-traffic-light-green behaviors-traffic-light-stop \
+          behaviors-unprotected-left behaviors-unprotected-right; do
+    DISABLE_EVALS="$DISABLE_EVALS --eval.$ev.enabled 0"
+done
+
 OVERLAY="/scratch/$USER/images/PufferDrive/overlay-15GB-500K.ext3"
 IMAGE="/share/apps/images/cuda12.8.1-cudnn9.8.0-ubuntu24.04.2.sif"
 
@@ -64,17 +76,17 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c '
     HEARTBEAT_PID=$!
 
     python -m pufferlib.pufferl train puffer_drive \
-        --env.map_dir pufferlib/resources/drive/binaries/carla_town02 \
-        --env.num_maps 1 \
-        --env.max_agents_per_env 1 \
-        --env.num_agents '"$NUM_AGENTS"' \
-        --env.use_map_cache 1 \
-        --env.traffic_light_behavior 0 \
-        --env.goal_placement 1 \
-        --env.num_target_waypoints 1 \
-        --env.max_goal_position '"$MAX_GOAL_POSITION"' \
-        --train.total_timesteps '"$TOTAL_TIMESTEPS"' \
-        --train.seed '"$SEED"' \
+        --env.map-dir pufferlib/resources/drive/binaries/carla_town02 \
+        --env.num-maps 1 \
+        --env.max-agents-per-env 1 \
+        --env.num-agents '"$NUM_AGENTS"' \
+        --env.use-map-cache 1 \
+        --env.traffic-light-behavior 0 \
+        --env.goal-placement 1 \
+        --env.num-target-waypoints 1 \
+        --env.max-goal-position '"$MAX_GOAL_POSITION"' \
+        --train.total-timesteps '"$TOTAL_TIMESTEPS"' \
+        --train.seed '"$SEED"' '"$DISABLE_EVALS"' \
         --wandb --wandb-project puffer_drive --wandb-group "'"$WANDB_GROUP"'" --tag seed'"$SEED"'
     TRAIN_EXIT=$?
 

From 0b5df39d5d3b080224a62cdaa460ea94d1152b72 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Fri, 22 May 2026 23:08:57 -0400
Subject: [PATCH 07/21] drive.ini: declare enabled explicitly in
 [eval.validation_replay]

It inherited enabled from validation_defaults, so the argparser never registered
--eval.validation-replay.enabled and CLI overrides were rejected. Declaring it
explicitly (same true default) makes it toggleable, matching the behaviors_* sections.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pufferlib/config/ocean/drive.ini | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 9f2f217428..636bab9ef4 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -274,6 +274,9 @@ eval.verify_coverage = true
 
 [eval.validation_replay]
 inherits = "validation_defaults"
+; declared explicitly (same as the inherited default) so it can be toggled from
+; the CLI, e.g. --eval.validation-replay.enabled 0
+enabled = true
 type = "multi_scenario"
 render = true
 render_backend = "triage_html"

From 3336e3d8d01da932a2263e9d7339483376659d9b Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@Eugenes-MacBook-Air.local>
Date: Fri, 22 May 2026 23:43:36 -0400
Subject: [PATCH 08/21] drive.ini: add single_agent_obs evaluator for manual
 obs_html renders

Dedicated, disabled-by-default evaluator mirroring the single-agent speed-run
task (Town02, one agent, random goals, lights off) with render_backend=obs_html.
Lets us faithfully obs-render any checkpoint via
  puffer eval puffer_drive --evaluator single_agent_obs --load-model-path <ckpt>
without it firing inline during training (enabled=false; standalone-by-name still
runs it).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pufferlib/config/ocean/drive.ini | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 636bab9ef4..fa57807724 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -305,6 +305,28 @@ env.resample_frequency = 500
 eval.render_num_scenarios = 8
 eval.render_max_steps = 300
 
+; Manual obs_html render of the single-agent speed-run task (Town02, one agent,
+; random goals anywhere, lights off). enabled=false so it never runs inline during
+; training; invoke standalone, e.g.:
+;   puffer eval puffer_drive --evaluator single_agent_obs --load-model-path <ckpt>
+[eval.single_agent_obs]
+inherits = "validation_gigaflow"
+enabled = false
+type = "multi_scenario"
+render = true
+render_backend = "obs_html"
+env.map_dir = "pufferlib/resources/drive/binaries/carla_town02"
+env.num_maps = 1
+env.num_agents = 8
+env.min_agents_per_env = 1
+env.max_agents_per_env = 1
+env.goal_placement = 1
+env.traffic_light_behavior = 0
+env.num_target_waypoints = 1
+env.max_goal_position = 1000
+eval.num_scenarios = 8
+eval.render_num_scenarios = 8
+
 ; ---------------------------------------------------------------------------
 ; Driving-behaviour evaluation: nuPlan scenes labeled by scene type. Each
 ; behavior is one [eval.behaviors_*] section. All inherit from the template

From f105d5541d62988ef177564097850b2c68bc86e2 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Sat, 23 May 2026 00:03:50 -0400
Subject: [PATCH 09/21] single-agent: min_waypoint_spacing 0 so goals can spawn
 near the agent

With goal_placement=random, min_waypoint_spacing is the rejection floor on goal
distance; 0 lets goals land right next to the agent (near goals, not only far).
Set in the training launcher and the single_agent_obs render config.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pufferlib/config/ocean/drive.ini      | 1 +
 scripts/single_agent_speed_run.sbatch | 1 +
 2 files changed, 2 insertions(+)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index fa57807724..fd8f665441 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -323,6 +323,7 @@ env.max_agents_per_env = 1
 env.goal_placement = 1
 env.traffic_light_behavior = 0
 env.num_target_waypoints = 1
+env.min_waypoint_spacing = 0
 env.max_goal_position = 1000
 eval.num_scenarios = 8
 eval.render_num_scenarios = 8
diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch
index 4c6533eff3..f525bb84ef 100755
--- a/scripts/single_agent_speed_run.sbatch
+++ b/scripts/single_agent_speed_run.sbatch
@@ -84,6 +84,7 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c '
         --env.traffic-light-behavior 0 \
         --env.goal-placement 1 \
         --env.num-target-waypoints 1 \
+        --env.min-waypoint-spacing 0 \
         --env.max-goal-position '"$MAX_GOAL_POSITION"' \
         --train.total-timesteps '"$TOTAL_TIMESTEPS"' \
         --train.seed '"$SEED"' '"$DISABLE_EVALS"' \

From b3b14f69ce42b9d596d8fc5830d9602aaad6a4a5 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Sat, 23 May 2026 00:10:06 -0400
Subject: [PATCH 10/21] Revert single-agent-specific config from drive.ini

Remove the [eval.single_agent_obs] section and the [eval.validation_replay]
enabled= line; keep drive.ini as the shared default. The general goal_placement
and use_map_cache [env] knobs stay (default off) since they're required for the
CLI flags to exist. The launcher no longer tries to disable validation_replay
(its enabled is inherited, not CLI-overridable without a drive.ini line).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pufferlib/config/ocean/drive.ini      | 26 --------------------------
 scripts/single_agent_speed_run.sbatch |  2 +-
 2 files changed, 1 insertion(+), 27 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index fd8f665441..9f2f217428 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -274,9 +274,6 @@ eval.verify_coverage = true
 
 [eval.validation_replay]
 inherits = "validation_defaults"
-; declared explicitly (same as the inherited default) so it can be toggled from
-; the CLI, e.g. --eval.validation-replay.enabled 0
-enabled = true
 type = "multi_scenario"
 render = true
 render_backend = "triage_html"
@@ -305,29 +302,6 @@ env.resample_frequency = 500
 eval.render_num_scenarios = 8
 eval.render_max_steps = 300
 
-; Manual obs_html render of the single-agent speed-run task (Town02, one agent,
-; random goals anywhere, lights off). enabled=false so it never runs inline during
-; training; invoke standalone, e.g.:
-;   puffer eval puffer_drive --evaluator single_agent_obs --load-model-path <ckpt>
-[eval.single_agent_obs]
-inherits = "validation_gigaflow"
-enabled = false
-type = "multi_scenario"
-render = true
-render_backend = "obs_html"
-env.map_dir = "pufferlib/resources/drive/binaries/carla_town02"
-env.num_maps = 1
-env.num_agents = 8
-env.min_agents_per_env = 1
-env.max_agents_per_env = 1
-env.goal_placement = 1
-env.traffic_light_behavior = 0
-env.num_target_waypoints = 1
-env.min_waypoint_spacing = 0
-env.max_goal_position = 1000
-eval.num_scenarios = 8
-eval.render_num_scenarios = 8
-
 ; ---------------------------------------------------------------------------
 ; Driving-behaviour evaluation: nuPlan scenes labeled by scene type. Each
 ; behavior is one [eval.behaviors_*] section. All inherit from the template
diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch
index f525bb84ef..b3ece8efcb 100755
--- a/scripts/single_agent_speed_run.sbatch
+++ b/scripts/single_agent_speed_run.sbatch
@@ -45,7 +45,7 @@ SEED=$((SEED_BASE + SLURM_ARRAY_TASK_ID))      # one seed per array task: SEED_B
 # (CARLA) stays on. enabled 0 -> bool(0)=False in the eval manager. Note: dotted CLI
 # flags use hyphens (the argparser maps _ -> -), so section names are hyphenated here.
 DISABLE_EVALS=""
-for ev in validation-replay behaviors-full-dir behaviors-hard-stop behaviors-highway-straight \
+for ev in behaviors-full-dir behaviors-hard-stop behaviors-highway-straight \
           behaviors-lane-change behaviors-merge behaviors-parked-cars behaviors-roundabout \
           behaviors-stopped-traffic behaviors-traffic-light-green behaviors-traffic-light-stop \
           behaviors-unprotected-left behaviors-unprotected-right; do

From d8a24db5770bc1d52860c4733d5dc700da62581f Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Sat, 23 May 2026 00:11:45 -0400
Subject: [PATCH 11/21] Restore eval-enabling config (validation_replay
 enabled, single_agent_obs)

These are eval-scoped and do not alter default training: validation_replay's
enabled was already inherited-true (now explicit, CLI-toggleable); single_agent_obs
is enabled=false so it never runs inline. Re-enable the launcher's validation_replay
disable for single-agent runs.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pufferlib/config/ocean/drive.ini      | 26 ++++++++++++++++++++++++++
 scripts/single_agent_speed_run.sbatch |  2 +-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 9f2f217428..fd8f665441 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -274,6 +274,9 @@ eval.verify_coverage = true
 
 [eval.validation_replay]
 inherits = "validation_defaults"
+; declared explicitly (same as the inherited default) so it can be toggled from
+; the CLI, e.g. --eval.validation-replay.enabled 0
+enabled = true
 type = "multi_scenario"
 render = true
 render_backend = "triage_html"
@@ -302,6 +305,29 @@ env.resample_frequency = 500
 eval.render_num_scenarios = 8
 eval.render_max_steps = 300
 
+; Manual obs_html render of the single-agent speed-run task (Town02, one agent,
+; random goals anywhere, lights off). enabled=false so it never runs inline during
+; training; invoke standalone, e.g.:
+;   puffer eval puffer_drive --evaluator single_agent_obs --load-model-path <ckpt>
+[eval.single_agent_obs]
+inherits = "validation_gigaflow"
+enabled = false
+type = "multi_scenario"
+render = true
+render_backend = "obs_html"
+env.map_dir = "pufferlib/resources/drive/binaries/carla_town02"
+env.num_maps = 1
+env.num_agents = 8
+env.min_agents_per_env = 1
+env.max_agents_per_env = 1
+env.goal_placement = 1
+env.traffic_light_behavior = 0
+env.num_target_waypoints = 1
+env.min_waypoint_spacing = 0
+env.max_goal_position = 1000
+eval.num_scenarios = 8
+eval.render_num_scenarios = 8
+
 ; ---------------------------------------------------------------------------
 ; Driving-behaviour evaluation: nuPlan scenes labeled by scene type. Each
 ; behavior is one [eval.behaviors_*] section. All inherit from the template
diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch
index b3ece8efcb..f525bb84ef 100755
--- a/scripts/single_agent_speed_run.sbatch
+++ b/scripts/single_agent_speed_run.sbatch
@@ -45,7 +45,7 @@ SEED=$((SEED_BASE + SLURM_ARRAY_TASK_ID))      # one seed per array task: SEED_B
 # (CARLA) stays on. enabled 0 -> bool(0)=False in the eval manager. Note: dotted CLI
 # flags use hyphens (the argparser maps _ -> -), so section names are hyphenated here.
 DISABLE_EVALS=""
-for ev in behaviors-full-dir behaviors-hard-stop behaviors-highway-straight \
+for ev in validation-replay behaviors-full-dir behaviors-hard-stop behaviors-highway-straight \
           behaviors-lane-change behaviors-merge behaviors-parked-cars behaviors-roundabout \
           behaviors-stopped-traffic behaviors-traffic-light-green behaviors-traffic-light-stop \
           behaviors-unprotected-left behaviors-unprotected-right; do

From 2a0af95d3c5d516ce9baf4ce35799835f823ba41 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Sat, 23 May 2026 11:14:44 -0400
Subject: [PATCH 12/21] single-agent: max_traffic_control_observations 0 to
 truly disable lights

traffic_light_behavior=0 only stops the env force-halting at reds; the agent still
learns to stop because a red-light violation zeroes the reward multiplier
(no_red_light, drive.h). The red-light metric is gated on
max_traffic_control_observations, so setting it to 0 removes lights from the
observation AND keeps red_light_violation_rate at 0 (multiplier never zeroed) --
the agent can neither see nor be penalized by lights. Set in the launcher and the
single_agent_obs render config.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pufferlib/config/ocean/drive.ini      | 1 +
 scripts/single_agent_speed_run.sbatch | 1 +
 2 files changed, 2 insertions(+)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index fd8f665441..50a026b0b7 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -322,6 +322,7 @@ env.min_agents_per_env = 1
 env.max_agents_per_env = 1
 env.goal_placement = 1
 env.traffic_light_behavior = 0
+env.max_traffic_control_observations = 0
 env.num_target_waypoints = 1
 env.min_waypoint_spacing = 0
 env.max_goal_position = 1000
diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch
index f525bb84ef..889500e3da 100755
--- a/scripts/single_agent_speed_run.sbatch
+++ b/scripts/single_agent_speed_run.sbatch
@@ -82,6 +82,7 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c '
         --env.num-agents '"$NUM_AGENTS"' \
         --env.use-map-cache 1 \
         --env.traffic-light-behavior 0 \
+        --env.max-traffic-control-observations 0 \
         --env.goal-placement 1 \
         --env.num-target-waypoints 1 \
         --env.min-waypoint-spacing 0 \

From 6a6c2e851c4f50bd476a9c5ccbe4c682b9987473 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Sat, 23 May 2026 11:26:34 -0400
Subject: [PATCH 13/21] scripts: drop stale scheduling/logs comment from
 single-agent launcher

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 scripts/single_agent_speed_run.sbatch | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch
index 889500e3da..028a3e471a 100755
--- a/scripts/single_agent_speed_run.sbatch
+++ b/scripts/single_agent_speed_run.sbatch
@@ -17,10 +17,6 @@
 #   NUM_AGENTS=2048 sbatch scripts/single_agent_speed_run.sbatch
 #   SEED_BASE=100 sbatch scripts/single_agent_speed_run.sbatch   # seeds 100,101,102
 #
-# Logs land in slurm-<jobid>_<seedidx>.out in the directory you submit from.
-# The h200_tandon QOS pool is often full (QOSGrpGRES); if it won't schedule,
-# resubmit with --partition=a100_tandon or --partition=h100_tandon.
-#
 #SBATCH --job-name=single_agent_speed_run
 #SBATCH --account=torch_pr_924_tandon_advanced
 #SBATCH --partition=h200_tandon

From 2def57116b5eda03b320a297f41f6fb12bf3d64c Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Sat, 23 May 2026 11:31:23 -0400
Subject: [PATCH 14/21] scripts: date-stamp wandb run name for single-agent
 launches

Set WANDB_NAME=<YYYY-MM-DD>_single_agent_seed<N> (WandbLogger passes no name=, so
the env var sets the run name). Date-first so runs sort chronologically per launch.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 scripts/single_agent_speed_run.sbatch | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch
index 028a3e471a..2ed652cd98 100755
--- a/scripts/single_agent_speed_run.sbatch
+++ b/scripts/single_agent_speed_run.sbatch
@@ -35,6 +35,8 @@ TOTAL_TIMESTEPS="${TOTAL_TIMESTEPS:-1000000000}"  # cap the run at 1B steps
 WANDB_GROUP="${WANDB_GROUP:-Nightly Test}"
 SEED_BASE="${SEED_BASE:-0}"
 SEED=$((SEED_BASE + SLURM_ARRAY_TASK_ID))      # one seed per array task: SEED_BASE .. SEED_BASE+2
+LAUNCH_DATE="$(date +%Y-%m-%d)"                 # stamp the wandb run name with the launch date
+WANDB_NAME="${LAUNCH_DATE}_single_agent_seed${SEED}"
 
 # Disable the nuPlan-based evaluators (replay + behavior buckets); they are
 # irrelevant to single-map CARLA training and spawn their own envs. validation_gigaflow
@@ -55,6 +57,7 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c '
     source /scratch/'"$USER"'/venvs/pufferdrive/bin/activate
     export TORCH_CUDA_ARCH_LIST="8.0;8.9;9.0"
     export PYTHONNOUSERSITE=1
+    export WANDB_NAME='"$WANDB_NAME"'  # wandb run name (no name= in WandbLogger -> env var wins)
     NCCL_DIR=$(compgen -G "/scratch/'"$USER"'/venvs/pufferdrive/lib/python3.12/site-packages/nvidia/nccl/lib" 2>/dev/null) \
         && export LD_LIBRARY_PATH="$NCCL_DIR:$LD_LIBRARY_PATH"
     cd /scratch/'"$USER"'/code/PufferDrive

From 0b60bfdde71ff9367d480f31fe823459ceb529bb Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Sat, 23 May 2026 11:57:38 -0400
Subject: [PATCH 15/21] drive: random goals respect max_waypoint_spacing as an
 upper distance bound

compute_goals_random previously only enforced a min distance and sampled
uniformly over the whole map (far-skewed, no cap). Now it accepts points in
[min_waypoint_spacing, max_waypoint_spacing], with a closest-to-band fallback so
a tight max still lands a nearby goal. The random-mode contexts (single-agent
launcher + single_agent_obs render) default max_waypoint_spacing to a huge value
(= anywhere on the map), preserving prior behavior; the global [env] default
stays 60 for route mode / default training.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pufferlib/config/ocean/drive.ini      |  1 +
 pufferlib/ocean/drive/drive.h         | 39 ++++++++++++++++++---------
 scripts/single_agent_speed_run.sbatch |  2 ++
 3 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index 50a026b0b7..ef5c48cc14 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -325,6 +325,7 @@ env.traffic_light_behavior = 0
 env.max_traffic_control_observations = 0
 env.num_target_waypoints = 1
 env.min_waypoint_spacing = 0
+env.max_waypoint_spacing = 1000000
 env.max_goal_position = 1000
 eval.num_scenarios = 8
 eval.render_num_scenarios = 8
diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index 5fdfefe581..a9be119629 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -1893,11 +1893,12 @@ static int compute_new_route(Drive *env, int agent_idx, int current_lane_idx) {
     return 1; // Success
 }
 
-// Place each target waypoint at a random drivable point anywhere on the map,
-// rejecting points closer than min_waypoint_spacing so goals are never trivially
-// close. Goals can land in any direction, including behind the agent. The route
-// and path are still built at spawn (used for lane observations and progression);
-// only the goal positions are overridden here.
+// Place each target waypoint at a random drivable point whose distance from the
+// agent falls in [min_waypoint_spacing, max_waypoint_spacing]. With a very large
+// max this is effectively "anywhere on the map"; a small max keeps goals nearby.
+// Goals can land in any direction, including behind the agent. The route and path
+// are still built at spawn (used for lane observations and progression); only the
+// goal positions are overridden here.
 static void compute_goals_random(Drive *env, int agent_idx) {
     Agent *agent = &env->agents[agent_idx];
 
@@ -1907,13 +1908,15 @@ static void compute_goals_random(Drive *env, int agent_idx) {
     }
 
     float min_dist_sq = env->min_waypoint_spacing * env->min_waypoint_spacing;
+    float max_dist_sq = env->max_waypoint_spacing * env->max_waypoint_spacing;
     const int MAX_GOAL_ATTEMPTS = 30;
 
     for (int i = 0; i < num_target_waypoints; i++) {
-        // Fall back to the agent's own position if no distant point is found.
+        // Fall back to the agent's own position if no drivable point is sampled at all.
         float goal_x = agent->sim_x;
         float goal_y = agent->sim_y;
         float goal_z = agent->sim_z;
+        float best_band_miss = -1.0f; // <0: nothing sampled yet; else closest-to-band distance
 
         for (int attempt = 0; attempt < MAX_GOAL_ATTEMPTS; attempt++) {
             int list_idx = rand() % env->grid_map->num_drivable_grid_cell;
@@ -1935,16 +1938,28 @@ static void compute_goals_random(Drive *env, int agent_idx) {
             RoadMapElement *lane = &env->road_elements[chosen.entity_idx];
             float cx = lane->x[chosen.geometry_idx];
             float cy = lane->y[chosen.geometry_idx];
+            float cz = lane->z[chosen.geometry_idx];
             float dx = cx - agent->sim_x;
             float dy = cy - agent->sim_y;
-            if (dx * dx + dy * dy < min_dist_sq) {
-                continue;
+            float d2 = dx * dx + dy * dy;
+
+            if (d2 >= min_dist_sq && d2 <= max_dist_sq) {
+                // Inside the [min_waypoint_spacing, max_waypoint_spacing] band: take it.
+                goal_x = cx;
+                goal_y = cy;
+                goal_z = cz;
+                break;
             }
 
-            goal_x = cx;
-            goal_y = cy;
-            goal_z = lane->z[chosen.geometry_idx];
-            break;
+            // Out of band: keep the point closest to the band so a tight max still
+            // yields a nearby goal instead of falling back to the agent's position.
+            float miss = (d2 > max_dist_sq) ? (d2 - max_dist_sq) : (min_dist_sq - d2);
+            if (best_band_miss < 0.0f || miss < best_band_miss) {
+                best_band_miss = miss;
+                goal_x = cx;
+                goal_y = cy;
+                goal_z = cz;
+            }
         }
 
         agent->goal_positions_x[i] = goal_x;
diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch
index 2ed652cd98..948a4db27c 100755
--- a/scripts/single_agent_speed_run.sbatch
+++ b/scripts/single_agent_speed_run.sbatch
@@ -31,6 +31,7 @@ set -euo pipefail
 # Tunables (override via the environment, e.g. NUM_AGENTS=2048 sbatch ...).
 NUM_AGENTS="${NUM_AGENTS:-1024}"                # number of single-agent environments
 MAX_GOAL_POSITION="${MAX_GOAL_POSITION:-1000}"  # obs goal-vector normalization (m); cover Town02's extent
+MAX_WAYPOINT_SPACING="${MAX_WAYPOINT_SPACING:-1000000}"  # random-goal upper distance bound; huge = anywhere on map
 TOTAL_TIMESTEPS="${TOTAL_TIMESTEPS:-1000000000}"  # cap the run at 1B steps
 WANDB_GROUP="${WANDB_GROUP:-Nightly Test}"
 SEED_BASE="${SEED_BASE:-0}"
@@ -85,6 +86,7 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c '
         --env.goal-placement 1 \
         --env.num-target-waypoints 1 \
         --env.min-waypoint-spacing 0 \
+        --env.max-waypoint-spacing '"$MAX_WAYPOINT_SPACING"' \
         --env.max-goal-position '"$MAX_GOAL_POSITION"' \
         --train.total-timesteps '"$TOTAL_TIMESTEPS"' \
         --train.seed '"$SEED"' '"$DISABLE_EVALS"' \

From 0a1e545ba7e8d47c106be01d3b3cfd7a3166f0f0 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@Eugenes-MacBook-Air.local>
Date: Sat, 23 May 2026 14:57:34 -0400
Subject: [PATCH 16/21] drive: map_cache_insert reuses freed slots before
 growing

free_shared_map_data NULLs an entry's slot on refcount zero; reuse those holes on
the next insert so a resample cycle (free all, rebuild) keeps g_map_cache_count
bounded by the number of distinct maps instead of appending past NULL holes.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pufferlib/ocean/drive/drive.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h
index a9be119629..8786343256 100644
--- a/pufferlib/ocean/drive/drive.h
+++ b/pufferlib/ocean/drive/drive.h
@@ -3487,6 +3487,16 @@ static void map_cache_insert(struct SharedMapData *entry) {
     if (g_map_cache_pid == 0) {
         g_map_cache_pid = getpid();
     }
+    // Reuse a slot vacated by free_shared_map_data before growing the array, so a
+    // resample cycle (vec_close frees every entry, then the rebuild re-inserts)
+    // keeps g_map_cache_count bounded by the number of distinct maps instead of
+    // appending past NULL holes on every cycle.
+    for (int i = 0; i < g_map_cache_count; i++) {
+        if (g_map_cache[i] == NULL) {
+            g_map_cache[i] = entry;
+            return;
+        }
+    }
     g_map_cache =
         (struct SharedMapData **) realloc(g_map_cache, (g_map_cache_count + 1) * sizeof(struct SharedMapData *));
     g_map_cache[g_map_cache_count++] = entry;

From 8a4ccba2e6974d11fbc1447840d3844eafd809a8 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@Eugenes-MacBook-Air.local>
Date: Sat, 23 May 2026 14:59:02 -0400
Subject: [PATCH 17/21] scripts: single-agent speed-run program_config for
 submit_cluster.py

Move the single-agent Town02 / random-goal / lights-off knobs into a
program_config YAML so the run goes through the canonical submit_cluster.py
path (code isolation + container + heartbeat) instead of a hand-rolled sbatch.
Launch 3 seeds via --args train.seed=0:1:2.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../single_agent_speed_run.yaml               | 59 +++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 scripts/cluster_configs/single_agent_speed_run.yaml

diff --git a/scripts/cluster_configs/single_agent_speed_run.yaml b/scripts/cluster_configs/single_agent_speed_run.yaml
new file mode 100644
index 0000000000..0d8bbc153f
--- /dev/null
+++ b/scripts/cluster_configs/single_agent_speed_run.yaml
@@ -0,0 +1,59 @@
+# Single-agent "speed run" training program config (for submit_cluster.py).
+# One controlled agent per environment on a single CARLA map (Town02), with many
+# parallel environments and goals sampled at random drivable points anywhere on
+# the map. Traffic lights are fully disabled (no observation, no metric, no reward
+# penalty). Keys here override pufferlib/config/ocean/drive.ini; submit_cluster.py
+# converts underscores in each key to dashes for the CLI.
+#
+# Launch (3 seeds via the colon sweep):
+#   source /scratch/$USER/venvs/pufferdrive/bin/activate
+#   python scripts/submit_cluster.py \
+#       --save_dir /scratch/$USER/runs --prefix $(date +%Y-%m-%d)_single_agent \
+#       --compute_config scripts/cluster_configs/nyu_greene.yaml \
+#       --program_config scripts/cluster_configs/single_agent_speed_run.yaml \
+#       --container --heartbeat --account <acct> --partition <gpu-partition> --time 720 \
+#       --args train.seed=0:1:2
+
+# Environment: single agent per env, Town02 only, shared map cache
+env.simulation_mode: gigaflow
+env.map_dir: pufferlib/resources/drive/binaries/carla_town02
+env.num_maps: 1
+env.min_agents_per_env: 1
+env.max_agents_per_env: 1
+env.num_agents: 1024
+env.use_map_cache: 1
+
+# Goals: one random goal anywhere (near or far, any direction incl. behind);
+# max_waypoint_spacing huge = no upper distance cap.
+env.goal_placement: 1
+env.num_target_waypoints: 1
+env.min_waypoint_spacing: 0
+env.max_waypoint_spacing: 1000000
+env.max_goal_position: 1000
+
+# Traffic lights fully off: not observed, not scored, no reward penalty.
+env.traffic_light_behavior: 0
+env.max_traffic_control_observations: 0
+
+# Training
+train.total_timesteps: 1_000_000_000
+
+# Disable the nuPlan-based evaluators (keep validation_gigaflow, which is CARLA).
+eval.validation_replay.enabled: 0
+eval.behaviors_full_dir.enabled: 0
+eval.behaviors_hard_stop.enabled: 0
+eval.behaviors_highway_straight.enabled: 0
+eval.behaviors_lane_change.enabled: 0
+eval.behaviors_merge.enabled: 0
+eval.behaviors_parked_cars.enabled: 0
+eval.behaviors_roundabout.enabled: 0
+eval.behaviors_stopped_traffic.enabled: 0
+eval.behaviors_traffic_light_green.enabled: 0
+eval.behaviors_traffic_light_stop.enabled: 0
+eval.behaviors_unprotected_left.enabled: 0
+eval.behaviors_unprotected_right.enabled: 0
+
+# W&B
+wandb: True
+wandb_project: puffer_drive
+wandb_group: Nightly Test

From 7d84e783993f78ed656840fae10784e094737014 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@Eugenes-MacBook-Air.local>
Date: Sat, 23 May 2026 15:03:21 -0400
Subject: [PATCH 18/21] scripts: single-agent wandb_group Nightly_Test (no
 space)

submit_cluster.py joins the inner command into a bash -c string without quoting
arg values, so a space in --wandb-group split the arg and crashed argparse.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 scripts/cluster_configs/single_agent_speed_run.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/cluster_configs/single_agent_speed_run.yaml b/scripts/cluster_configs/single_agent_speed_run.yaml
index 0d8bbc153f..1916431c9e 100644
--- a/scripts/cluster_configs/single_agent_speed_run.yaml
+++ b/scripts/cluster_configs/single_agent_speed_run.yaml
@@ -53,7 +53,8 @@ eval.behaviors_traffic_light_stop.enabled: 0
 eval.behaviors_unprotected_left.enabled: 0
 eval.behaviors_unprotected_right.enabled: 0
 
-# W&B
+# W&B. Group has no space: submit_cluster.py joins the inner command into a
+# bash -c string without quoting arg values, so a space would split the arg.
 wandb: True
 wandb_project: puffer_drive
-wandb_group: Nightly Test
+wandb_group: Nightly_Test

From 6a8926b4cdcaf7604805d65be73a24ff4bd31b1c Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Sat, 23 May 2026 15:05:14 -0400
Subject: [PATCH 19/21] scripts: retire hand-rolled
 single_agent_speed_run.sbatch

Superseded by the submit_cluster.py path (scripts/cluster_configs/single_agent_speed_run.yaml),
which provides per-run code isolation, container wrapping, and the heartbeat. The
hand-rolled sbatch ran from the shared checkout with no isolation, which is what
let a rebuild SIGBUS the live seeds.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 scripts/single_agent_speed_run.sbatch | 98 ---------------------------
 1 file changed, 98 deletions(-)
 delete mode 100755 scripts/single_agent_speed_run.sbatch

diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch
deleted file mode 100755
index 948a4db27c..0000000000
--- a/scripts/single_agent_speed_run.sbatch
+++ /dev/null
@@ -1,98 +0,0 @@
-#!/bin/bash
-# Single-agent "speed run" training -- nightly 3-seed sweep.
-#
-# One controlled agent per environment on a single CARLA map (Town02), with many
-# parallel environments, and goals sampled at random drivable points anywhere on
-# the map (goal_placement=1) -- so the goal can be far away and in any direction,
-# including behind the agent. On reaching a goal a new random one is drawn, so the
-# agent does continuous point-to-point runs across the town.
-#
-# Launches 3 seeds as a SLURM job array, all logged to wandb group "Nightly Test".
-#
-# Launch:
-#   sbatch scripts/single_agent_speed_run.sbatch
-#
-# Override scheduling / scale / seeds without editing this file:
-#   sbatch --partition=a100_tandon --account=torch_pr_924_tandon_advanced scripts/single_agent_speed_run.sbatch
-#   NUM_AGENTS=2048 sbatch scripts/single_agent_speed_run.sbatch
-#   SEED_BASE=100 sbatch scripts/single_agent_speed_run.sbatch   # seeds 100,101,102
-#
-#SBATCH --job-name=single_agent_speed_run
-#SBATCH --account=torch_pr_924_tandon_advanced
-#SBATCH --partition=h200_tandon
-#SBATCH --array=0-2
-#SBATCH --gres=gpu:1
-#SBATCH --cpus-per-task=16
-#SBATCH --mem=128gb
-#SBATCH --time=12:00:00
-
-set -euo pipefail
-
-# Tunables (override via the environment, e.g. NUM_AGENTS=2048 sbatch ...).
-NUM_AGENTS="${NUM_AGENTS:-1024}"                # number of single-agent environments
-MAX_GOAL_POSITION="${MAX_GOAL_POSITION:-1000}"  # obs goal-vector normalization (m); cover Town02's extent
-MAX_WAYPOINT_SPACING="${MAX_WAYPOINT_SPACING:-1000000}"  # random-goal upper distance bound; huge = anywhere on map
-TOTAL_TIMESTEPS="${TOTAL_TIMESTEPS:-1000000000}"  # cap the run at 1B steps
-WANDB_GROUP="${WANDB_GROUP:-Nightly Test}"
-SEED_BASE="${SEED_BASE:-0}"
-SEED=$((SEED_BASE + SLURM_ARRAY_TASK_ID))      # one seed per array task: SEED_BASE .. SEED_BASE+2
-LAUNCH_DATE="$(date +%Y-%m-%d)"                 # stamp the wandb run name with the launch date
-WANDB_NAME="${LAUNCH_DATE}_single_agent_seed${SEED}"
-
-# Disable the nuPlan-based evaluators (replay + behavior buckets); they are
-# irrelevant to single-map CARLA training and spawn their own envs. validation_gigaflow
-# (CARLA) stays on. enabled 0 -> bool(0)=False in the eval manager. Note: dotted CLI
-# flags use hyphens (the argparser maps _ -> -), so section names are hyphenated here.
-DISABLE_EVALS=""
-for ev in validation-replay behaviors-full-dir behaviors-hard-stop behaviors-highway-straight \
-          behaviors-lane-change behaviors-merge behaviors-parked-cars behaviors-roundabout \
-          behaviors-stopped-traffic behaviors-traffic-light-green behaviors-traffic-light-stop \
-          behaviors-unprotected-left behaviors-unprotected-right; do
-    DISABLE_EVALS="$DISABLE_EVALS --eval.$ev.enabled 0"
-done
-
-OVERLAY="/scratch/$USER/images/PufferDrive/overlay-15GB-500K.ext3"
-IMAGE="/share/apps/images/cuda12.8.1-cudnn9.8.0-ubuntu24.04.2.sif"
-
-singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c '
-    source /scratch/'"$USER"'/venvs/pufferdrive/bin/activate
-    export TORCH_CUDA_ARCH_LIST="8.0;8.9;9.0"
-    export PYTHONNOUSERSITE=1
-    export WANDB_NAME='"$WANDB_NAME"'  # wandb run name (no name= in WandbLogger -> env var wins)
-    NCCL_DIR=$(compgen -G "/scratch/'"$USER"'/venvs/pufferdrive/lib/python3.12/site-packages/nvidia/nccl/lib" 2>/dev/null) \
-        && export LD_LIBRARY_PATH="$NCCL_DIR:$LD_LIBRARY_PATH"
-    cd /scratch/'"$USER"'/code/PufferDrive
-
-    # Single-map dir holding only Town02, derived from the bundled carla maps.
-    # Atomic create (temp + mv) so concurrent array tasks never read a partial file.
-    DEST=pufferlib/resources/drive/binaries/carla_town02/opendrive__Town02.bin
-    if [ ! -e "$DEST" ]; then
-        mkdir -p pufferlib/resources/drive/binaries/carla_town02
-        TMP=$(mktemp pufferlib/resources/drive/binaries/carla_town02/Town02.XXXXXX)
-        cp pufferlib/resources/drive/binaries/carla/opendrive__Town02.bin "$TMP" && mv -f "$TMP" "$DEST"
-    fi
-
-    python scripts/gpu_heartbeat.py > /tmp/gpu_heartbeat_'"$SLURM_JOB_ID"'.log 2>&1 &
-    HEARTBEAT_PID=$!
-
-    python -m pufferlib.pufferl train puffer_drive \
-        --env.map-dir pufferlib/resources/drive/binaries/carla_town02 \
-        --env.num-maps 1 \
-        --env.max-agents-per-env 1 \
-        --env.num-agents '"$NUM_AGENTS"' \
-        --env.use-map-cache 1 \
-        --env.traffic-light-behavior 0 \
-        --env.max-traffic-control-observations 0 \
-        --env.goal-placement 1 \
-        --env.num-target-waypoints 1 \
-        --env.min-waypoint-spacing 0 \
-        --env.max-waypoint-spacing '"$MAX_WAYPOINT_SPACING"' \
-        --env.max-goal-position '"$MAX_GOAL_POSITION"' \
-        --train.total-timesteps '"$TOTAL_TIMESTEPS"' \
-        --train.seed '"$SEED"' '"$DISABLE_EVALS"' \
-        --wandb --wandb-project puffer_drive --wandb-group "'"$WANDB_GROUP"'" --tag seed'"$SEED"'
-    TRAIN_EXIT=$?
-
-    kill $HEARTBEAT_PID 2>/dev/null || true
-    exit $TRAIN_EXIT
-'

From 1f911ca45a01554d97f5474d7c0d9b13c7bb9a97 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Sat, 23 May 2026 17:32:43 -0400
Subject: [PATCH 20/21] scripts: single_agent_no_lane_vel program_config
 (lane+velocity reward ablation)

Self-contained variant of single_agent_speed_run.yaml with lane (lane_align,
lane_center) and velocity (vel_align, velocity, overspeed) rewards zeroed, so the
ablation is defined by a config file rather than ad-hoc --args. seed stays on --args.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../single_agent_no_lane_vel.yaml             | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 scripts/cluster_configs/single_agent_no_lane_vel.yaml

diff --git a/scripts/cluster_configs/single_agent_no_lane_vel.yaml b/scripts/cluster_configs/single_agent_no_lane_vel.yaml
new file mode 100644
index 0000000000..7eacccb88a
--- /dev/null
+++ b/scripts/cluster_configs/single_agent_no_lane_vel.yaml
@@ -0,0 +1,64 @@
+# Single-agent "speed run" ablation: lane + velocity reward shaping removed.
+# Same as single_agent_speed_run.yaml (Town02, one agent, random goals, lights
+# off) but with all lane-keeping and velocity rewards zeroed, so the policy is
+# shaped only by goal-reaching + the safety penalties (collision/offroad/comfort).
+# Program configs don't inherit, so this duplicates the base; keep the two in sync.
+#
+# Launch (per-launch knobs like the seed stay on --args):
+#   source /scratch/$USER/venvs/pufferdrive/bin/activate
+#   python scripts/submit_cluster.py \
+#       --save_dir /scratch/$USER/runs --prefix $(date +%Y-%m-%d)_no_lane_vel \
+#       --compute_config scripts/cluster_configs/nyu_greene.yaml \
+#       --program_config scripts/cluster_configs/single_agent_no_lane_vel.yaml \
+#       --container --heartbeat --account <acct> --partition <gpu-partition> --time 720 \
+#       --args train.seed=0:1:2
+
+# Environment: single agent per env, Town02 only, shared map cache
+env.simulation_mode: gigaflow
+env.map_dir: pufferlib/resources/drive/binaries/carla_town02
+env.num_maps: 1
+env.min_agents_per_env: 1
+env.max_agents_per_env: 1
+env.num_agents: 1024
+env.use_map_cache: 1
+
+# Goals: one random goal anywhere, no upper distance cap.
+env.goal_placement: 1
+env.num_target_waypoints: 1
+env.min_waypoint_spacing: 0
+env.max_waypoint_spacing: 1000000
+env.max_goal_position: 1000
+
+# Traffic lights fully off: not observed, not scored, no reward penalty.
+env.traffic_light_behavior: 0
+env.max_traffic_control_observations: 0
+
+# Ablation: lane-keeping + velocity reward shaping removed (goal + safety only).
+env.reward_lane_align: 0
+env.reward_lane_center: 0
+env.reward_vel_align: 0
+env.reward_velocity: 0
+env.reward_overspeed: 0
+
+# Training
+train.total_timesteps: 1_000_000_000
+
+# Disable the nuPlan-based evaluators (keep validation_gigaflow, which is CARLA).
+eval.validation_replay.enabled: 0
+eval.behaviors_full_dir.enabled: 0
+eval.behaviors_hard_stop.enabled: 0
+eval.behaviors_highway_straight.enabled: 0
+eval.behaviors_lane_change.enabled: 0
+eval.behaviors_merge.enabled: 0
+eval.behaviors_parked_cars.enabled: 0
+eval.behaviors_roundabout.enabled: 0
+eval.behaviors_stopped_traffic.enabled: 0
+eval.behaviors_traffic_light_green.enabled: 0
+eval.behaviors_traffic_light_stop.enabled: 0
+eval.behaviors_unprotected_left.enabled: 0
+eval.behaviors_unprotected_right.enabled: 0
+
+# W&B (group has no space; see single_agent_speed_run.yaml note)
+wandb: True
+wandb_project: puffer_drive
+wandb_group: no_lane_vel

From 7d931effcc5f895cb3ddb921f2c8048020f15a83 Mon Sep 17 00:00:00 2001
From: Eugene Vinitsky <eugenevinitsky@lab1040imac08.mtc.scps.nyu.edu>
Date: Sat, 23 May 2026 17:59:24 -0400
Subject: [PATCH 21/21] drive.ini: single_agent_obs renders full 1280-step
 episodes

Override the inherited validation_gigaflow scenario_length=500 / render_max_steps=300
so the obs render matches the single-agent training episode length (1280 steps).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 pufferlib/config/ocean/drive.ini | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini
index ef5c48cc14..4c14e9baec 100644
--- a/pufferlib/config/ocean/drive.ini
+++ b/pufferlib/config/ocean/drive.ini
@@ -327,8 +327,10 @@ env.num_target_waypoints = 1
 env.min_waypoint_spacing = 0
 env.max_waypoint_spacing = 1000000
 env.max_goal_position = 1000
+env.scenario_length = 1280
 eval.num_scenarios = 8
 eval.render_num_scenarios = 8
+eval.render_max_steps = 1280
 
 ; ---------------------------------------------------------------------------
 ; Driving-behaviour evaluation: nuPlan scenes labeled by scene type. Each