From 473941ceff766d295795587218cdaad8b90c758b Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Fri, 22 May 2026 22:09:10 -0400 Subject: [PATCH 01/21] drive: add goal_placement=random for goals anywhere on the map New [env] knob goal_placement (gigaflow): 0 = route (existing forward-route waypoints), 1 = random. In random mode compute_goals samples each target waypoint at a random drivable point anywhere on the map, rejecting points within min_waypoint_spacing of the agent, so goals can land in any direction including behind it. The route/path are still built at spawn for lane observations and progression; only the goal positions change. Goal-reached stays a pure euclidean check, so random goals reward correctly. Co-Authored-By: Claude Opus 4.7 --- pufferlib/config/ocean/drive.ini | 2 + pufferlib/ocean/drive/binding.c | 1 + pufferlib/ocean/drive/drive.h | 75 ++++++++++++++++++++++++++++++++ pufferlib/ocean/drive/drive.py | 5 +++ 4 files changed, 83 insertions(+) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 571b2ea582..1a40e67747 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -54,6 +54,8 @@ collision_behavior = 1 offroad_behavior = 1 ; Traffic light behavior - options: 0 - Ignore, 1 - Stop, 2 - Remove traffic_light_behavior = 1 +; Goal placement (gigaflow) - options: 0 - route (forward waypoints), 1 - random (anywhere on map) +goal_placement = 0 ; Number of steps before reset scenario_length = 1280 ; Frequency of resampling scenario (in steps), 0 to disable diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index f67243a32d..81ba26d86f 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -1796,6 +1796,7 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { env->collision_behavior = (int) unpack(kwargs, "collision_behavior"); env->offroad_behavior = (int) unpack(kwargs, "offroad_behavior"); env->traffic_light_behavior = (int) unpack(kwargs, "traffic_light_behavior"); + env->goal_placement = (int) unpack(kwargs, "goal_placement"); env->emit_completed_episodes = (int) unpack(kwargs, "emit_completed_episodes"); env->next_episode_index = 0; env->completed_episodes_count = 0; diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 329e1bdf76..a2b268a433 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -121,6 +121,10 @@ #define TARGET_STATIC 0 #define TARGET_DYNAMIC 1 +// GOAL_PLACEMENT modes (controls how gigaflow goals are sampled) +#define GOAL_PLACEMENT_ROUTE 0 // goals lie ahead along a forward random-walk route +#define GOAL_PLACEMENT_RANDOM 1 // goals are random drivable points anywhere on the map + // Observation feature counts #define EGO_FEATURES_CLASSIC 8 #define EGO_FEATURES_JERK 10 @@ -354,6 +358,7 @@ struct Drive { int collision_behavior; // 0 = none, 1=stop, 2 = remove int offroad_behavior; // 0 = none, 1=stop, 2 = remove int traffic_light_behavior; // 0 = none, 1=stop, 2 = remove + int goal_placement; // 0 = route (forward waypoints), 1 = random (anywhere on map) // Metadata fields char scenario_id[128]; char dataset_name[32]; @@ -1862,10 +1867,80 @@ static int compute_new_route(Drive *env, int agent_idx, int current_lane_idx) { return 1; // Success } +// Place each target waypoint at a random drivable point anywhere on the map, +// rejecting points closer than min_waypoint_spacing so goals are never trivially +// close. Goals can land in any direction, including behind the agent. The route +// and path are still built at spawn (used for lane observations and progression); +// only the goal positions are overridden here. +static void compute_goals_random(Drive *env, int agent_idx) { + Agent *agent = &env->agents[agent_idx]; + + int num_target_waypoints = env->num_target_waypoints; + if (num_target_waypoints <= 0 || num_target_waypoints > MAX_TARGET_WAYPOINTS) { + num_target_waypoints = MAX_TARGET_WAYPOINTS; + } + + float min_dist_sq = env->min_waypoint_spacing * env->min_waypoint_spacing; + const int MAX_GOAL_ATTEMPTS = 30; + + for (int i = 0; i < num_target_waypoints; i++) { + // Fall back to the agent's own position if no distant point is found. + float goal_x = agent->sim_x; + float goal_y = agent->sim_y; + float goal_z = agent->sim_z; + + for (int attempt = 0; attempt < MAX_GOAL_ATTEMPTS; attempt++) { + int list_idx = rand() % env->grid_map->num_drivable_grid_cell; + int grid_idx = env->grid_map->grid_index_drivable[list_idx]; + + GridMapEntity cell_candidates[MAX_ENTITIES_PER_CELL]; + int candidate_count = 0; + for (int j = 0; j < env->grid_map->cell_entities_count[grid_idx]; j++) { + GridMapEntity entity = env->grid_map->cells[grid_idx][j]; + if (is_drivable_road_lane(env->road_elements[entity.entity_idx].type)) { + cell_candidates[candidate_count++] = entity; + } + } + if (candidate_count == 0) { + continue; + } + + GridMapEntity chosen = cell_candidates[rand() % candidate_count]; + RoadMapElement *lane = &env->road_elements[chosen.entity_idx]; + float cx = lane->x[chosen.geometry_idx]; + float cy = lane->y[chosen.geometry_idx]; + float dx = cx - agent->sim_x; + float dy = cy - agent->sim_y; + if (dx * dx + dy * dy < min_dist_sq) { + continue; + } + + goal_x = cx; + goal_y = cy; + goal_z = lane->z[chosen.geometry_idx]; + break; + } + + agent->goal_positions_x[i] = goal_x; + agent->goal_positions_y[i] = goal_y; + agent->goal_positions_z[i] = goal_z; + } + + agent->current_goal_idx = 0; + agent->goal_position_x = agent->goal_positions_x[0]; + agent->goal_position_y = agent->goal_positions_y[0]; + agent->goal_position_z = agent->goal_positions_z[0]; +} + static void compute_goals(Drive *env, int agent_idx) { Agent *agent = &env->agents[agent_idx]; struct Path *path = agent->path; + if (env->goal_placement == GOAL_PLACEMENT_RANDOM) { + compute_goals_random(env, agent_idx); + return; + } + // Validate path exists if (path == NULL || path->num_waypoints == 0) { printf("[GIGAFLOW WARNING] -> Agent %d has no valid path\n", agent_idx); diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index d49563ebf6..1c44e453f1 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -47,6 +47,7 @@ def __init__( collision_behavior=0, offroad_behavior=0, traffic_light_behavior=0, + goal_placement=0, # emit_completed_episodes=True: env emits one summary dict per # completed episode via info (drained from a per-env C-side queue). # capture_compact_replay=True additionally records per-step agent and @@ -146,6 +147,9 @@ def __init__( self.collision_behavior = collision_behavior self.offroad_behavior = offroad_behavior self.traffic_light_behavior = traffic_light_behavior + if goal_placement not in (0, 1): + raise ValueError(f"goal_placement must be 0 (route) or 1 (random). Got: {goal_placement}") + self.goal_placement = goal_placement self.capture_compact_replay = bool(capture_compact_replay) # capture_compact_replay implies emit_completed_episodes, since the # bundle rides on the per-episode summary. @@ -381,6 +385,7 @@ def _env_init_kwargs(self, map_file, max_agents): "collision_behavior": self.collision_behavior, "offroad_behavior": self.offroad_behavior, "traffic_light_behavior": self.traffic_light_behavior, + "goal_placement": self.goal_placement, "emit_completed_episodes": int(self.emit_completed_episodes), "goal_radius": self.goal_radius, "min_waypoint_spacing": self.min_waypoint_spacing, From 02d8bd274ce1bda9823f61aa2c924b4caba5df9d Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Fri, 22 May 2026 22:13:53 -0400 Subject: [PATCH 02/21] scripts: add single_agent_speed_run.sbatch launcher Manual launcher for single-agent-per-map training: one agent per env on Town02 only, many envs, goal_placement=random (goals anywhere on the map incl. behind). Auto-creates the Town02-only map dir, runs gpu_heartbeat alongside, wandb-tracked. Scheduling and scale (NUM_AGENTS) are overridable without editing the file. Co-Authored-By: Claude Opus 4.7 --- scripts/single_agent_speed_run.sbatch | 69 +++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100755 scripts/single_agent_speed_run.sbatch diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch new file mode 100755 index 0000000000..907659d140 --- /dev/null +++ b/scripts/single_agent_speed_run.sbatch @@ -0,0 +1,69 @@ +#!/bin/bash +# Single-agent "speed run" training. +# +# One controlled agent per environment on a single CARLA map (Town02), with many +# parallel environments, and goals sampled at random drivable points anywhere on +# the map (goal_placement=1) -- so the goal can be far away and in any direction, +# including behind the agent. On reaching a goal a new random one is drawn, so the +# agent does continuous point-to-point runs across the town. +# +# Launch: +# sbatch scripts/single_agent_speed_run.sbatch +# +# Override scheduling / scale without editing this file: +# sbatch --partition=a100_tandon --account=torch_pr_924_tandon_advanced scripts/single_agent_speed_run.sbatch +# NUM_AGENTS=2048 sbatch scripts/single_agent_speed_run.sbatch +# +# Logs land in slurm-.out in the directory you submit from (repo root). +# The h200_tandon QOS pool is often full (QOSGrpGRES); if it won't schedule, +# resubmit with --partition=a100_tandon or --partition=h100_tandon. +# +#SBATCH --job-name=single_agent_speed_run +#SBATCH --account=torch_pr_924_tandon_advanced +#SBATCH --partition=h200_tandon +#SBATCH --gres=gpu:1 +#SBATCH --cpus-per-task=16 +#SBATCH --mem=128gb +#SBATCH --time=12:00:00 + +set -euo pipefail + +# Tunables (override via the environment, e.g. NUM_AGENTS=2048 sbatch ...). +NUM_AGENTS="${NUM_AGENTS:-1024}" # number of single-agent environments +MAX_GOAL_POSITION="${MAX_GOAL_POSITION:-300}" # obs goal-vector normalization (m); cover Town02's extent +WANDB_GROUP="${WANDB_GROUP:-single_agent_speed_runs}" + +OVERLAY="/scratch/$USER/images/PufferDrive/overlay-15GB-500K.ext3" +IMAGE="/share/apps/images/cuda12.8.1-cudnn9.8.0-ubuntu24.04.2.sif" + +singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c ' + source /scratch/'"$USER"'/venvs/pufferdrive/bin/activate + export TORCH_CUDA_ARCH_LIST="8.0;8.9;9.0" + export PYTHONNOUSERSITE=1 + NCCL_DIR=$(compgen -G "/scratch/'"$USER"'/venvs/pufferdrive/lib/python3.12/site-packages/nvidia/nccl/lib" 2>/dev/null) \ + && export LD_LIBRARY_PATH="$NCCL_DIR:$LD_LIBRARY_PATH" + cd /scratch/'"$USER"'/code/PufferDrive + + # Single-map dir holding only Town02, derived from the bundled carla maps. + if [ ! -e pufferlib/resources/drive/binaries/carla_town02/opendrive__Town02.bin ]; then + mkdir -p pufferlib/resources/drive/binaries/carla_town02 + cp pufferlib/resources/drive/binaries/carla/opendrive__Town02.bin pufferlib/resources/drive/binaries/carla_town02/ + fi + + python scripts/gpu_heartbeat.py > /tmp/gpu_heartbeat_'"$SLURM_JOB_ID"'.log 2>&1 & + HEARTBEAT_PID=$! + + python -m pufferlib.pufferl train puffer_drive \ + --env.map_dir pufferlib/resources/drive/binaries/carla_town02 \ + --env.num_maps 1 \ + --env.max_agents_per_env 1 \ + --env.num_agents '"$NUM_AGENTS"' \ + --env.goal_placement 1 \ + --env.num_target_waypoints 1 \ + --env.max_goal_position '"$MAX_GOAL_POSITION"' \ + --wandb --wandb-project puffer_drive --wandb-group '"$WANDB_GROUP"' + TRAIN_EXIT=$? + + kill $HEARTBEAT_PID 2>/dev/null || true + exit $TRAIN_EXIT +' From fd9ca04114cf6aaede77d864a399e4e73d99a7e2 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Fri, 22 May 2026 22:20:57 -0400 Subject: [PATCH 03/21] scripts: 3-seed array into wandb group "Nightly Test" Launch the single-agent speed run as a 3-task SLURM array (one seed each via --train.seed), all logged to wandb group "Nightly Test". max_goal_position=1000 and total_timesteps capped at 1B. Town02-folder create is atomic so concurrent array tasks can't read a partial map file. Co-Authored-By: Claude Opus 4.7 --- scripts/single_agent_speed_run.sbatch | 30 +++++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch index 907659d140..7c5adbe140 100755 --- a/scripts/single_agent_speed_run.sbatch +++ b/scripts/single_agent_speed_run.sbatch @@ -1,5 +1,5 @@ #!/bin/bash -# Single-agent "speed run" training. +# Single-agent "speed run" training -- nightly 3-seed sweep. # # One controlled agent per environment on a single CARLA map (Town02), with many # parallel environments, and goals sampled at random drivable points anywhere on @@ -7,20 +7,24 @@ # including behind the agent. On reaching a goal a new random one is drawn, so the # agent does continuous point-to-point runs across the town. # +# Launches 3 seeds as a SLURM job array, all logged to wandb group "Nightly Test". +# # Launch: # sbatch scripts/single_agent_speed_run.sbatch # -# Override scheduling / scale without editing this file: +# Override scheduling / scale / seeds without editing this file: # sbatch --partition=a100_tandon --account=torch_pr_924_tandon_advanced scripts/single_agent_speed_run.sbatch # NUM_AGENTS=2048 sbatch scripts/single_agent_speed_run.sbatch +# SEED_BASE=100 sbatch scripts/single_agent_speed_run.sbatch # seeds 100,101,102 # -# Logs land in slurm-.out in the directory you submit from (repo root). +# Logs land in slurm-_.out in the directory you submit from. # The h200_tandon QOS pool is often full (QOSGrpGRES); if it won't schedule, # resubmit with --partition=a100_tandon or --partition=h100_tandon. # #SBATCH --job-name=single_agent_speed_run #SBATCH --account=torch_pr_924_tandon_advanced #SBATCH --partition=h200_tandon +#SBATCH --array=0-2 #SBATCH --gres=gpu:1 #SBATCH --cpus-per-task=16 #SBATCH --mem=128gb @@ -29,9 +33,12 @@ set -euo pipefail # Tunables (override via the environment, e.g. NUM_AGENTS=2048 sbatch ...). -NUM_AGENTS="${NUM_AGENTS:-1024}" # number of single-agent environments -MAX_GOAL_POSITION="${MAX_GOAL_POSITION:-300}" # obs goal-vector normalization (m); cover Town02's extent -WANDB_GROUP="${WANDB_GROUP:-single_agent_speed_runs}" +NUM_AGENTS="${NUM_AGENTS:-1024}" # number of single-agent environments +MAX_GOAL_POSITION="${MAX_GOAL_POSITION:-1000}" # obs goal-vector normalization (m); cover Town02's extent +TOTAL_TIMESTEPS="${TOTAL_TIMESTEPS:-1000000000}" # cap the run at 1B steps +WANDB_GROUP="${WANDB_GROUP:-Nightly Test}" +SEED_BASE="${SEED_BASE:-0}" +SEED=$((SEED_BASE + SLURM_ARRAY_TASK_ID)) # one seed per array task: SEED_BASE .. SEED_BASE+2 OVERLAY="/scratch/$USER/images/PufferDrive/overlay-15GB-500K.ext3" IMAGE="/share/apps/images/cuda12.8.1-cudnn9.8.0-ubuntu24.04.2.sif" @@ -45,9 +52,12 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c ' cd /scratch/'"$USER"'/code/PufferDrive # Single-map dir holding only Town02, derived from the bundled carla maps. - if [ ! -e pufferlib/resources/drive/binaries/carla_town02/opendrive__Town02.bin ]; then + # Atomic create (temp + mv) so concurrent array tasks never read a partial file. + DEST=pufferlib/resources/drive/binaries/carla_town02/opendrive__Town02.bin + if [ ! -e "$DEST" ]; then mkdir -p pufferlib/resources/drive/binaries/carla_town02 - cp pufferlib/resources/drive/binaries/carla/opendrive__Town02.bin pufferlib/resources/drive/binaries/carla_town02/ + TMP=$(mktemp pufferlib/resources/drive/binaries/carla_town02/Town02.XXXXXX) + cp pufferlib/resources/drive/binaries/carla/opendrive__Town02.bin "$TMP" && mv -f "$TMP" "$DEST" fi python scripts/gpu_heartbeat.py > /tmp/gpu_heartbeat_'"$SLURM_JOB_ID"'.log 2>&1 & @@ -61,7 +71,9 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c ' --env.goal_placement 1 \ --env.num_target_waypoints 1 \ --env.max_goal_position '"$MAX_GOAL_POSITION"' \ - --wandb --wandb-project puffer_drive --wandb-group '"$WANDB_GROUP"' + --train.total_timesteps '"$TOTAL_TIMESTEPS"' \ + --train.seed '"$SEED"' \ + --wandb --wandb-project puffer_drive --wandb-group "'"$WANDB_GROUP"'" --tag seed'"$SEED"' TRAIN_EXIT=$? kill $HEARTBEAT_PID 2>/dev/null || true From 490f5715980318300f19795983fa24b6b327106c Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Fri, 22 May 2026 22:54:13 -0400 Subject: [PATCH 04/21] drive: optional shared map cache (use_map_cache) for static geometry Adds an opt-in [env] knob use_map_cache (default 0). When set, environments loading the same map file share one read-only copy of the static geometry -- road_elements, the spatial grid (cells + neighbor cache), and the lane graph (incl. its O(n^2) distance matrix) -- via a per-process, reference-counted cache keyed by map filename. Per-env mutable state (agents, traffic-light states) is never shared, so dynamic traffic lights stay correct. This removes the per-env map duplication that OOMs single-map / many-env runs (e.g. 1 map x 1024 single-agent envs): the geometry is built once and borrowed. Lifecycle: init() builds-or-borrows; c_close() decrements the refcount and frees the entry only on the last reference. A getpid() guard prevents a forked worker from freeing the parent's copy-on-write geometry (the use-after-free that the upstream WIP #346 hit). Default off keeps existing single-owner behavior; the single_agent_speed_run launcher passes --env.use_map_cache 1. Co-Authored-By: Claude Opus 4.7 --- pufferlib/config/ocean/drive.ini | 2 + pufferlib/ocean/drive/binding.c | 1 + pufferlib/ocean/drive/drive.h | 180 ++++++++++++++++++++++---- pufferlib/ocean/drive/drive.py | 5 + scripts/single_agent_speed_run.sbatch | 1 + 5 files changed, 163 insertions(+), 26 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 1a40e67747..9f2f217428 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -56,6 +56,8 @@ offroad_behavior = 1 traffic_light_behavior = 1 ; Goal placement (gigaflow) - options: 0 - route (forward waypoints), 1 - random (anywhere on map) goal_placement = 0 +; Share static map geometry (roads/grid/lane-graph) across envs using the same map - 0 off, 1 on +use_map_cache = 0 ; Number of steps before reset scenario_length = 1280 ; Frequency of resampling scenario (in steps), 0 to disable diff --git a/pufferlib/ocean/drive/binding.c b/pufferlib/ocean/drive/binding.c index 81ba26d86f..9b5ce3d536 100644 --- a/pufferlib/ocean/drive/binding.c +++ b/pufferlib/ocean/drive/binding.c @@ -1797,6 +1797,7 @@ static int my_init(Env *env, PyObject *args, PyObject *kwargs) { env->offroad_behavior = (int) unpack(kwargs, "offroad_behavior"); env->traffic_light_behavior = (int) unpack(kwargs, "traffic_light_behavior"); env->goal_placement = (int) unpack(kwargs, "goal_placement"); + env->use_map_cache = (int) unpack(kwargs, "use_map_cache"); env->emit_completed_episodes = (int) unpack(kwargs, "emit_completed_episodes"); env->next_episode_index = 0; env->completed_episodes_count = 0; diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index a2b268a433..5fdfefe581 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -288,6 +288,30 @@ struct GridMap { int num_drivable_grid_cell; }; +// Static, read-only map geometry shared across environments that load the same +// map file when use_map_cache is set. Holds only data that is never mutated after +// load: road geometry, the spatial grid (cells + neighbor cache), and the lane +// graph. Per-env mutable data (agents, traffic-light states) is never shared. +// Reference-counted; freed when the last borrowing env in the owning process +// closes. See init() / c_close(). +struct SharedMapData { + char *map_name; + RoadMapElement *road_elements; + int num_road_elements; + GridMap *grid_map; + int *neighbor_offsets; + struct LaneGraph lane_graph; + int ref_count; +}; + +// Per-process map cache. Built lazily in init(). g_map_cache_pid stamps the +// process that built it: a forked worker inherits these pointers via copy-on-write +// and must never free them (it would corrupt the parent's heap), so the free path +// in c_close is guarded by getpid() == g_map_cache_pid. +static struct SharedMapData **g_map_cache = NULL; +static int g_map_cache_count = 0; +static pid_t g_map_cache_pid = 0; + struct Drive { Client *client; // Render mode: RENDER_WINDOW (0) for interactive viewer, RENDER_HEADLESS (1) @@ -359,6 +383,8 @@ struct Drive { int offroad_behavior; // 0 = none, 1=stop, 2 = remove int traffic_light_behavior; // 0 = none, 1=stop, 2 = remove int goal_placement; // 0 = route (forward waypoints), 1 = random (anywhere on map) + int use_map_cache; // 0 = each env owns its map copy, 1 = share static geometry across envs + struct SharedMapData *shared_map; // non-NULL when this env borrows cached geometry // Metadata fields char scenario_id[128]; char dataset_name[32]; @@ -3433,18 +3459,107 @@ void remove_bad_trajectories(Drive *env) { env->timestep = 0; } +static struct SharedMapData *map_cache_lookup(const char *map_name) { + for (int i = 0; i < g_map_cache_count; i++) { + if (g_map_cache[i] != NULL && strcmp(g_map_cache[i]->map_name, map_name) == 0) { + return g_map_cache[i]; + } + } + return NULL; +} + +static void map_cache_insert(struct SharedMapData *entry) { + if (g_map_cache_pid == 0) { + g_map_cache_pid = getpid(); + } + g_map_cache = + (struct SharedMapData **) realloc(g_map_cache, (g_map_cache_count + 1) * sizeof(struct SharedMapData *)); + g_map_cache[g_map_cache_count++] = entry; +} + +// Free a shared geometry entry and everything it owns, and clear its cache slot. +// Mirrors the owned-geometry branch of c_close. Only call in the building process. +static void free_shared_map_data(struct SharedMapData *shared) { + if (shared == NULL) { + return; + } + for (int i = 0; i < shared->num_road_elements; i++) { + free_road_element(&shared->road_elements[i]); + } + free(shared->road_elements); + int grid_cell_count = shared->grid_map->grid_cols * shared->grid_map->grid_rows; + for (int grid_index = 0; grid_index < grid_cell_count; grid_index++) { + free(shared->grid_map->cells[grid_index]); + } + free(shared->grid_map->cells); + free(shared->grid_map->cell_entities_count); + free(shared->grid_map->grid_index_drivable); + for (int i = 0; i < grid_cell_count; i++) { + free(shared->grid_map->neighbor_cache_entities[i]); + } + free(shared->grid_map->neighbor_cache_entities); + free(shared->grid_map->neighbor_cache_count); + free(shared->grid_map); + free(shared->neighbor_offsets); + free_lane_graph(&shared->lane_graph); + free(shared->map_name); + for (int i = 0; i < g_map_cache_count; i++) { + if (g_map_cache[i] == shared) { + g_map_cache[i] = NULL; + break; + } + } + free(shared); +} + void init(Drive *env) { env->human_agent_idx = 0; env->timestep = 0; - load_map_binary(env->map_name, env); + env->shared_map = NULL; + + struct SharedMapData *shared = env->use_map_cache ? map_cache_lookup(env->map_name) : NULL; + if (shared != NULL) { + // Cache hit: load only the per-env data (agents, traffic-control elements), + // then discard the freshly-loaded geometry and borrow the shared copy. + load_map_binary(env->map_name, env); + for (int i = 0; i < env->num_road_elements; i++) { + free_road_element(&env->road_elements[i]); + } + free(env->road_elements); + free_lane_graph(&env->lane_graph); + env->road_elements = shared->road_elements; + env->num_road_elements = shared->num_road_elements; + env->grid_map = shared->grid_map; + env->neighbor_offsets = shared->neighbor_offsets; + env->lane_graph = shared->lane_graph; + env->shared_map = shared; + shared->ref_count++; + } else { + // Cache miss (or caching off): load and build the geometry as usual. + load_map_binary(env->map_name, env); + init_grid_map(env); + int vision_half_range = (int) ceilf( + fmaxf(fmaxf(env->road_obs_front_dist, env->road_obs_behind_dist), env->road_obs_side_dist) / GRID_CELL_SIZE); + env->grid_map->vision_range = 2 * vision_half_range + 1; + init_neighbor_offsets(env); + cache_neighbor_offsets(env); + if (env->use_map_cache) { + // Transfer the just-built geometry into a shared, ref-counted entry that + // this env borrows (ref_count starts at 1). + struct SharedMapData *entry = (struct SharedMapData *) calloc(1, sizeof(struct SharedMapData)); + entry->map_name = strdup(env->map_name); + entry->road_elements = env->road_elements; + entry->num_road_elements = env->num_road_elements; + entry->grid_map = env->grid_map; + entry->neighbor_offsets = env->neighbor_offsets; + entry->lane_graph = env->lane_graph; + entry->ref_count = 1; + map_cache_insert(entry); + env->shared_map = entry; + } + } env->road_dropout_enabled = (env->obs_lane_segment_count < env->max_lane_segment_observations) || (env->obs_boundary_segment_count < env->max_boundary_segment_observations); - init_grid_map(env); - int vision_half_range = (int) ceilf( - fmaxf(fmaxf(env->road_obs_front_dist, env->road_obs_behind_dist), env->road_obs_side_dist) / GRID_CELL_SIZE); - env->grid_map->vision_range = 2 * vision_half_range + 1; - init_neighbor_offsets(env); - cache_neighbor_offsets(env); env->logs_capacity = 0; set_active_agents(env); env->logs_capacity = env->active_agent_count; @@ -3517,38 +3632,51 @@ void c_close(Drive *env) { for (int i = 0; i < env->num_total_agents; i++) { free_agent(&env->agents[i]); } - for (int i = 0; i < env->num_road_elements; i++) { - free_road_element(&env->road_elements[i]); - } for (int i = 0; i < env->num_traffic_elements; i++) { free_traffic_element(&env->traffic_elements[i]); } free(env->agents); - free(env->road_elements); free(env->traffic_elements); free(env->active_agent_indices); free(env->logs); - // GridMap cleanup - int grid_cell_count = env->grid_map->grid_cols * env->grid_map->grid_rows; - for (int grid_index = 0; grid_index < grid_cell_count; grid_index++) { - free(env->grid_map->cells[grid_index]); - } - free(env->grid_map->cells); - free(env->grid_map->cell_entities_count); - free(env->grid_map->grid_index_drivable); - free(env->neighbor_offsets); - for (int i = 0; i < grid_cell_count; i++) { - free(env->grid_map->neighbor_cache_entities[i]); + if (env->shared_map != NULL) { + // Geometry is borrowed from the cache: release our reference. Free the + // shared entry only when it is the last reference AND we are the process + // that built it (a forked worker must not free the parent's copy-on-write + // pages — it would corrupt the parent's heap). + env->shared_map->ref_count--; + if (env->shared_map->ref_count <= 0 && g_map_cache_pid == getpid()) { + free_shared_map_data(env->shared_map); + } + env->shared_map = NULL; + } else { + // Geometry is owned by this env: free it. + for (int i = 0; i < env->num_road_elements; i++) { + free_road_element(&env->road_elements[i]); + } + free(env->road_elements); + int grid_cell_count = env->grid_map->grid_cols * env->grid_map->grid_rows; + for (int grid_index = 0; grid_index < grid_cell_count; grid_index++) { + free(env->grid_map->cells[grid_index]); + } + free(env->grid_map->cells); + free(env->grid_map->cell_entities_count); + free(env->grid_map->grid_index_drivable); + free(env->neighbor_offsets); + for (int i = 0; i < grid_cell_count; i++) { + free(env->grid_map->neighbor_cache_entities[i]); + } + free(env->grid_map->neighbor_cache_entities); + free(env->grid_map->neighbor_cache_count); + free(env->grid_map); + free_lane_graph(&env->lane_graph); } - free(env->grid_map->neighbor_cache_entities); - free(env->grid_map->neighbor_cache_count); - free(env->grid_map); + free(env->static_agent_indices); free(env->expert_static_agent_indices); free(env->objects_of_interest); free(env->tracks_to_predict); - free_lane_graph(&env->lane_graph); free(env->map_name); free(env->ini_file); } diff --git a/pufferlib/ocean/drive/drive.py b/pufferlib/ocean/drive/drive.py index 1c44e453f1..8fd492ee39 100644 --- a/pufferlib/ocean/drive/drive.py +++ b/pufferlib/ocean/drive/drive.py @@ -48,6 +48,7 @@ def __init__( offroad_behavior=0, traffic_light_behavior=0, goal_placement=0, + use_map_cache=0, # emit_completed_episodes=True: env emits one summary dict per # completed episode via info (drained from a per-env C-side queue). # capture_compact_replay=True additionally records per-step agent and @@ -150,6 +151,9 @@ def __init__( if goal_placement not in (0, 1): raise ValueError(f"goal_placement must be 0 (route) or 1 (random). Got: {goal_placement}") self.goal_placement = goal_placement + if use_map_cache not in (0, 1): + raise ValueError(f"use_map_cache must be 0 (off) or 1 (on). Got: {use_map_cache}") + self.use_map_cache = use_map_cache self.capture_compact_replay = bool(capture_compact_replay) # capture_compact_replay implies emit_completed_episodes, since the # bundle rides on the per-episode summary. @@ -386,6 +390,7 @@ def _env_init_kwargs(self, map_file, max_agents): "offroad_behavior": self.offroad_behavior, "traffic_light_behavior": self.traffic_light_behavior, "goal_placement": self.goal_placement, + "use_map_cache": self.use_map_cache, "emit_completed_episodes": int(self.emit_completed_episodes), "goal_radius": self.goal_radius, "min_waypoint_spacing": self.min_waypoint_spacing, diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch index 7c5adbe140..65e0108573 100755 --- a/scripts/single_agent_speed_run.sbatch +++ b/scripts/single_agent_speed_run.sbatch @@ -68,6 +68,7 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c ' --env.num_maps 1 \ --env.max_agents_per_env 1 \ --env.num_agents '"$NUM_AGENTS"' \ + --env.use_map_cache 1 \ --env.goal_placement 1 \ --env.num_target_waypoints 1 \ --env.max_goal_position '"$MAX_GOAL_POSITION"' \ From befb01e3099bdf7dfb3d559c3b5b6751ed784d91 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Fri, 22 May 2026 22:56:44 -0400 Subject: [PATCH 05/21] scripts: turn traffic lights off for single-agent speed runs Pass --env.traffic_light_behavior 0 so the agent isn't stopped/penalized at lights during point-to-point speed runs. Co-Authored-By: Claude Opus 4.7 --- scripts/single_agent_speed_run.sbatch | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch index 65e0108573..b33da2735e 100755 --- a/scripts/single_agent_speed_run.sbatch +++ b/scripts/single_agent_speed_run.sbatch @@ -69,6 +69,7 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c ' --env.max_agents_per_env 1 \ --env.num_agents '"$NUM_AGENTS"' \ --env.use_map_cache 1 \ + --env.traffic_light_behavior 0 \ --env.goal_placement 1 \ --env.num_target_waypoints 1 \ --env.max_goal_position '"$MAX_GOAL_POSITION"' \ From e18b13d85f337ee1d692244a5ce51a0794222769 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Fri, 22 May 2026 23:00:32 -0400 Subject: [PATCH 06/21] scripts: fix dotted CLI flags to hyphens; disable nuPlan evals The dynamic pufferl argparser registers dotted [env]/[train] keys with hyphens (map_dir -> --env.map-dir), so the underscore forms were rejected as unrecognized arguments. Convert all dotted overrides to hyphens. Also disable the nuPlan-based evaluators (validation_replay + behavior buckets) for single-map CARLA training via --eval..enabled 0; validation_gigaflow stays enabled. Co-Authored-By: Claude Opus 4.7 --- scripts/single_agent_speed_run.sbatch | 34 ++++++++++++++++++--------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch index b33da2735e..4c6533eff3 100755 --- a/scripts/single_agent_speed_run.sbatch +++ b/scripts/single_agent_speed_run.sbatch @@ -40,6 +40,18 @@ WANDB_GROUP="${WANDB_GROUP:-Nightly Test}" SEED_BASE="${SEED_BASE:-0}" SEED=$((SEED_BASE + SLURM_ARRAY_TASK_ID)) # one seed per array task: SEED_BASE .. SEED_BASE+2 +# Disable the nuPlan-based evaluators (replay + behavior buckets); they are +# irrelevant to single-map CARLA training and spawn their own envs. validation_gigaflow +# (CARLA) stays on. enabled 0 -> bool(0)=False in the eval manager. Note: dotted CLI +# flags use hyphens (the argparser maps _ -> -), so section names are hyphenated here. +DISABLE_EVALS="" +for ev in validation-replay behaviors-full-dir behaviors-hard-stop behaviors-highway-straight \ + behaviors-lane-change behaviors-merge behaviors-parked-cars behaviors-roundabout \ + behaviors-stopped-traffic behaviors-traffic-light-green behaviors-traffic-light-stop \ + behaviors-unprotected-left behaviors-unprotected-right; do + DISABLE_EVALS="$DISABLE_EVALS --eval.$ev.enabled 0" +done + OVERLAY="/scratch/$USER/images/PufferDrive/overlay-15GB-500K.ext3" IMAGE="/share/apps/images/cuda12.8.1-cudnn9.8.0-ubuntu24.04.2.sif" @@ -64,17 +76,17 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c ' HEARTBEAT_PID=$! python -m pufferlib.pufferl train puffer_drive \ - --env.map_dir pufferlib/resources/drive/binaries/carla_town02 \ - --env.num_maps 1 \ - --env.max_agents_per_env 1 \ - --env.num_agents '"$NUM_AGENTS"' \ - --env.use_map_cache 1 \ - --env.traffic_light_behavior 0 \ - --env.goal_placement 1 \ - --env.num_target_waypoints 1 \ - --env.max_goal_position '"$MAX_GOAL_POSITION"' \ - --train.total_timesteps '"$TOTAL_TIMESTEPS"' \ - --train.seed '"$SEED"' \ + --env.map-dir pufferlib/resources/drive/binaries/carla_town02 \ + --env.num-maps 1 \ + --env.max-agents-per-env 1 \ + --env.num-agents '"$NUM_AGENTS"' \ + --env.use-map-cache 1 \ + --env.traffic-light-behavior 0 \ + --env.goal-placement 1 \ + --env.num-target-waypoints 1 \ + --env.max-goal-position '"$MAX_GOAL_POSITION"' \ + --train.total-timesteps '"$TOTAL_TIMESTEPS"' \ + --train.seed '"$SEED"' '"$DISABLE_EVALS"' \ --wandb --wandb-project puffer_drive --wandb-group "'"$WANDB_GROUP"'" --tag seed'"$SEED"' TRAIN_EXIT=$? From 0b5df39d5d3b080224a62cdaa460ea94d1152b72 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Fri, 22 May 2026 23:08:57 -0400 Subject: [PATCH 07/21] drive.ini: declare enabled explicitly in [eval.validation_replay] It inherited enabled from validation_defaults, so the argparser never registered --eval.validation-replay.enabled and CLI overrides were rejected. Declaring it explicitly (same true default) makes it toggleable, matching the behaviors_* sections. Co-Authored-By: Claude Opus 4.7 --- pufferlib/config/ocean/drive.ini | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 9f2f217428..636bab9ef4 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -274,6 +274,9 @@ eval.verify_coverage = true [eval.validation_replay] inherits = "validation_defaults" +; declared explicitly (same as the inherited default) so it can be toggled from +; the CLI, e.g. --eval.validation-replay.enabled 0 +enabled = true type = "multi_scenario" render = true render_backend = "triage_html" From 3336e3d8d01da932a2263e9d7339483376659d9b Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Fri, 22 May 2026 23:43:36 -0400 Subject: [PATCH 08/21] drive.ini: add single_agent_obs evaluator for manual obs_html renders Dedicated, disabled-by-default evaluator mirroring the single-agent speed-run task (Town02, one agent, random goals, lights off) with render_backend=obs_html. Lets us faithfully obs-render any checkpoint via puffer eval puffer_drive --evaluator single_agent_obs --load-model-path without it firing inline during training (enabled=false; standalone-by-name still runs it). Co-Authored-By: Claude Opus 4.7 --- pufferlib/config/ocean/drive.ini | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 636bab9ef4..fa57807724 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -305,6 +305,28 @@ env.resample_frequency = 500 eval.render_num_scenarios = 8 eval.render_max_steps = 300 +; Manual obs_html render of the single-agent speed-run task (Town02, one agent, +; random goals anywhere, lights off). enabled=false so it never runs inline during +; training; invoke standalone, e.g.: +; puffer eval puffer_drive --evaluator single_agent_obs --load-model-path +[eval.single_agent_obs] +inherits = "validation_gigaflow" +enabled = false +type = "multi_scenario" +render = true +render_backend = "obs_html" +env.map_dir = "pufferlib/resources/drive/binaries/carla_town02" +env.num_maps = 1 +env.num_agents = 8 +env.min_agents_per_env = 1 +env.max_agents_per_env = 1 +env.goal_placement = 1 +env.traffic_light_behavior = 0 +env.num_target_waypoints = 1 +env.max_goal_position = 1000 +eval.num_scenarios = 8 +eval.render_num_scenarios = 8 + ; --------------------------------------------------------------------------- ; Driving-behaviour evaluation: nuPlan scenes labeled by scene type. Each ; behavior is one [eval.behaviors_*] section. All inherit from the template From f105d5541d62988ef177564097850b2c68bc86e2 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 23 May 2026 00:03:50 -0400 Subject: [PATCH 09/21] single-agent: min_waypoint_spacing 0 so goals can spawn near the agent With goal_placement=random, min_waypoint_spacing is the rejection floor on goal distance; 0 lets goals land right next to the agent (near goals, not only far). Set in the training launcher and the single_agent_obs render config. Co-Authored-By: Claude Opus 4.7 --- pufferlib/config/ocean/drive.ini | 1 + scripts/single_agent_speed_run.sbatch | 1 + 2 files changed, 2 insertions(+) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index fa57807724..fd8f665441 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -323,6 +323,7 @@ env.max_agents_per_env = 1 env.goal_placement = 1 env.traffic_light_behavior = 0 env.num_target_waypoints = 1 +env.min_waypoint_spacing = 0 env.max_goal_position = 1000 eval.num_scenarios = 8 eval.render_num_scenarios = 8 diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch index 4c6533eff3..f525bb84ef 100755 --- a/scripts/single_agent_speed_run.sbatch +++ b/scripts/single_agent_speed_run.sbatch @@ -84,6 +84,7 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c ' --env.traffic-light-behavior 0 \ --env.goal-placement 1 \ --env.num-target-waypoints 1 \ + --env.min-waypoint-spacing 0 \ --env.max-goal-position '"$MAX_GOAL_POSITION"' \ --train.total-timesteps '"$TOTAL_TIMESTEPS"' \ --train.seed '"$SEED"' '"$DISABLE_EVALS"' \ From b3b14f69ce42b9d596d8fc5830d9602aaad6a4a5 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 23 May 2026 00:10:06 -0400 Subject: [PATCH 10/21] Revert single-agent-specific config from drive.ini Remove the [eval.single_agent_obs] section and the [eval.validation_replay] enabled= line; keep drive.ini as the shared default. The general goal_placement and use_map_cache [env] knobs stay (default off) since they're required for the CLI flags to exist. The launcher no longer tries to disable validation_replay (its enabled is inherited, not CLI-overridable without a drive.ini line). Co-Authored-By: Claude Opus 4.7 --- pufferlib/config/ocean/drive.ini | 26 -------------------------- scripts/single_agent_speed_run.sbatch | 2 +- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index fd8f665441..9f2f217428 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -274,9 +274,6 @@ eval.verify_coverage = true [eval.validation_replay] inherits = "validation_defaults" -; declared explicitly (same as the inherited default) so it can be toggled from -; the CLI, e.g. --eval.validation-replay.enabled 0 -enabled = true type = "multi_scenario" render = true render_backend = "triage_html" @@ -305,29 +302,6 @@ env.resample_frequency = 500 eval.render_num_scenarios = 8 eval.render_max_steps = 300 -; Manual obs_html render of the single-agent speed-run task (Town02, one agent, -; random goals anywhere, lights off). enabled=false so it never runs inline during -; training; invoke standalone, e.g.: -; puffer eval puffer_drive --evaluator single_agent_obs --load-model-path -[eval.single_agent_obs] -inherits = "validation_gigaflow" -enabled = false -type = "multi_scenario" -render = true -render_backend = "obs_html" -env.map_dir = "pufferlib/resources/drive/binaries/carla_town02" -env.num_maps = 1 -env.num_agents = 8 -env.min_agents_per_env = 1 -env.max_agents_per_env = 1 -env.goal_placement = 1 -env.traffic_light_behavior = 0 -env.num_target_waypoints = 1 -env.min_waypoint_spacing = 0 -env.max_goal_position = 1000 -eval.num_scenarios = 8 -eval.render_num_scenarios = 8 - ; --------------------------------------------------------------------------- ; Driving-behaviour evaluation: nuPlan scenes labeled by scene type. Each ; behavior is one [eval.behaviors_*] section. All inherit from the template diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch index f525bb84ef..b3ece8efcb 100755 --- a/scripts/single_agent_speed_run.sbatch +++ b/scripts/single_agent_speed_run.sbatch @@ -45,7 +45,7 @@ SEED=$((SEED_BASE + SLURM_ARRAY_TASK_ID)) # one seed per array task: SEED_B # (CARLA) stays on. enabled 0 -> bool(0)=False in the eval manager. Note: dotted CLI # flags use hyphens (the argparser maps _ -> -), so section names are hyphenated here. DISABLE_EVALS="" -for ev in validation-replay behaviors-full-dir behaviors-hard-stop behaviors-highway-straight \ +for ev in behaviors-full-dir behaviors-hard-stop behaviors-highway-straight \ behaviors-lane-change behaviors-merge behaviors-parked-cars behaviors-roundabout \ behaviors-stopped-traffic behaviors-traffic-light-green behaviors-traffic-light-stop \ behaviors-unprotected-left behaviors-unprotected-right; do From d8a24db5770bc1d52860c4733d5dc700da62581f Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 23 May 2026 00:11:45 -0400 Subject: [PATCH 11/21] Restore eval-enabling config (validation_replay enabled, single_agent_obs) These are eval-scoped and do not alter default training: validation_replay's enabled was already inherited-true (now explicit, CLI-toggleable); single_agent_obs is enabled=false so it never runs inline. Re-enable the launcher's validation_replay disable for single-agent runs. Co-Authored-By: Claude Opus 4.7 --- pufferlib/config/ocean/drive.ini | 26 ++++++++++++++++++++++++++ scripts/single_agent_speed_run.sbatch | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 9f2f217428..fd8f665441 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -274,6 +274,9 @@ eval.verify_coverage = true [eval.validation_replay] inherits = "validation_defaults" +; declared explicitly (same as the inherited default) so it can be toggled from +; the CLI, e.g. --eval.validation-replay.enabled 0 +enabled = true type = "multi_scenario" render = true render_backend = "triage_html" @@ -302,6 +305,29 @@ env.resample_frequency = 500 eval.render_num_scenarios = 8 eval.render_max_steps = 300 +; Manual obs_html render of the single-agent speed-run task (Town02, one agent, +; random goals anywhere, lights off). enabled=false so it never runs inline during +; training; invoke standalone, e.g.: +; puffer eval puffer_drive --evaluator single_agent_obs --load-model-path +[eval.single_agent_obs] +inherits = "validation_gigaflow" +enabled = false +type = "multi_scenario" +render = true +render_backend = "obs_html" +env.map_dir = "pufferlib/resources/drive/binaries/carla_town02" +env.num_maps = 1 +env.num_agents = 8 +env.min_agents_per_env = 1 +env.max_agents_per_env = 1 +env.goal_placement = 1 +env.traffic_light_behavior = 0 +env.num_target_waypoints = 1 +env.min_waypoint_spacing = 0 +env.max_goal_position = 1000 +eval.num_scenarios = 8 +eval.render_num_scenarios = 8 + ; --------------------------------------------------------------------------- ; Driving-behaviour evaluation: nuPlan scenes labeled by scene type. Each ; behavior is one [eval.behaviors_*] section. All inherit from the template diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch index b3ece8efcb..f525bb84ef 100755 --- a/scripts/single_agent_speed_run.sbatch +++ b/scripts/single_agent_speed_run.sbatch @@ -45,7 +45,7 @@ SEED=$((SEED_BASE + SLURM_ARRAY_TASK_ID)) # one seed per array task: SEED_B # (CARLA) stays on. enabled 0 -> bool(0)=False in the eval manager. Note: dotted CLI # flags use hyphens (the argparser maps _ -> -), so section names are hyphenated here. DISABLE_EVALS="" -for ev in behaviors-full-dir behaviors-hard-stop behaviors-highway-straight \ +for ev in validation-replay behaviors-full-dir behaviors-hard-stop behaviors-highway-straight \ behaviors-lane-change behaviors-merge behaviors-parked-cars behaviors-roundabout \ behaviors-stopped-traffic behaviors-traffic-light-green behaviors-traffic-light-stop \ behaviors-unprotected-left behaviors-unprotected-right; do From 2a0af95d3c5d516ce9baf4ce35799835f823ba41 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 23 May 2026 11:14:44 -0400 Subject: [PATCH 12/21] single-agent: max_traffic_control_observations 0 to truly disable lights traffic_light_behavior=0 only stops the env force-halting at reds; the agent still learns to stop because a red-light violation zeroes the reward multiplier (no_red_light, drive.h). The red-light metric is gated on max_traffic_control_observations, so setting it to 0 removes lights from the observation AND keeps red_light_violation_rate at 0 (multiplier never zeroed) -- the agent can neither see nor be penalized by lights. Set in the launcher and the single_agent_obs render config. Co-Authored-By: Claude Opus 4.7 --- pufferlib/config/ocean/drive.ini | 1 + scripts/single_agent_speed_run.sbatch | 1 + 2 files changed, 2 insertions(+) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index fd8f665441..50a026b0b7 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -322,6 +322,7 @@ env.min_agents_per_env = 1 env.max_agents_per_env = 1 env.goal_placement = 1 env.traffic_light_behavior = 0 +env.max_traffic_control_observations = 0 env.num_target_waypoints = 1 env.min_waypoint_spacing = 0 env.max_goal_position = 1000 diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch index f525bb84ef..889500e3da 100755 --- a/scripts/single_agent_speed_run.sbatch +++ b/scripts/single_agent_speed_run.sbatch @@ -82,6 +82,7 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c ' --env.num-agents '"$NUM_AGENTS"' \ --env.use-map-cache 1 \ --env.traffic-light-behavior 0 \ + --env.max-traffic-control-observations 0 \ --env.goal-placement 1 \ --env.num-target-waypoints 1 \ --env.min-waypoint-spacing 0 \ From 6a6c2e851c4f50bd476a9c5ccbe4c682b9987473 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 23 May 2026 11:26:34 -0400 Subject: [PATCH 13/21] scripts: drop stale scheduling/logs comment from single-agent launcher Co-Authored-By: Claude Opus 4.7 --- scripts/single_agent_speed_run.sbatch | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch index 889500e3da..028a3e471a 100755 --- a/scripts/single_agent_speed_run.sbatch +++ b/scripts/single_agent_speed_run.sbatch @@ -17,10 +17,6 @@ # NUM_AGENTS=2048 sbatch scripts/single_agent_speed_run.sbatch # SEED_BASE=100 sbatch scripts/single_agent_speed_run.sbatch # seeds 100,101,102 # -# Logs land in slurm-_.out in the directory you submit from. -# The h200_tandon QOS pool is often full (QOSGrpGRES); if it won't schedule, -# resubmit with --partition=a100_tandon or --partition=h100_tandon. -# #SBATCH --job-name=single_agent_speed_run #SBATCH --account=torch_pr_924_tandon_advanced #SBATCH --partition=h200_tandon From 2def57116b5eda03b320a297f41f6fb12bf3d64c Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 23 May 2026 11:31:23 -0400 Subject: [PATCH 14/21] scripts: date-stamp wandb run name for single-agent launches Set WANDB_NAME=_single_agent_seed (WandbLogger passes no name=, so the env var sets the run name). Date-first so runs sort chronologically per launch. Co-Authored-By: Claude Opus 4.7 --- scripts/single_agent_speed_run.sbatch | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch index 028a3e471a..2ed652cd98 100755 --- a/scripts/single_agent_speed_run.sbatch +++ b/scripts/single_agent_speed_run.sbatch @@ -35,6 +35,8 @@ TOTAL_TIMESTEPS="${TOTAL_TIMESTEPS:-1000000000}" # cap the run at 1B steps WANDB_GROUP="${WANDB_GROUP:-Nightly Test}" SEED_BASE="${SEED_BASE:-0}" SEED=$((SEED_BASE + SLURM_ARRAY_TASK_ID)) # one seed per array task: SEED_BASE .. SEED_BASE+2 +LAUNCH_DATE="$(date +%Y-%m-%d)" # stamp the wandb run name with the launch date +WANDB_NAME="${LAUNCH_DATE}_single_agent_seed${SEED}" # Disable the nuPlan-based evaluators (replay + behavior buckets); they are # irrelevant to single-map CARLA training and spawn their own envs. validation_gigaflow @@ -55,6 +57,7 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c ' source /scratch/'"$USER"'/venvs/pufferdrive/bin/activate export TORCH_CUDA_ARCH_LIST="8.0;8.9;9.0" export PYTHONNOUSERSITE=1 + export WANDB_NAME='"$WANDB_NAME"' # wandb run name (no name= in WandbLogger -> env var wins) NCCL_DIR=$(compgen -G "/scratch/'"$USER"'/venvs/pufferdrive/lib/python3.12/site-packages/nvidia/nccl/lib" 2>/dev/null) \ && export LD_LIBRARY_PATH="$NCCL_DIR:$LD_LIBRARY_PATH" cd /scratch/'"$USER"'/code/PufferDrive From 0b60bfdde71ff9367d480f31fe823459ceb529bb Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 23 May 2026 11:57:38 -0400 Subject: [PATCH 15/21] drive: random goals respect max_waypoint_spacing as an upper distance bound compute_goals_random previously only enforced a min distance and sampled uniformly over the whole map (far-skewed, no cap). Now it accepts points in [min_waypoint_spacing, max_waypoint_spacing], with a closest-to-band fallback so a tight max still lands a nearby goal. The random-mode contexts (single-agent launcher + single_agent_obs render) default max_waypoint_spacing to a huge value (= anywhere on the map), preserving prior behavior; the global [env] default stays 60 for route mode / default training. Co-Authored-By: Claude Opus 4.7 --- pufferlib/config/ocean/drive.ini | 1 + pufferlib/ocean/drive/drive.h | 39 ++++++++++++++++++--------- scripts/single_agent_speed_run.sbatch | 2 ++ 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index 50a026b0b7..ef5c48cc14 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -325,6 +325,7 @@ env.traffic_light_behavior = 0 env.max_traffic_control_observations = 0 env.num_target_waypoints = 1 env.min_waypoint_spacing = 0 +env.max_waypoint_spacing = 1000000 env.max_goal_position = 1000 eval.num_scenarios = 8 eval.render_num_scenarios = 8 diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index 5fdfefe581..a9be119629 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -1893,11 +1893,12 @@ static int compute_new_route(Drive *env, int agent_idx, int current_lane_idx) { return 1; // Success } -// Place each target waypoint at a random drivable point anywhere on the map, -// rejecting points closer than min_waypoint_spacing so goals are never trivially -// close. Goals can land in any direction, including behind the agent. The route -// and path are still built at spawn (used for lane observations and progression); -// only the goal positions are overridden here. +// Place each target waypoint at a random drivable point whose distance from the +// agent falls in [min_waypoint_spacing, max_waypoint_spacing]. With a very large +// max this is effectively "anywhere on the map"; a small max keeps goals nearby. +// Goals can land in any direction, including behind the agent. The route and path +// are still built at spawn (used for lane observations and progression); only the +// goal positions are overridden here. static void compute_goals_random(Drive *env, int agent_idx) { Agent *agent = &env->agents[agent_idx]; @@ -1907,13 +1908,15 @@ static void compute_goals_random(Drive *env, int agent_idx) { } float min_dist_sq = env->min_waypoint_spacing * env->min_waypoint_spacing; + float max_dist_sq = env->max_waypoint_spacing * env->max_waypoint_spacing; const int MAX_GOAL_ATTEMPTS = 30; for (int i = 0; i < num_target_waypoints; i++) { - // Fall back to the agent's own position if no distant point is found. + // Fall back to the agent's own position if no drivable point is sampled at all. float goal_x = agent->sim_x; float goal_y = agent->sim_y; float goal_z = agent->sim_z; + float best_band_miss = -1.0f; // <0: nothing sampled yet; else closest-to-band distance for (int attempt = 0; attempt < MAX_GOAL_ATTEMPTS; attempt++) { int list_idx = rand() % env->grid_map->num_drivable_grid_cell; @@ -1935,16 +1938,28 @@ static void compute_goals_random(Drive *env, int agent_idx) { RoadMapElement *lane = &env->road_elements[chosen.entity_idx]; float cx = lane->x[chosen.geometry_idx]; float cy = lane->y[chosen.geometry_idx]; + float cz = lane->z[chosen.geometry_idx]; float dx = cx - agent->sim_x; float dy = cy - agent->sim_y; - if (dx * dx + dy * dy < min_dist_sq) { - continue; + float d2 = dx * dx + dy * dy; + + if (d2 >= min_dist_sq && d2 <= max_dist_sq) { + // Inside the [min_waypoint_spacing, max_waypoint_spacing] band: take it. + goal_x = cx; + goal_y = cy; + goal_z = cz; + break; } - goal_x = cx; - goal_y = cy; - goal_z = lane->z[chosen.geometry_idx]; - break; + // Out of band: keep the point closest to the band so a tight max still + // yields a nearby goal instead of falling back to the agent's position. + float miss = (d2 > max_dist_sq) ? (d2 - max_dist_sq) : (min_dist_sq - d2); + if (best_band_miss < 0.0f || miss < best_band_miss) { + best_band_miss = miss; + goal_x = cx; + goal_y = cy; + goal_z = cz; + } } agent->goal_positions_x[i] = goal_x; diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch index 2ed652cd98..948a4db27c 100755 --- a/scripts/single_agent_speed_run.sbatch +++ b/scripts/single_agent_speed_run.sbatch @@ -31,6 +31,7 @@ set -euo pipefail # Tunables (override via the environment, e.g. NUM_AGENTS=2048 sbatch ...). NUM_AGENTS="${NUM_AGENTS:-1024}" # number of single-agent environments MAX_GOAL_POSITION="${MAX_GOAL_POSITION:-1000}" # obs goal-vector normalization (m); cover Town02's extent +MAX_WAYPOINT_SPACING="${MAX_WAYPOINT_SPACING:-1000000}" # random-goal upper distance bound; huge = anywhere on map TOTAL_TIMESTEPS="${TOTAL_TIMESTEPS:-1000000000}" # cap the run at 1B steps WANDB_GROUP="${WANDB_GROUP:-Nightly Test}" SEED_BASE="${SEED_BASE:-0}" @@ -85,6 +86,7 @@ singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c ' --env.goal-placement 1 \ --env.num-target-waypoints 1 \ --env.min-waypoint-spacing 0 \ + --env.max-waypoint-spacing '"$MAX_WAYPOINT_SPACING"' \ --env.max-goal-position '"$MAX_GOAL_POSITION"' \ --train.total-timesteps '"$TOTAL_TIMESTEPS"' \ --train.seed '"$SEED"' '"$DISABLE_EVALS"' \ From 0a1e545ba7e8d47c106be01d3b3cfd7a3166f0f0 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 23 May 2026 14:57:34 -0400 Subject: [PATCH 16/21] drive: map_cache_insert reuses freed slots before growing free_shared_map_data NULLs an entry's slot on refcount zero; reuse those holes on the next insert so a resample cycle (free all, rebuild) keeps g_map_cache_count bounded by the number of distinct maps instead of appending past NULL holes. Co-Authored-By: Claude Opus 4.7 --- pufferlib/ocean/drive/drive.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pufferlib/ocean/drive/drive.h b/pufferlib/ocean/drive/drive.h index a9be119629..8786343256 100644 --- a/pufferlib/ocean/drive/drive.h +++ b/pufferlib/ocean/drive/drive.h @@ -3487,6 +3487,16 @@ static void map_cache_insert(struct SharedMapData *entry) { if (g_map_cache_pid == 0) { g_map_cache_pid = getpid(); } + // Reuse a slot vacated by free_shared_map_data before growing the array, so a + // resample cycle (vec_close frees every entry, then the rebuild re-inserts) + // keeps g_map_cache_count bounded by the number of distinct maps instead of + // appending past NULL holes on every cycle. + for (int i = 0; i < g_map_cache_count; i++) { + if (g_map_cache[i] == NULL) { + g_map_cache[i] = entry; + return; + } + } g_map_cache = (struct SharedMapData **) realloc(g_map_cache, (g_map_cache_count + 1) * sizeof(struct SharedMapData *)); g_map_cache[g_map_cache_count++] = entry; From 8a4ccba2e6974d11fbc1447840d3844eafd809a8 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 23 May 2026 14:59:02 -0400 Subject: [PATCH 17/21] scripts: single-agent speed-run program_config for submit_cluster.py Move the single-agent Town02 / random-goal / lights-off knobs into a program_config YAML so the run goes through the canonical submit_cluster.py path (code isolation + container + heartbeat) instead of a hand-rolled sbatch. Launch 3 seeds via --args train.seed=0:1:2. Co-Authored-By: Claude Opus 4.7 --- .../single_agent_speed_run.yaml | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 scripts/cluster_configs/single_agent_speed_run.yaml diff --git a/scripts/cluster_configs/single_agent_speed_run.yaml b/scripts/cluster_configs/single_agent_speed_run.yaml new file mode 100644 index 0000000000..0d8bbc153f --- /dev/null +++ b/scripts/cluster_configs/single_agent_speed_run.yaml @@ -0,0 +1,59 @@ +# Single-agent "speed run" training program config (for submit_cluster.py). +# One controlled agent per environment on a single CARLA map (Town02), with many +# parallel environments and goals sampled at random drivable points anywhere on +# the map. Traffic lights are fully disabled (no observation, no metric, no reward +# penalty). Keys here override pufferlib/config/ocean/drive.ini; submit_cluster.py +# converts underscores in each key to dashes for the CLI. +# +# Launch (3 seeds via the colon sweep): +# source /scratch/$USER/venvs/pufferdrive/bin/activate +# python scripts/submit_cluster.py \ +# --save_dir /scratch/$USER/runs --prefix $(date +%Y-%m-%d)_single_agent \ +# --compute_config scripts/cluster_configs/nyu_greene.yaml \ +# --program_config scripts/cluster_configs/single_agent_speed_run.yaml \ +# --container --heartbeat --account --partition --time 720 \ +# --args train.seed=0:1:2 + +# Environment: single agent per env, Town02 only, shared map cache +env.simulation_mode: gigaflow +env.map_dir: pufferlib/resources/drive/binaries/carla_town02 +env.num_maps: 1 +env.min_agents_per_env: 1 +env.max_agents_per_env: 1 +env.num_agents: 1024 +env.use_map_cache: 1 + +# Goals: one random goal anywhere (near or far, any direction incl. behind); +# max_waypoint_spacing huge = no upper distance cap. +env.goal_placement: 1 +env.num_target_waypoints: 1 +env.min_waypoint_spacing: 0 +env.max_waypoint_spacing: 1000000 +env.max_goal_position: 1000 + +# Traffic lights fully off: not observed, not scored, no reward penalty. +env.traffic_light_behavior: 0 +env.max_traffic_control_observations: 0 + +# Training +train.total_timesteps: 1_000_000_000 + +# Disable the nuPlan-based evaluators (keep validation_gigaflow, which is CARLA). +eval.validation_replay.enabled: 0 +eval.behaviors_full_dir.enabled: 0 +eval.behaviors_hard_stop.enabled: 0 +eval.behaviors_highway_straight.enabled: 0 +eval.behaviors_lane_change.enabled: 0 +eval.behaviors_merge.enabled: 0 +eval.behaviors_parked_cars.enabled: 0 +eval.behaviors_roundabout.enabled: 0 +eval.behaviors_stopped_traffic.enabled: 0 +eval.behaviors_traffic_light_green.enabled: 0 +eval.behaviors_traffic_light_stop.enabled: 0 +eval.behaviors_unprotected_left.enabled: 0 +eval.behaviors_unprotected_right.enabled: 0 + +# W&B +wandb: True +wandb_project: puffer_drive +wandb_group: Nightly Test From 7d84e783993f78ed656840fae10784e094737014 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 23 May 2026 15:03:21 -0400 Subject: [PATCH 18/21] scripts: single-agent wandb_group Nightly_Test (no space) submit_cluster.py joins the inner command into a bash -c string without quoting arg values, so a space in --wandb-group split the arg and crashed argparse. Co-Authored-By: Claude Opus 4.7 --- scripts/cluster_configs/single_agent_speed_run.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/cluster_configs/single_agent_speed_run.yaml b/scripts/cluster_configs/single_agent_speed_run.yaml index 0d8bbc153f..1916431c9e 100644 --- a/scripts/cluster_configs/single_agent_speed_run.yaml +++ b/scripts/cluster_configs/single_agent_speed_run.yaml @@ -53,7 +53,8 @@ eval.behaviors_traffic_light_stop.enabled: 0 eval.behaviors_unprotected_left.enabled: 0 eval.behaviors_unprotected_right.enabled: 0 -# W&B +# W&B. Group has no space: submit_cluster.py joins the inner command into a +# bash -c string without quoting arg values, so a space would split the arg. wandb: True wandb_project: puffer_drive -wandb_group: Nightly Test +wandb_group: Nightly_Test From 6a8926b4cdcaf7604805d65be73a24ff4bd31b1c Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 23 May 2026 15:05:14 -0400 Subject: [PATCH 19/21] scripts: retire hand-rolled single_agent_speed_run.sbatch Superseded by the submit_cluster.py path (scripts/cluster_configs/single_agent_speed_run.yaml), which provides per-run code isolation, container wrapping, and the heartbeat. The hand-rolled sbatch ran from the shared checkout with no isolation, which is what let a rebuild SIGBUS the live seeds. Co-Authored-By: Claude Opus 4.7 --- scripts/single_agent_speed_run.sbatch | 98 --------------------------- 1 file changed, 98 deletions(-) delete mode 100755 scripts/single_agent_speed_run.sbatch diff --git a/scripts/single_agent_speed_run.sbatch b/scripts/single_agent_speed_run.sbatch deleted file mode 100755 index 948a4db27c..0000000000 --- a/scripts/single_agent_speed_run.sbatch +++ /dev/null @@ -1,98 +0,0 @@ -#!/bin/bash -# Single-agent "speed run" training -- nightly 3-seed sweep. -# -# One controlled agent per environment on a single CARLA map (Town02), with many -# parallel environments, and goals sampled at random drivable points anywhere on -# the map (goal_placement=1) -- so the goal can be far away and in any direction, -# including behind the agent. On reaching a goal a new random one is drawn, so the -# agent does continuous point-to-point runs across the town. -# -# Launches 3 seeds as a SLURM job array, all logged to wandb group "Nightly Test". -# -# Launch: -# sbatch scripts/single_agent_speed_run.sbatch -# -# Override scheduling / scale / seeds without editing this file: -# sbatch --partition=a100_tandon --account=torch_pr_924_tandon_advanced scripts/single_agent_speed_run.sbatch -# NUM_AGENTS=2048 sbatch scripts/single_agent_speed_run.sbatch -# SEED_BASE=100 sbatch scripts/single_agent_speed_run.sbatch # seeds 100,101,102 -# -#SBATCH --job-name=single_agent_speed_run -#SBATCH --account=torch_pr_924_tandon_advanced -#SBATCH --partition=h200_tandon -#SBATCH --array=0-2 -#SBATCH --gres=gpu:1 -#SBATCH --cpus-per-task=16 -#SBATCH --mem=128gb -#SBATCH --time=12:00:00 - -set -euo pipefail - -# Tunables (override via the environment, e.g. NUM_AGENTS=2048 sbatch ...). -NUM_AGENTS="${NUM_AGENTS:-1024}" # number of single-agent environments -MAX_GOAL_POSITION="${MAX_GOAL_POSITION:-1000}" # obs goal-vector normalization (m); cover Town02's extent -MAX_WAYPOINT_SPACING="${MAX_WAYPOINT_SPACING:-1000000}" # random-goal upper distance bound; huge = anywhere on map -TOTAL_TIMESTEPS="${TOTAL_TIMESTEPS:-1000000000}" # cap the run at 1B steps -WANDB_GROUP="${WANDB_GROUP:-Nightly Test}" -SEED_BASE="${SEED_BASE:-0}" -SEED=$((SEED_BASE + SLURM_ARRAY_TASK_ID)) # one seed per array task: SEED_BASE .. SEED_BASE+2 -LAUNCH_DATE="$(date +%Y-%m-%d)" # stamp the wandb run name with the launch date -WANDB_NAME="${LAUNCH_DATE}_single_agent_seed${SEED}" - -# Disable the nuPlan-based evaluators (replay + behavior buckets); they are -# irrelevant to single-map CARLA training and spawn their own envs. validation_gigaflow -# (CARLA) stays on. enabled 0 -> bool(0)=False in the eval manager. Note: dotted CLI -# flags use hyphens (the argparser maps _ -> -), so section names are hyphenated here. -DISABLE_EVALS="" -for ev in validation-replay behaviors-full-dir behaviors-hard-stop behaviors-highway-straight \ - behaviors-lane-change behaviors-merge behaviors-parked-cars behaviors-roundabout \ - behaviors-stopped-traffic behaviors-traffic-light-green behaviors-traffic-light-stop \ - behaviors-unprotected-left behaviors-unprotected-right; do - DISABLE_EVALS="$DISABLE_EVALS --eval.$ev.enabled 0" -done - -OVERLAY="/scratch/$USER/images/PufferDrive/overlay-15GB-500K.ext3" -IMAGE="/share/apps/images/cuda12.8.1-cudnn9.8.0-ubuntu24.04.2.sif" - -singularity exec --nv --overlay "${OVERLAY}:ro" "$IMAGE" bash -c ' - source /scratch/'"$USER"'/venvs/pufferdrive/bin/activate - export TORCH_CUDA_ARCH_LIST="8.0;8.9;9.0" - export PYTHONNOUSERSITE=1 - export WANDB_NAME='"$WANDB_NAME"' # wandb run name (no name= in WandbLogger -> env var wins) - NCCL_DIR=$(compgen -G "/scratch/'"$USER"'/venvs/pufferdrive/lib/python3.12/site-packages/nvidia/nccl/lib" 2>/dev/null) \ - && export LD_LIBRARY_PATH="$NCCL_DIR:$LD_LIBRARY_PATH" - cd /scratch/'"$USER"'/code/PufferDrive - - # Single-map dir holding only Town02, derived from the bundled carla maps. - # Atomic create (temp + mv) so concurrent array tasks never read a partial file. - DEST=pufferlib/resources/drive/binaries/carla_town02/opendrive__Town02.bin - if [ ! -e "$DEST" ]; then - mkdir -p pufferlib/resources/drive/binaries/carla_town02 - TMP=$(mktemp pufferlib/resources/drive/binaries/carla_town02/Town02.XXXXXX) - cp pufferlib/resources/drive/binaries/carla/opendrive__Town02.bin "$TMP" && mv -f "$TMP" "$DEST" - fi - - python scripts/gpu_heartbeat.py > /tmp/gpu_heartbeat_'"$SLURM_JOB_ID"'.log 2>&1 & - HEARTBEAT_PID=$! - - python -m pufferlib.pufferl train puffer_drive \ - --env.map-dir pufferlib/resources/drive/binaries/carla_town02 \ - --env.num-maps 1 \ - --env.max-agents-per-env 1 \ - --env.num-agents '"$NUM_AGENTS"' \ - --env.use-map-cache 1 \ - --env.traffic-light-behavior 0 \ - --env.max-traffic-control-observations 0 \ - --env.goal-placement 1 \ - --env.num-target-waypoints 1 \ - --env.min-waypoint-spacing 0 \ - --env.max-waypoint-spacing '"$MAX_WAYPOINT_SPACING"' \ - --env.max-goal-position '"$MAX_GOAL_POSITION"' \ - --train.total-timesteps '"$TOTAL_TIMESTEPS"' \ - --train.seed '"$SEED"' '"$DISABLE_EVALS"' \ - --wandb --wandb-project puffer_drive --wandb-group "'"$WANDB_GROUP"'" --tag seed'"$SEED"' - TRAIN_EXIT=$? - - kill $HEARTBEAT_PID 2>/dev/null || true - exit $TRAIN_EXIT -' From 1f911ca45a01554d97f5474d7c0d9b13c7bb9a97 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 23 May 2026 17:32:43 -0400 Subject: [PATCH 20/21] scripts: single_agent_no_lane_vel program_config (lane+velocity reward ablation) Self-contained variant of single_agent_speed_run.yaml with lane (lane_align, lane_center) and velocity (vel_align, velocity, overspeed) rewards zeroed, so the ablation is defined by a config file rather than ad-hoc --args. seed stays on --args. Co-Authored-By: Claude Opus 4.7 --- .../single_agent_no_lane_vel.yaml | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 scripts/cluster_configs/single_agent_no_lane_vel.yaml diff --git a/scripts/cluster_configs/single_agent_no_lane_vel.yaml b/scripts/cluster_configs/single_agent_no_lane_vel.yaml new file mode 100644 index 0000000000..7eacccb88a --- /dev/null +++ b/scripts/cluster_configs/single_agent_no_lane_vel.yaml @@ -0,0 +1,64 @@ +# Single-agent "speed run" ablation: lane + velocity reward shaping removed. +# Same as single_agent_speed_run.yaml (Town02, one agent, random goals, lights +# off) but with all lane-keeping and velocity rewards zeroed, so the policy is +# shaped only by goal-reaching + the safety penalties (collision/offroad/comfort). +# Program configs don't inherit, so this duplicates the base; keep the two in sync. +# +# Launch (per-launch knobs like the seed stay on --args): +# source /scratch/$USER/venvs/pufferdrive/bin/activate +# python scripts/submit_cluster.py \ +# --save_dir /scratch/$USER/runs --prefix $(date +%Y-%m-%d)_no_lane_vel \ +# --compute_config scripts/cluster_configs/nyu_greene.yaml \ +# --program_config scripts/cluster_configs/single_agent_no_lane_vel.yaml \ +# --container --heartbeat --account --partition --time 720 \ +# --args train.seed=0:1:2 + +# Environment: single agent per env, Town02 only, shared map cache +env.simulation_mode: gigaflow +env.map_dir: pufferlib/resources/drive/binaries/carla_town02 +env.num_maps: 1 +env.min_agents_per_env: 1 +env.max_agents_per_env: 1 +env.num_agents: 1024 +env.use_map_cache: 1 + +# Goals: one random goal anywhere, no upper distance cap. +env.goal_placement: 1 +env.num_target_waypoints: 1 +env.min_waypoint_spacing: 0 +env.max_waypoint_spacing: 1000000 +env.max_goal_position: 1000 + +# Traffic lights fully off: not observed, not scored, no reward penalty. +env.traffic_light_behavior: 0 +env.max_traffic_control_observations: 0 + +# Ablation: lane-keeping + velocity reward shaping removed (goal + safety only). +env.reward_lane_align: 0 +env.reward_lane_center: 0 +env.reward_vel_align: 0 +env.reward_velocity: 0 +env.reward_overspeed: 0 + +# Training +train.total_timesteps: 1_000_000_000 + +# Disable the nuPlan-based evaluators (keep validation_gigaflow, which is CARLA). +eval.validation_replay.enabled: 0 +eval.behaviors_full_dir.enabled: 0 +eval.behaviors_hard_stop.enabled: 0 +eval.behaviors_highway_straight.enabled: 0 +eval.behaviors_lane_change.enabled: 0 +eval.behaviors_merge.enabled: 0 +eval.behaviors_parked_cars.enabled: 0 +eval.behaviors_roundabout.enabled: 0 +eval.behaviors_stopped_traffic.enabled: 0 +eval.behaviors_traffic_light_green.enabled: 0 +eval.behaviors_traffic_light_stop.enabled: 0 +eval.behaviors_unprotected_left.enabled: 0 +eval.behaviors_unprotected_right.enabled: 0 + +# W&B (group has no space; see single_agent_speed_run.yaml note) +wandb: True +wandb_project: puffer_drive +wandb_group: no_lane_vel From 7d931effcc5f895cb3ddb921f2c8048020f15a83 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 23 May 2026 17:59:24 -0400 Subject: [PATCH 21/21] drive.ini: single_agent_obs renders full 1280-step episodes Override the inherited validation_gigaflow scenario_length=500 / render_max_steps=300 so the obs render matches the single-agent training episode length (1280 steps). Co-Authored-By: Claude Opus 4.7 --- pufferlib/config/ocean/drive.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pufferlib/config/ocean/drive.ini b/pufferlib/config/ocean/drive.ini index ef5c48cc14..4c14e9baec 100644 --- a/pufferlib/config/ocean/drive.ini +++ b/pufferlib/config/ocean/drive.ini @@ -327,8 +327,10 @@ env.num_target_waypoints = 1 env.min_waypoint_spacing = 0 env.max_waypoint_spacing = 1000000 env.max_goal_position = 1000 +env.scenario_length = 1280 eval.num_scenarios = 8 eval.render_num_scenarios = 8 +eval.render_max_steps = 1280 ; --------------------------------------------------------------------------- ; Driving-behaviour evaluation: nuPlan scenes labeled by scene type. Each