From 0318f097925d43509acdf56abaf192f4e569fb04 Mon Sep 17 00:00:00 2001 From: Tim-phant Date: Wed, 20 May 2026 08:48:52 +0000 Subject: [PATCH 1/7] Add Boxoban incremental loader groundwork --- ocean/boxoban/binding.c | 4 ++ ocean/boxoban/boxoban.h | 4 ++ ocean/boxoban/boxoban_maps.h | 101 ++++++++++++++++++++++++++++++++++- 3 files changed, 108 insertions(+), 1 deletion(-) diff --git a/ocean/boxoban/binding.c b/ocean/boxoban/binding.c index 86c6b341d8..e06fa85579 100644 --- a/ocean/boxoban/binding.c +++ b/ocean/boxoban/binding.c @@ -17,6 +17,10 @@ void my_init(Env* env, Dict* kwargs) { env->max_steps = (int)dict_get(kwargs, "max_steps")->value; env->int_r_coeff = (float)dict_get(kwargs, "int_r_coeff")->value; env->target_loss_pen_coeff = (float)dict_get(kwargs, "target_loss_pen_coeff")->value; + env->curriculum_mode = (env->difficulty_id == 5); + env->curriculum_difficulty = 0; + env->largest_solved_difficulty = -1; + env->episode_maps_solved = 0; init(env); } diff --git a/ocean/boxoban/boxoban.h b/ocean/boxoban/boxoban.h index 35b61a2529..d88f96968b 100644 --- a/ocean/boxoban/boxoban.h +++ b/ocean/boxoban/boxoban.h @@ -62,6 +62,10 @@ typedef struct { Client* client; int win; float episode_return; + int curriculum_mode; // 1 when using incremental difficulty mode + int curriculum_difficulty; // current active difficulty in curriculum mode + int largest_solved_difficulty; // highest difficulty solved this episode, -1 if none + int episode_maps_solved; // number of puzzles solved this episode } Boxoban; void ensure_map_loaded(void); diff --git a/ocean/boxoban/boxoban_maps.h b/ocean/boxoban/boxoban_maps.h index c633c37307..3433a637b3 100644 --- a/ocean/boxoban/boxoban_maps.h +++ b/ocean/boxoban/boxoban_maps.h @@ -25,6 +25,22 @@ extern size_t PUZZLE_COUNT; extern size_t PUZZLE_SIZE; extern size_t PUZZLE_OBS_BYTES; +#define BOXOBAN_DIFFICULTY_BASIC 0 +#define BOXOBAN_DIFFICULTY_EASY 1 +#define BOXOBAN_DIFFICULTY_MEDIUM 2 +#define BOXOBAN_DIFFICULTY_HARD 3 +#define BOXOBAN_DIFFICULTY_UNFILTERED 4 +#define BOXOBAN_DIFFICULTY_INCREMENTAL 5 + +#define BOXOBAN_INCREMENTAL_MIN_DIFFICULTY BOXOBAN_DIFFICULTY_BASIC +#define BOXOBAN_INCREMENTAL_MAX_DIFFICULTY BOXOBAN_DIFFICULTY_HARD +#define BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES 4 + +extern uint8_t *INCREMENTAL_MAP_BASES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES]; +extern size_t INCREMENTAL_MAP_FILESIZES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES]; +extern size_t INCREMENTAL_PUZZLE_COUNTS[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES]; + +int boxoban_ensure_bin_for_difficulty(const char* difficulty, char* out_path, size_t out_cap); int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap); int boxoban_set_map_path(const char *path); int boxoban_difficulty_id_from_name(const char* difficulty_name); @@ -38,6 +54,9 @@ size_t MAP_FILESIZE = 0; size_t PUZZLE_COUNT = 0; size_t PUZZLE_SIZE = BOXOBAN_PUZZLE_BYTES; size_t PUZZLE_OBS_BYTES = BOXOBAN_PUZZLE_OBS_BYTES; +uint8_t *INCREMENTAL_MAP_BASES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {0}; +size_t INCREMENTAL_MAP_FILESIZES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {0}; +size_t INCREMENTAL_PUZZLE_COUNTS[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {0}; static char* BOXOBAN_MAP_PATH = NULL; static const char* BOXOBAN_LEVEL_ROOT = "resources/boxoban/levels"; @@ -325,7 +344,7 @@ static int boxoban_bin_path(const char* difficulty, char* out_path, size_t out_c return 0; } -int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) { +int boxoban_ensure_bin_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) { if (difficulty == NULL || out_path == NULL) { return -1; } @@ -356,12 +375,92 @@ int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, fprintf(stdout, "[Boxoban] Generated %zu puzzles for '%s' at %s\n", puzzle_count, difficulty, out_path); } + return 0; +} + +int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) { + if (boxoban_ensure_bin_for_difficulty(difficulty, out_path, out_cap) != 0) { + return -1; + } if (boxoban_set_map_path(out_path) != 0) { return -1; } return 0; } +static int boxoban_ensure_incremental_bins(void) { + const char* difficulties[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = { + "basic", + "easy", + "medium", + "hard", + }; + + for (int i = 0; i < BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; i++) { + char bin_path[512]; + if (boxoban_ensure_bin_for_difficulty(difficulties[i], bin_path, sizeof(bin_path)) != 0) { + return -1; + } + } + + return 0; +} + +static int boxoban_load_incremental_bin_slot(int slot, const char* difficulty) { + char bin_path[512]; + int fd; + struct stat st; + uint8_t* map_base; + + if (slot < 0 || slot >= BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES) { + return -1; + } + if (boxoban_ensure_bin_for_difficulty(difficulty, bin_path, sizeof(bin_path)) != 0) { + return -1; + } + + fd = open(bin_path, O_RDONLY); + if (fd < 0) { + return -1; + } + if (fstat(fd, &st) != 0) { + close(fd); + return -1; + } + if ((size_t)st.st_size % PUZZLE_SIZE != 0) { + close(fd); + return -1; + } + + map_base = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (map_base == MAP_FAILED) { + return -1; + } + + INCREMENTAL_MAP_BASES[slot] = map_base; + INCREMENTAL_MAP_FILESIZES[slot] = st.st_size; + INCREMENTAL_PUZZLE_COUNTS[slot] = (size_t)st.st_size / PUZZLE_SIZE; + return 0; +} + +static int boxoban_load_incremental_bins(void) { + const char* difficulties[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = { + "basic", + "easy", + "medium", + "hard", + }; + + for (int i = 0; i < BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; i++) { + if (boxoban_load_incremental_bin_slot(i, difficulties[i]) != 0) { + return -1; + } + } + + return 0; +} + static void reset_map_cache(void) { if (MAP_BASE != NULL && MAP_BASE != MAP_FAILED && MAP_FILESIZE > 0) { munmap(MAP_BASE, MAP_FILESIZE); From 0fdb8e3a3915d71303ef2ffe9af3434ddc636cfc Mon Sep 17 00:00:00 2001 From: Tim-phant Date: Wed, 20 May 2026 09:02:53 +0000 Subject: [PATCH 2/7] Add incremental Boxoban progression; keep non-incremental behavior unchanged --- ocean/boxoban/boxoban.h | 67 ++++++++++++++++++++++++++++++------ ocean/boxoban/boxoban_maps.h | 31 +++++++---------- 2 files changed, 69 insertions(+), 29 deletions(-) diff --git a/ocean/boxoban/boxoban.h b/ocean/boxoban/boxoban.h index d88f96968b..4e8e496e75 100644 --- a/ocean/boxoban/boxoban.h +++ b/ocean/boxoban/boxoban.h @@ -71,6 +71,15 @@ typedef struct { void ensure_map_loaded(void); static int boxoban_configure_maps_from_env(Boxoban* env) { + if (env->difficulty_id == BOXOBAN_DIFFICULTY_INCREMENTAL) { + reset_incremental_map_cache(); + if (boxoban_load_incremental_bins() != 0) { + fprintf(stderr, "Failed to load incremental Boxoban map bins\n"); + return -1; + } + return 0; + } + if (env->difficulty_id == -1) { return 0; } @@ -111,8 +120,8 @@ static inline unsigned char get_intermediate_reward_status(Boxoban *env, int x, return env->intermediate_rewards[(y)*env->size + (x)]; } -static inline const uint32_t get_random_puzzle_idx(const Boxoban *env) { - int idx = rand_r(&env->rng) % PUZZLE_COUNT; +static inline const uint32_t get_random_puzzle_idx(const Boxoban *env, size_t puzzle_count) { + int idx = rand_r(&env->rng) % puzzle_count; return idx; } @@ -134,11 +143,20 @@ void init (Boxoban* env) { void add_log(Boxoban* env) { - float denom = (float)env->n_boxes; - float num = (float)env->on_target; - float perf = (env->win== 1) ? 1.0f : 0.0f; + float perf; + float score; + if (env->curriculum_mode) { + score = 0.0f; + if (env->largest_solved_difficulty >= 0) { + score = (float)(env->largest_solved_difficulty + 1); + } + perf = score / (float)BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; + } else { + perf = (env->win == 1) ? 1.0f : 0.0f; + score = perf; + } env->log.perf += perf; - env->log.score += perf; + env->log.score += score; env->log.episode_length += env->tick; env->log.episode_return += env->episode_return; env->log.on_targets += env->on_target; @@ -153,10 +171,16 @@ bool clear(Boxoban* env, int x, int y) { return (get_entity(env, WALLS, x, y) == 0) && (get_entity(env, BOXES, x, y) == 0); } -// Required function -void c_reset(Boxoban* env) { - const uint32_t i = get_random_puzzle_idx(env); - const uint8_t* puzzle = MAP_BASE + (size_t)i * PUZZLE_SIZE; +static void load_random_puzzle(Boxoban* env) { + const uint8_t* map_base = MAP_BASE; + size_t puzzle_count = PUZZLE_COUNT; + if (env->curriculum_mode) { + map_base = INCREMENTAL_MAP_BASES[env->curriculum_difficulty]; + puzzle_count = INCREMENTAL_PUZZLE_COUNTS[env->curriculum_difficulty]; + } + + const uint32_t i = get_random_puzzle_idx(env, puzzle_count); + const uint8_t* puzzle = map_base + (size_t)i * PUZZLE_SIZE; memcpy(env->observations, puzzle, PUZZLE_OBS_BYTES); const uint8_t* meta = puzzle + PUZZLE_OBS_BYTES; @@ -168,10 +192,20 @@ void c_reset(Boxoban* env) { memcpy(env->intermediate_rewards, env->observations + TARGET * env->size * env->size,env->size * env->size); +} +// Required function +void c_reset(Boxoban* env) { env->tick = 0; env->win = 0; env->episode_return = 0; + if (env->curriculum_mode) { + env->curriculum_difficulty = 0; + env->largest_solved_difficulty = -1; + env->episode_maps_solved = 0; + } + + load_random_puzzle(env); if (!env->initialized) { env->tick = rand_r(&env->rng) % env->max_steps; @@ -264,10 +298,21 @@ void c_step(Boxoban* env) { //Terminals if (env->on_target == env->n_targets) { - env->terminals[0] = 1; env->rewards[0] += 1.0; env->win = 1; env->episode_return += env->rewards[0]; + if (env->curriculum_mode) { + if (env->curriculum_difficulty > env->largest_solved_difficulty) { + env->largest_solved_difficulty = env->curriculum_difficulty; + } + env->episode_maps_solved += 1; + if (env->curriculum_difficulty < BOXOBAN_INCREMENTAL_MAX_DIFFICULTY) { + env->curriculum_difficulty += 1; + } + load_random_puzzle(env); + return; + } + env->terminals[0] = 1; add_log(env); c_reset(env); return; diff --git a/ocean/boxoban/boxoban_maps.h b/ocean/boxoban/boxoban_maps.h index 3433a637b3..7f2db7d470 100644 --- a/ocean/boxoban/boxoban_maps.h +++ b/ocean/boxoban/boxoban_maps.h @@ -388,24 +388,6 @@ int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, return 0; } -static int boxoban_ensure_incremental_bins(void) { - const char* difficulties[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = { - "basic", - "easy", - "medium", - "hard", - }; - - for (int i = 0; i < BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; i++) { - char bin_path[512]; - if (boxoban_ensure_bin_for_difficulty(difficulties[i], bin_path, sizeof(bin_path)) != 0) { - return -1; - } - } - - return 0; -} - static int boxoban_load_incremental_bin_slot(int slot, const char* difficulty) { char bin_path[512]; int fd; @@ -461,6 +443,19 @@ static int boxoban_load_incremental_bins(void) { return 0; } +static void reset_incremental_map_cache(void) { + for (int i = 0; i < BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; i++) { + if (INCREMENTAL_MAP_BASES[i] != NULL && + INCREMENTAL_MAP_BASES[i] != MAP_FAILED && + INCREMENTAL_MAP_FILESIZES[i] > 0) { + munmap(INCREMENTAL_MAP_BASES[i], INCREMENTAL_MAP_FILESIZES[i]); + } + INCREMENTAL_MAP_BASES[i] = NULL; + INCREMENTAL_MAP_FILESIZES[i] = 0; + INCREMENTAL_PUZZLE_COUNTS[i] = 0; + } +} + static void reset_map_cache(void) { if (MAP_BASE != NULL && MAP_BASE != MAP_FAILED && MAP_FILESIZE > 0) { munmap(MAP_BASE, MAP_FILESIZE); From ed16ec6a4fa6f58dd52796cfd2ed22ccc57bdddf Mon Sep 17 00:00:00 2001 From: Tim-phant Date: Wed, 20 May 2026 09:16:36 +0000 Subject: [PATCH 3/7] Track Boxoban per-puzzle timeout and final puzzle tick --- ocean/boxoban/binding.c | 1 + ocean/boxoban/boxoban.h | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/ocean/boxoban/binding.c b/ocean/boxoban/binding.c index e06fa85579..1378d8fb75 100644 --- a/ocean/boxoban/binding.c +++ b/ocean/boxoban/binding.c @@ -30,4 +30,5 @@ void my_log(Log* log, Dict* out) { dict_set(out, "episode_return", log->episode_return); dict_set(out, "episode_length", log->episode_length); dict_set(out, "targets_hit", log->on_targets); + dict_set(out, "final_puzzle_tick", log->puzzle_ticks); } diff --git a/ocean/boxoban/boxoban.h b/ocean/boxoban/boxoban.h index 4e8e496e75..a50cb626a1 100644 --- a/ocean/boxoban/boxoban.h +++ b/ocean/boxoban/boxoban.h @@ -24,6 +24,7 @@ typedef struct { float episode_length; // Recommended metric: number of steps of agent episode // Any extra fields you add here may be exported to Python in binding.c float on_targets; // Number of targets currently boxed + float puzzle_ticks; // Steps spent on the final puzzle of the episode float n; // Required as the last field } Log; @@ -48,6 +49,7 @@ typedef struct { int size; int num_agents; int tick; + int puzzle_tick; int max_steps; int agent_x; int agent_y; @@ -145,6 +147,7 @@ void init (Boxoban* env) { void add_log(Boxoban* env) { float perf; float score; + float targets_hit = 0.0f; if (env->curriculum_mode) { score = 0.0f; if (env->largest_solved_difficulty >= 0) { @@ -155,11 +158,15 @@ void add_log(Boxoban* env) { perf = (env->win == 1) ? 1.0f : 0.0f; score = perf; } + if (env->n_targets > 0) { + targets_hit = (float)env->on_target / (float)env->n_targets; + } env->log.perf += perf; env->log.score += score; env->log.episode_length += env->tick; env->log.episode_return += env->episode_return; - env->log.on_targets += env->on_target; + env->log.on_targets += targets_hit; + env->log.puzzle_ticks += env->puzzle_tick; env->log.n++; } @@ -192,11 +199,13 @@ static void load_random_puzzle(Boxoban* env) { memcpy(env->intermediate_rewards, env->observations + TARGET * env->size * env->size,env->size * env->size); + env->puzzle_tick = 0; } // Required function void c_reset(Boxoban* env) { env->tick = 0; + env->puzzle_tick = 0; env->win = 0; env->episode_return = 0; if (env->curriculum_mode) { @@ -209,6 +218,7 @@ void c_reset(Boxoban* env) { if (!env->initialized) { env->tick = rand_r(&env->rng) % env->max_steps; + env->puzzle_tick = env->tick; env->initialized = true; } } @@ -281,6 +291,7 @@ int take_action(Boxoban* env, int action) { // Required function void c_step(Boxoban* env) { env->tick += 1; + env->puzzle_tick += 1; env->terminals[0] = 0; env->rewards[0] = 0.0; @@ -318,7 +329,7 @@ void c_step(Boxoban* env) { return; } - if (env->tick >= env->max_steps) { + if (env->puzzle_tick >= env->max_steps) { env->terminals[0] = 1; env->rewards[0] -= 1.0; env->episode_return += env->rewards[0]; From c2d9f3f14195c08e6ecfd8d266000e8114ffe839 Mon Sep 17 00:00:00 2001 From: Tim-phant Date: Wed, 20 May 2026 09:19:05 +0000 Subject: [PATCH 4/7] made default incremental --- config/boxoban.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/boxoban.ini b/config/boxoban.ini index f076ab74b6..a0e72e7c01 100644 --- a/config/boxoban.ini +++ b/config/boxoban.ini @@ -8,7 +8,7 @@ num_threads = 1 [env] num_agents = 1 -difficulty = 2 +difficulty = 5 int_r_coeff = 0.25 target_loss_pen_coeff = 0 max_steps = 150 From 47bafcec82df2986d9973c37ac1395d8ba7ecd24 Mon Sep 17 00:00:00 2001 From: Tim-phant Date: Wed, 20 May 2026 12:19:37 +0000 Subject: [PATCH 5/7] Make incremental Boxoban perf reflect frontier progress --- ocean/boxoban/boxoban.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ocean/boxoban/boxoban.h b/ocean/boxoban/boxoban.h index a50cb626a1..d1918a656e 100644 --- a/ocean/boxoban/boxoban.h +++ b/ocean/boxoban/boxoban.h @@ -148,19 +148,23 @@ void add_log(Boxoban* env) { float perf; float score; float targets_hit = 0.0f; + if (env->n_targets > 0) { + targets_hit = (float)env->on_target / (float)env->n_targets; + } if (env->curriculum_mode) { score = 0.0f; if (env->largest_solved_difficulty >= 0) { score = (float)(env->largest_solved_difficulty + 1); } - perf = score / (float)BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; + if (score >= (float)BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES) { + perf = 1.0f; + } else { + perf = (score + targets_hit) / (float)BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; + } } else { perf = (env->win == 1) ? 1.0f : 0.0f; score = perf; } - if (env->n_targets > 0) { - targets_hit = (float)env->on_target / (float)env->n_targets; - } env->log.perf += perf; env->log.score += score; env->log.episode_length += env->tick; From acd7d44b7f58824064552007e26df88e5c62d04d Mon Sep 17 00:00:00 2001 From: Tim-phant Date: Wed, 20 May 2026 12:38:12 +0000 Subject: [PATCH 6/7] Adjust incremental Boxoban perf normalization --- ocean/boxoban/boxoban.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ocean/boxoban/boxoban.h b/ocean/boxoban/boxoban.h index d1918a656e..1aa76d2fb2 100644 --- a/ocean/boxoban/boxoban.h +++ b/ocean/boxoban/boxoban.h @@ -156,11 +156,8 @@ void add_log(Boxoban* env) { if (env->largest_solved_difficulty >= 0) { score = (float)(env->largest_solved_difficulty + 1); } - if (score >= (float)BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES) { - perf = 1.0f; - } else { - perf = (score + targets_hit) / (float)BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; - } + perf = (score + targets_hit) / + (float)(BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES + 1); } else { perf = (env->win == 1) ? 1.0f : 0.0f; score = perf; From b65b94dd7b401b295cf5c9dfd910428cbffa430a Mon Sep 17 00:00:00 2001 From: Tim-phant Date: Sat, 23 May 2026 19:40:25 +0000 Subject: [PATCH 7/7] fix sweep , breakout config for sweeps --- config/boxoban.ini | 57 +++++++++++++++++++++++----------------------- pufferlib/sweep.py | 3 ++- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/config/boxoban.ini b/config/boxoban.ini index a0e72e7c01..b35e6aeac8 100644 --- a/config/boxoban.ini +++ b/config/boxoban.ini @@ -2,9 +2,9 @@ env_name = boxoban [vec] -total_agents = 32768 -num_buffers = 4 -num_threads = 1 +total_agents = 4096 +num_buffers = 8 +num_threads = 8 [env] num_agents = 1 @@ -14,31 +14,32 @@ target_loss_pen_coeff = 0 max_steps = 150 [policy] -hidden_size = 512 -num_layers = 3.32422 -expansion_factor = 1 +num_layers = 2 +hidden_size = 64 [train] gpus = 1 -seed = 42 -total_timesteps = 880580001 -learning_rate = 0.00134234 -anneal_lr = 1 -min_lr_ratio = 0.37872 -gamma = 0.989717 -gae_lambda = 0.759273 -replay_ratio = 1.6234 -clip_coef = 0.01 -vf_coef = 5 -vf_clip_coef = 5 -max_grad_norm = 1.20325 -ent_coef = 0.000188411 -beta1 = 0.995526 -beta2 = 0.999536 -eps = 1e-14 -minibatch_size = 32768 -horizon = 64 -vtrace_rho_clip = 3.13347 -vtrace_c_clip = 2.75328 -prio_alpha = 0.453827 -prio_beta0 = 0.765589 +total_timesteps = 94_000_000 +beta1 = 0.7279714073125252 +beta2 = 0.9986265112492152 +clip_coef = 0.6746497927896418 +ent_coef = 0.0033240721522812535 +eps = 0.00008339460257113628 +gae_lambda = 0.948721675814334 +gamma = 0.9721246598992744 +learning_rate = 0.1 +max_grad_norm = 1.8109182724544075 +minibatch_size = 65_536 +prio_alpha = 0.1 +prio_beta0 = 0.8247156461060179 +replay_ratio = 1.4242098997083206 +vf_clip_coef = 1.2291681640124468 +vf_coef = 1.2195502588297364 +vtrace_c_clip = 1.0830442742115065 +vtrace_rho_clip = 2.1017317041552603 + +[sweep] +gpus = 1 +max_suggestion_cost = 10800 + + diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py index 36e27bf42a..01453a63d7 100644 --- a/pufferlib/sweep.py +++ b/pufferlib/sweep.py @@ -146,7 +146,8 @@ def _params_from_puffer_sweep(sweep_config, only_include=None): for name, param in sweep_config.items(): if name in ('method', 'metric', 'metric_distribution', 'goal', 'downsample', 'use_gpu', 'prune_pareto', - 'sweep_only', 'max_suggestion_cost', 'early_stop_quantile', 'gpus', 'max_runs'): + 'sweep_only', 'max_suggestion_cost', 'early_stop_quantile', 'gpus', 'max_runs', 'match_enemy_model_path', 'match_num_games', + 'match_enemy_hidden_size', 'match_enemy_num_layers'): continue assert isinstance(param, dict), f'Param {name} is not a dict'