From 0318f097925d43509acdf56abaf192f4e569fb04 Mon Sep 17 00:00:00 2001
From: Tim-phant <ha24582@bristol.ac.uk>
Date: Wed, 20 May 2026 08:48:52 +0000
Subject: [PATCH 1/7] Add Boxoban incremental loader groundwork

---
 ocean/boxoban/binding.c      |   4 ++
 ocean/boxoban/boxoban.h      |   4 ++
 ocean/boxoban/boxoban_maps.h | 101 ++++++++++++++++++++++++++++++++++-
 3 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/ocean/boxoban/binding.c b/ocean/boxoban/binding.c
index 86c6b341d8..e06fa85579 100644
--- a/ocean/boxoban/binding.c
+++ b/ocean/boxoban/binding.c
@@ -17,6 +17,10 @@ void my_init(Env* env, Dict* kwargs) {
     env->max_steps = (int)dict_get(kwargs, "max_steps")->value;
     env->int_r_coeff = (float)dict_get(kwargs, "int_r_coeff")->value;
     env->target_loss_pen_coeff = (float)dict_get(kwargs, "target_loss_pen_coeff")->value;
+    env->curriculum_mode = (env->difficulty_id == 5);
+    env->curriculum_difficulty = 0;
+    env->largest_solved_difficulty = -1;
+    env->episode_maps_solved = 0;
     init(env);
 }
 
diff --git a/ocean/boxoban/boxoban.h b/ocean/boxoban/boxoban.h
index 35b61a2529..d88f96968b 100644
--- a/ocean/boxoban/boxoban.h
+++ b/ocean/boxoban/boxoban.h
@@ -62,6 +62,10 @@ typedef struct {
     Client* client;
     int win;
     float episode_return;
+    int curriculum_mode; // 1 when using incremental difficulty mode
+    int curriculum_difficulty; // current active difficulty in curriculum mode
+    int largest_solved_difficulty; // highest difficulty solved this episode, -1 if none
+    int episode_maps_solved; // number of puzzles solved this episode
 } Boxoban;
 
 void ensure_map_loaded(void);
diff --git a/ocean/boxoban/boxoban_maps.h b/ocean/boxoban/boxoban_maps.h
index c633c37307..3433a637b3 100644
--- a/ocean/boxoban/boxoban_maps.h
+++ b/ocean/boxoban/boxoban_maps.h
@@ -25,6 +25,22 @@ extern size_t PUZZLE_COUNT;
 extern size_t PUZZLE_SIZE;
 extern size_t PUZZLE_OBS_BYTES;
 
+#define BOXOBAN_DIFFICULTY_BASIC 0
+#define BOXOBAN_DIFFICULTY_EASY 1
+#define BOXOBAN_DIFFICULTY_MEDIUM 2
+#define BOXOBAN_DIFFICULTY_HARD 3
+#define BOXOBAN_DIFFICULTY_UNFILTERED 4
+#define BOXOBAN_DIFFICULTY_INCREMENTAL 5
+
+#define BOXOBAN_INCREMENTAL_MIN_DIFFICULTY BOXOBAN_DIFFICULTY_BASIC
+#define BOXOBAN_INCREMENTAL_MAX_DIFFICULTY BOXOBAN_DIFFICULTY_HARD
+#define BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES 4
+
+extern uint8_t *INCREMENTAL_MAP_BASES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES];
+extern size_t INCREMENTAL_MAP_FILESIZES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES];
+extern size_t INCREMENTAL_PUZZLE_COUNTS[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES];
+
+int boxoban_ensure_bin_for_difficulty(const char* difficulty, char* out_path, size_t out_cap);
 int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap);
 int boxoban_set_map_path(const char *path);
 int boxoban_difficulty_id_from_name(const char* difficulty_name);
@@ -38,6 +54,9 @@ size_t MAP_FILESIZE = 0;
 size_t PUZZLE_COUNT = 0;
 size_t PUZZLE_SIZE = BOXOBAN_PUZZLE_BYTES;
 size_t PUZZLE_OBS_BYTES = BOXOBAN_PUZZLE_OBS_BYTES;
+uint8_t *INCREMENTAL_MAP_BASES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {0};
+size_t INCREMENTAL_MAP_FILESIZES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {0};
+size_t INCREMENTAL_PUZZLE_COUNTS[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {0};
 static char* BOXOBAN_MAP_PATH = NULL;
 static const char* BOXOBAN_LEVEL_ROOT = "resources/boxoban/levels";
 
@@ -325,7 +344,7 @@ static int boxoban_bin_path(const char* difficulty, char* out_path, size_t out_c
     return 0;
 }
 
-int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) {
+int boxoban_ensure_bin_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) {
     if (difficulty == NULL || out_path == NULL) {
         return -1;
     }
@@ -356,12 +375,92 @@ int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path,
         fprintf(stdout, "[Boxoban] Generated %zu puzzles for '%s' at %s\n", puzzle_count, difficulty, out_path);
     }
 
+    return 0;
+}
+
+int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) {
+    if (boxoban_ensure_bin_for_difficulty(difficulty, out_path, out_cap) != 0) {
+        return -1;
+    }
     if (boxoban_set_map_path(out_path) != 0) {
         return -1;
     }
     return 0;
 }
 
+static int boxoban_ensure_incremental_bins(void) {
+    const char* difficulties[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {
+        "basic",
+        "easy",
+        "medium",
+        "hard",
+    };
+
+    for (int i = 0; i < BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; i++) {
+        char bin_path[512];
+        if (boxoban_ensure_bin_for_difficulty(difficulties[i], bin_path, sizeof(bin_path)) != 0) {
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static int boxoban_load_incremental_bin_slot(int slot, const char* difficulty) {
+    char bin_path[512];
+    int fd;
+    struct stat st;
+    uint8_t* map_base;
+
+    if (slot < 0 || slot >= BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES) {
+        return -1;
+    }
+    if (boxoban_ensure_bin_for_difficulty(difficulty, bin_path, sizeof(bin_path)) != 0) {
+        return -1;
+    }
+
+    fd = open(bin_path, O_RDONLY);
+    if (fd < 0) {
+        return -1;
+    }
+    if (fstat(fd, &st) != 0) {
+        close(fd);
+        return -1;
+    }
+    if ((size_t)st.st_size % PUZZLE_SIZE != 0) {
+        close(fd);
+        return -1;
+    }
+
+    map_base = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+    close(fd);
+    if (map_base == MAP_FAILED) {
+        return -1;
+    }
+
+    INCREMENTAL_MAP_BASES[slot] = map_base;
+    INCREMENTAL_MAP_FILESIZES[slot] = st.st_size;
+    INCREMENTAL_PUZZLE_COUNTS[slot] = (size_t)st.st_size / PUZZLE_SIZE;
+    return 0;
+}
+
+static int boxoban_load_incremental_bins(void) {
+    const char* difficulties[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {
+        "basic",
+        "easy",
+        "medium",
+        "hard",
+    };
+
+    for (int i = 0; i < BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; i++) {
+        if (boxoban_load_incremental_bin_slot(i, difficulties[i]) != 0) {
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
 static void reset_map_cache(void) {
     if (MAP_BASE != NULL && MAP_BASE != MAP_FAILED && MAP_FILESIZE > 0) {
         munmap(MAP_BASE, MAP_FILESIZE);

From 0fdb8e3a3915d71303ef2ffe9af3434ddc636cfc Mon Sep 17 00:00:00 2001
From: Tim-phant <ha24582@bristol.ac.uk>
Date: Wed, 20 May 2026 09:02:53 +0000
Subject: [PATCH 2/7] Add incremental Boxoban progression; keep non-incremental
 behavior unchanged

---
 ocean/boxoban/boxoban.h      | 67 ++++++++++++++++++++++++++++++------
 ocean/boxoban/boxoban_maps.h | 31 +++++++----------
 2 files changed, 69 insertions(+), 29 deletions(-)

diff --git a/ocean/boxoban/boxoban.h b/ocean/boxoban/boxoban.h
index d88f96968b..4e8e496e75 100644
--- a/ocean/boxoban/boxoban.h
+++ b/ocean/boxoban/boxoban.h
@@ -71,6 +71,15 @@ typedef struct {
 void ensure_map_loaded(void);
 
 static int boxoban_configure_maps_from_env(Boxoban* env) {
+    if (env->difficulty_id == BOXOBAN_DIFFICULTY_INCREMENTAL) {
+        reset_incremental_map_cache();
+        if (boxoban_load_incremental_bins() != 0) {
+            fprintf(stderr, "Failed to load incremental Boxoban map bins\n");
+            return -1;
+        }
+        return 0;
+    }
+
     if (env->difficulty_id == -1) {
         return 0;
     }
@@ -111,8 +120,8 @@ static inline unsigned char get_intermediate_reward_status(Boxoban *env, int x,
     return env->intermediate_rewards[(y)*env->size + (x)];
 }
 
-static inline const uint32_t get_random_puzzle_idx(const Boxoban *env) {
-    int idx = rand_r(&env->rng) % PUZZLE_COUNT;
+static inline const uint32_t get_random_puzzle_idx(const Boxoban *env, size_t puzzle_count) {
+    int idx = rand_r(&env->rng) % puzzle_count;
     return idx;
 }
 
@@ -134,11 +143,20 @@ void init (Boxoban* env) {
 
 
 void add_log(Boxoban* env) {
-    float denom = (float)env->n_boxes;
-    float num = (float)env->on_target;
-    float perf = (env->win== 1) ? 1.0f : 0.0f;
+    float perf;
+    float score;
+    if (env->curriculum_mode) {
+        score = 0.0f;
+        if (env->largest_solved_difficulty >= 0) {
+            score = (float)(env->largest_solved_difficulty + 1);
+        }
+        perf = score / (float)BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES;
+    } else {
+        perf = (env->win == 1) ? 1.0f : 0.0f;
+        score = perf;
+    }
     env->log.perf += perf;
-    env->log.score += perf;
+    env->log.score += score;
     env->log.episode_length += env->tick;
     env->log.episode_return += env->episode_return;
     env->log.on_targets += env->on_target;
@@ -153,10 +171,16 @@ bool clear(Boxoban* env, int x, int y) {
     return (get_entity(env, WALLS, x, y) == 0) && (get_entity(env, BOXES, x, y) == 0);
 }
 
-// Required function
-void c_reset(Boxoban* env) {
-    const uint32_t i = get_random_puzzle_idx(env);
-    const uint8_t* puzzle = MAP_BASE + (size_t)i * PUZZLE_SIZE;
+static void load_random_puzzle(Boxoban* env) {
+    const uint8_t* map_base = MAP_BASE;
+    size_t puzzle_count = PUZZLE_COUNT;
+    if (env->curriculum_mode) {
+        map_base = INCREMENTAL_MAP_BASES[env->curriculum_difficulty];
+        puzzle_count = INCREMENTAL_PUZZLE_COUNTS[env->curriculum_difficulty];
+    }
+
+    const uint32_t i = get_random_puzzle_idx(env, puzzle_count);
+    const uint8_t* puzzle = map_base + (size_t)i * PUZZLE_SIZE;
     memcpy(env->observations, puzzle, PUZZLE_OBS_BYTES);
 
     const uint8_t* meta = puzzle + PUZZLE_OBS_BYTES;
@@ -168,10 +192,20 @@ void c_reset(Boxoban* env) {
 
     memcpy(env->intermediate_rewards,
             env->observations + TARGET * env->size * env->size,env->size * env->size);
+}
 
+// Required function
+void c_reset(Boxoban* env) {
     env->tick = 0;
     env->win = 0;
     env->episode_return = 0;
+    if (env->curriculum_mode) {
+        env->curriculum_difficulty = 0;
+        env->largest_solved_difficulty = -1;
+        env->episode_maps_solved = 0;
+    }
+
+    load_random_puzzle(env);
 
     if (!env->initialized) {
         env->tick = rand_r(&env->rng) % env->max_steps;
@@ -264,10 +298,21 @@ void c_step(Boxoban* env) {
 
     //Terminals
     if (env->on_target == env->n_targets) {
-        env->terminals[0] = 1;
         env->rewards[0] += 1.0;
         env->win = 1;
         env->episode_return += env->rewards[0];
+        if (env->curriculum_mode) {
+            if (env->curriculum_difficulty > env->largest_solved_difficulty) {
+                env->largest_solved_difficulty = env->curriculum_difficulty;
+            }
+            env->episode_maps_solved += 1;
+            if (env->curriculum_difficulty < BOXOBAN_INCREMENTAL_MAX_DIFFICULTY) {
+                env->curriculum_difficulty += 1;
+            }
+            load_random_puzzle(env);
+            return;
+        }
+        env->terminals[0] = 1;
         add_log(env);
         c_reset(env);
         return;
diff --git a/ocean/boxoban/boxoban_maps.h b/ocean/boxoban/boxoban_maps.h
index 3433a637b3..7f2db7d470 100644
--- a/ocean/boxoban/boxoban_maps.h
+++ b/ocean/boxoban/boxoban_maps.h
@@ -388,24 +388,6 @@ int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path,
     return 0;
 }
 
-static int boxoban_ensure_incremental_bins(void) {
-    const char* difficulties[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {
-        "basic",
-        "easy",
-        "medium",
-        "hard",
-    };
-
-    for (int i = 0; i < BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; i++) {
-        char bin_path[512];
-        if (boxoban_ensure_bin_for_difficulty(difficulties[i], bin_path, sizeof(bin_path)) != 0) {
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
 static int boxoban_load_incremental_bin_slot(int slot, const char* difficulty) {
     char bin_path[512];
     int fd;
@@ -461,6 +443,19 @@ static int boxoban_load_incremental_bins(void) {
     return 0;
 }
 
+static void reset_incremental_map_cache(void) {
+    for (int i = 0; i < BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; i++) {
+        if (INCREMENTAL_MAP_BASES[i] != NULL &&
+                INCREMENTAL_MAP_BASES[i] != MAP_FAILED &&
+                INCREMENTAL_MAP_FILESIZES[i] > 0) {
+            munmap(INCREMENTAL_MAP_BASES[i], INCREMENTAL_MAP_FILESIZES[i]);
+        }
+        INCREMENTAL_MAP_BASES[i] = NULL;
+        INCREMENTAL_MAP_FILESIZES[i] = 0;
+        INCREMENTAL_PUZZLE_COUNTS[i] = 0;
+    }
+}
+
 static void reset_map_cache(void) {
     if (MAP_BASE != NULL && MAP_BASE != MAP_FAILED && MAP_FILESIZE > 0) {
         munmap(MAP_BASE, MAP_FILESIZE);

From ed16ec6a4fa6f58dd52796cfd2ed22ccc57bdddf Mon Sep 17 00:00:00 2001
From: Tim-phant <ha24582@bristol.ac.uk>
Date: Wed, 20 May 2026 09:16:36 +0000
Subject: [PATCH 3/7] Track Boxoban per-puzzle timeout and final puzzle tick

---
 ocean/boxoban/binding.c |  1 +
 ocean/boxoban/boxoban.h | 15 +++++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/ocean/boxoban/binding.c b/ocean/boxoban/binding.c
index e06fa85579..1378d8fb75 100644
--- a/ocean/boxoban/binding.c
+++ b/ocean/boxoban/binding.c
@@ -30,4 +30,5 @@ void my_log(Log* log, Dict* out) {
     dict_set(out, "episode_return", log->episode_return);
     dict_set(out, "episode_length", log->episode_length);
     dict_set(out, "targets_hit", log->on_targets);
+    dict_set(out, "final_puzzle_tick", log->puzzle_ticks);
 }
diff --git a/ocean/boxoban/boxoban.h b/ocean/boxoban/boxoban.h
index 4e8e496e75..a50cb626a1 100644
--- a/ocean/boxoban/boxoban.h
+++ b/ocean/boxoban/boxoban.h
@@ -24,6 +24,7 @@ typedef struct {
     float episode_length; // Recommended metric: number of steps of agent episode
     // Any extra fields you add here may be exported to Python in binding.c
     float on_targets; // Number of targets currently boxed
+    float puzzle_ticks; // Steps spent on the final puzzle of the episode
     float n; // Required as the last field 
 } Log;
 
@@ -48,6 +49,7 @@ typedef struct {
     int size;
     int num_agents;
     int tick;
+    int puzzle_tick;
     int max_steps;
     int agent_x;
     int agent_y;
@@ -145,6 +147,7 @@ void init (Boxoban* env) {
 void add_log(Boxoban* env) {
     float perf;
     float score;
+    float targets_hit = 0.0f;
     if (env->curriculum_mode) {
         score = 0.0f;
         if (env->largest_solved_difficulty >= 0) {
@@ -155,11 +158,15 @@ void add_log(Boxoban* env) {
         perf = (env->win == 1) ? 1.0f : 0.0f;
         score = perf;
     }
+    if (env->n_targets > 0) {
+        targets_hit = (float)env->on_target / (float)env->n_targets;
+    }
     env->log.perf += perf;
     env->log.score += score;
     env->log.episode_length += env->tick;
     env->log.episode_return += env->episode_return;
-    env->log.on_targets += env->on_target;
+    env->log.on_targets += targets_hit;
+    env->log.puzzle_ticks += env->puzzle_tick;
     env->log.n++;
 }
 
@@ -192,11 +199,13 @@ static void load_random_puzzle(Boxoban* env) {
 
     memcpy(env->intermediate_rewards,
             env->observations + TARGET * env->size * env->size,env->size * env->size);
+    env->puzzle_tick = 0;
 }
 
 // Required function
 void c_reset(Boxoban* env) {
     env->tick = 0;
+    env->puzzle_tick = 0;
     env->win = 0;
     env->episode_return = 0;
     if (env->curriculum_mode) {
@@ -209,6 +218,7 @@ void c_reset(Boxoban* env) {
 
     if (!env->initialized) {
         env->tick = rand_r(&env->rng) % env->max_steps;
+        env->puzzle_tick = env->tick;
         env->initialized = true;
     }
 }
@@ -281,6 +291,7 @@ int take_action(Boxoban* env, int action) {
 // Required function
 void c_step(Boxoban* env) {
     env->tick += 1;
+    env->puzzle_tick += 1;
     env->terminals[0] = 0;
     env->rewards[0] = 0.0;
        
@@ -318,7 +329,7 @@ void c_step(Boxoban* env) {
         return;
     }
 
-    if (env->tick >= env->max_steps) {
+    if (env->puzzle_tick >= env->max_steps) {
         env->terminals[0] = 1;
         env->rewards[0] -= 1.0; 
         env->episode_return += env->rewards[0];

From c2d9f3f14195c08e6ecfd8d266000e8114ffe839 Mon Sep 17 00:00:00 2001
From: Tim-phant <ha24582@bristol.ac.uk>
Date: Wed, 20 May 2026 09:19:05 +0000
Subject: [PATCH 4/7] made default incremental

---
 config/boxoban.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/boxoban.ini b/config/boxoban.ini
index f076ab74b6..a0e72e7c01 100644
--- a/config/boxoban.ini
+++ b/config/boxoban.ini
@@ -8,7 +8,7 @@ num_threads = 1
 
 [env]
 num_agents = 1
-difficulty = 2
+difficulty = 5
 int_r_coeff = 0.25
 target_loss_pen_coeff = 0
 max_steps = 150

From 47bafcec82df2986d9973c37ac1395d8ba7ecd24 Mon Sep 17 00:00:00 2001
From: Tim-phant <ha24582@bristol.ac.uk>
Date: Wed, 20 May 2026 12:19:37 +0000
Subject: [PATCH 5/7] Make incremental Boxoban perf reflect frontier progress

---
 ocean/boxoban/boxoban.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/ocean/boxoban/boxoban.h b/ocean/boxoban/boxoban.h
index a50cb626a1..d1918a656e 100644
--- a/ocean/boxoban/boxoban.h
+++ b/ocean/boxoban/boxoban.h
@@ -148,19 +148,23 @@ void add_log(Boxoban* env) {
     float perf;
     float score;
     float targets_hit = 0.0f;
+    if (env->n_targets > 0) {
+        targets_hit = (float)env->on_target / (float)env->n_targets;
+    }
     if (env->curriculum_mode) {
         score = 0.0f;
         if (env->largest_solved_difficulty >= 0) {
             score = (float)(env->largest_solved_difficulty + 1);
         }
-        perf = score / (float)BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES;
+        if (score >= (float)BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES) {
+            perf = 1.0f;
+        } else {
+            perf = (score + targets_hit) / (float)BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES;
+        }
     } else {
         perf = (env->win == 1) ? 1.0f : 0.0f;
         score = perf;
     }
-    if (env->n_targets > 0) {
-        targets_hit = (float)env->on_target / (float)env->n_targets;
-    }
     env->log.perf += perf;
     env->log.score += score;
     env->log.episode_length += env->tick;

From acd7d44b7f58824064552007e26df88e5c62d04d Mon Sep 17 00:00:00 2001
From: Tim-phant <ha24582@bristol.ac.uk>
Date: Wed, 20 May 2026 12:38:12 +0000
Subject: [PATCH 6/7] Adjust incremental Boxoban perf normalization

---
 ocean/boxoban/boxoban.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/ocean/boxoban/boxoban.h b/ocean/boxoban/boxoban.h
index d1918a656e..1aa76d2fb2 100644
--- a/ocean/boxoban/boxoban.h
+++ b/ocean/boxoban/boxoban.h
@@ -156,11 +156,8 @@ void add_log(Boxoban* env) {
         if (env->largest_solved_difficulty >= 0) {
             score = (float)(env->largest_solved_difficulty + 1);
         }
-        if (score >= (float)BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES) {
-            perf = 1.0f;
-        } else {
-            perf = (score + targets_hit) / (float)BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES;
-        }
+        perf = (score + targets_hit) /
+            (float)(BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES + 1);
     } else {
         perf = (env->win == 1) ? 1.0f : 0.0f;
         score = perf;

From b65b94dd7b401b295cf5c9dfd910428cbffa430a Mon Sep 17 00:00:00 2001
From: Tim-phant <ha24582@bristol.ac.uk>
Date: Sat, 23 May 2026 19:40:25 +0000
Subject: [PATCH 7/7] fix sweep , breakout config for sweeps

---
 config/boxoban.ini | 57 +++++++++++++++++++++++-----------------------
 pufferlib/sweep.py |  3 ++-
 2 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/config/boxoban.ini b/config/boxoban.ini
index a0e72e7c01..b35e6aeac8 100644
--- a/config/boxoban.ini
+++ b/config/boxoban.ini
@@ -2,9 +2,9 @@
 env_name = boxoban
 
 [vec]
-total_agents = 32768
-num_buffers = 4
-num_threads = 1
+total_agents = 4096
+num_buffers = 8
+num_threads = 8
 
 [env]
 num_agents = 1
@@ -14,31 +14,32 @@ target_loss_pen_coeff = 0
 max_steps = 150
 
 [policy]
-hidden_size = 512
-num_layers = 3.32422
-expansion_factor = 1
+num_layers = 2
+hidden_size = 64
 
 [train]
 gpus = 1
-seed = 42
-total_timesteps = 880580001
-learning_rate = 0.00134234
-anneal_lr = 1
-min_lr_ratio = 0.37872
-gamma = 0.989717
-gae_lambda = 0.759273
-replay_ratio = 1.6234
-clip_coef = 0.01
-vf_coef = 5
-vf_clip_coef = 5
-max_grad_norm = 1.20325
-ent_coef = 0.000188411
-beta1 = 0.995526
-beta2 = 0.999536
-eps = 1e-14
-minibatch_size = 32768
-horizon = 64
-vtrace_rho_clip = 3.13347
-vtrace_c_clip = 2.75328
-prio_alpha = 0.453827
-prio_beta0 = 0.765589
+total_timesteps = 94_000_000
+beta1 = 0.7279714073125252
+beta2 = 0.9986265112492152
+clip_coef = 0.6746497927896418
+ent_coef = 0.0033240721522812535
+eps = 0.00008339460257113628
+gae_lambda = 0.948721675814334
+gamma = 0.9721246598992744
+learning_rate = 0.1
+max_grad_norm = 1.8109182724544075
+minibatch_size = 65_536
+prio_alpha = 0.1
+prio_beta0 = 0.8247156461060179
+replay_ratio = 1.4242098997083206
+vf_clip_coef = 1.2291681640124468
+vf_coef = 1.2195502588297364
+vtrace_c_clip = 1.0830442742115065
+vtrace_rho_clip = 2.1017317041552603
+
+[sweep]
+gpus = 1
+max_suggestion_cost = 10800
+
+
diff --git a/pufferlib/sweep.py b/pufferlib/sweep.py
index 36e27bf42a..01453a63d7 100644
--- a/pufferlib/sweep.py
+++ b/pufferlib/sweep.py
@@ -146,7 +146,8 @@ def _params_from_puffer_sweep(sweep_config, only_include=None):
 
     for name, param in sweep_config.items():
         if name in ('method', 'metric', 'metric_distribution', 'goal', 'downsample', 'use_gpu', 'prune_pareto',
-                    'sweep_only', 'max_suggestion_cost', 'early_stop_quantile', 'gpus', 'max_runs'):
+                    'sweep_only', 'max_suggestion_cost', 'early_stop_quantile', 'gpus', 'max_runs', 'match_enemy_model_path', 'match_num_games',
+                  'match_enemy_hidden_size', 'match_enemy_num_layers'):
             continue
 
         assert isinstance(param, dict), f'Param {name} is not a dict'