Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config/boxoban.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ num_threads = 1

[env]
num_agents = 1
difficulty = 2
difficulty = 5
int_r_coeff = 0.25
target_loss_pen_coeff = 0
max_steps = 150
Expand Down
5 changes: 5 additions & 0 deletions ocean/boxoban/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ void my_init(Env* env, Dict* kwargs) {
env->max_steps = (int)dict_get(kwargs, "max_steps")->value;
env->int_r_coeff = (float)dict_get(kwargs, "int_r_coeff")->value;
env->target_loss_pen_coeff = (float)dict_get(kwargs, "target_loss_pen_coeff")->value;
env->curriculum_mode = (env->difficulty_id == 5);
env->curriculum_difficulty = 0;
env->largest_solved_difficulty = -1;
env->episode_maps_solved = 0;
init(env);
}

Expand All @@ -26,4 +30,5 @@ void my_log(Log* log, Dict* out) {
dict_set(out, "episode_return", log->episode_return);
dict_set(out, "episode_length", log->episode_length);
dict_set(out, "targets_hit", log->on_targets);
dict_set(out, "final_puzzle_tick", log->puzzle_ticks);
}
87 changes: 74 additions & 13 deletions ocean/boxoban/boxoban.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ typedef struct {
float episode_length; // Recommended metric: number of steps of agent episode
// Any extra fields you add here may be exported to Python in binding.c
float on_targets; // Number of targets currently boxed
float puzzle_ticks; // Steps spent on the final puzzle of the episode
float n; // Required as the last field
} Log;

Expand All @@ -48,6 +49,7 @@ typedef struct {
int size;
int num_agents;
int tick;
int puzzle_tick;
int max_steps;
int agent_x;
int agent_y;
Expand All @@ -62,11 +64,24 @@ typedef struct {
Client* client;
int win;
float episode_return;
int curriculum_mode; // 1 when using incremental difficulty mode
int curriculum_difficulty; // current active difficulty in curriculum mode
int largest_solved_difficulty; // highest difficulty solved this episode, -1 if none
int episode_maps_solved; // number of puzzles solved this episode
} Boxoban;

void ensure_map_loaded(void);

static int boxoban_configure_maps_from_env(Boxoban* env) {
if (env->difficulty_id == BOXOBAN_DIFFICULTY_INCREMENTAL) {
reset_incremental_map_cache();
if (boxoban_load_incremental_bins() != 0) {
fprintf(stderr, "Failed to load incremental Boxoban map bins\n");
return -1;
}
return 0;
}

if (env->difficulty_id == -1) {
return 0;
}
Expand Down Expand Up @@ -107,8 +122,8 @@ static inline unsigned char get_intermediate_reward_status(Boxoban *env, int x,
return env->intermediate_rewards[(y)*env->size + (x)];
}

static inline const uint32_t get_random_puzzle_idx(const Boxoban *env) {
int idx = rand_r(&env->rng) % PUZZLE_COUNT;
static inline const uint32_t get_random_puzzle_idx(const Boxoban *env, size_t puzzle_count) {
int idx = rand_r(&env->rng) % puzzle_count;
return idx;
}

Expand All @@ -130,14 +145,29 @@ void init (Boxoban* env) {


void add_log(Boxoban* env) {
float denom = (float)env->n_boxes;
float num = (float)env->on_target;
float perf = (env->win== 1) ? 1.0f : 0.0f;
float perf;
float score;
float targets_hit = 0.0f;
if (env->n_targets > 0) {
targets_hit = (float)env->on_target / (float)env->n_targets;
}
if (env->curriculum_mode) {
score = 0.0f;
if (env->largest_solved_difficulty >= 0) {
score = (float)(env->largest_solved_difficulty + 1);
}
perf = (score + targets_hit) /
(float)(BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES + 1);
} else {
perf = (env->win == 1) ? 1.0f : 0.0f;
score = perf;
}
env->log.perf += perf;
env->log.score += perf;
env->log.score += score;
env->log.episode_length += env->tick;
env->log.episode_return += env->episode_return;
env->log.on_targets += env->on_target;
env->log.on_targets += targets_hit;
env->log.puzzle_ticks += env->puzzle_tick;
env->log.n++;
}

Expand All @@ -149,10 +179,16 @@ bool clear(Boxoban* env, int x, int y) {
return (get_entity(env, WALLS, x, y) == 0) && (get_entity(env, BOXES, x, y) == 0);
}

// Required function
void c_reset(Boxoban* env) {
const uint32_t i = get_random_puzzle_idx(env);
const uint8_t* puzzle = MAP_BASE + (size_t)i * PUZZLE_SIZE;
static void load_random_puzzle(Boxoban* env) {
const uint8_t* map_base = MAP_BASE;
size_t puzzle_count = PUZZLE_COUNT;
if (env->curriculum_mode) {
map_base = INCREMENTAL_MAP_BASES[env->curriculum_difficulty];
puzzle_count = INCREMENTAL_PUZZLE_COUNTS[env->curriculum_difficulty];
}

const uint32_t i = get_random_puzzle_idx(env, puzzle_count);
const uint8_t* puzzle = map_base + (size_t)i * PUZZLE_SIZE;
memcpy(env->observations, puzzle, PUZZLE_OBS_BYTES);

const uint8_t* meta = puzzle + PUZZLE_OBS_BYTES;
Expand All @@ -164,13 +200,26 @@ void c_reset(Boxoban* env) {

memcpy(env->intermediate_rewards,
env->observations + TARGET * env->size * env->size,env->size * env->size);
env->puzzle_tick = 0;
}

// Required function
void c_reset(Boxoban* env) {
env->tick = 0;
env->puzzle_tick = 0;
env->win = 0;
env->episode_return = 0;
if (env->curriculum_mode) {
env->curriculum_difficulty = 0;
env->largest_solved_difficulty = -1;
env->episode_maps_solved = 0;
}

load_random_puzzle(env);

if (!env->initialized) {
env->tick = rand_r(&env->rng) % env->max_steps;
env->puzzle_tick = env->tick;
env->initialized = true;
}
}
Expand Down Expand Up @@ -243,6 +292,7 @@ int take_action(Boxoban* env, int action) {
// Required function
void c_step(Boxoban* env) {
env->tick += 1;
env->puzzle_tick += 1;
env->terminals[0] = 0;
env->rewards[0] = 0.0;

Expand All @@ -260,16 +310,27 @@ void c_step(Boxoban* env) {

//Terminals
if (env->on_target == env->n_targets) {
env->terminals[0] = 1;
env->rewards[0] += 1.0;
env->win = 1;
env->episode_return += env->rewards[0];
if (env->curriculum_mode) {
if (env->curriculum_difficulty > env->largest_solved_difficulty) {
env->largest_solved_difficulty = env->curriculum_difficulty;
}
env->episode_maps_solved += 1;
if (env->curriculum_difficulty < BOXOBAN_INCREMENTAL_MAX_DIFFICULTY) {
env->curriculum_difficulty += 1;
}
load_random_puzzle(env);
return;
}
env->terminals[0] = 1;
add_log(env);
c_reset(env);
return;
}

if (env->tick >= env->max_steps) {
if (env->puzzle_tick >= env->max_steps) {
env->terminals[0] = 1;
env->rewards[0] -= 1.0;
env->episode_return += env->rewards[0];
Expand Down
96 changes: 95 additions & 1 deletion ocean/boxoban/boxoban_maps.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@ extern size_t PUZZLE_COUNT;
extern size_t PUZZLE_SIZE;
extern size_t PUZZLE_OBS_BYTES;

#define BOXOBAN_DIFFICULTY_BASIC 0
#define BOXOBAN_DIFFICULTY_EASY 1
#define BOXOBAN_DIFFICULTY_MEDIUM 2
#define BOXOBAN_DIFFICULTY_HARD 3
#define BOXOBAN_DIFFICULTY_UNFILTERED 4
#define BOXOBAN_DIFFICULTY_INCREMENTAL 5

#define BOXOBAN_INCREMENTAL_MIN_DIFFICULTY BOXOBAN_DIFFICULTY_BASIC
#define BOXOBAN_INCREMENTAL_MAX_DIFFICULTY BOXOBAN_DIFFICULTY_HARD
#define BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES 4

extern uint8_t *INCREMENTAL_MAP_BASES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES];
extern size_t INCREMENTAL_MAP_FILESIZES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES];
extern size_t INCREMENTAL_PUZZLE_COUNTS[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES];

int boxoban_ensure_bin_for_difficulty(const char* difficulty, char* out_path, size_t out_cap);
int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap);
int boxoban_set_map_path(const char *path);
int boxoban_difficulty_id_from_name(const char* difficulty_name);
Expand All @@ -38,6 +54,9 @@ size_t MAP_FILESIZE = 0;
size_t PUZZLE_COUNT = 0;
size_t PUZZLE_SIZE = BOXOBAN_PUZZLE_BYTES;
size_t PUZZLE_OBS_BYTES = BOXOBAN_PUZZLE_OBS_BYTES;
uint8_t *INCREMENTAL_MAP_BASES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {0};
size_t INCREMENTAL_MAP_FILESIZES[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {0};
size_t INCREMENTAL_PUZZLE_COUNTS[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {0};
static char* BOXOBAN_MAP_PATH = NULL;
static const char* BOXOBAN_LEVEL_ROOT = "resources/boxoban/levels";

Expand Down Expand Up @@ -325,7 +344,7 @@ static int boxoban_bin_path(const char* difficulty, char* out_path, size_t out_c
return 0;
}

int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) {
int boxoban_ensure_bin_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) {
if (difficulty == NULL || out_path == NULL) {
return -1;
}
Expand Down Expand Up @@ -356,12 +375,87 @@ int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path,
fprintf(stdout, "[Boxoban] Generated %zu puzzles for '%s' at %s\n", puzzle_count, difficulty, out_path);
}

return 0;
}

int boxoban_prepare_maps_for_difficulty(const char* difficulty, char* out_path, size_t out_cap) {
if (boxoban_ensure_bin_for_difficulty(difficulty, out_path, out_cap) != 0) {
return -1;
}
if (boxoban_set_map_path(out_path) != 0) {
return -1;
}
return 0;
}

static int boxoban_load_incremental_bin_slot(int slot, const char* difficulty) {
char bin_path[512];
int fd;
struct stat st;
uint8_t* map_base;

if (slot < 0 || slot >= BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES) {
return -1;
}
if (boxoban_ensure_bin_for_difficulty(difficulty, bin_path, sizeof(bin_path)) != 0) {
return -1;
}

fd = open(bin_path, O_RDONLY);
if (fd < 0) {
return -1;
}
if (fstat(fd, &st) != 0) {
close(fd);
return -1;
}
if ((size_t)st.st_size % PUZZLE_SIZE != 0) {
close(fd);
return -1;
}

map_base = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (map_base == MAP_FAILED) {
return -1;
}

INCREMENTAL_MAP_BASES[slot] = map_base;
INCREMENTAL_MAP_FILESIZES[slot] = st.st_size;
INCREMENTAL_PUZZLE_COUNTS[slot] = (size_t)st.st_size / PUZZLE_SIZE;
return 0;
}

static int boxoban_load_incremental_bins(void) {
const char* difficulties[BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES] = {
"basic",
"easy",
"medium",
"hard",
};

for (int i = 0; i < BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; i++) {
if (boxoban_load_incremental_bin_slot(i, difficulties[i]) != 0) {
return -1;
}
}

return 0;
}

static void reset_incremental_map_cache(void) {
for (int i = 0; i < BOXOBAN_INCREMENTAL_NUM_DIFFICULTIES; i++) {
if (INCREMENTAL_MAP_BASES[i] != NULL &&
INCREMENTAL_MAP_BASES[i] != MAP_FAILED &&
INCREMENTAL_MAP_FILESIZES[i] > 0) {
munmap(INCREMENTAL_MAP_BASES[i], INCREMENTAL_MAP_FILESIZES[i]);
}
INCREMENTAL_MAP_BASES[i] = NULL;
INCREMENTAL_MAP_FILESIZES[i] = 0;
INCREMENTAL_PUZZLE_COUNTS[i] = 0;
}
}

static void reset_map_cache(void) {
if (MAP_BASE != NULL && MAP_BASE != MAP_FAILED && MAP_FILESIZE > 0) {
munmap(MAP_BASE, MAP_FILESIZE);
Expand Down
Loading