Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
464 changes: 310 additions & 154 deletions build.sh

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions config/default.ini
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ ent_coef = 0.001
beta1 = 0.95
beta2 = 0.999
eps = 1e-12
overlap = 0
cpu_inference = 0
train_fp16 = 0
minibatch_size = 8192
horizon = 64
vtrace_rho_clip = 1.0
Expand Down
21 changes: 16 additions & 5 deletions pufferlib/pufferl.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,14 @@ def _train(env_name, args, sweep_obj=None, result_queue=None, verbose=False):
log_dir = os.path.join(args['log_dir'], args['env_name'])
os.makedirs(log_dir, exist_ok=True)

# Write config-only stub at trial start so hung/crashed trials leave a
# post-mortem trace. Overwritten with full {config, metrics} at trial end.
# default=str handles bytes (nccl_id is b'' before being popped below) and
# any numpy scalars that protein may have leaked into args.
log_path = os.path.join(log_dir, run_id + '.json')
with open(log_path, 'w') as f:
json.dump({**args, 'metrics': {}, 'status': 'pending'}, f, default=str)

try:
pufferl = backend.create_pufferl(args)
except RuntimeError as e:
Expand Down Expand Up @@ -295,11 +303,9 @@ def _train(env_name, args, sweep_obj=None, result_queue=None, verbose=False):
for k in metrics:
metrics[k][-1] = all_logs[-1][k]

# Save own log: config + downsampled results
log_dir = os.path.join(args['log_dir'], args['env_name'])
os.makedirs(log_dir, exist_ok=True)
with open(os.path.join(log_dir, run_id + '.json'), 'w') as f:
json.dump({**args, 'metrics': metrics}, f)
# Save own log: config + downsampled results (overwrites pending stub)
with open(log_path, 'w') as f:
json.dump({**args, 'metrics': metrics, 'status': 'completed'}, f, default=str)

if args['wandb']:
if sweep_obj is None and model_path: # Don't spam uploads during sweeps
Expand Down Expand Up @@ -403,6 +409,11 @@ def eval(env_name, args=None, load_path=None):
args = args or load_config(env_name)
args['reset_state'] = False
args['train']['horizon'] = 1
# Eval batches are total_agents*1, so cap minibatch to that to satisfy
# the divisibility check. Training-time minibatch may be larger.
eval_batch = args['vec']['total_agents']
if args['train']['minibatch_size'] > eval_batch:
args['train']['minibatch_size'] = eval_batch

backend = _resolve_backend(args)
pufferl = backend.create_pufferl(args)
Expand Down
22 changes: 19 additions & 3 deletions src/bindings.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,26 @@

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <cstring>
#include <stdexcept>
#include <string>
#include "pufferlib.cu"

#define _PUFFER_STRINGIFY(x) #x
#define PUFFER_STRINGIFY(x) _PUFFER_STRINGIFY(x)

namespace py = pybind11;

static void assert_static_env_name_matches(void) {
const char* binding_env_name = PUFFER_STRINGIFY(ENV_NAME);
const char* static_env_name = get_static_env_name();
if (strcmp(binding_env_name, static_env_name) != 0) {
throw std::runtime_error(
std::string("compiled _C env mismatch: binding env_name=") +
binding_env_name + ", static_env_name=" + static_env_name);
}
}

// Wrapper functions for Python bindings
pybind11::dict puf_log(pybind11::object pufferl_obj) {
auto& pufferl = pufferl_obj.cast<PuffeRL&>();
Expand Down Expand Up @@ -106,7 +119,7 @@ pybind11::dict puf_eval_log(pybind11::object pufferl_obj) {
pufferl.last_log_step = pufferl.global_step;

pybind11::dict env_dict;
Dict* env_out = create_dict(32);
Dict* env_out = create_dict(64);
static_vec_eval_log(pufferl.vec, env_out);
for (int i = 0; i < env_out->size; i++) {
env_dict[env_out->items[i].key] = env_out->items[i].value;
Expand Down Expand Up @@ -248,7 +261,7 @@ Dict* py_dict_to_c_dict(py::dict py_dict) {
}

// ============================================================================
// Python-facing VecEnv: wraps StaticVec for use from python_pufferl.py.
// Python-facing VecEnv wrapper.
// After vec_step(), GPU buffers are current — Python wraps them zero-copy
// with torch.from_blob(ptr, shape, dtype, device='cuda').
// ============================================================================
Expand Down Expand Up @@ -318,7 +331,7 @@ void cpu_vec_step_py(VecEnv& ve, long long actions_ptr) {
}

py::dict vec_log(VecEnv& ve) {
Dict* out = create_dict(32);
Dict* out = create_dict(64);
static_vec_log(ve.vec, out);
py::dict result;
for (int i = 0; i < out->size; i++) {
Expand Down Expand Up @@ -410,6 +423,8 @@ std::unique_ptr<PuffeRL> create_pufferl(py::dict args) {
}

PYBIND11_MODULE(_C, m) {
assert_static_env_name_matches();

// Multi-GPU: generate NCCL unique ID (call on rank 0, pass bytes to all ranks)
m.def("get_nccl_id", []() {
ncclUniqueId id;
Expand Down Expand Up @@ -454,6 +469,7 @@ PYBIND11_MODULE(_C, m) {

m.attr("precision_bytes") = (int)sizeof(precision_t);
m.attr("env_name") = PUFFER_STRINGIFY(ENV_NAME);
m.attr("static_env_name") = get_static_env_name();
m.attr("gpu") = 1;

// Core functions
Expand Down
17 changes: 16 additions & 1 deletion src/bindings_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <stdexcept>
#include <string>

#define _PUFFER_STRINGIFY(x) #x
#define PUFFER_STRINGIFY(x) _PUFFER_STRINGIFY(x)
Expand All @@ -12,6 +14,16 @@

namespace py = pybind11;

static void assert_static_env_name_matches(void) {
const char* binding_env_name = PUFFER_STRINGIFY(ENV_NAME);
const char* static_env_name = get_static_env_name();
if (strcmp(binding_env_name, static_env_name) != 0) {
throw std::runtime_error(
std::string("compiled _C env mismatch: binding env_name=") +
binding_env_name + ", static_env_name=" + static_env_name);
}
}

// Stub out CUDA functions that the static lib references (dead code when gpu=0)
extern "C" {
typedef int cudaError_t;
Expand Down Expand Up @@ -141,7 +153,7 @@ static void cpu_vec_step_py(VecEnv& ve, long long actions_ptr) {
}

static py::dict vec_log(VecEnv& ve) {
Dict* out = create_dict(32);
Dict* out = create_dict(64);
static_vec_log(ve.vec, out);
py::dict result;
for (int i = 0; i < out->size; i++)
Expand All @@ -161,8 +173,11 @@ static void vec_close(VecEnv& ve) {
// ============================================================================

PYBIND11_MODULE(_C, m) {
assert_static_env_name_matches();

m.attr("precision_bytes") = 4;
m.attr("env_name") = PUFFER_STRINGIFY(ENV_NAME);
m.attr("static_env_name") = get_static_env_name();
m.attr("gpu") = 0;

m.def("puff_advantage_cpu", &py_puff_advantage_cpu);
Expand Down
Loading