Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 42 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,13 @@ static MAX_THREADS: AtomicUsize = AtomicUsize::new(0);
static OVERFLOW_COUNT: AtomicUsize = AtomicUsize::new(0);
static OVERFLOW_BYTES: AtomicUsize = AtomicUsize::new(0);

/// Nested-phase counter. `begin_phase()` increments it; `end_phase()`
/// decrements. Only the outermost begin (0 → 1) bumps GENERATION and
/// flips ARENA_ACTIVE; only the outermost end (1 → 0) deactivates.
/// This makes nested `begin_phase()` calls compose without the inner
/// begin recycling the outer phase's slab data.
static PHASE_DEPTH: AtomicUsize = AtomicUsize::new(0);

/// Allocations smaller than this go to System even during active phases.
/// Routes registry / hashmap / injector-block-sized allocations away from
/// the arena, so library state that outlives a phase doesn't land in
Expand Down Expand Up @@ -196,19 +203,39 @@ fn ensure_region() -> usize {
/// or copy into a `Vec` allocated outside any phase).
pub fn begin_phase() {
ensure_region();
GENERATION.fetch_add(1, Ordering::Release);
ARENA_ACTIVE.store(true, Ordering::Release);
// Only the outermost begin bumps GENERATION and activates the arena.
// Nested begin_phase() calls just bump the depth counter; without this
// guard, an inner begin would recycle the slab and silently overwrite
// the outer phase's bump-allocated data.
if PHASE_DEPTH.fetch_add(1, Ordering::AcqRel) == 0 {
GENERATION.fetch_add(1, Ordering::Release);
ARENA_ACTIVE.store(true, Ordering::Release);
}
}

/// Deactivates the arena. New allocations go to the system allocator; existing arena
/// pointers stay valid until the next `begin_phase()` resets the slabs.
///
/// With the `rayon-flush` feature (default), this also drains rayon's internal
/// queues to release any crossbeam-deque blocks allocated during the phase.
///
/// Calls beyond the matching `begin_phase()` (i.e., when no phase is
/// active) are tolerated and have no effect.
pub fn end_phase() {
ARENA_ACTIVE.store(false, Ordering::Release);
#[cfg(feature = "rayon-flush")]
flush_rayon();
// Only the outermost end (depth 1 → 0) tears down. Saturate at zero so
// unbalanced end_phase() calls are no-ops rather than wrapping around.
let prev = PHASE_DEPTH.fetch_update(Ordering::AcqRel, Ordering::Acquire, |d| {
if d == 0 {
None
} else {
Some(d - 1)
}
});
if let Ok(1) = prev {
ARENA_ACTIVE.store(false, Ordering::Release);
#[cfg(feature = "rayon-flush")]
flush_rayon();
}
}

/// Drains rayon's crossbeam-deque injector to release blocks allocated during
Expand Down Expand Up @@ -396,7 +423,16 @@ unsafe impl GlobalAlloc for ZkAllocator {
let new_layout = unsafe { Layout::from_size_align_unchecked(new_size, layout.align()) };
let new_ptr = unsafe { self.alloc(new_layout) };
if !new_ptr.is_null() {
unsafe { std::ptr::copy_nonoverlapping(ptr, new_ptr, layout.size()) };
// Use `ptr::copy` (memmove) instead of `copy_nonoverlapping`:
// when reallocating an arena pointer across a phase boundary,
// the cold-path slab reset (or fast-path bump after reset) can
// hand back a pointer that aliases or partially overlaps the
// source. `copy_nonoverlapping` is UB on overlap; `copy`
// handles it correctly. Modern x86_64 memcpy implementations
// happen to be safe for short overlaps in practice, but the
// language-level UB is real and would surface under miri or
// future codegen.
unsafe { std::ptr::copy(ptr, new_ptr, layout.size()) };
unsafe { self.dealloc(ptr, layout) };
}
new_ptr
Expand Down
48 changes: 48 additions & 0 deletions tests/test_nested_phase.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
//! hunt-1: Nested phases silently corrupt outer phase data.
//!
//! Hypothesis: nested `begin_phase()` calls bump GENERATION, which marks
//! every thread's slab as needing reset. On the next allocation in the
//! inner phase, ARENA_PTR is reset to the slab base — overwriting any
//! data the outer phase had bump-allocated there.
//!
//! There is no nesting counter. The inner `end_phase()` flips
//! ARENA_ACTIVE to false, after which any allocations in the remainder
//! of the outer phase silently land in System (a different correctness
//! issue covered separately).
//!
//! Existing `tests/test_phase_guard.rs::nested_phase_guards_compose`
//! only verifies that nested guards don't panic — it never allocates
//! anything in the outer phase, so this corruption is invisible there.

#[global_allocator]
static A: zk_alloc::ZkAllocator = zk_alloc::ZkAllocator;

#[test]
fn nested_phase_does_not_corrupt_outer() {
// Allocation big enough to land in arena (>= min_arena_bytes = 4096).
zk_alloc::begin_phase();
let outer: Vec<u8> = vec![0xA1_u8; 8192];
let outer_ptr = outer.as_ptr() as usize;

// Nested begin_phase. GENERATION bumps; the next arena alloc on
// this thread will reset ARENA_PTR back to slab base.
zk_alloc::begin_phase();
let inner: Vec<u8> = vec![0xB2_u8; 8192];
let inner_ptr = inner.as_ptr() as usize;
let inner_first_byte = inner[0];
zk_alloc::end_phase();

zk_alloc::end_phase();

eprintln!("outer_ptr=0x{outer_ptr:x} inner_ptr=0x{inner_ptr:x}");

// If nested phases are sound, outer's bytes are still 0xA1.
let pos = outer.iter().position(|&b| b != 0xA1);
let _ = inner_first_byte;
assert!(
pos.is_none(),
"outer phase data corrupted at offset {}: nested begin_phase bumped \
GENERATION and the inner allocation recycled the outer's slab region",
pos.unwrap()
);
}
55 changes: 32 additions & 23 deletions tests/test_panic_phase.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
//! Scenario 3: panic unwinding through a phase boundary.
//!
//! There is no RAII guard around begin_phase()/end_phase(). If a panic
//! propagates out of phase code without reaching end_phase(), ARENA_ACTIVE
//! stays true. Subsequent "post-phase" allocations land in arena and get
//! silently recycled on the next begin_phase().
//! There is no RAII guard around bare begin_phase()/end_phase(). If a
//! panic propagates out of phase code without reaching end_phase(),
//! PHASE_DEPTH stays > 0 and ARENA_ACTIVE stays true.
//!
//! This is a plain API hazard: the recovery path of any prove_with_panic
//! pattern is unsafe.
//! After the depth-counter fix, this means: subsequent begin_phase() calls
//! in the recovery path nest under the orphaned phase rather than recycling
//! the slab. The slab keeps growing across "iterations" until it overflows
//! to System — a memory leak, not silent corruption. Either way the
//! recovery path is broken; PhaseGuard is the supported way to make panic
//! recovery safe (see test_phase_guard.rs).

#[global_allocator]
static A: zk_alloc::ZkAllocator = zk_alloc::ZkAllocator;

#[test]
fn panic_inside_phase_leaves_arena_active() {
fn panic_without_phase_guard_orphans_phase_depth() {
use std::panic;

// Suppress default panic print to minimize incidental allocations between
Expand All @@ -23,42 +26,48 @@ fn panic_inside_phase_leaves_arena_active() {
zk_alloc::begin_phase();
let r = panic::catch_unwind(panic::AssertUnwindSafe(|| panic!("simulated")));
assert!(r.is_err());
// No end_phase reached. ARENA_ACTIVE is still true.
// No end_phase reached. PHASE_DEPTH still 1, ARENA_ACTIVE still true.

// This Vec lands in arena (since arena is still active and 8192 >=
// MIN_ARENA_BYTES default 4096).
// Lands in arena (active=true, 8192 >= MIN_ARENA_BYTES default 4096).
let post_panic: Vec<u8> = vec![0xCC; 8192];
let post_panic_ptr = post_panic.as_ptr() as usize;

// Begin the next phase (e.g., next iteration of a prove loop). Arena
// resets — anything allocated during the "ghost" phase between panic
// and now gets recycled.
// Begin the next "iteration". With the depth-counter fix this nests
// under the orphaned phase (depth 1 → 2) instead of bumping
// GENERATION, so the slab is NOT reset and post_panic survives.
zk_alloc::begin_phase();
// Span enough of the slab to cover post_panic's offset, regardless of
// how many small bumps the panic introduced.
let big: Vec<u8> = vec![0x33; 1 << 20];
let big_ptr = big.as_ptr() as usize;
let big_end = big_ptr + big.len();
zk_alloc::end_phase();

let _ = panic::take_hook();

let in_big_range = post_panic_ptr >= big_ptr && post_panic_ptr < big_end;
let post_in_big = post_panic_ptr >= big_ptr && post_panic_ptr < big_end;
let big_overlaps_post =
big_ptr < post_panic_ptr + post_panic.len() && big_ptr + big.len() > post_panic_ptr;
let observed = post_panic[0];

eprintln!(
"post_panic_ptr=0x{post_panic_ptr:x} big=[0x{big_ptr:x}, 0x{big_end:x}); \
in_range={in_big_range} observed=0x{observed:02x}"
post_in_big={post_in_big} big_overlaps_post={big_overlaps_post} \
observed=0x{observed:02x}"
);

// With the depth-counter fix, the second begin_phase nests rather than
// recycling. post_panic is preserved and big is allocated *after* it
// in the same slab, so they do not overlap.
assert!(
in_big_range,
"post-panic Vec didn't land in arena's slab — test layout assumption broken"
!big_overlaps_post,
"depth-counter fix failed: nested begin_phase recycled the slab and \
big overlapped post_panic"
);
assert_eq!(
observed, 0x33,
"expected post-panic Vec contents to be recycled by next begin_phase \
(arena was still active after the panic) — got 0x{observed:02x}"
observed, 0xCC,
"post-panic Vec was corrupted; depth-counter fix should prevent the \
next begin_phase from recycling the slab. got 0x{observed:02x}"
);
eprintln!("BUG REPRODUCED: panic without end_phase leaves arena active; post-panic allocations recycled silently.");

// Drain the orphaned depth so subsequent tests see a clean state.
zk_alloc::end_phase();
}
88 changes: 88 additions & 0 deletions tests/test_realloc_overlap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
//! hunt-2: realloc growth across a phase boundary can produce a destination
//! that partially overlaps the source. The current realloc uses
//! `copy_nonoverlapping`, which is UB on overlap and — with glibc's
//! forward-direction memcpy — corrupts the upper-half source bytes by
//! clobbering them before they're read.
//!
//! Construction:
//! 1. Phase N: alloc p1 at slab_base, size 8 KiB, fill with 0xC1.
//! 2. end_phase.
//! 3. Phase N+1 begin.
//! 4. First alloc in this phase resets the slab (cold path) and lands at
//! slab_base; we make it 4 KiB and fill with 0xD0. ARENA_PTR now sits
//! at slab_base + 4 KiB.
//! 5. Realloc p1 to grow to 16 KiB. The arena fast path returns
//! ARENA_PTR = slab_base + 4 KiB, so:
//! src = [base, base+8K) — bytes: 0xD0 (first 4K, py overwrote)
//! + 0xC1 (last 4K, untouched)
//! dst = [base+4K, base+12K)
//! overlap region = [base+4K, base+8K) — 4 KiB
//! 6. Realloc executes `copy_nonoverlapping(src, dst, 8192)`. With glibc's
//! forward-direction memcpy this writes src's first half (0xD0) into
//! the overlap region, clobbering the upper half of src BEFORE it's
//! read for dst[4K..8K]. Result: dst[4K..8K] is 0xD0 instead of 0xC1.
//! 7. With memmove (`ptr::copy`), the backward-direction copy preserves
//! src bytes correctly: dst[4K..8K] = 0xC1.
//!
//! This test is sensitive to the platform's memcpy direction. On
//! x86_64-glibc the forward-direction implementation reliably corrupts;
//! the test passes after switching to `ptr::copy`.

use std::alloc::{GlobalAlloc, Layout};

#[global_allocator]
static A: zk_alloc::ZkAllocator = zk_alloc::ZkAllocator;

static ZK: zk_alloc::ZkAllocator = zk_alloc::ZkAllocator;

#[test]
fn realloc_partial_overlap_preserves_source_bytes() {
// Phase N: claim slab via fresh 8 KiB alloc, fill with 0xC1.
zk_alloc::begin_phase();
let layout1 = Layout::from_size_align(8192, 8).unwrap();
let p1 = unsafe { ZK.alloc(layout1) };
assert!(!p1.is_null(), "phase-N alloc returned null");
unsafe { std::ptr::write_bytes(p1, 0xC1, 8192) };
zk_alloc::end_phase();

// Phase N+1: prime ARENA_PTR by allocating 4 KiB at slab_base. This
// runs the cold path (gen mismatch) which resets ARENA_PTR to base.
zk_alloc::begin_phase();
let layout_y = Layout::from_size_align(4096, 8).unwrap();
let py = unsafe { ZK.alloc(layout_y) };
assert!(!py.is_null(), "py alloc returned null");
unsafe { std::ptr::write_bytes(py, 0xD0, 4096) };

// Now realloc p1 to grow to 16 KiB. New ptr lands at slab_base + 4 KiB
// (fast path, ARENA_PTR == base + 4K), partially overlapping p1.
let p2 = unsafe { ZK.realloc(p1, layout1, 16384) };
assert!(!p2.is_null(), "realloc returned null");

println!(
"p1=0x{:x} py=0x{:x} p2=0x{:x}",
p1 as usize, py as usize, p2 as usize
);

// Read back p2's first 8 KiB: the bytes the user expects are the
// contents of src at the time of the call, i.e. 0xD0 for [0,4K) and
// 0xC1 for [4K,8K).
let bytes = unsafe { std::slice::from_raw_parts(p2, 8192) };
let lower_d0 = bytes[..4096].iter().all(|&b| b == 0xD0);
let upper_c1 = bytes[4096..].iter().all(|&b| b == 0xC1);

let upper_first_bad = bytes[4096..].iter().position(|&b| b != 0xC1);
eprintln!("lower_d0={lower_d0} upper_c1={upper_c1} upper_first_bad={upper_first_bad:?}");

assert!(
lower_d0,
"lower 4K of p2 should hold py's 0xD0 bytes (src had 0xD0 for [0,4K))"
);
assert!(
upper_c1,
"upper 4K of p2 was corrupted: expected 0xC1 (src's upper half) but \
forward-direction memcpy in copy_nonoverlapping clobbered the \
overlapping source range. Switch to ptr::copy (memmove)."
);

zk_alloc::end_phase();
}
Loading