diff --git a/src/lib.rs b/src/lib.rs index ec7b4fd..0327247 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -112,6 +112,13 @@ static MAX_THREADS: AtomicUsize = AtomicUsize::new(0); static OVERFLOW_COUNT: AtomicUsize = AtomicUsize::new(0); static OVERFLOW_BYTES: AtomicUsize = AtomicUsize::new(0); +/// Nested-phase counter. `begin_phase()` increments it; `end_phase()` +/// decrements. Only the outermost begin (0 → 1) bumps GENERATION and +/// flips ARENA_ACTIVE; only the outermost end (1 → 0) deactivates. +/// This makes nested `begin_phase()` calls compose without the inner +/// begin recycling the outer phase's slab data. +static PHASE_DEPTH: AtomicUsize = AtomicUsize::new(0); + /// Allocations smaller than this go to System even during active phases. /// Routes registry / hashmap / injector-block-sized allocations away from /// the arena, so library state that outlives a phase doesn't land in @@ -196,8 +203,14 @@ fn ensure_region() -> usize { /// or copy into a `Vec` allocated outside any phase). pub fn begin_phase() { ensure_region(); - GENERATION.fetch_add(1, Ordering::Release); - ARENA_ACTIVE.store(true, Ordering::Release); + // Only the outermost begin bumps GENERATION and activates the arena. + // Nested begin_phase() calls just bump the depth counter; without this + // guard, an inner begin would recycle the slab and silently overwrite + // the outer phase's bump-allocated data. + if PHASE_DEPTH.fetch_add(1, Ordering::AcqRel) == 0 { + GENERATION.fetch_add(1, Ordering::Release); + ARENA_ACTIVE.store(true, Ordering::Release); + } } /// Deactivates the arena. New allocations go to the system allocator; existing arena @@ -205,10 +218,24 @@ pub fn begin_phase() { /// /// With the `rayon-flush` feature (default), this also drains rayon's internal /// queues to release any crossbeam-deque blocks allocated during the phase. +/// +/// Calls beyond the matching `begin_phase()` (i.e., when no phase is +/// active) are tolerated and have no effect. pub fn end_phase() { - ARENA_ACTIVE.store(false, Ordering::Release); - #[cfg(feature = "rayon-flush")] - flush_rayon(); + // Only the outermost end (depth 1 → 0) tears down. Saturate at zero so + // unbalanced end_phase() calls are no-ops rather than wrapping around. + let prev = PHASE_DEPTH.fetch_update(Ordering::AcqRel, Ordering::Acquire, |d| { + if d == 0 { + None + } else { + Some(d - 1) + } + }); + if let Ok(1) = prev { + ARENA_ACTIVE.store(false, Ordering::Release); + #[cfg(feature = "rayon-flush")] + flush_rayon(); + } } /// Drains rayon's crossbeam-deque injector to release blocks allocated during @@ -396,7 +423,16 @@ unsafe impl GlobalAlloc for ZkAllocator { let new_layout = unsafe { Layout::from_size_align_unchecked(new_size, layout.align()) }; let new_ptr = unsafe { self.alloc(new_layout) }; if !new_ptr.is_null() { - unsafe { std::ptr::copy_nonoverlapping(ptr, new_ptr, layout.size()) }; + // Use `ptr::copy` (memmove) instead of `copy_nonoverlapping`: + // when reallocating an arena pointer across a phase boundary, + // the cold-path slab reset (or fast-path bump after reset) can + // hand back a pointer that aliases or partially overlaps the + // source. `copy_nonoverlapping` is UB on overlap; `copy` + // handles it correctly. Modern x86_64 memcpy implementations + // happen to be safe for short overlaps in practice, but the + // language-level UB is real and would surface under miri or + // future codegen. + unsafe { std::ptr::copy(ptr, new_ptr, layout.size()) }; unsafe { self.dealloc(ptr, layout) }; } new_ptr diff --git a/tests/test_nested_phase.rs b/tests/test_nested_phase.rs new file mode 100644 index 0000000..1b123a3 --- /dev/null +++ b/tests/test_nested_phase.rs @@ -0,0 +1,48 @@ +//! hunt-1: Nested phases silently corrupt outer phase data. +//! +//! Hypothesis: nested `begin_phase()` calls bump GENERATION, which marks +//! every thread's slab as needing reset. On the next allocation in the +//! inner phase, ARENA_PTR is reset to the slab base — overwriting any +//! data the outer phase had bump-allocated there. +//! +//! There is no nesting counter. The inner `end_phase()` flips +//! ARENA_ACTIVE to false, after which any allocations in the remainder +//! of the outer phase silently land in System (a different correctness +//! issue covered separately). +//! +//! Existing `tests/test_phase_guard.rs::nested_phase_guards_compose` +//! only verifies that nested guards don't panic — it never allocates +//! anything in the outer phase, so this corruption is invisible there. + +#[global_allocator] +static A: zk_alloc::ZkAllocator = zk_alloc::ZkAllocator; + +#[test] +fn nested_phase_does_not_corrupt_outer() { + // Allocation big enough to land in arena (>= min_arena_bytes = 4096). + zk_alloc::begin_phase(); + let outer: Vec = vec![0xA1_u8; 8192]; + let outer_ptr = outer.as_ptr() as usize; + + // Nested begin_phase. GENERATION bumps; the next arena alloc on + // this thread will reset ARENA_PTR back to slab base. + zk_alloc::begin_phase(); + let inner: Vec = vec![0xB2_u8; 8192]; + let inner_ptr = inner.as_ptr() as usize; + let inner_first_byte = inner[0]; + zk_alloc::end_phase(); + + zk_alloc::end_phase(); + + eprintln!("outer_ptr=0x{outer_ptr:x} inner_ptr=0x{inner_ptr:x}"); + + // If nested phases are sound, outer's bytes are still 0xA1. + let pos = outer.iter().position(|&b| b != 0xA1); + let _ = inner_first_byte; + assert!( + pos.is_none(), + "outer phase data corrupted at offset {}: nested begin_phase bumped \ + GENERATION and the inner allocation recycled the outer's slab region", + pos.unwrap() + ); +} diff --git a/tests/test_panic_phase.rs b/tests/test_panic_phase.rs index c03c1b1..4cef194 100644 --- a/tests/test_panic_phase.rs +++ b/tests/test_panic_phase.rs @@ -1,18 +1,21 @@ //! Scenario 3: panic unwinding through a phase boundary. //! -//! There is no RAII guard around begin_phase()/end_phase(). If a panic -//! propagates out of phase code without reaching end_phase(), ARENA_ACTIVE -//! stays true. Subsequent "post-phase" allocations land in arena and get -//! silently recycled on the next begin_phase(). +//! There is no RAII guard around bare begin_phase()/end_phase(). If a +//! panic propagates out of phase code without reaching end_phase(), +//! PHASE_DEPTH stays > 0 and ARENA_ACTIVE stays true. //! -//! This is a plain API hazard: the recovery path of any prove_with_panic -//! pattern is unsafe. +//! After the depth-counter fix, this means: subsequent begin_phase() calls +//! in the recovery path nest under the orphaned phase rather than recycling +//! the slab. The slab keeps growing across "iterations" until it overflows +//! to System — a memory leak, not silent corruption. Either way the +//! recovery path is broken; PhaseGuard is the supported way to make panic +//! recovery safe (see test_phase_guard.rs). #[global_allocator] static A: zk_alloc::ZkAllocator = zk_alloc::ZkAllocator; #[test] -fn panic_inside_phase_leaves_arena_active() { +fn panic_without_phase_guard_orphans_phase_depth() { use std::panic; // Suppress default panic print to minimize incidental allocations between @@ -23,19 +26,16 @@ fn panic_inside_phase_leaves_arena_active() { zk_alloc::begin_phase(); let r = panic::catch_unwind(panic::AssertUnwindSafe(|| panic!("simulated"))); assert!(r.is_err()); - // No end_phase reached. ARENA_ACTIVE is still true. + // No end_phase reached. PHASE_DEPTH still 1, ARENA_ACTIVE still true. - // This Vec lands in arena (since arena is still active and 8192 >= - // MIN_ARENA_BYTES default 4096). + // Lands in arena (active=true, 8192 >= MIN_ARENA_BYTES default 4096). let post_panic: Vec = vec![0xCC; 8192]; let post_panic_ptr = post_panic.as_ptr() as usize; - // Begin the next phase (e.g., next iteration of a prove loop). Arena - // resets — anything allocated during the "ghost" phase between panic - // and now gets recycled. + // Begin the next "iteration". With the depth-counter fix this nests + // under the orphaned phase (depth 1 → 2) instead of bumping + // GENERATION, so the slab is NOT reset and post_panic survives. zk_alloc::begin_phase(); - // Span enough of the slab to cover post_panic's offset, regardless of - // how many small bumps the panic introduced. let big: Vec = vec![0x33; 1 << 20]; let big_ptr = big.as_ptr() as usize; let big_end = big_ptr + big.len(); @@ -43,22 +43,31 @@ fn panic_inside_phase_leaves_arena_active() { let _ = panic::take_hook(); - let in_big_range = post_panic_ptr >= big_ptr && post_panic_ptr < big_end; + let post_in_big = post_panic_ptr >= big_ptr && post_panic_ptr < big_end; + let big_overlaps_post = + big_ptr < post_panic_ptr + post_panic.len() && big_ptr + big.len() > post_panic_ptr; let observed = post_panic[0]; eprintln!( "post_panic_ptr=0x{post_panic_ptr:x} big=[0x{big_ptr:x}, 0x{big_end:x}); \ - in_range={in_big_range} observed=0x{observed:02x}" + post_in_big={post_in_big} big_overlaps_post={big_overlaps_post} \ + observed=0x{observed:02x}" ); + // With the depth-counter fix, the second begin_phase nests rather than + // recycling. post_panic is preserved and big is allocated *after* it + // in the same slab, so they do not overlap. assert!( - in_big_range, - "post-panic Vec didn't land in arena's slab — test layout assumption broken" + !big_overlaps_post, + "depth-counter fix failed: nested begin_phase recycled the slab and \ + big overlapped post_panic" ); assert_eq!( - observed, 0x33, - "expected post-panic Vec contents to be recycled by next begin_phase \ - (arena was still active after the panic) — got 0x{observed:02x}" + observed, 0xCC, + "post-panic Vec was corrupted; depth-counter fix should prevent the \ + next begin_phase from recycling the slab. got 0x{observed:02x}" ); - eprintln!("BUG REPRODUCED: panic without end_phase leaves arena active; post-panic allocations recycled silently."); + + // Drain the orphaned depth so subsequent tests see a clean state. + zk_alloc::end_phase(); } diff --git a/tests/test_realloc_overlap.rs b/tests/test_realloc_overlap.rs new file mode 100644 index 0000000..e680654 --- /dev/null +++ b/tests/test_realloc_overlap.rs @@ -0,0 +1,88 @@ +//! hunt-2: realloc growth across a phase boundary can produce a destination +//! that partially overlaps the source. The current realloc uses +//! `copy_nonoverlapping`, which is UB on overlap and — with glibc's +//! forward-direction memcpy — corrupts the upper-half source bytes by +//! clobbering them before they're read. +//! +//! Construction: +//! 1. Phase N: alloc p1 at slab_base, size 8 KiB, fill with 0xC1. +//! 2. end_phase. +//! 3. Phase N+1 begin. +//! 4. First alloc in this phase resets the slab (cold path) and lands at +//! slab_base; we make it 4 KiB and fill with 0xD0. ARENA_PTR now sits +//! at slab_base + 4 KiB. +//! 5. Realloc p1 to grow to 16 KiB. The arena fast path returns +//! ARENA_PTR = slab_base + 4 KiB, so: +//! src = [base, base+8K) — bytes: 0xD0 (first 4K, py overwrote) +//! + 0xC1 (last 4K, untouched) +//! dst = [base+4K, base+12K) +//! overlap region = [base+4K, base+8K) — 4 KiB +//! 6. Realloc executes `copy_nonoverlapping(src, dst, 8192)`. With glibc's +//! forward-direction memcpy this writes src's first half (0xD0) into +//! the overlap region, clobbering the upper half of src BEFORE it's +//! read for dst[4K..8K]. Result: dst[4K..8K] is 0xD0 instead of 0xC1. +//! 7. With memmove (`ptr::copy`), the backward-direction copy preserves +//! src bytes correctly: dst[4K..8K] = 0xC1. +//! +//! This test is sensitive to the platform's memcpy direction. On +//! x86_64-glibc the forward-direction implementation reliably corrupts; +//! the test passes after switching to `ptr::copy`. + +use std::alloc::{GlobalAlloc, Layout}; + +#[global_allocator] +static A: zk_alloc::ZkAllocator = zk_alloc::ZkAllocator; + +static ZK: zk_alloc::ZkAllocator = zk_alloc::ZkAllocator; + +#[test] +fn realloc_partial_overlap_preserves_source_bytes() { + // Phase N: claim slab via fresh 8 KiB alloc, fill with 0xC1. + zk_alloc::begin_phase(); + let layout1 = Layout::from_size_align(8192, 8).unwrap(); + let p1 = unsafe { ZK.alloc(layout1) }; + assert!(!p1.is_null(), "phase-N alloc returned null"); + unsafe { std::ptr::write_bytes(p1, 0xC1, 8192) }; + zk_alloc::end_phase(); + + // Phase N+1: prime ARENA_PTR by allocating 4 KiB at slab_base. This + // runs the cold path (gen mismatch) which resets ARENA_PTR to base. + zk_alloc::begin_phase(); + let layout_y = Layout::from_size_align(4096, 8).unwrap(); + let py = unsafe { ZK.alloc(layout_y) }; + assert!(!py.is_null(), "py alloc returned null"); + unsafe { std::ptr::write_bytes(py, 0xD0, 4096) }; + + // Now realloc p1 to grow to 16 KiB. New ptr lands at slab_base + 4 KiB + // (fast path, ARENA_PTR == base + 4K), partially overlapping p1. + let p2 = unsafe { ZK.realloc(p1, layout1, 16384) }; + assert!(!p2.is_null(), "realloc returned null"); + + println!( + "p1=0x{:x} py=0x{:x} p2=0x{:x}", + p1 as usize, py as usize, p2 as usize + ); + + // Read back p2's first 8 KiB: the bytes the user expects are the + // contents of src at the time of the call, i.e. 0xD0 for [0,4K) and + // 0xC1 for [4K,8K). + let bytes = unsafe { std::slice::from_raw_parts(p2, 8192) }; + let lower_d0 = bytes[..4096].iter().all(|&b| b == 0xD0); + let upper_c1 = bytes[4096..].iter().all(|&b| b == 0xC1); + + let upper_first_bad = bytes[4096..].iter().position(|&b| b != 0xC1); + eprintln!("lower_d0={lower_d0} upper_c1={upper_c1} upper_first_bad={upper_first_bad:?}"); + + assert!( + lower_d0, + "lower 4K of p2 should hold py's 0xD0 bytes (src had 0xD0 for [0,4K))" + ); + assert!( + upper_c1, + "upper 4K of p2 was corrupted: expected 0xC1 (src's upper half) but \ + forward-direction memcpy in copy_nonoverlapping clobbered the \ + overlapping source range. Switch to ptr::copy (memmove)." + ); + + zk_alloc::end_phase(); +}