diff --git a/0001-feat-mem-backend-shared-option-for-MAP-SHARED.patch b/0001-feat-mem-backend-shared-option-for-MAP-SHARED.patch index 8600286..10c1850 100644 --- a/0001-feat-mem-backend-shared-option-for-MAP-SHARED.patch +++ b/0001-feat-mem-backend-shared-option-for-MAP-SHARED.patch @@ -1,8 +1,60 @@ +diff --git a/src/firecracker/src/api_server/request/snapshot.rs b/src/firecracker/src/api_server/request/snapshot.rs +index 4a96292d1..df6600603 100644 +--- a/src/firecracker/src/api_server/request/snapshot.rs ++++ b/src/firecracker/src/api_server/request/snapshot.rs +@@ -96,6 +96,7 @@ fn parse_put_snapshot_load(body: &Body) -> Result { + // either `mem_file_path` or `mem_backend` field is always specified. + backend_path: snapshot_config.mem_file_path.unwrap(), + backend_type: MemBackendType::File, ++ shared: false, + } + } + }; +@@ -179,6 +180,7 @@ mod tests { + mem_backend: MemBackendConfig { + backend_path: PathBuf::from("bar"), + backend_type: MemBackendType::File, ++ shared: false, + }, + enable_diff_snapshots: false, + resume_vm: false, +@@ -209,6 +211,7 @@ mod tests { + mem_backend: MemBackendConfig { + backend_path: PathBuf::from("bar"), + backend_type: MemBackendType::File, ++ shared: false, + }, + enable_diff_snapshots: true, + resume_vm: false, +@@ -239,6 +242,7 @@ mod tests { + mem_backend: MemBackendConfig { + backend_path: PathBuf::from("bar"), + backend_type: MemBackendType::Uffd, ++ shared: false, + }, + enable_diff_snapshots: false, + resume_vm: true, +@@ -275,6 +279,7 @@ mod tests { + mem_backend: MemBackendConfig { + backend_path: PathBuf::from("bar"), + backend_type: MemBackendType::Uffd, ++ shared: false, + }, + enable_diff_snapshots: false, + resume_vm: true, +@@ -305,6 +310,7 @@ mod tests { + mem_backend: MemBackendConfig { + backend_path: PathBuf::from("bar"), + backend_type: MemBackendType::File, ++ shared: false, + }, + enable_diff_snapshots: false, + resume_vm: true, diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs -index feb53c0e1..ecb688cc5 100644 +index aeacadeb6..5ddcc1dc4 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs -@@ -449,8 +449,13 @@ pub fn restore_from_snapshot( +@@ -384,8 +384,13 @@ pub fn restore_from_snapshot( .into()); } ( @@ -18,24 +70,29 @@ index feb53c0e1..ecb688cc5 100644 None, ) } -@@ -512,9 +517,11 @@ fn guest_memory_from_file( +@@ -451,9 +456,15 @@ fn guest_memory_from_file( mem_file_path: &Path, mem_state: &GuestMemoryState, track_dirty_pages: bool, + shared: bool, ) -> Result, GuestMemoryFromFileError> { - let mem_file = File::open(mem_file_path)?; +- let mem_file = File::open(mem_file_path)?; - let guest_mem = memory::snapshot_file(mem_file, mem_state.regions(), track_dirty_pages)?; ++ let mem_file = if shared { ++ OpenOptions::new().read(true).write(true).open(mem_file_path)? ++ } else { ++ File::open(mem_file_path)? ++ }; + let guest_mem = + memory::snapshot_file(mem_file, mem_state.regions(), track_dirty_pages, shared)?; Ok(guest_mem) } diff --git a/src/vmm/src/vmm_config/snapshot.rs b/src/vmm/src/vmm_config/snapshot.rs -index 4f312f88f..220316abf 100644 +index 27a7841d5..9796daaf9 100644 --- a/src/vmm/src/vmm_config/snapshot.rs +++ b/src/vmm/src/vmm_config/snapshot.rs -@@ -130,6 +130,16 @@ pub struct MemBackendConfig { +@@ -107,6 +107,16 @@ pub struct MemBackendConfig { pub backend_path: PathBuf, /// Specifies the guest memory backend type. pub backend_type: MemBackendType, @@ -53,10 +110,10 @@ index 4f312f88f..220316abf 100644 /// The microVM state options. diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs -index 7bff6ad07..1d632a31f 100644 +index 19367f7f9..390f1d7e3 100644 --- a/src/vmm/src/vstate/memory.rs +++ b/src/vmm/src/vstate/memory.rs -@@ -575,10 +575,17 @@ pub fn anonymous( +@@ -124,12 +124,24 @@ pub fn anonymous( /// Creates a GuestMemoryMmap given a `file` containing the data /// and a `state` containing mapping information. @@ -72,21 +129,13 @@ index 7bff6ad07..1d632a31f 100644 track_dirty_pages: bool, + shared: bool, ) -> Result, MemoryError> { - let regions: Vec<_> = regions.collect(); - let memory_size = regions -@@ -593,9 +600,14 @@ pub fn snapshot_file( - return Err(MemoryError::OffsetTooLarge); - } - +- create(regions, libc::MAP_PRIVATE, Some(file), track_dirty_pages) + let mmap_flags = if shared { + libc::MAP_SHARED + } else { + libc::MAP_PRIVATE + }; - create( - regions.into_iter(), -- libc::MAP_PRIVATE, -+ mmap_flags, - Some(file), - track_dirty_pages, - ) ++ create(regions, mmap_flags, Some(file), track_dirty_pages) + } + + /// Defines the interface for snapshotting memory. diff --git a/DESIGN-v0.4-PHASE3-SPIKE.md b/DESIGN-v0.4-PHASE3-SPIKE.md index c2693f2..1b90ca5 100644 --- a/DESIGN-v0.4-PHASE3-SPIKE.md +++ b/DESIGN-v0.4-PHASE3-SPIKE.md @@ -1,6 +1,6 @@ # v0.4 Phase 3 spike — Firecracker integration options -**Status:** RESOLVED — picked **Option B + light parallel Option A**. The forkd-patched Firecracker fork lives at [`deeplethe/firecracker:forkd-v0.4-mem-backend-shared`](https://github.com/deeplethe/firecracker/tree/forkd-v0.4-mem-backend-shared) with the proposal's diff applied (commit [`b30f1ba`](https://github.com/deeplethe/firecracker/commit/b30f1ba)). Operational details and rebase plan in [`docs/VENDORED-FIRECRACKER.md`](./docs/VENDORED-FIRECRACKER.md). Upstream conversation tracked at [`firecracker-microvm/firecracker#5912`](https://github.com/firecracker-microvm/firecracker/issues/5912) — filed 2026-05-25, currently awaiting first-response (FC's typical SLA for external feature requests is 30-90 days, per [#5768 precedent](https://github.com/firecracker-microvm/firecracker/issues/5768)). The vendored fork decouples forkd v0.4 progress from upstream timing. +**Status:** RESOLVED — picked **Option B + light parallel Option A**. The forkd-patched Firecracker fork lives at [`deeplethe/firecracker:forkd-v0.4-mem-backend-shared-v1.12`](https://github.com/deeplethe/firecracker/tree/forkd-v0.4-mem-backend-shared-v1.12) (3 commits on top of the v1.12.0 tag: main patch + struct-literal build fix + O_RDWR fix). See [`docs/VENDORED-FIRECRACKER.md`](./docs/VENDORED-FIRECRACKER.md) for the operational details (build, rebase plan, the earlier `forkd-v0.4-mem-backend-shared` v1.16-dev branch kept around for upstream-PR archeology). Upstream conversation tracked at [`firecracker-microvm/firecracker#5912`](https://github.com/firecracker-microvm/firecracker/issues/5912) — filed 2026-05-25, currently awaiting first-response (FC's typical SLA for external feature requests is 30-90 days, per [#5768 precedent](https://github.com/firecracker-microvm/firecracker/issues/5768)). The vendored fork decouples forkd v0.4 progress from upstream timing. --- diff --git a/crates/forkd-vmm/src/lib.rs b/crates/forkd-vmm/src/lib.rs index 3509d81..79290ea 100644 --- a/crates/forkd-vmm/src/lib.rs +++ b/crates/forkd-vmm/src/lib.rs @@ -312,7 +312,7 @@ pub enum MemoryBackend { }, /// v0.4 live-fork backing. Each restored child gets its own memfd /// populated from the snapshot's memory.bin; the patched - /// Firecracker (deeplethe/firecracker:forkd-v0.4-mem-backend-shared, + /// Firecracker (deeplethe/firecracker:forkd-v0.4-mem-backend-shared-v1.12, /// see `docs/VENDORED-FIRECRACKER.md`) mmaps the memfd with /// `MAP_SHARED` so the controller can later arm /// `UFFDIO_WRITEPROTECT` on the same backing and capture dirty diff --git a/docs/VENDORED-FIRECRACKER.md b/docs/VENDORED-FIRECRACKER.md index c16a075..90457e3 100644 --- a/docs/VENDORED-FIRECRACKER.md +++ b/docs/VENDORED-FIRECRACKER.md @@ -6,36 +6,40 @@ forkd v0.4's live-fork primitive needs `MAP_SHARED` on the snapshot-restore memory mapping so that an external snapshot manager — `forkd-controller`, in our case — can observe guest memory writes through its own `mmap` of the same memfd. The current upstream Firecracker hardcodes `MAP_PRIVATE` in `src/vmm/src/vstate/memory.rs::snapshot_file`; without `MAP_SHARED`, guest writes copy-on-write into FC's private pages and never propagate back to the controller's view. -The fix is a ~33-line patch adding an opt-in `shared: bool` field to `MemBackendConfig`. The patch is filed upstream as [`firecracker#5912`](https://github.com/firecracker-microvm/firecracker/issues/5912) (proposal docs: [`FIRECRACKER-UPSTREAM-PROPOSAL.md`](../FIRECRACKER-UPSTREAM-PROPOSAL.md), full diff: [`0001-feat-mem-backend-shared-option-for-MAP-SHARED.patch`](../0001-feat-mem-backend-shared-option-for-MAP-SHARED.patch)). While that upstream conversation runs, forkd vendors the patch on a Firecracker fork so v0.4 isn't blocked. +The fix is a ~44-line patch adding an opt-in `shared: bool` field to `MemBackendConfig`. The patch is filed upstream as [`firecracker#5912`](https://github.com/firecracker-microvm/firecracker/issues/5912) (proposal docs: [`FIRECRACKER-UPSTREAM-PROPOSAL.md`](../FIRECRACKER-UPSTREAM-PROPOSAL.md), full diff: [`0001-feat-mem-backend-shared-option-for-MAP-SHARED.patch`](../0001-feat-mem-backend-shared-option-for-MAP-SHARED.patch)). While that upstream conversation runs, forkd vendors the patch on a Firecracker fork so v0.4 isn't blocked. ## Where the fork lives -[**`deeplethe/firecracker`**](https://github.com/deeplethe/firecracker), branch [`forkd-v0.4-mem-backend-shared`](https://github.com/deeplethe/firecracker/tree/forkd-v0.4-mem-backend-shared). +[**`deeplethe/firecracker`**](https://github.com/deeplethe/firecracker), branch [`forkd-v0.4-mem-backend-shared-v1.12`](https://github.com/deeplethe/firecracker/tree/forkd-v0.4-mem-backend-shared-v1.12) (matches the FC version forkd produces snapshots with). -- Forked from upstream `main` at the commit cited in the proposal. -- One commit on the branch: the proposal's diff applied verbatim. No other deviations from upstream. -- The branch will be rebased onto upstream `main` periodically until upstream lands its own version of the patch (or we hear back that they don't want it; in which case the fork becomes our permanent home for it). +- Forked from upstream `v1.12.0` tag. +- Three commits on the branch (combined ~44 lines): + - `cc3632b72 feat(mem_backend): opt-in shared: true for MAP_SHARED snapshot restore` — main patch (API field + mmap-flag plumbing). + - `f3b299ff7 fix(snapshot): add shared: false to MemBackendConfig literals (build fix)` — fills in `shared: false` at six unpatched struct-literal sites so the tree still compiles. + - `fe2b39026 fix(snapshot): open mem_file with O_RDWR when shared=true` — without this, `mmap(..., MAP_SHARED, ...)` returns `EACCES` on a read-only fd. +- The earlier `forkd-v0.4-mem-backend-shared` branch (rebased on upstream `main`/v1.16-dev) is kept for the upstream PR diff; the v1.12 branch is the one forkd actually builds against. +- The branch will be rebased onto upstream `v1.12.x` (or whichever tag forkd snapshots with) until upstream lands its own version of the patch. ## When upstream lands the patch -If [`firecracker#5912`](https://github.com/firecracker-microvm/firecracker/issues/5912) merges, we delete the `forkd-v0.4-mem-backend-shared` branch on our fork and point forkd's build path back at upstream. The fork stops being a maintenance burden the moment upstream has the equivalent. +If [`firecracker#5912`](https://github.com/firecracker-microvm/firecracker/issues/5912) merges, we delete both `forkd-v0.4-mem-backend-shared` branches on our fork and point forkd's build path back at upstream. The fork stops being a maintenance burden the moment upstream has the equivalent. If upstream declines or never responds (the [historical pattern for FC's external-tool features](https://github.com/firecracker-microvm/firecracker/issues/5768) — sparse-memory snapshots was closed as "use existing diff snapshots") then the fork keeps running and forkd documents its FC dependency as "deeplethe/firecracker, not vanilla." ## Building the patched binary ```bash -git clone -b forkd-v0.4-mem-backend-shared https://github.com/deeplethe/firecracker.git +git clone -b forkd-v0.4-mem-backend-shared-v1.12 https://github.com/deeplethe/firecracker.git cd firecracker tools/devtool build --release # Binary lands at: build/cargo_target/x86_64-unknown-linux-musl/release/firecracker ``` -Build prerequisites are the same as upstream Firecracker (Docker + tools/devtool). +Build prerequisites are the same as upstream Firecracker (Docker + tools/devtool). The branch builds clean — no manual patch application required. ## Smoke-checking the patch worked -`firecracker-patch/test-shared.sh` (in `space/` next to `forkd/`) is a 30-line bash script that loads an existing forkd snapshot with the patched binary, walks `/proc//maps`, and reports whether the `memory.bin` mapping is `rw-s` (MAP_SHARED — patch active) vs `rw-p` (MAP_PRIVATE — unpatched FC). +[`scripts/dev/test-shared.sh`](../scripts/dev/test-shared.sh) loads an existing forkd snapshot with the patched binary once per `shared` flag value, walks `/proc//maps`, and reports whether the `memory.bin` mapping is `rw-s` (MAP_SHARED — patch active) vs `rw-p` (MAP_PRIVATE — unpatched FC). Requires an existing forkd snapshot at `~/.local/share/forkd/snapshots/coding-agent-fork-prewarm-v1` (override via `SNAP_DIR=…`) and the patched binary on `$FC_BIN`. Expected output: @@ -46,19 +50,21 @@ shared=true → rw-s # MAP_SHARED (what the patch enables) ## What forkd needs to do next -The patched FC binary is necessary but not sufficient for v0.4. Three remaining pieces of forkd-side work, each scoped as a separate PR sequence: +The patched FC binary is necessary but not sufficient for v0.4. -1. **`forkd-vmm` memfd-backed spawn path** (Phase 5a, ~1 week). `forkd-controller` `memfd_create()`s a region, populates it with the snapshot's `memory.bin`, then hands FC `/proc/self/fd/` as `mem_backend.backend_path` with `shared: true`. Tracking issue to be filed. -2. **Default-on memfd backing on supported kernels** (Phase 5b, ~3 days). Auto-detects kernel ≥ 5.7 with `uffd_wp`-on-shmem support; falls back to file-backed otherwise. See `forkd doctor` checks 15-16 in [`DESIGN-v0.4-USER-API.md`](../DESIGN-v0.4-USER-API.md). -3. **`mode="live"` BRANCH path** (Phase 6, ~2 weeks). The asynchronous dirty-page copier — the actual live-fork primitive. See [`DESIGN-v0.4.md`](../DESIGN-v0.4.md) for the kernel mechanics. +- **Phase 5a — `forkd-vmm` memfd helper** (done, PR #186): `forkd-controller` `memfd_create()`s a region, populates it with the snapshot's `memory.bin`, exposes `/proc/self/fd/` as a backend path. +- **Phase 5b — memfd in restore_many_with** (done, PR #187): per-child memfd + per-child JSON body with `"shared": true`, gated on `MemoryBackend::MemfdShared`. +- **Phase 5c — patch verification** (done, 2026-05-29): `test-shared.sh` confirms `shared: true` produces `rw-s` (MAP_SHARED) and `shared: false` still produces `rw-p` (MAP_PRIVATE) on the v1.12 patched binary. +- **Phase 6 — `mode="live"` BRANCH path** (next, ~2 weeks): the asynchronous dirty-page copier — the actual live-fork primitive. See [`DESIGN-v0.4.md`](../DESIGN-v0.4.md) for the kernel mechanics. -Phases 5a → 5b → 6 are sequential. 7-9 (REST/CLI/SDK plumbing, doctor checks, benchmarks) are mostly parallel once the runtime path works. +7–9 (REST/CLI/SDK plumbing, doctor checks, benchmarks) are mostly parallel once Phase 6 lands. ## Risk: upstream rebase conflicts -The patch touches three files in `src/vmm/src/`: -- `persist.rs` — adds a `shared` parameter through `guest_memory_from_file` -- `vmm_config/snapshot.rs` — adds the `shared` field to `MemBackendConfig` -- `vstate/memory.rs` — chooses `MAP_SHARED` vs `MAP_PRIVATE` based on the flag +The patch touches four files: +- `src/vmm/src/persist.rs` — adds a `shared` parameter through `guest_memory_from_file`; opens `mem_file` with `O_RDWR` when `shared=true` so `mmap(MAP_SHARED, PROT_WRITE)` doesn't return `EACCES`. +- `src/vmm/src/vmm_config/snapshot.rs` — adds the `shared` field to `MemBackendConfig` (default `false`). +- `src/vmm/src/vstate/memory.rs` — chooses `MAP_SHARED` vs `MAP_PRIVATE` based on the flag. +- `src/firecracker/src/api_server/request/snapshot.rs` — request-parsing side of the new field (struct-literal sites get `shared: false` to keep compatibility). These are stable interfaces by FC standards. Upstream rebases should be clean unless FC restructures the snapshot path itself, which their release notes haven't signaled. If a rebase ever fails, the patch is small enough to re-apply by hand in ~10 minutes. diff --git a/scripts/dev/test-shared.sh b/scripts/dev/test-shared.sh new file mode 100644 index 0000000..277a0b8 --- /dev/null +++ b/scripts/dev/test-shared.sh @@ -0,0 +1,93 @@ +#!/usr/bin/env bash +# Verify the FC mem_backend.shared patch by checking /proc/PID/maps. +# +# Expected: +# shared=false -> rw-p (MAP_PRIVATE, unchanged FC behavior) +# shared=true -> rw-s (MAP_SHARED, what the patch enables) + +set -euo pipefail + +FC_BIN="${FC_BIN:-$HOME/firecracker/build/cargo_target/release/firecracker}" +SNAP_DIR="${SNAP_DIR:-$HOME/.local/share/forkd/snapshots/coding-agent-fork-prewarm-v1}" + +cleanup_pids=() +trap 'for p in "${cleanup_pids[@]}"; do sudo kill -9 "$p" 2>/dev/null || true; done' EXIT + +run_one() { + local shared=$1 + local label=$2 + local workdir + workdir=$(mktemp -d -t fc-shared-test-XXXX) + local sock="$workdir/api.sock" + local logfile="$workdir/fc.log" + + echo "=== run with shared=$shared ($label) ===" + sudo touch "$logfile" + sudo "$FC_BIN" --api-sock "$sock" --log-path "$logfile" --level Debug >"$workdir/stdout.log" 2>&1 & + local sudo_pid=$! + cleanup_pids+=("$sudo_pid") + sleep 0.5 + + # Resolve the REAL firecracker PID (sudo's direct child) + local fc_pid + fc_pid=$(pgrep -P "$sudo_pid" || true) + if [[ -z "$fc_pid" ]]; then + echo " could not resolve real FC PID under sudo $sudo_pid" + cat "$workdir/stdout.log" + return 1 + fi + cleanup_pids+=("$fc_pid") + echo " FC pid = $fc_pid (sudo wrapper $sudo_pid)" + + local body + body=$(cat < HTTP $http_status" + if [[ "$http_status" != "204" && "$http_status" != "200" ]]; then + echo " curl body: $(sudo cat "$workdir/curl.out")" + return 1 + fi + + echo " /proc/$fc_pid/maps lines matching memory.bin:" + sudo grep memory.bin "/proc/$fc_pid/maps" | head -3 | sed 's/^/ /' + local perms + perms=$(sudo grep memory.bin "/proc/$fc_pid/maps" | head -1 | awk '{print $2}') + local share_flag="${perms:3:1}" + echo " perms column = $perms (share_flag = '$share_flag')" + + # Clean up immediately so we do not pile up FCs across runs + sudo kill -9 "$fc_pid" 2>/dev/null || true + sudo kill -9 "$sudo_pid" 2>/dev/null || true + wait "$sudo_pid" 2>/dev/null || true + + if [[ "$shared" == "false" && "$share_flag" == "p" ]]; then + echo " OK shared=false -> MAP_PRIVATE (expected)" + return 0 + elif [[ "$shared" == "true" && "$share_flag" == "s" ]]; then + echo " OK shared=true -> MAP_SHARED (PATCH WORKS)" + return 0 + else + echo " FAIL: unexpected share_flag '$share_flag' for shared=$shared" + return 2 + fi +} + +run_one false "stock-compatible default" +echo +run_one true "patched behavior" + +echo +echo "PATCH VERIFIED"