Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 70 additions & 21 deletions 0001-feat-mem-backend-shared-option-for-MAP-SHARED.patch
Original file line number Diff line number Diff line change
@@ -1,8 +1,60 @@
diff --git a/src/firecracker/src/api_server/request/snapshot.rs b/src/firecracker/src/api_server/request/snapshot.rs
index 4a96292d1..df6600603 100644
--- a/src/firecracker/src/api_server/request/snapshot.rs
+++ b/src/firecracker/src/api_server/request/snapshot.rs
@@ -96,6 +96,7 @@ fn parse_put_snapshot_load(body: &Body) -> Result<ParsedRequest, RequestError> {
// either `mem_file_path` or `mem_backend` field is always specified.
backend_path: snapshot_config.mem_file_path.unwrap(),
backend_type: MemBackendType::File,
+ shared: false,
}
}
};
@@ -179,6 +180,7 @@ mod tests {
mem_backend: MemBackendConfig {
backend_path: PathBuf::from("bar"),
backend_type: MemBackendType::File,
+ shared: false,
},
enable_diff_snapshots: false,
resume_vm: false,
@@ -209,6 +211,7 @@ mod tests {
mem_backend: MemBackendConfig {
backend_path: PathBuf::from("bar"),
backend_type: MemBackendType::File,
+ shared: false,
},
enable_diff_snapshots: true,
resume_vm: false,
@@ -239,6 +242,7 @@ mod tests {
mem_backend: MemBackendConfig {
backend_path: PathBuf::from("bar"),
backend_type: MemBackendType::Uffd,
+ shared: false,
},
enable_diff_snapshots: false,
resume_vm: true,
@@ -275,6 +279,7 @@ mod tests {
mem_backend: MemBackendConfig {
backend_path: PathBuf::from("bar"),
backend_type: MemBackendType::Uffd,
+ shared: false,
},
enable_diff_snapshots: false,
resume_vm: true,
@@ -305,6 +310,7 @@ mod tests {
mem_backend: MemBackendConfig {
backend_path: PathBuf::from("bar"),
backend_type: MemBackendType::File,
+ shared: false,
},
enable_diff_snapshots: false,
resume_vm: true,
diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs
index feb53c0e1..ecb688cc5 100644
index aeacadeb6..5ddcc1dc4 100644
--- a/src/vmm/src/persist.rs
+++ b/src/vmm/src/persist.rs
@@ -449,8 +449,13 @@ pub fn restore_from_snapshot(
@@ -384,8 +384,13 @@ pub fn restore_from_snapshot(
.into());
}
(
Expand All @@ -18,24 +70,29 @@ index feb53c0e1..ecb688cc5 100644
None,
)
}
@@ -512,9 +517,11 @@ fn guest_memory_from_file(
@@ -451,9 +456,15 @@ fn guest_memory_from_file(
mem_file_path: &Path,
mem_state: &GuestMemoryState,
track_dirty_pages: bool,
+ shared: bool,
) -> Result<Vec<GuestRegionMmap>, GuestMemoryFromFileError> {
let mem_file = File::open(mem_file_path)?;
- let mem_file = File::open(mem_file_path)?;
- let guest_mem = memory::snapshot_file(mem_file, mem_state.regions(), track_dirty_pages)?;
+ let mem_file = if shared {
+ OpenOptions::new().read(true).write(true).open(mem_file_path)?
+ } else {
+ File::open(mem_file_path)?
+ };
+ let guest_mem =
+ memory::snapshot_file(mem_file, mem_state.regions(), track_dirty_pages, shared)?;
Ok(guest_mem)
}

diff --git a/src/vmm/src/vmm_config/snapshot.rs b/src/vmm/src/vmm_config/snapshot.rs
index 4f312f88f..220316abf 100644
index 27a7841d5..9796daaf9 100644
--- a/src/vmm/src/vmm_config/snapshot.rs
+++ b/src/vmm/src/vmm_config/snapshot.rs
@@ -130,6 +130,16 @@ pub struct MemBackendConfig {
@@ -107,6 +107,16 @@ pub struct MemBackendConfig {
pub backend_path: PathBuf,
/// Specifies the guest memory backend type.
pub backend_type: MemBackendType,
Expand All @@ -53,10 +110,10 @@ index 4f312f88f..220316abf 100644

/// The microVM state options.
diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs
index 7bff6ad07..1d632a31f 100644
index 19367f7f9..390f1d7e3 100644
--- a/src/vmm/src/vstate/memory.rs
+++ b/src/vmm/src/vstate/memory.rs
@@ -575,10 +575,17 @@ pub fn anonymous(
@@ -124,12 +124,24 @@ pub fn anonymous(

/// Creates a GuestMemoryMmap given a `file` containing the data
/// and a `state` containing mapping information.
Expand All @@ -72,21 +129,13 @@ index 7bff6ad07..1d632a31f 100644
track_dirty_pages: bool,
+ shared: bool,
) -> Result<Vec<GuestRegionMmap>, MemoryError> {
let regions: Vec<_> = regions.collect();
let memory_size = regions
@@ -593,9 +600,14 @@ pub fn snapshot_file(
return Err(MemoryError::OffsetTooLarge);
}

- create(regions, libc::MAP_PRIVATE, Some(file), track_dirty_pages)
+ let mmap_flags = if shared {
+ libc::MAP_SHARED
+ } else {
+ libc::MAP_PRIVATE
+ };
create(
regions.into_iter(),
- libc::MAP_PRIVATE,
+ mmap_flags,
Some(file),
track_dirty_pages,
)
+ create(regions, mmap_flags, Some(file), track_dirty_pages)
}

/// Defines the interface for snapshotting memory.
2 changes: 1 addition & 1 deletion DESIGN-v0.4-PHASE3-SPIKE.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# v0.4 Phase 3 spike — Firecracker integration options

**Status:** RESOLVED — picked **Option B + light parallel Option A**. The forkd-patched Firecracker fork lives at [`deeplethe/firecracker:forkd-v0.4-mem-backend-shared`](https://github.com/deeplethe/firecracker/tree/forkd-v0.4-mem-backend-shared) with the proposal's diff applied (commit [`b30f1ba`](https://github.com/deeplethe/firecracker/commit/b30f1ba)). Operational details and rebase plan in [`docs/VENDORED-FIRECRACKER.md`](./docs/VENDORED-FIRECRACKER.md). Upstream conversation tracked at [`firecracker-microvm/firecracker#5912`](https://github.com/firecracker-microvm/firecracker/issues/5912) — filed 2026-05-25, currently awaiting first-response (FC's typical SLA for external feature requests is 30-90 days, per [#5768 precedent](https://github.com/firecracker-microvm/firecracker/issues/5768)). The vendored fork decouples forkd v0.4 progress from upstream timing.
**Status:** RESOLVED — picked **Option B + light parallel Option A**. The forkd-patched Firecracker fork lives at [`deeplethe/firecracker:forkd-v0.4-mem-backend-shared-v1.12`](https://github.com/deeplethe/firecracker/tree/forkd-v0.4-mem-backend-shared-v1.12) (3 commits on top of the v1.12.0 tag: main patch + struct-literal build fix + O_RDWR fix). See [`docs/VENDORED-FIRECRACKER.md`](./docs/VENDORED-FIRECRACKER.md) for the operational details (build, rebase plan, the earlier `forkd-v0.4-mem-backend-shared` v1.16-dev branch kept around for upstream-PR archeology). Upstream conversation tracked at [`firecracker-microvm/firecracker#5912`](https://github.com/firecracker-microvm/firecracker/issues/5912) — filed 2026-05-25, currently awaiting first-response (FC's typical SLA for external feature requests is 30-90 days, per [#5768 precedent](https://github.com/firecracker-microvm/firecracker/issues/5768)). The vendored fork decouples forkd v0.4 progress from upstream timing.

---

Expand Down
2 changes: 1 addition & 1 deletion crates/forkd-vmm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ pub enum MemoryBackend {
},
/// v0.4 live-fork backing. Each restored child gets its own memfd
/// populated from the snapshot's memory.bin; the patched
/// Firecracker (deeplethe/firecracker:forkd-v0.4-mem-backend-shared,
/// Firecracker (deeplethe/firecracker:forkd-v0.4-mem-backend-shared-v1.12,
/// see `docs/VENDORED-FIRECRACKER.md`) mmaps the memfd with
/// `MAP_SHARED` so the controller can later arm
/// `UFFDIO_WRITEPROTECT` on the same backing and capture dirty
Expand Down
42 changes: 24 additions & 18 deletions docs/VENDORED-FIRECRACKER.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,40 @@

forkd v0.4's live-fork primitive needs `MAP_SHARED` on the snapshot-restore memory mapping so that an external snapshot manager — `forkd-controller`, in our case — can observe guest memory writes through its own `mmap` of the same memfd. The current upstream Firecracker hardcodes `MAP_PRIVATE` in `src/vmm/src/vstate/memory.rs::snapshot_file`; without `MAP_SHARED`, guest writes copy-on-write into FC's private pages and never propagate back to the controller's view.

The fix is a ~33-line patch adding an opt-in `shared: bool` field to `MemBackendConfig`. The patch is filed upstream as [`firecracker#5912`](https://github.com/firecracker-microvm/firecracker/issues/5912) (proposal docs: [`FIRECRACKER-UPSTREAM-PROPOSAL.md`](../FIRECRACKER-UPSTREAM-PROPOSAL.md), full diff: [`0001-feat-mem-backend-shared-option-for-MAP-SHARED.patch`](../0001-feat-mem-backend-shared-option-for-MAP-SHARED.patch)). While that upstream conversation runs, forkd vendors the patch on a Firecracker fork so v0.4 isn't blocked.
The fix is a ~44-line patch adding an opt-in `shared: bool` field to `MemBackendConfig`. The patch is filed upstream as [`firecracker#5912`](https://github.com/firecracker-microvm/firecracker/issues/5912) (proposal docs: [`FIRECRACKER-UPSTREAM-PROPOSAL.md`](../FIRECRACKER-UPSTREAM-PROPOSAL.md), full diff: [`0001-feat-mem-backend-shared-option-for-MAP-SHARED.patch`](../0001-feat-mem-backend-shared-option-for-MAP-SHARED.patch)). While that upstream conversation runs, forkd vendors the patch on a Firecracker fork so v0.4 isn't blocked.

## Where the fork lives

[**`deeplethe/firecracker`**](https://github.com/deeplethe/firecracker), branch [`forkd-v0.4-mem-backend-shared`](https://github.com/deeplethe/firecracker/tree/forkd-v0.4-mem-backend-shared).
[**`deeplethe/firecracker`**](https://github.com/deeplethe/firecracker), branch [`forkd-v0.4-mem-backend-shared-v1.12`](https://github.com/deeplethe/firecracker/tree/forkd-v0.4-mem-backend-shared-v1.12) (matches the FC version forkd produces snapshots with).

- Forked from upstream `main` at the commit cited in the proposal.
- One commit on the branch: the proposal's diff applied verbatim. No other deviations from upstream.
- The branch will be rebased onto upstream `main` periodically until upstream lands its own version of the patch (or we hear back that they don't want it; in which case the fork becomes our permanent home for it).
- Forked from upstream `v1.12.0` tag.
- Three commits on the branch (combined ~44 lines):
- `cc3632b72 feat(mem_backend): opt-in shared: true for MAP_SHARED snapshot restore` — main patch (API field + mmap-flag plumbing).
- `f3b299ff7 fix(snapshot): add shared: false to MemBackendConfig literals (build fix)` — fills in `shared: false` at six unpatched struct-literal sites so the tree still compiles.
- `fe2b39026 fix(snapshot): open mem_file with O_RDWR when shared=true` — without this, `mmap(..., MAP_SHARED, ...)` returns `EACCES` on a read-only fd.
- The earlier `forkd-v0.4-mem-backend-shared` branch (rebased on upstream `main`/v1.16-dev) is kept for the upstream PR diff; the v1.12 branch is the one forkd actually builds against.
- The branch will be rebased onto upstream `v1.12.x` (or whichever tag forkd snapshots with) until upstream lands its own version of the patch.

## When upstream lands the patch

If [`firecracker#5912`](https://github.com/firecracker-microvm/firecracker/issues/5912) merges, we delete the `forkd-v0.4-mem-backend-shared` branch on our fork and point forkd's build path back at upstream. The fork stops being a maintenance burden the moment upstream has the equivalent.
If [`firecracker#5912`](https://github.com/firecracker-microvm/firecracker/issues/5912) merges, we delete both `forkd-v0.4-mem-backend-shared` branches on our fork and point forkd's build path back at upstream. The fork stops being a maintenance burden the moment upstream has the equivalent.

If upstream declines or never responds (the [historical pattern for FC's external-tool features](https://github.com/firecracker-microvm/firecracker/issues/5768) — sparse-memory snapshots was closed as "use existing diff snapshots") then the fork keeps running and forkd documents its FC dependency as "deeplethe/firecracker, not vanilla."

## Building the patched binary

```bash
git clone -b forkd-v0.4-mem-backend-shared https://github.com/deeplethe/firecracker.git
git clone -b forkd-v0.4-mem-backend-shared-v1.12 https://github.com/deeplethe/firecracker.git
cd firecracker
tools/devtool build --release
# Binary lands at: build/cargo_target/x86_64-unknown-linux-musl/release/firecracker
```

Build prerequisites are the same as upstream Firecracker (Docker + tools/devtool).
Build prerequisites are the same as upstream Firecracker (Docker + tools/devtool). The branch builds clean — no manual patch application required.

## Smoke-checking the patch worked

`firecracker-patch/test-shared.sh` (in `space/` next to `forkd/`) is a 30-line bash script that loads an existing forkd snapshot with the patched binary, walks `/proc/<fc_pid>/maps`, and reports whether the `memory.bin` mapping is `rw-s` (MAP_SHARED — patch active) vs `rw-p` (MAP_PRIVATE — unpatched FC).
[`scripts/dev/test-shared.sh`](../scripts/dev/test-shared.sh) loads an existing forkd snapshot with the patched binary once per `shared` flag value, walks `/proc/<fc_pid>/maps`, and reports whether the `memory.bin` mapping is `rw-s` (MAP_SHARED — patch active) vs `rw-p` (MAP_PRIVATE — unpatched FC). Requires an existing forkd snapshot at `~/.local/share/forkd/snapshots/coding-agent-fork-prewarm-v1` (override via `SNAP_DIR=…`) and the patched binary on `$FC_BIN`.

Expected output:

Expand All @@ -46,19 +50,21 @@ shared=true → rw-s # MAP_SHARED (what the patch enables)

## What forkd needs to do next

The patched FC binary is necessary but not sufficient for v0.4. Three remaining pieces of forkd-side work, each scoped as a separate PR sequence:
The patched FC binary is necessary but not sufficient for v0.4.

1. **`forkd-vmm` memfd-backed spawn path** (Phase 5a, ~1 week). `forkd-controller` `memfd_create()`s a region, populates it with the snapshot's `memory.bin`, then hands FC `/proc/self/fd/<N>` as `mem_backend.backend_path` with `shared: true`. Tracking issue to be filed.
2. **Default-on memfd backing on supported kernels** (Phase 5b, ~3 days). Auto-detects kernel ≥ 5.7 with `uffd_wp`-on-shmem support; falls back to file-backed otherwise. See `forkd doctor` checks 15-16 in [`DESIGN-v0.4-USER-API.md`](../DESIGN-v0.4-USER-API.md).
3. **`mode="live"` BRANCH path** (Phase 6, ~2 weeks). The asynchronous dirty-page copier — the actual live-fork primitive. See [`DESIGN-v0.4.md`](../DESIGN-v0.4.md) for the kernel mechanics.
- **Phase 5a — `forkd-vmm` memfd helper** (done, PR #186): `forkd-controller` `memfd_create()`s a region, populates it with the snapshot's `memory.bin`, exposes `/proc/self/fd/<N>` as a backend path.
- **Phase 5b — memfd in restore_many_with** (done, PR #187): per-child memfd + per-child JSON body with `"shared": true`, gated on `MemoryBackend::MemfdShared`.
- **Phase 5c — patch verification** (done, 2026-05-29): `test-shared.sh` confirms `shared: true` produces `rw-s` (MAP_SHARED) and `shared: false` still produces `rw-p` (MAP_PRIVATE) on the v1.12 patched binary.
- **Phase 6 — `mode="live"` BRANCH path** (next, ~2 weeks): the asynchronous dirty-page copier — the actual live-fork primitive. See [`DESIGN-v0.4.md`](../DESIGN-v0.4.md) for the kernel mechanics.

Phases 5a → 5b → 6 are sequential. 7-9 (REST/CLI/SDK plumbing, doctor checks, benchmarks) are mostly parallel once the runtime path works.
7–9 (REST/CLI/SDK plumbing, doctor checks, benchmarks) are mostly parallel once Phase 6 lands.

## Risk: upstream rebase conflicts

The patch touches three files in `src/vmm/src/`:
- `persist.rs` — adds a `shared` parameter through `guest_memory_from_file`
- `vmm_config/snapshot.rs` — adds the `shared` field to `MemBackendConfig`
- `vstate/memory.rs` — chooses `MAP_SHARED` vs `MAP_PRIVATE` based on the flag
The patch touches four files:
- `src/vmm/src/persist.rs` — adds a `shared` parameter through `guest_memory_from_file`; opens `mem_file` with `O_RDWR` when `shared=true` so `mmap(MAP_SHARED, PROT_WRITE)` doesn't return `EACCES`.
- `src/vmm/src/vmm_config/snapshot.rs` — adds the `shared` field to `MemBackendConfig` (default `false`).
- `src/vmm/src/vstate/memory.rs` — chooses `MAP_SHARED` vs `MAP_PRIVATE` based on the flag.
- `src/firecracker/src/api_server/request/snapshot.rs` — request-parsing side of the new field (struct-literal sites get `shared: false` to keep compatibility).

These are stable interfaces by FC standards. Upstream rebases should be clean unless FC restructures the snapshot path itself, which their release notes haven't signaled. If a rebase ever fails, the patch is small enough to re-apply by hand in ~10 minutes.
93 changes: 93 additions & 0 deletions scripts/dev/test-shared.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/usr/bin/env bash
# Verify the FC mem_backend.shared patch by checking /proc/PID/maps.
#
# Expected:
# shared=false -> rw-p (MAP_PRIVATE, unchanged FC behavior)
# shared=true -> rw-s (MAP_SHARED, what the patch enables)

set -euo pipefail

FC_BIN="${FC_BIN:-$HOME/firecracker/build/cargo_target/release/firecracker}"
SNAP_DIR="${SNAP_DIR:-$HOME/.local/share/forkd/snapshots/coding-agent-fork-prewarm-v1}"

cleanup_pids=()
trap 'for p in "${cleanup_pids[@]}"; do sudo kill -9 "$p" 2>/dev/null || true; done' EXIT

run_one() {
local shared=$1
local label=$2
local workdir
workdir=$(mktemp -d -t fc-shared-test-XXXX)
local sock="$workdir/api.sock"
local logfile="$workdir/fc.log"

echo "=== run with shared=$shared ($label) ==="
sudo touch "$logfile"
sudo "$FC_BIN" --api-sock "$sock" --log-path "$logfile" --level Debug >"$workdir/stdout.log" 2>&1 &
local sudo_pid=$!
cleanup_pids+=("$sudo_pid")
sleep 0.5

# Resolve the REAL firecracker PID (sudo's direct child)
local fc_pid
fc_pid=$(pgrep -P "$sudo_pid" || true)
if [[ -z "$fc_pid" ]]; then
echo " could not resolve real FC PID under sudo $sudo_pid"
cat "$workdir/stdout.log"
return 1
fi
cleanup_pids+=("$fc_pid")
echo " FC pid = $fc_pid (sudo wrapper $sudo_pid)"

local body
body=$(cat <<JSON
{
"snapshot_path": "$SNAP_DIR/vmstate",
"mem_backend": {
"backend_path": "$SNAP_DIR/memory.bin",
"backend_type": "File",
"shared": $shared
},
"enable_diff_snapshots": false,
"resume_vm": false
}
JSON
)
local http_status
http_status=$(sudo curl --unix-socket "$sock" -s -o "$workdir/curl.out" -w "%{http_code}" -X PUT "http://localhost/snapshot/load" -H "Content-Type: application/json" -d "$body" || echo curl_failed)
echo " /snapshot/load -> HTTP $http_status"
if [[ "$http_status" != "204" && "$http_status" != "200" ]]; then
echo " curl body: $(sudo cat "$workdir/curl.out")"
return 1
fi

echo " /proc/$fc_pid/maps lines matching memory.bin:"
sudo grep memory.bin "/proc/$fc_pid/maps" | head -3 | sed 's/^/ /'
local perms
perms=$(sudo grep memory.bin "/proc/$fc_pid/maps" | head -1 | awk '{print $2}')
local share_flag="${perms:3:1}"
echo " perms column = $perms (share_flag = '$share_flag')"

# Clean up immediately so we do not pile up FCs across runs
sudo kill -9 "$fc_pid" 2>/dev/null || true
sudo kill -9 "$sudo_pid" 2>/dev/null || true
wait "$sudo_pid" 2>/dev/null || true

if [[ "$shared" == "false" && "$share_flag" == "p" ]]; then
echo " OK shared=false -> MAP_PRIVATE (expected)"
return 0
elif [[ "$shared" == "true" && "$share_flag" == "s" ]]; then
echo " OK shared=true -> MAP_SHARED (PATCH WORKS)"
return 0
else
echo " FAIL: unexpected share_flag '$share_flag' for shared=$shared"
return 2
fi
}

run_one false "stock-compatible default"
echo
run_one true "patched behavior"

echo
echo "PATCH VERIFIED"
Loading