Skip to content

Commit 58d7858

Browse files
committed
feat(vm): deploy NemoClaw helm chart inside gateway VM
Enable full NemoClaw control plane deployment inside the libkrun microVM so e2e tests can run against the VM instead of Docker. Build-time (build-rootfs.sh): - Package helm chart and inject into k3s static charts directory - Copy HelmChart CR and agent-sandbox manifests into rootfs - Pull and save arm64 container images as tarballs for airgap boot Boot-time (gateway-init.sh): - Enable flannel CNI (remove --flannel-backend=none and related flags) - Deploy bundled manifests to k3s auto-deploy directory - Patch HelmChart CR for VM context (pullPolicy, SSH placeholders) - Ensure DNS fallback when DHCP doesn't configure resolv.conf Post-boot (lib.rs): - Wait for navigator namespace created by Helm controller - Generate PKI and apply TLS secrets via host kubectl - Store cluster metadata and mTLS creds for CLI/SDK access - Set 'gateway' as active cluster for e2e test discovery Also bump VM to 8GB RAM / 4 vCPUs, add port 30051 forwarding, fix nemoclaw wrapper fingerprint to include navigator-vm crate, and add test:e2e:vm mise task.
1 parent d23ff61 commit 58d7858

File tree

8 files changed

+379
-23
lines changed

8 files changed

+379
-23
lines changed

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/navigator-bootstrap/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
pub mod build;
55
pub mod image;
66

7-
mod constants;
7+
pub mod constants;
88
mod docker;
99
mod kubeconfig;
1010
mod metadata;
11-
mod mtls;
11+
pub mod mtls;
1212
pub mod paths;
13-
mod pki;
13+
pub mod pki;
1414
pub(crate) mod push;
1515
mod runtime;
1616

crates/navigator-vm/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@ name = "navigator_vm"
1515
path = "src/lib.rs"
1616

1717
[dependencies]
18+
base64 = "0.22"
1819
libc = "0.2"
1920
miette = { workspace = true }
21+
navigator-bootstrap = { path = "../navigator-bootstrap" }
22+
serde_json = "1"
2023
thiserror = { workspace = true }
2124

2225
[lints]

crates/navigator-vm/scripts/build-rootfs.sh

Lines changed: 85 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44

55
# Build an aarch64 Ubuntu rootfs for the gateway microVM.
66
#
7-
# Produces a rootfs with k3s pre-installed, plus the gateway-init.sh script
8-
# that runs as PID 1 inside the libkrun VM.
7+
# Produces a rootfs with k3s pre-installed, the NemoClaw helm chart and
8+
# manifests baked in, and container images pre-loaded for airgap boot.
99
#
1010
# Usage:
1111
# ./crates/navigator-vm/scripts/build-rootfs.sh [output_dir]
1212
#
13-
# Requires: Docker (or compatible container runtime), curl
13+
# Requires: Docker (or compatible container runtime), curl, helm
1414

1515
set -euo pipefail
1616

@@ -25,8 +25,19 @@ IMAGE_TAG="krun-rootfs:gateway"
2525
K3S_VERSION="${K3S_VERSION:-v1.29.8+k3s1}"
2626
K3S_VERSION="${K3S_VERSION//-k3s/+k3s}"
2727

28+
# Project root (two levels up from crates/navigator-vm/scripts/)
29+
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
30+
31+
# Container images to pre-load into k3s (arm64).
32+
IMAGE_REPO_BASE="${IMAGE_REPO_BASE:-d1i0nduu2f6qxk.cloudfront.net/navigator}"
33+
IMAGE_TAG="${IMAGE_TAG:-latest}"
34+
SERVER_IMAGE="${IMAGE_REPO_BASE}/server:${IMAGE_TAG}"
35+
SANDBOX_IMAGE="${IMAGE_REPO_BASE}/sandbox:${IMAGE_TAG}"
36+
AGENT_SANDBOX_IMAGE="registry.k8s.io/agent-sandbox/agent-sandbox-controller:v0.1.0"
37+
2838
echo "==> Building gateway rootfs"
2939
echo " k3s version: ${K3S_VERSION}"
40+
echo " Images: ${SERVER_IMAGE}, ${SANDBOX_IMAGE}"
3041
echo " Output: ${ROOTFS_DIR}"
3142

3243
# ── Download k3s binary (outside Docker — much faster) ─────────────────
@@ -92,6 +103,69 @@ chmod +x "${ROOTFS_DIR}/srv/gateway-init.sh"
92103
cp "${SCRIPT_DIR}/hello-server.py" "${ROOTFS_DIR}/srv/hello-server.py"
93104
chmod +x "${ROOTFS_DIR}/srv/hello-server.py"
94105

106+
# ── Package and inject helm chart ────────────────────────────────────
107+
108+
HELM_CHART_DIR="${PROJECT_ROOT}/deploy/helm/navigator"
109+
CHART_DEST="${ROOTFS_DIR}/var/lib/rancher/k3s/server/static/charts"
110+
111+
if [ -d "${HELM_CHART_DIR}" ]; then
112+
echo "==> Packaging helm chart..."
113+
mkdir -p "${CHART_DEST}"
114+
helm package "${HELM_CHART_DIR}" -d "${CHART_DEST}"
115+
echo " $(ls "${CHART_DEST}"/*.tgz 2>/dev/null | xargs -I{} basename {})"
116+
else
117+
echo "WARNING: Helm chart not found at ${HELM_CHART_DIR}, skipping"
118+
fi
119+
120+
# ── Inject Kubernetes manifests ──────────────────────────────────────
121+
# These are copied to /opt/navigator/manifests/ (staging). gateway-init.sh
122+
# moves them to /var/lib/rancher/k3s/server/manifests/ at boot so the
123+
# k3s Helm Controller auto-deploys them.
124+
125+
MANIFEST_SRC="${PROJECT_ROOT}/deploy/kube/manifests"
126+
MANIFEST_DEST="${ROOTFS_DIR}/opt/navigator/manifests"
127+
128+
echo "==> Injecting Kubernetes manifests..."
129+
mkdir -p "${MANIFEST_DEST}"
130+
131+
for manifest in navigator-helmchart.yaml agent-sandbox.yaml; do
132+
if [ -f "${MANIFEST_SRC}/${manifest}" ]; then
133+
cp "${MANIFEST_SRC}/${manifest}" "${MANIFEST_DEST}/"
134+
echo " ${manifest}"
135+
else
136+
echo "WARNING: ${manifest} not found in ${MANIFEST_SRC}"
137+
fi
138+
done
139+
140+
# ── Pre-load container images ────────────────────────────────────────
141+
# Pull arm64 images and save as tarballs in the k3s airgap images
142+
# directory. k3s auto-imports from /var/lib/rancher/k3s/agent/images/
143+
# on startup, so no internet access is needed at boot time.
144+
145+
IMAGES_DIR="${ROOTFS_DIR}/var/lib/rancher/k3s/agent/images"
146+
mkdir -p "${IMAGES_DIR}"
147+
148+
echo "==> Pre-loading container images (arm64)..."
149+
150+
pull_and_save() {
151+
local image="$1"
152+
local output="$2"
153+
154+
if [ -f "${output}" ]; then
155+
echo " cached: $(basename "${output}")"
156+
return 0
157+
fi
158+
159+
echo " pulling: ${image}..."
160+
docker pull --platform linux/arm64 "${image}" --quiet
161+
echo " saving: $(basename "${output}")..."
162+
docker save "${image}" -o "${output}"
163+
}
164+
165+
pull_and_save "${SERVER_IMAGE}" "${IMAGES_DIR}/navigator-server.tar"
166+
pull_and_save "${SANDBOX_IMAGE}" "${IMAGES_DIR}/navigator-sandbox.tar"
167+
pull_and_save "${AGENT_SANDBOX_IMAGE}" "${IMAGES_DIR}/agent-sandbox-controller.tar"
168+
95169
# ── Verify ────────────────────────────────────────────────────────────
96170

97171
if [ ! -f "${ROOTFS_DIR}/usr/local/bin/k3s" ]; then
@@ -102,6 +176,14 @@ fi
102176
echo ""
103177
echo "==> Rootfs ready at: ${ROOTFS_DIR}"
104178
echo " Size: $(du -sh "${ROOTFS_DIR}" | cut -f1)"
179+
180+
# Show image sizes
181+
echo " Images:"
182+
for img in "${IMAGES_DIR}"/*.tar; do
183+
[ -f "$img" ] || continue
184+
echo " $(basename "$img"): $(du -sh "$img" | cut -f1)"
185+
done
186+
105187
echo ""
106188
echo "Next steps:"
107189
echo " 1. Run: ncl gateway"

crates/navigator-vm/scripts/gateway-init.sh

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
# Init script for the gateway microVM. Runs as PID 1 inside the libkrun VM.
66
#
7-
# Mounts essential virtual filesystems, then execs k3s server.
7+
# Mounts essential virtual filesystems, deploys bundled manifests (helm chart,
8+
# agent-sandbox controller), then execs k3s server.
89

910
set -e
1011

@@ -80,6 +81,14 @@ DHCP_SCRIPT
8081
ip route add default via 192.168.127.1 2>/dev/null || true
8182
fi
8283

84+
# Ensure DNS is configured. DHCP should have set /etc/resolv.conf,
85+
# but if it didn't (or static fallback was used), provide a default.
86+
if [ ! -s /etc/resolv.conf ]; then
87+
echo "[gateway-init] no DNS configured, using public DNS"
88+
echo "nameserver 8.8.8.8" > /etc/resolv.conf
89+
echo "nameserver 8.8.4.4" >> /etc/resolv.conf
90+
fi
91+
8392
# Read back the IP we got (from DHCP or static).
8493
NODE_IP=$(ip -4 addr show eth0 | grep -oP 'inet \K[^/]+' || echo "192.168.127.2")
8594
echo "[gateway-init] eth0 IP: $NODE_IP"
@@ -110,15 +119,53 @@ rm -f /var/lib/rancher/k3s/server/kine.sock 2>/dev/null || true
110119
find /var/lib/rancher/k3s -name '*.sock' -delete 2>/dev/null || true
111120
find /run -name '*.sock' -delete 2>/dev/null || true
112121

122+
# ── Deploy bundled manifests ────────────────────────────────────────────
123+
# Copy manifests from the staging directory to the k3s auto-deploy path.
124+
# This mirrors the approach in cluster-entrypoint.sh for the Docker path.
125+
126+
K3S_MANIFESTS="/var/lib/rancher/k3s/server/manifests"
127+
BUNDLED_MANIFESTS="/opt/navigator/manifests"
128+
129+
mkdir -p "$K3S_MANIFESTS"
130+
131+
if [ -d "$BUNDLED_MANIFESTS" ]; then
132+
echo "[gateway-init] deploying bundled manifests..."
133+
for manifest in "$BUNDLED_MANIFESTS"/*.yaml; do
134+
[ ! -f "$manifest" ] && continue
135+
cp "$manifest" "$K3S_MANIFESTS/"
136+
echo " $(basename "$manifest")"
137+
done
138+
139+
# Remove stale navigator-managed manifests from previous boots.
140+
for existing in "$K3S_MANIFESTS"/navigator-*.yaml \
141+
"$K3S_MANIFESTS"/agent-*.yaml; do
142+
[ ! -f "$existing" ] && continue
143+
basename=$(basename "$existing")
144+
if [ ! -f "$BUNDLED_MANIFESTS/$basename" ]; then
145+
echo " removing stale: $basename"
146+
rm -f "$existing"
147+
fi
148+
done
149+
fi
150+
151+
# Patch the HelmChart manifest for VM deployment.
152+
HELMCHART="$K3S_MANIFESTS/navigator-helmchart.yaml"
153+
if [ -f "$HELMCHART" ]; then
154+
echo "[gateway-init] patching HelmChart manifest..."
155+
# Use pre-loaded images — don't pull from registry.
156+
sed -i 's|pullPolicy: Always|pullPolicy: IfNotPresent|' "$HELMCHART"
157+
# Clear SSH gateway placeholders (default 127.0.0.1 is correct for local VM).
158+
sed -i 's|sshGatewayHost: __SSH_GATEWAY_HOST__|sshGatewayHost: ""|g' "$HELMCHART"
159+
sed -i 's|sshGatewayPort: __SSH_GATEWAY_PORT__|sshGatewayPort: 0|g' "$HELMCHART"
160+
fi
161+
113162
# ── Start k3s ──────────────────────────────────────────────────────────
114163

115164
echo "[gateway-init] starting k3s server..."
116165
exec /usr/local/bin/k3s server \
117166
--disable=traefik \
118167
--write-kubeconfig-mode=644 \
119168
--node-ip="$NODE_IP" \
120-
--flannel-backend=none \
121-
--disable-network-policy \
122-
--disable-kube-proxy \
123169
--kube-apiserver-arg=bind-address=0.0.0.0 \
170+
--resolv-conf=/etc/resolv.conf \
124171
--tls-san=localhost,127.0.0.1,10.0.2.15,192.168.127.2

0 commit comments

Comments
 (0)