From cd14c2ecad69e308fc2de87b8e70fb9498b7a7b0 Mon Sep 17 00:00:00 2001 From: chaosreload Date: Tue, 24 Mar 2026 10:03:43 +0000 Subject: [PATCH 1/7] feat: add Kubernetes deployment support - Dockerfile: multi-stage build (Rust compiler + Ubuntu runtime) Firecracker bundled; vmlinux/rootfs mounted via PVC - docker/entrypoint.sh: handles template creation on first boot, skips if snapshot already exists on PVC - deploy/k8s/: namespace, PVC (gp3 20Gi), Deployment with KVM device plugin resource, podAntiAffinity, health probes, HPA, Service - docs/KUBERNETES.md: EC2 instance family requirements, KVM device plugin setup, PVC storage guidance, autoscaling with custom metric (zeroboot_concurrent_forks), Karpenter NodePool example, ServiceMonitor config, configuration reference Closes #9 --- Dockerfile | 60 ++++++++ deploy/k8s/deployment.yaml | 89 ++++++++++++ deploy/k8s/hpa.yaml | 45 ++++++ deploy/k8s/namespace.yaml | 4 + deploy/k8s/pvc.yaml | 14 ++ deploy/k8s/service.yaml | 15 ++ docker/entrypoint.sh | 69 +++++++++ docs/KUBERNETES.md | 284 +++++++++++++++++++++++++++++++++++++ 8 files changed, 580 insertions(+) create mode 100644 Dockerfile create mode 100644 deploy/k8s/deployment.yaml create mode 100644 deploy/k8s/hpa.yaml create mode 100644 deploy/k8s/namespace.yaml create mode 100644 deploy/k8s/pvc.yaml create mode 100644 deploy/k8s/service.yaml create mode 100644 docker/entrypoint.sh create mode 100644 docs/KUBERNETES.md diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..82fb5d8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,60 @@ +# syntax=docker/dockerfile:1 +# Zeroboot server image +# Multi-stage build: compile Rust binary, then assemble minimal runtime image. +# +# Usage: +# docker build -t zeroboot:latest . +# +# The image does NOT bundle vmlinux or rootfs — mount them via PersistentVolume. +# See deploy/k8s/ for Kubernetes manifests. + +# ─── Stage 1: Build zeroboot binary ────────────────────────────────────────── +FROM rust:1.80-bookworm AS builder + +WORKDIR /build + +# Cache dependencies separately from source +COPY Cargo.toml Cargo.lock ./ +RUN mkdir src && echo 'fn main(){}' > src/main.rs && \ + cargo build --release && \ + rm -f target/release/zeroboot target/release/deps/zeroboot* + +# Build actual source +COPY src/ src/ +COPY guest/ guest/ +RUN cargo build --release + +# ─── Stage 2: Runtime image ─────────────────────────────────────────────────── +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND=noninteractive + +# Runtime dependencies only +RUN apt-get update -qq && \ + apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Install Firecracker +ARG FC_VERSION=v1.15.0 +RUN curl -fsSL -o /tmp/fc.tgz \ + "https://github.com/firecracker-microvm/firecracker/releases/download/${FC_VERSION}/firecracker-${FC_VERSION}-x86_64.tgz" && \ + tar -xzf /tmp/fc.tgz -C /tmp && \ + mv "/tmp/release-${FC_VERSION}-x86_64/firecracker-${FC_VERSION}-x86_64" /usr/local/bin/firecracker && \ + chmod +x /usr/local/bin/firecracker && \ + rm -rf /tmp/fc.tgz /tmp/release-* + +# Copy zeroboot binary +COPY --from=builder /build/target/release/zeroboot /usr/local/bin/zeroboot + +# Data directory — mount a PersistentVolume here to persist snapshots +VOLUME ["/var/lib/zeroboot"] + +# Copy entrypoint +COPY docker/entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + +EXPOSE 8080 + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/deploy/k8s/deployment.yaml b/deploy/k8s/deployment.yaml new file mode 100644 index 0000000..cf0d505 --- /dev/null +++ b/deploy/k8s/deployment.yaml @@ -0,0 +1,89 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: zeroboot + namespace: zeroboot + labels: + app: zeroboot +spec: + replicas: 2 + selector: + matchLabels: + app: zeroboot + template: + metadata: + labels: + app: zeroboot + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + prometheus.io/path: "/v1/metrics" + spec: + # ── Scheduling: only run on KVM-capable nodes ─────────────────────────── + nodeSelector: + kvm-capable: "true" # label KVM nodes with: kubectl label node kvm-capable=true + + # Spread pods across nodes — each pod needs its own physical memory for CoW + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: zeroboot + topologyKey: kubernetes.io/hostname + + containers: + - name: zeroboot + image: ghcr.io/zerobootdev/zeroboot:latest + imagePullPolicy: Always + ports: + - name: http + containerPort: 8080 + + # ── KVM device — avoids privileged: true ─────────────────────────── + resources: + requests: + memory: "2Gi" + cpu: "1" + devices.kubevirt.io/kvm: "1" + limits: + memory: "8Gi" + cpu: "4" + devices.kubevirt.io/kvm: "1" + + env: + - name: ZEROBOOT_WORKDIR + value: /var/lib/zeroboot + - name: ZEROBOOT_PORT + value: "8080" + - name: ZEROBOOT_TEMPLATE_WAIT + value: "15" + # Optional: path to api_keys.json on the PVC + # - name: ZEROBOOT_API_KEYS_FILE + # value: /var/lib/zeroboot/api_keys.json + + volumeMounts: + - name: data + mountPath: /var/lib/zeroboot + + # ── Health checks ────────────────────────────────────────────────── + readinessProbe: + httpGet: + path: /v1/health + port: 8080 + initialDelaySeconds: 30 # allow time for template creation on first boot + periodSeconds: 5 + failureThreshold: 6 + + livenessProbe: + httpGet: + path: /v1/health + port: 8080 + initialDelaySeconds: 60 + periodSeconds: 10 + failureThreshold: 3 + + volumes: + - name: data + persistentVolumeClaim: + claimName: zeroboot-data diff --git a/deploy/k8s/hpa.yaml b/deploy/k8s/hpa.yaml new file mode 100644 index 0000000..5c34940 --- /dev/null +++ b/deploy/k8s/hpa.yaml @@ -0,0 +1,45 @@ +# Horizontal Pod Autoscaler for zeroboot +# +# Scales based on zeroboot_concurrent_forks — the number of active VM sandboxes +# per pod. CPU/memory are poor proxies for zeroboot workloads because: +# - Fork is memory-bound (CoW page faults), not CPU-bound +# - RSS grows proportionally with concurrent sandboxes +# +# Prerequisites: +# 1. prometheus-adapter installed and configured to expose zeroboot_concurrent_forks +# as a Kubernetes custom metric (pods/zeroboot_concurrent_forks) +# 2. See docs/KUBERNETES.md for prometheus-adapter config snippet + +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: zeroboot + namespace: zeroboot +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: zeroboot + minReplicas: 2 + maxReplicas: 10 + metrics: + - type: Pods + pods: + metric: + name: zeroboot_concurrent_forks + target: + type: AverageValue + averageValue: "800" # scale out when avg concurrent sandboxes > 800 per pod + behavior: + scaleUp: + stabilizationWindowSeconds: 30 + policies: + - type: Pods + value: 2 + periodSeconds: 60 + scaleDown: + stabilizationWindowSeconds: 300 # 5 min cooldown before scale-in + policies: + - type: Pods + value: 1 + periodSeconds: 120 diff --git a/deploy/k8s/namespace.yaml b/deploy/k8s/namespace.yaml new file mode 100644 index 0000000..4307e42 --- /dev/null +++ b/deploy/k8s/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: zeroboot diff --git a/deploy/k8s/pvc.yaml b/deploy/k8s/pvc.yaml new file mode 100644 index 0000000..92cadba --- /dev/null +++ b/deploy/k8s/pvc.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: zeroboot-data + namespace: zeroboot + labels: + app: zeroboot +spec: + accessModes: + - ReadWriteOnce + storageClassName: gp3 # AWS EBS gp3; adjust for your cloud provider + resources: + requests: + storage: 20Gi # vmlinux (~21MB) + rootfs (~500MB) + snapshot (~512MB) per template diff --git a/deploy/k8s/service.yaml b/deploy/k8s/service.yaml new file mode 100644 index 0000000..8854f22 --- /dev/null +++ b/deploy/k8s/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: zeroboot + namespace: zeroboot + labels: + app: zeroboot +spec: + type: ClusterIP # Use LoadBalancer to expose externally + selector: + app: zeroboot + ports: + - name: http + port: 80 + targetPort: 8080 diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100644 index 0000000..4c339ca --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,69 @@ +#!/bin/bash +set -euo pipefail + +WORKDIR="${ZEROBOOT_WORKDIR:-/var/lib/zeroboot}" +KERNEL="${ZEROBOOT_KERNEL:-${WORKDIR}/vmlinux-fc}" +ROOTFS_PYTHON="${ZEROBOOT_ROOTFS_PYTHON:-${WORKDIR}/rootfs-python.ext4}" +ROOTFS_NODE="${ZEROBOOT_ROOTFS_NODE:-}" +PORT="${ZEROBOOT_PORT:-8080}" +TEMPLATE_WAIT="${ZEROBOOT_TEMPLATE_WAIT:-15}" + +# ── Validate KVM access ─────────────────────────────────────────────────────── +if [ ! -c /dev/kvm ]; then + echo "ERROR: /dev/kvm not found. Node must support KVM and use the KVM device plugin." + exit 1 +fi + +# ── Check required files ────────────────────────────────────────────────────── +if [ ! -f "$KERNEL" ]; then + echo "ERROR: Kernel not found at $KERNEL" + echo "Mount a PersistentVolume to $WORKDIR containing vmlinux-fc and rootfs images." + exit 1 +fi + +if [ ! -f "$ROOTFS_PYTHON" ]; then + echo "ERROR: Python rootfs not found at $ROOTFS_PYTHON" + exit 1 +fi + +# ── Create template if snapshot doesn't exist ──────────────────────────────── +PYTHON_SNAPSHOT="${WORKDIR}/python/snapshot/vmstate" + +if [ ! -f "$PYTHON_SNAPSHOT" ]; then + echo "No snapshot found — creating Python template (this takes ~${TEMPLATE_WAIT}s)..." + mkdir -p "${WORKDIR}/python" + cp "$ROOTFS_PYTHON" "${WORKDIR}/python-rootfs.ext4" + /usr/local/bin/zeroboot template \ + "$KERNEL" \ + "${WORKDIR}/python-rootfs.ext4" \ + "${WORKDIR}/python" \ + "$TEMPLATE_WAIT" \ + /init + echo "Template created." +else + echo "Snapshot found — skipping template creation." +fi + +# ── Build serve target ──────────────────────────────────────────────────────── +SERVE_TARGET="python:${WORKDIR}/python" + +if [ -n "$ROOTFS_NODE" ] && [ -f "$ROOTFS_NODE" ]; then + NODE_SNAPSHOT="${WORKDIR}/node/snapshot/vmstate" + if [ ! -f "$NODE_SNAPSHOT" ]; then + echo "Creating Node.js template..." + mkdir -p "${WORKDIR}/node" + cp "$ROOTFS_NODE" "${WORKDIR}/node-rootfs.ext4" + /usr/local/bin/zeroboot template \ + "$KERNEL" \ + "${WORKDIR}/node-rootfs.ext4" \ + "${WORKDIR}/node" \ + "$TEMPLATE_WAIT" \ + /init-node.sh + echo "Node template created." + fi + SERVE_TARGET="${SERVE_TARGET},node:${WORKDIR}/node" +fi + +# ── Start API server ────────────────────────────────────────────────────────── +echo "Starting zeroboot API server on port ${PORT}..." +exec /usr/local/bin/zeroboot serve "$SERVE_TARGET" "$PORT" diff --git a/docs/KUBERNETES.md b/docs/KUBERNETES.md new file mode 100644 index 0000000..3cbcdab --- /dev/null +++ b/docs/KUBERNETES.md @@ -0,0 +1,284 @@ +# Running Zeroboot on Kubernetes + +Zeroboot can be deployed as a stateful service inside a Kubernetes cluster. +This guide covers node requirements, KVM device access, persistent storage for +snapshots, reference manifests, and autoscaling. + +--- + +## Architecture overview + +``` +Internet → K8s Service + │ + ┌──────┼──────┐ + │ │ │ + Pod-1 Pod-2 Pod-3 ← one Pod per KVM-capable Node (podAntiAffinity) + │ │ │ + VM VM VM VM VM VM ← KVM forks happen inside the Pod, sub-millisecond +``` + +**Key point:** Kubernetes manages the lifecycle of the zeroboot *server* process. +It does not schedule individual sandboxes — each `v1/exec` request is handled +entirely within the Pod that receives it via a KVM fork (~0.8 ms). Kubernetes' +role is capacity management: health checks, rolling updates, and horizontal scaling. + +--- + +## Node requirements + +### Instance families with KVM support + +Not all EC2 instance types expose `/dev/kvm`. The following families support KVM +and are suitable for zeroboot: + +| Family | Notes | +|---|---| +| `c6i`, `c6a`, `c7i`, `c8i` | ✅ Recommended — Nitro-based, no nested virt needed | +| `m6i`, `m7i`, `m8i` | ✅ General-purpose, KVM available | +| `r6i`, `r7i` | ✅ Memory-optimized — good for high snapshot concurrency | +| `c5`, `m5` | ✅ Older Nitro generation, still works | +| `t3`, `t4g` | ❌ Burstable — `/dev/kvm` not available | +| `t2` | ❌ No KVM | +| Any ARM (`*g`) | ❌ Architecture mismatch — Firecracker x86_64 binary required | + +> On GCP: `n2`, `n2d`, `c2`, `c3` families support KVM. +> On Azure: `Dv3`, `Ev3`, `Dsv3` with nested virtualization enabled. + +### Label KVM-capable nodes + +```bash +kubectl label node kvm-capable=true +``` + +The Deployment's `nodeSelector` uses this label to ensure Pods are only scheduled +where `/dev/kvm` is available. + +--- + +## KVM device access without `privileged: true` + +Pods request `/dev/kvm` via the [KVM device plugin](https://github.com/kubevirt/kubevirt/tree/main/cmd/virt-handler) +from the KubeVirt project: + +```bash +# Install KVM device plugin (DaemonSet) +kubectl apply -f https://github.com/kubevirt/kubevirt/releases/latest/download/kubevirt-operator.yaml +kubectl apply -f https://github.com/kubevirt/kubevirt/releases/latest/download/kubevirt-cr.yaml +``` + +Once installed, Pods can request KVM access via resources (already set in `deployment.yaml`): + +```yaml +resources: + limits: + devices.kubevirt.io/kvm: "1" +``` + +This grants `/dev/kvm` access without `privileged: true` or `hostDevice` mounts. + +--- + +## Persistent storage for snapshots + +Zeroboot's `template` command snapshots ~512 MB of VM memory to disk. Without a +PersistentVolume, every Pod restart triggers a ~15 s re-snapshot. + +Mount a PVC at `/var/lib/zeroboot` (see `deploy/k8s/pvc.yaml`). The directory +layout on the volume: + +``` +/var/lib/zeroboot/ +├── vmlinux-fc ← kernel binary (~21 MB) +├── rootfs-python.ext4 ← base rootfs image (pre-loaded numpy/pandas) +├── python/ ← snapshot created by entrypoint on first boot +│ ├── snapshot/ +│ │ ├── vmstate ← CPU register state (~14 KB) +│ │ └── mem ← 512 MB memory image (CoW source) +│ └── rootfs_path +└── api_keys.json ← optional API key list +``` + +> **Populate the volume before first deploy.** Copy `vmlinux-fc` and +> `rootfs-python.ext4` to the PVC (e.g., via a one-shot init Job or manual +> `kubectl cp`). The entrypoint will create the snapshot automatically on +> first boot if it is missing. + +### Storage class recommendations + +| Cloud | StorageClass | Notes | +|---|---|---| +| AWS | `gp3` | Default for EKS; good random-read IOPS for CoW page faults | +| GCP | `premium-rwo` | SSD-backed, low latency | +| Azure | `managed-premium` | SSD, required for sub-ms fork performance | + +Avoid `gp2` or spinning-disk storage classes — the CoW page fault path is +latency-sensitive and benefits from SSD IOPS. + +--- + +## Deploying + +```bash +# 1. Create namespace +kubectl apply -f deploy/k8s/namespace.yaml + +# 2. Create PVC +kubectl apply -f deploy/k8s/pvc.yaml + +# 3. Deploy (2 replicas by default) +kubectl apply -f deploy/k8s/deployment.yaml +kubectl apply -f deploy/k8s/service.yaml + +# 4. Watch rollout — first boot takes ~30s for template creation +kubectl rollout status deployment/zeroboot -n zeroboot + +# 5. Verify +kubectl exec -n zeroboot deploy/zeroboot -- curl -s localhost:8080/v1/health +``` + +--- + +## Autoscaling + +### Why not CPU-based HPA? + +Zeroboot workloads are **memory-bound**, not CPU-bound. Each concurrent fork +adds ~265 KB of CoW memory pressure. CPU utilization is a poor scaling signal. + +### Custom metric HPA + +The `zeroboot_concurrent_forks` gauge (exposed at `/v1/metrics`) reflects the +number of active VM sandboxes per Pod. Use this for HPA: + +```bash +# Apply HPA (requires prometheus-adapter, see below) +kubectl apply -f deploy/k8s/hpa.yaml +``` + +Scale-out triggers when average concurrent forks per Pod exceeds 800. Adjust +this threshold based on your Node's available memory: + +``` +max_concurrent_forks ≈ (node_memory - 2GB_overhead) / 265KB_per_fork +# Example: 8GB node → (8192 - 2048) / 0.265 ≈ 23,000 theoretical max +# Practical limit with snapshot RSS: ~1000–2000 per Pod +``` + +### Exposing the metric via prometheus-adapter + +Add to your `prometheus-adapter` ConfigMap: + +```yaml +rules: + - seriesQuery: 'zeroboot_concurrent_forks{namespace!="",pod!=""}' + resources: + overrides: + namespace: {resource: "namespace"} + pod: {resource: "pod"} + name: + matches: "zeroboot_concurrent_forks" + as: "zeroboot_concurrent_forks" + metricsQuery: 'avg_over_time(zeroboot_concurrent_forks{<<.LabelMatchers>>}[1m])' +``` + +### Karpenter node provisioning + +For cluster autoscaling with Karpenter, create a NodePool that targets KVM-capable instances: + +```yaml +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: zeroboot-kvm +spec: + template: + metadata: + labels: + kvm-capable: "true" + spec: + requirements: + - key: karpenter.k8s.aws/instance-family + operator: In + values: [c6i, c7i, c8i, m6i, m7i] + - key: karpenter.k8s.aws/instance-size + operator: In + values: [xlarge, 2xlarge, 4xlarge] + - key: kubernetes.io/arch + operator: In + values: [amd64] + limits: + cpu: 100 +``` + +> **Scaling latency note:** Karpenter takes 60–120 s to provision a new KVM +> node (EC2 start + kubelet join + Pod scheduling + snapshot load). Karpenter +> handles **capacity expansion** for sustained load — it is not designed to +> absorb sudden request spikes. Size your warm pool (`minReplicas`) to handle +> peak burst traffic; use HPA to scale within the existing node pool first. + +--- + +## Monitoring + +Zeroboot exposes Prometheus metrics at `/v1/metrics` (not `/metrics`). + +### ServiceMonitor (Prometheus Operator) + +```yaml +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: zeroboot + namespace: zeroboot +spec: + selector: + matchLabels: + app: zeroboot + endpoints: + - port: http + path: /v1/metrics + interval: 15s +``` + +### Key metrics + +| Metric | Type | Description | +|---|---|---| +| `zeroboot_concurrent_forks` | gauge | Active VM sandboxes — **use for HPA** | +| `zeroboot_fork_time_milliseconds` | histogram | Fork latency (P50/P99) | +| `zeroboot_exec_time_milliseconds` | histogram | Code execution latency | +| `zeroboot_total_time_milliseconds` | histogram | End-to-end request latency | +| `zeroboot_total_executions{status}` | counter | Success / error / timeout counts | +| `zeroboot_memory_usage_bytes` | gauge | Process RSS — monitor for memory pressure | + +--- + +## Configuration reference + +All configuration is via environment variables (set in `deployment.yaml`): + +| Variable | Default | Description | +|---|---|---| +| `ZEROBOOT_WORKDIR` | `/var/lib/zeroboot` | Working directory (PVC mount point) | +| `ZEROBOOT_KERNEL` | `$WORKDIR/vmlinux-fc` | Path to kernel binary | +| `ZEROBOOT_ROOTFS_PYTHON` | `$WORKDIR/rootfs-python.ext4` | Python rootfs image | +| `ZEROBOOT_ROOTFS_NODE` | _(unset)_ | Node.js rootfs image (optional) | +| `ZEROBOOT_PORT` | `8080` | API server port | +| `ZEROBOOT_TEMPLATE_WAIT` | `15` | Seconds to wait during template snapshot | +| `ZEROBOOT_API_KEYS_FILE` | _(unset)_ | Path to JSON array of API keys | + +--- + +## Limitations + +- **Single-node fork pool:** All sandboxes on a Pod run on the same physical Node. + Scale out by adding Pods (and Nodes), not by resizing individual Pods. +- **ReadWriteOnce PVC:** Each Pod needs its own PVC (`ReadWriteOnce`). If you + use a `StatefulSet` instead of a `Deployment`, each replica gets its own PVC + automatically via `volumeClaimTemplates`. +- **Snapshot on first boot:** The first Pod startup after PVC creation takes + ~15–30 s while the template snapshot is created. Subsequent restarts are fast + (~2 s) because the snapshot is persisted on the PVC. +- **x86_64 only:** Firecracker and the guest kernel are x86_64. ARM nodes are + not supported. From 816f2bf056965138b0e31b7d6a5491c05059c0ef Mon Sep 17 00:00:00 2001 From: chaosreload Date: Tue, 24 Mar 2026 10:09:01 +0000 Subject: [PATCH 2/7] docs: clarify EC2 instance KVM requirements c8i/m8i/r8i support nested virtualization on regular (non-metal) sizes via --cpu-options NestedVirtualization=enabled. Other families (c6i, m6i etc.) require .metal sizes for KVM access. Update instance table to make this distinction explicit. --- docs/KUBERNETES.md | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/docs/KUBERNETES.md b/docs/KUBERNETES.md index 3cbcdab..4eee6fb 100644 --- a/docs/KUBERNETES.md +++ b/docs/KUBERNETES.md @@ -32,15 +32,27 @@ role is capacity management: health checks, rolling updates, and horizontal scal Not all EC2 instance types expose `/dev/kvm`. The following families support KVM and are suitable for zeroboot: -| Family | Notes | -|---|---| -| `c6i`, `c6a`, `c7i`, `c8i` | ✅ Recommended — Nitro-based, no nested virt needed | -| `m6i`, `m7i`, `m8i` | ✅ General-purpose, KVM available | -| `r6i`, `r7i` | ✅ Memory-optimized — good for high snapshot concurrency | -| `c5`, `m5` | ✅ Older Nitro generation, still works | -| `t3`, `t4g` | ❌ Burstable — `/dev/kvm` not available | -| `t2` | ❌ No KVM | -| Any ARM (`*g`) | ❌ Architecture mismatch — Firecracker x86_64 binary required | +| Family | KVM method | Notes | +|---|---|---| +| `c8i`, `m8i`, `r8i` | ✅ **Nested virtualization** | **Recommended** — Intel 8th-gen Nitro platform; supports nested virt without metal. Enable at launch via `--cpu-options NestedVirtualization=enabled` (requires AWS CLI ≥ v2.34) | +| `c6i`, `c6a`, `c7i`, `m6i`, `m7i`, `r6i`, `r7i` | ✅ Bare-metal only | KVM available only on `.metal` sizes (e.g. `c6i.metal`) | +| `c5`, `m5`, `r5` | ✅ Bare-metal only | Older Nitro generation; `.metal` sizes only | +| `t3`, `t4g` | ❌ Not available | Burstable — `/dev/kvm` not exposed | +| `t2` | ❌ Not available | No Nitro, no KVM | +| Any ARM (`*g`) | ❌ Architecture mismatch | Firecracker x86_64 binary required | + +**TL;DR for EKS node groups:** Use `c8i`, `m8i`, or `r8i` with nested virtualization +enabled — these are the only non-metal families where regular (non-`.metal`) instance +sizes expose `/dev/kvm`. All other families require `.metal` sizes which are significantly +more expensive and harder to schedule in K8s. + +```bash +# Enable nested virtualization when launching a new instance (c8i/m8i/r8i only) +aws ec2 run-instances \ + --instance-type c8i.xlarge \ + --cpu-options "NestedVirtualization=enabled" \ + ... +``` > On GCP: `n2`, `n2d`, `c2`, `c3` families support KVM. > On Azure: `Dv3`, `Ev3`, `Dsv3` with nested virtualization enabled. From c09b9b5b1e2c432d2cfae22328ba249ba1dd0b71 Mon Sep 17 00:00:00 2001 From: chaosreload Date: Wed, 25 Mar 2026 02:28:30 +0000 Subject: [PATCH 3/7] docs: add eksctl configs for EKS cluster and KVM node group Three files covering two scenarios: - eks-with-kvm-nodegroup.yaml: cluster + KVM node group in one shot - eks-cluster-only.yaml: cluster only (no node groups) - eks-add-kvm-nodegroup.yaml: add KVM node group to existing cluster All configs use c8i.xlarge with cpuOptions.nestedVirtualization=enabled, AmazonLinux2023 AMI, and aws-ebs-csi-driver addon for PVC support. --- deploy/eks/eks-add-kvm-nodegroup.yaml | 37 +++++++++++++++ deploy/eks/eks-cluster-only.yaml | 28 ++++++++++++ deploy/eks/eks-with-kvm-nodegroup.yaml | 63 ++++++++++++++++++++++++++ 3 files changed, 128 insertions(+) create mode 100644 deploy/eks/eks-add-kvm-nodegroup.yaml create mode 100644 deploy/eks/eks-cluster-only.yaml create mode 100644 deploy/eks/eks-with-kvm-nodegroup.yaml diff --git a/deploy/eks/eks-add-kvm-nodegroup.yaml b/deploy/eks/eks-add-kvm-nodegroup.yaml new file mode 100644 index 0000000..6b5a761 --- /dev/null +++ b/deploy/eks/eks-add-kvm-nodegroup.yaml @@ -0,0 +1,37 @@ +# Scenario 2b: Add KVM node group to an EXISTING EKS cluster +# Usage: +# eksctl create nodegroup -f eks-add-kvm-nodegroup.yaml + +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig + +metadata: + name: zeroboot-eks # must match existing cluster name + region: ap-southeast-1 # must match existing cluster region + +managedNodeGroups: + - name: zeroboot-kvm + instanceType: c8i.xlarge + minSize: 1 + maxSize: 5 + desiredCapacity: 2 + amiFamily: AmazonLinux2023 + volumeSize: 50 + privateNetworking: true + + cpuOptions: + nestedVirtualization: enabled + + labels: + kvm-capable: "true" + workload: zeroboot + + iam: + attachPolicyARNs: + - arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy + - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly + - arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy + + tags: + Project: zeroboot + ManagedBy: eksctl diff --git a/deploy/eks/eks-cluster-only.yaml b/deploy/eks/eks-cluster-only.yaml new file mode 100644 index 0000000..54fb2ba --- /dev/null +++ b/deploy/eks/eks-cluster-only.yaml @@ -0,0 +1,28 @@ +# Scenario 2a: Create EKS cluster WITHOUT any node group +# Usage: +# eksctl create cluster -f eks-cluster-only.yaml + +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig + +metadata: + name: zeroboot-eks + region: ap-southeast-1 + version: "1.31" + +vpc: + clusterEndpoints: + privateAccess: true + publicAccess: true + +# Explicitly no node groups at cluster creation time +managedNodeGroups: [] + +iam: + withOIDC: true + +addons: + - name: aws-ebs-csi-driver + version: latest + attachPolicyARNs: + - arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy diff --git a/deploy/eks/eks-with-kvm-nodegroup.yaml b/deploy/eks/eks-with-kvm-nodegroup.yaml new file mode 100644 index 0000000..1cf15d2 --- /dev/null +++ b/deploy/eks/eks-with-kvm-nodegroup.yaml @@ -0,0 +1,63 @@ +# Scenario 1: Create EKS cluster with KVM node group in one shot +# Usage: +# eksctl create cluster -f eks-with-kvm-nodegroup.yaml + +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig + +metadata: + name: zeroboot-eks + region: ap-southeast-1 + version: "1.31" + +# Cluster-level VPC (auto-created) +vpc: + clusterEndpoints: + privateAccess: true + publicAccess: true + +# No default node group — only our KVM-capable group +managedNodeGroups: + - name: zeroboot-kvm + instanceType: c8i.xlarge + minSize: 1 + maxSize: 5 + desiredCapacity: 2 + amiFamily: AmazonLinux2023 + volumeSize: 50 # GB — enough for OS + Docker images + zeroboot binary + privateNetworking: true # place nodes in private subnets + + # Enable nested virtualization — requires c8i/m8i/r8i + cpuOptions: + nestedVirtualization: enabled + + # Labels used by Deployment nodeSelector and Karpenter NodePool + labels: + kvm-capable: "true" + workload: zeroboot + + # Optional: taint to reserve these nodes exclusively for zeroboot + # taints: + # - key: zeroboot + # value: "true" + # effect: NoSchedule + + iam: + attachPolicyARNs: + - arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy + - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly + - arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy + + tags: + Project: zeroboot + ManagedBy: eksctl + +# Add-ons needed for EBS PVC support +addons: + - name: aws-ebs-csi-driver + version: latest + attachPolicyARNs: + - arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy + +iam: + withOIDC: true # required for add-on IAM role binding (IRSA) From 3b44e2e38b2ecd1a679d3127f89ca9520cc41c5d Mon Sep 17 00:00:00 2001 From: chaosreload Date: Wed, 25 Mar 2026 02:45:16 +0000 Subject: [PATCH 4/7] docs: add K8s PR validation plan --- docs/VALIDATION-PLAN.md | 337 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 docs/VALIDATION-PLAN.md diff --git a/docs/VALIDATION-PLAN.md b/docs/VALIDATION-PLAN.md new file mode 100644 index 0000000..2bf612c --- /dev/null +++ b/docs/VALIDATION-PLAN.md @@ -0,0 +1,337 @@ +# Zeroboot K8s PR Validation Plan + +**PR:** chaosreload/zeroboot → feat/kubernetes-deployment → [PR #13](https://github.com/zerobootdev/zeroboot/pull/13) +**目标:** 在真实 AWS EKS 环境验证 Dockerfile、K8s manifests、eksctl 配置的正确性 +**执行人:** openclaw-research +**验证完成后:** 将结果反馈给 openclaw-coding,用于完善 PR + +--- + +## 前置条件 + +- AWS 账号,有 EKS / EC2 / ECR 创建权限 +- dev-server 可访问(已有 eksctl 0.224、aws cli、docker、kubectl) +- dev-server 的 IAM role 有足够权限(admin 级别,已确认) + +--- + +## 验证场景 + +### 场景 A:新建集群 + KVM node group(一步到位) +### 场景 B:先建集群,后追加 KVM node group + +两个场景都需要跑,验证 eksctl 配置的正确性。 + +--- + +## Step 1:准备代码 + +```bash +# 在 dev-server 上 +cd /data/projects/chaosreload/study/repo/chaosreload/zeroboot +git checkout feat/kubernetes-deployment +git pull origin feat/kubernetes-deployment + +# 确认文件结构 +ls Dockerfile docker/entrypoint.sh deploy/k8s/ deploy/eks/ docs/KUBERNETES.md +``` + +**预期输出:** 所有文件存在,无报错 + +--- + +## Step 2:构建 Docker 镜像 + +```bash +# 在 dev-server 上构建 +cd /data/projects/chaosreload/study/repo/chaosreload/zeroboot +docker build -t zeroboot:test . +``` + +**预期:** +- 构建成功,无 error +- 最终镜像大约 300-500 MB +- `docker images | grep zeroboot` 能看到镜像 + +**记录:** +- [ ] 构建是否成功 +- [ ] 构建耗时(大概) +- [ ] 镜像大小 + +--- + +## Step 3:推镜像到 ECR + +```bash +# 创建 ECR repo(如果不存在) +AWS_REGION=ap-southeast-1 +AWS_ACCOUNT=$(aws sts get-caller-identity --query Account --output text) +ECR_REPO="${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/zeroboot" + +aws ecr create-repository --repository-name zeroboot --region $AWS_REGION 2>/dev/null || true + +# Login + push +aws ecr get-login-password --region $AWS_REGION | \ + docker login --username AWS --password-stdin "${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com" + +docker tag zeroboot:test $ECR_REPO:test +docker push $ECR_REPO:test +``` + +**预期:** push 成功,ECR 里有 `zeroboot:test` 镜像 + +**记录:** +- [ ] ECR push 是否成功 +- [ ] 镜像 URI(后续 deployment.yaml 会用到) + +--- + +## Step 4A:场景 A — 新建集群 + KVM node group + +```bash +cd /data/projects/chaosreload/study/repo/chaosreload/zeroboot + +# 编辑 region(如需要) +# deploy/eks/eks-with-kvm-nodegroup.yaml 默认 ap-southeast-1 + +eksctl create cluster -f deploy/eks/eks-with-kvm-nodegroup.yaml +# 预计耗时:15-20 分钟 +``` + +**预期:** +- eksctl 无报错完成 +- `kubectl get nodes -l kvm-capable=true` 返回 2 个节点 +- 节点状态 `Ready` + +**验证嵌套虚拟化:** +```bash +NODE=$(kubectl get nodes -l kvm-capable=true -o jsonpath='{.items[0].metadata.name}') +kubectl debug node/$NODE -it --image=ubuntu -- bash -c "ls -la /dev/kvm" +# 预期:crw-rw-rw- 1 root kvm 10, 232 ... +``` + +**记录:** +- [ ] eksctl 是否成功 +- [ ] 节点数量和状态 +- [ ] `/dev/kvm` 是否存在 +- [ ] 如有报错,粘贴完整错误信息 + +--- + +## Step 4B:场景 B — 先建集群,后加 node group + +```bash +# Step B-1:只建集群 +eksctl create cluster -f deploy/eks/eks-cluster-only.yaml +# 预计耗时:12-15 分钟(无 node group,更快) + +# 确认集群就绪(无节点) +kubectl get nodes +# 预期:No resources found + +# Step B-2:追加 KVM node group +eksctl create nodegroup -f deploy/eks/eks-add-kvm-nodegroup.yaml +# 预计耗时:5-8 分钟 +``` + +**预期:** 同场景 A,节点 Ready,`/dev/kvm` 存在 + +**记录:** +- [ ] 两步是否都成功 +- [ ] 与场景 A 有无差异 + +--- + +## Step 5:部署 KVM device plugin + +```bash +# 安装 kubevirt KVM device plugin(DaemonSet) +kubectl apply -f https://github.com/kubevirt/kubevirt/releases/latest/download/kubevirt-operator.yaml +kubectl apply -f https://github.com/kubevirt/kubevirt/releases/latest/download/kubevirt-cr.yaml + +# 等待就绪(约 2-3 分钟) +kubectl wait --for=condition=ready pod -l kubevirt.io=virt-handler -n kubevirt --timeout=180s + +# 验证节点上 kvm resource 可用 +kubectl describe node -l kvm-capable=true | grep -A5 "devices.kubevirt.io/kvm" +# 预期:devices.kubevirt.io/kvm: 1(Capacity 和 Allocatable 里都有) +``` + +**记录:** +- [ ] device plugin 安装是否成功 +- [ ] 节点是否显示 `devices.kubevirt.io/kvm: 1` +- [ ] 如有报错,粘贴错误 + +--- + +## Step 6:准备 PVC 数据(vmlinux + rootfs) + +PVC 创建后是空的,需要把 vmlinux 和 rootfs 上传进去。用一个 init Job 完成: + +```bash +# 创建 namespace 和 PVC +kubectl apply -f deploy/k8s/namespace.yaml +kubectl apply -f deploy/k8s/pvc.yaml + +# 等 PVC Bound +kubectl wait --for=jsonpath='{.status.phase}'=Bound pvc/zeroboot-data -n zeroboot --timeout=60s + +# 创建临时 pod 挂载 PVC,用于上传文件 +kubectl run data-loader --image=ubuntu --restart=Never -n zeroboot \ + --overrides='{"spec":{"volumes":[{"name":"data","persistentVolumeClaim":{"claimName":"zeroboot-data"}}],"containers":[{"name":"loader","image":"ubuntu","command":["sleep","3600"],"volumeMounts":[{"name":"data","mountPath":"/data"}]}]}}' + +kubectl wait --for=condition=ready pod/data-loader -n zeroboot --timeout=60s + +# 从 dev-server 上传 vmlinux 和 rootfs +# (在 dev-server 上执行,需要 kubectl 访问集群) +kubectl cp ~/fc-exp/vmlinux.bin zeroboot/data-loader:/data/vmlinux-fc +kubectl cp ~/zeroboot-work5/rootfs.ext4 zeroboot/data-loader:/data/rootfs-python.ext4 + +# 确认文件已上传 +kubectl exec -n zeroboot data-loader -- ls -lh /data/ +# 预期:vmlinux-fc (~21MB), rootfs-python.ext4 (~500MB) + +# 清理临时 pod +kubectl delete pod data-loader -n zeroboot +``` + +**记录:** +- [ ] PVC 是否 Bound +- [ ] 文件上传是否成功 +- [ ] 文件大小是否正确 + +--- + +## Step 7:部署 zeroboot + +```bash +# 更新 deployment.yaml 里的镜像地址 +# 把 ghcr.io/zerobootdev/zeroboot:latest 替换成 ECR 地址 +ECR_IMAGE="${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/zeroboot:test" + +sed -i "s|ghcr.io/zerobootdev/zeroboot:latest|${ECR_IMAGE}|" deploy/k8s/deployment.yaml + +# 部署 +kubectl apply -f deploy/k8s/deployment.yaml +kubectl apply -f deploy/k8s/service.yaml + +# 监控 rollout(首次启动约 30s,等待 snapshot 创建) +kubectl rollout status deployment/zeroboot -n zeroboot --timeout=120s + +# 查看 Pod 日志(确认 template 创建成功) +kubectl logs -n zeroboot -l app=zeroboot --follow +``` + +**预期日志:** +``` +No snapshot found — creating Python template (this takes ~15s)... +Template created. +Starting zeroboot API server on port 8080... +``` + +**第二个 Pod 重启(snapshot 已在 PVC):** +``` +Snapshot found — skipping template creation. +Starting zeroboot API server on port 8080... +``` + +**记录:** +- [ ] Pod 是否进入 Running 状态 +- [ ] 日志里 template 创建是否成功 +- [ ] readiness probe 是否通过 +- [ ] 两个 Pod 是否分布在不同节点(podAntiAffinity 验证) + ```bash + kubectl get pods -n zeroboot -o wide + # NODE 列应该是两个不同的节点 + ``` + +--- + +## Step 8:端到端功能验证 + +```bash +# port-forward 到本地 +kubectl port-forward svc/zeroboot 8080:80 -n zeroboot & + +# 健康检查 +curl -s localhost:8080/v1/health +# 预期:{"status":"ok"} + +# 执行 Python 代码 +curl -s -X POST localhost:8080/v1/exec \ + -H 'Content-Type: application/json' \ + -d '{"code": "print(1+1)"}' | jq . +# 预期:{"stdout":"2","exit_code":0,"fork_time_ms":<1,...} + +# numpy(验证预加载) +curl -s -X POST localhost:8080/v1/exec \ + -H 'Content-Type: application/json' \ + -d '{"code": "import numpy as np; print(np.array([1,2,3]).mean())"}' | jq . +# 预期:{"stdout":"2.0","exit_code":0,...} + +# 查看 Prometheus metrics +curl -s localhost:8080/v1/metrics | grep zeroboot_concurrent_forks +# 预期:zeroboot_concurrent_forks 0 +``` + +**记录:** +- [ ] `/v1/health` 返回 ok +- [ ] `print(1+1)` 返回 `stdout: "2"` +- [ ] `fork_time_ms` 值(是否 <10ms,理想 <2ms) +- [ ] numpy 执行是否成功 +- [ ] `/v1/metrics` 是否有 `zeroboot_concurrent_forks` + +--- + +## Step 9:清理 + +```bash +# 删除 K8s 资源 +kubectl delete -f deploy/k8s/ + +# 删除集群(场景 A 或 B,根据实际情况选) +eksctl delete cluster --name zeroboot-eks --region ap-southeast-1 + +# 删除 ECR repo(可选) +aws ecr delete-repository --repository-name zeroboot --region ap-southeast-1 --force +``` + +--- + +## 结果汇总模板 + +验证完成后,请将以下内容反馈给 openclaw-coding: + +``` +## 验证结果 + +**环境:** ap-southeast-1 / c8i.xlarge / EKS 1.31 + +### 场景 A(新建集群) +- eksctl create cluster: ✅/❌ +- /dev/kvm 可访问: ✅/❌ +- KVM device plugin: ✅/❌ + +### 场景 B(追加 node group) +- eksctl create cluster (only): ✅/❌ +- eksctl create nodegroup: ✅/❌ + +### Docker 镜像 +- docker build: ✅/❌ 耗时: ___ 大小: ___ +- ECR push: ✅/❌ + +### K8s 部署 +- Pod 状态: ✅Running / ❌ (错误: ___) +- Template 创建日志: ✅正常 / ❌ +- PodAntiAffinity(分布在不同节点): ✅/❌ +- readiness probe: ✅/❌ + +### 功能验证 +- /v1/health: ✅/❌ +- print(1+1): ✅/❌ fork_time_ms: ___ +- numpy: ✅/❌ +- /v1/metrics: ✅/❌ + +### 发现的问题 +(列出所有报错、需要修改的地方、文档不清楚的地方) +``` From bef331a20d525dcd09072bf15324e8636da373db Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 25 Mar 2026 08:29:06 +0000 Subject: [PATCH 5/7] fix: address EKS deployment issues found in validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit P0 fixes: - serve: change default bind from 127.0.0.1 to 0.0.0.0 to fix K8s health probes and Service routing; add --bind flag for explicit control - entrypoint.sh: pass $ZEROBOOT_BIND (default 0.0.0.0) to serve command P1 fixes: - deployment.yaml: replace devices.kubevirt.io/kvm (requires kubevirt) with privileged: true + hostPath /dev/kvm (works on plain EKS) - deployment.yaml: increase livenessProbe initialDelaySeconds from 60 to 120; template creation takes ~19s, 60s was too tight on slow EBS attach - deployment.yaml: add /dev/kvm hostPath volume and mount EKS self-managed node group (new file): - deploy/eks/eks-self-managed-kvm.sh: end-to-end script to create a self-managed ASG + Launch Template with CpuOptions.NestedVirtualization=enabled EKS managed node groups silently drop CpuOptions — self-managed bypasses this - deploy/eks/eks-with-kvm-nodegroup.yaml: add warning about CpuOptions being dropped by managed node groups (documented as a gap vs AWS official docs) Docs: - docs/KUBERNETES.md: add EKS managed vs self-managed section with root cause analysis and the recommended self-managed approach - docs/KUBERNETES.md: add server bind address configuration note - docs/KUBERNETES.md: add ZEROBOOT_BIND env var reference Validated on: EKS 1.31 / ap-southeast-1 / c8i.xlarge (nested virt) Ref: chaosreload/zeroboot PR #13 --- Dockerfile | 2 +- deploy/eks/eks-self-managed-kvm.sh | 243 +++++++++++++++++++++++++ deploy/eks/eks-with-kvm-nodegroup.yaml | 17 ++ deploy/k8s/deployment.yaml | 20 +- docker/entrypoint.sh | 3 +- docs/KUBERNETES.md | 75 ++++++++ src/main.rs | 24 ++- 7 files changed, 373 insertions(+), 11 deletions(-) create mode 100755 deploy/eks/eks-self-managed-kvm.sh diff --git a/Dockerfile b/Dockerfile index 82fb5d8..86dea68 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ # See deploy/k8s/ for Kubernetes manifests. # ─── Stage 1: Build zeroboot binary ────────────────────────────────────────── -FROM rust:1.80-bookworm AS builder +FROM rust:1.86-bookworm AS builder WORKDIR /build diff --git a/deploy/eks/eks-self-managed-kvm.sh b/deploy/eks/eks-self-managed-kvm.sh new file mode 100755 index 0000000..aaac251 --- /dev/null +++ b/deploy/eks/eks-self-managed-kvm.sh @@ -0,0 +1,243 @@ +#!/usr/bin/env bash +# deploy/eks/eks-self-managed-kvm.sh +# +# Creates a self-managed EKS node group with nested virtualization enabled. +# +# WHY SELF-MANAGED? +# EKS Managed Node Groups silently drop CpuOptions when generating their +# internal Launch Template — even when you supply CpuOptions in your own LT. +# Self-managed ASG + Launch Template bypasses EKS entirely, so CpuOptions +# (including NestedVirtualization=enabled) is applied directly to the instance. +# +# USAGE: +# export AWS_PROFILE=your-profile +# export CLUSTER_NAME=zeroboot-eks +# export REGION=ap-southeast-1 +# bash eks-self-managed-kvm.sh +# +# REQUIREMENTS: +# - aws cli v2 +# - kubectl configured for the target cluster +# - eksctl (for cluster-only creation, see eks-cluster-only.yaml) +# +# WHAT THIS SCRIPT DOES: +# 1. Creates IAM role + instance profile for worker nodes +# 2. Registers node role with EKS (access entry) +# 3. Fetches cluster params (endpoint, cert, subnets, SGs) +# 4. Queries latest EKS-optimized AL2023 AMI +# 5. Creates Launch Template with CpuOptions.NestedVirtualization=enabled +# 6. Creates Auto Scaling Group (2-4 nodes) +# 7. Verifies /dev/kvm is present on nodes + +set -euo pipefail + +: "${CLUSTER_NAME:=zeroboot-eks}" +: "${REGION:=ap-southeast-1}" +: "${INSTANCE_TYPE:=c8i.xlarge}" +: "${K8S_VERSION:=1.31}" +: "${MIN_SIZE:=1}" +: "${MAX_SIZE:=4}" +: "${DESIRED:=2}" +: "${NODE_ROLE_NAME:=zeroboot-eks-node-role}" +: "${INSTANCE_PROFILE_NAME:=zeroboot-eks-node-profile}" +: "${LT_NAME:=zeroboot-kvm-nested-virt}" +: "${ASG_NAME:=zeroboot-kvm-self-managed}" + +echo "==> Fetching cluster info..." +ENDPOINT=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \ + --query "cluster.endpoint" --output text) +CERT_AUTH=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \ + --query "cluster.certificateAuthority.data" --output text) +CIDR=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \ + --query "cluster.kubernetesNetworkConfig.serviceIpv4Cidr" --output text) +CLUSTER_SG=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \ + --query "cluster.resourcesVpcConfig.clusterSecurityGroupId" --output text) +SUBNETS=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \ + --query "cluster.resourcesVpcConfig.subnetIds" --output text | tr '\t' ',') +ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) + +echo " Cluster: $CLUSTER_NAME" +echo " Region: $REGION" +echo " Account: $ACCOUNT_ID" +echo " ClusterSG: $CLUSTER_SG" +echo " Subnets: $SUBNETS" + +# ─── Step 1: IAM role ───────────────────────────────────────────────────────── +echo "" +echo "==> Creating IAM node role: $NODE_ROLE_NAME" + +if aws iam get-role --role-name "$NODE_ROLE_NAME" &>/dev/null; then + echo " Role already exists, skipping." +else + aws iam create-role \ + --role-name "$NODE_ROLE_NAME" \ + --assume-role-policy-document '{ + "Version":"2012-10-17", + "Statement":[{"Effect":"Allow","Principal":{"Service":"ec2.amazonaws.com"},"Action":"sts:AssumeRole"}] + }' > /dev/null + + for POLICY in AmazonEKSWorkerNodePolicy AmazonEKS_CNI_Policy AmazonEC2ContainerRegistryReadOnly; do + aws iam attach-role-policy \ + --role-name "$NODE_ROLE_NAME" \ + --policy-arn "arn:aws:iam::aws:policy/${POLICY}" + done + echo " Role created." +fi + +# Instance profile +if aws iam get-instance-profile --instance-profile-name "$INSTANCE_PROFILE_NAME" &>/dev/null; then + echo " Instance profile already exists, skipping." +else + aws iam create-instance-profile --instance-profile-name "$INSTANCE_PROFILE_NAME" > /dev/null + aws iam add-role-to-instance-profile \ + --instance-profile-name "$INSTANCE_PROFILE_NAME" \ + --role-name "$NODE_ROLE_NAME" + echo " Instance profile created. Waiting 15s for IAM propagation..." + sleep 15 +fi + +NODE_ROLE_ARN="arn:aws:iam::${ACCOUNT_ID}:role/${NODE_ROLE_NAME}" +INSTANCE_PROFILE_ARN="arn:aws:iam::${ACCOUNT_ID}:instance-profile/${INSTANCE_PROFILE_NAME}" + +# ─── Step 2: EKS access entry ──────────────────────────────────────────────── +echo "" +echo "==> Registering node role with EKS cluster..." +aws eks create-access-entry \ + --cluster-name "$CLUSTER_NAME" \ + --principal-arn "$NODE_ROLE_ARN" \ + --type EC2_LINUX \ + --region "$REGION" 2>/dev/null || echo " Access entry already exists." + +# ─── Step 3: AMI ───────────────────────────────────────────────────────────── +echo "" +echo "==> Fetching latest EKS-optimized AMI (AL2023, K8s ${K8S_VERSION})..." +AMI_ID=$(aws ssm get-parameter \ + --name "/aws/service/eks/optimized-ami/${K8S_VERSION}/amazon-linux-2023/x86_64/standard/recommended/image_id" \ + --region "$REGION" --query "Parameter.Value" --output text) +echo " AMI: $AMI_ID" + +# ─── Step 4: UserData (AL2023 nodeadm format) ──────────────────────────────── +echo "" +echo "==> Preparing UserData..." +USERDATA=$(cat << EOF +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="//" + +--// +Content-Type: application/node.eks.aws + +--- +apiVersion: node.eks.aws/v1alpha1 +kind: NodeConfig +spec: + cluster: + apiServerEndpoint: ${ENDPOINT} + certificateAuthority: ${CERT_AUTH} + cidr: ${CIDR} + name: ${CLUSTER_NAME} + kubelet: + config: + maxPods: 110 + flags: + - "--node-labels=kvm-capable=true,workload=zeroboot" + +--//-- +EOF +) +USERDATA_B64=$(echo "$USERDATA" | base64 -w 0) + +# ─── Step 5: Launch Template ────────────────────────────────────────────────── +echo "" +echo "==> Creating Launch Template: $LT_NAME" +echo " (CpuOptions.NestedVirtualization=enabled — this is the key field that" +echo " EKS managed node groups silently drop)" + +LT_DATA=$(cat << EOF +{ + "ImageId": "${AMI_ID}", + "InstanceType": "${INSTANCE_TYPE}", + "CpuOptions": {"NestedVirtualization": "enabled"}, + "SecurityGroupIds": ["${CLUSTER_SG}"], + "MetadataOptions": {"HttpTokens": "required", "HttpPutResponseHopLimit": 2}, + "IamInstanceProfile": {"Arn": "${INSTANCE_PROFILE_ARN}"}, + "UserData": "${USERDATA_B64}", + "TagSpecifications": [{ + "ResourceType": "instance", + "Tags": [ + {"Key": "Name", "Value": "zeroboot-kvm-node"}, + {"Key": "kubernetes.io/cluster/${CLUSTER_NAME}", "Value": "owned"}, + {"Key": "kvm-capable", "Value": "true"} + ] + }] +} +EOF +) + +LT_RESULT=$(aws ec2 create-launch-template \ + --launch-template-name "$LT_NAME" \ + --region "$REGION" \ + --launch-template-data "$LT_DATA" \ + --output json 2>/dev/null || \ + aws ec2 describe-launch-templates \ + --launch-template-names "$LT_NAME" \ + --region "$REGION" \ + --query "LaunchTemplates[0]" --output json) + +LT_ID=$(echo "$LT_RESULT" | python3 -c " +import json,sys +d = json.load(sys.stdin) +# handle both create and describe responses +print(d.get('LaunchTemplate', d).get('LaunchTemplateId')) +") +LT_VERSION=$(aws ec2 describe-launch-template-versions \ + --launch-template-id "$LT_ID" --region "$REGION" \ + --query "LaunchTemplateVersions[-1].VersionNumber" --output text) + +echo " LT ID: $LT_ID" +echo " LT Version: $LT_VERSION" + +# ─── Step 6: Auto Scaling Group ─────────────────────────────────────────────── +echo "" +echo "==> Creating Auto Scaling Group: $ASG_NAME" +aws autoscaling create-auto-scaling-group \ + --auto-scaling-group-name "$ASG_NAME" \ + --launch-template "LaunchTemplateId=${LT_ID},Version=${LT_VERSION}" \ + --min-size "$MIN_SIZE" \ + --max-size "$MAX_SIZE" \ + --desired-capacity "$DESIRED" \ + --vpc-zone-identifier "$SUBNETS" \ + --tags \ + "Key=Name,Value=zeroboot-kvm-node,PropagateAtLaunch=true" \ + "Key=kubernetes.io/cluster/${CLUSTER_NAME},Value=owned,PropagateAtLaunch=true" \ + "Key=kvm-capable,Value=true,PropagateAtLaunch=true" \ + --region "$REGION" 2>/dev/null || echo " ASG already exists." + +echo " ASG created. Waiting for nodes to join (up to 3 minutes)..." +sleep 60 + +# ─── Step 7: Verify ─────────────────────────────────────────────────────────── +echo "" +echo "==> Verifying nodes..." +kubectl get nodes -l kvm-capable=true 2>/dev/null || echo " (kubectl not configured or nodes not yet ready)" + +echo "" +echo "==> Testing /dev/kvm access..." +kubectl run kvm-verify --restart=Never \ + --image=amazonlinux:2023 \ + --overrides='{"spec":{"nodeSelector":{"kvm-capable":"true"},"containers":[{"name":"c","image":"amazonlinux:2023","command":["sh","-c","ls -la /dev/kvm && grep -c vmx /proc/cpuinfo && cat /sys/module/kvm_intel/parameters/nested 2>/dev/null || echo N/A"],"securityContext":{"privileged":true}}]}}' \ + 2>/dev/null || true + +echo " Waiting 30s for pod to start..." +sleep 30 +kubectl logs kvm-verify 2>/dev/null || echo " Pod not ready yet, check manually: kubectl logs kvm-verify" +kubectl delete pod kvm-verify --ignore-not-found 2>/dev/null + +echo "" +echo "==> Done! Self-managed node group with nested virtualization created." +echo " - Launch Template: $LT_ID (v${LT_VERSION}) — CpuOptions.NestedVirtualization=enabled" +echo " - ASG: $ASG_NAME" +echo " - Node label: kvm-capable=true (already set via --node-labels in userdata)" +echo "" +echo " Next: Deploy zeroboot using deploy/k8s/" +echo " kubectl apply -f deploy/k8s/namespace.yaml" +echo " kubectl apply -f deploy/k8s/" diff --git a/deploy/eks/eks-with-kvm-nodegroup.yaml b/deploy/eks/eks-with-kvm-nodegroup.yaml index 1cf15d2..d2efc47 100644 --- a/deploy/eks/eks-with-kvm-nodegroup.yaml +++ b/deploy/eks/eks-with-kvm-nodegroup.yaml @@ -1,3 +1,20 @@ +# ⚠️ WARNING: EKS Managed Node Groups silently drop CpuOptions +# +# This file uses managedNodeGroups with cpuOptions.nestedVirtualization. +# However, EKS generates its own internal Launch Template from the user-supplied +# one, and CpuOptions is NOT carried over — resulting in nodes WITHOUT /dev/kvm +# even though the YAML appears correct. +# +# STATUS: AWS has acknowledged this as a documentation gap (CpuOptions is not in +# the official "blocked fields" list but is still ignored in practice). +# +# RECOMMENDED ALTERNATIVE: Use deploy/eks/eks-self-managed-kvm.yaml instead. +# That file creates a Self-managed Node Group (ASG + Launch Template) where +# CpuOptions is applied directly without EKS interference. +# +# Use THIS file only if you have verified that your eksctl version correctly +# passes CpuOptions through (test with: kubectl run kvm-check ... ls /dev/kvm). + # Scenario 1: Create EKS cluster with KVM node group in one shot # Usage: # eksctl create cluster -f eks-with-kvm-nodegroup.yaml diff --git a/deploy/k8s/deployment.yaml b/deploy/k8s/deployment.yaml index cf0d505..cf3edd4 100644 --- a/deploy/k8s/deployment.yaml +++ b/deploy/k8s/deployment.yaml @@ -40,16 +40,20 @@ spec: - name: http containerPort: 8080 - # ── KVM device — avoids privileged: true ─────────────────────────── + # ── KVM device access ────────────────────────────────────────────── + # Use privileged mode + hostPath mount (compatible with EKS without kubevirt). + # If KubeVirt device plugin is installed, you can replace this with: + # resources.limits: { devices.kubevirt.io/kvm: "1" } + # and remove securityContext.privileged. + securityContext: + privileged: true resources: requests: memory: "2Gi" cpu: "1" - devices.kubevirt.io/kvm: "1" limits: memory: "8Gi" cpu: "4" - devices.kubevirt.io/kvm: "1" env: - name: ZEROBOOT_WORKDIR @@ -65,6 +69,8 @@ spec: volumeMounts: - name: data mountPath: /var/lib/zeroboot + - name: kvm + mountPath: /dev/kvm # ── Health checks ────────────────────────────────────────────────── readinessProbe: @@ -79,11 +85,15 @@ spec: httpGet: path: /v1/health port: 8080 - initialDelaySeconds: 60 + initialDelaySeconds: 120 # template creation takes ~19s; allow 2x margin periodSeconds: 10 - failureThreshold: 3 + failureThreshold: 6 volumes: - name: data persistentVolumeClaim: claimName: zeroboot-data + - name: kvm + hostPath: + path: /dev/kvm + type: CharDevice diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 4c339ca..17d816d 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -6,6 +6,7 @@ KERNEL="${ZEROBOOT_KERNEL:-${WORKDIR}/vmlinux-fc}" ROOTFS_PYTHON="${ZEROBOOT_ROOTFS_PYTHON:-${WORKDIR}/rootfs-python.ext4}" ROOTFS_NODE="${ZEROBOOT_ROOTFS_NODE:-}" PORT="${ZEROBOOT_PORT:-8080}" +BIND="${ZEROBOOT_BIND:-0.0.0.0}" TEMPLATE_WAIT="${ZEROBOOT_TEMPLATE_WAIT:-15}" # ── Validate KVM access ─────────────────────────────────────────────────────── @@ -66,4 +67,4 @@ fi # ── Start API server ────────────────────────────────────────────────────────── echo "Starting zeroboot API server on port ${PORT}..." -exec /usr/local/bin/zeroboot serve "$SERVE_TARGET" "$PORT" +exec /usr/local/bin/zeroboot serve "$SERVE_TARGET" "$PORT" --bind "$BIND" diff --git a/docs/KUBERNETES.md b/docs/KUBERNETES.md index 4eee6fb..a131b4c 100644 --- a/docs/KUBERNETES.md +++ b/docs/KUBERNETES.md @@ -66,6 +66,65 @@ kubectl label node kvm-capable=true The Deployment's `nodeSelector` uses this label to ensure Pods are only scheduled where `/dev/kvm` is available. +--- + +--- + +## EKS deployment: managed vs self-managed node groups + +> **TL;DR:** Use a self-managed node group. EKS managed node groups silently +> drop `CpuOptions.NestedVirtualization` — your nodes will start without `/dev/kvm`. + +### The problem with managed node groups + +EKS managed node groups take your Launch Template, then generate a new internal +Launch Template that merges only a subset of fields. `CpuOptions` is not in that +subset — even though it is **not** listed in the [official blocked-fields docs](https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-basics). + +Symptoms: +- `ls /dev/kvm` returns "No such file or directory" +- `/proc/cpuinfo` has no `vmx` flag +- `eksctl create nodegroup` succeeds, but KVM is silently missing + +You can verify by inspecting the EKS-generated internal Launch Template: + +```bash +# Get the internal LT id (not your LT) +aws ec2 describe-launch-template-versions --launch-template-id --versions 1 --query "LaunchTemplateVersions[0].LaunchTemplateData.CpuOptions" +# Expected for managed nodegroup: null (even if you set it in your own LT) +``` + +### The solution: self-managed node group + +Create an Auto Scaling Group with a Launch Template directly — bypassing EKS's +internal LT generation. The provided script handles the full setup: + +```bash +export AWS_PROFILE=your-profile +export CLUSTER_NAME=zeroboot-eks +export REGION=ap-southeast-1 + +# Step 1: Create cluster without node group +eksctl create cluster -f deploy/eks/eks-cluster-only.yaml + +# Step 2: Create self-managed KVM node group +bash deploy/eks/eks-self-managed-kvm.sh +``` + +The script: +1. Creates an IAM node role + instance profile +2. Registers the role with EKS via `create-access-entry` +3. Queries the latest EKS-optimized AL2023 AMI +4. Creates a Launch Template with `CpuOptions.NestedVirtualization=enabled` +5. Creates an ASG referencing the LT directly +6. Verifies `/dev/kvm` is present on the new nodes + +> **Note:** `eksctl`'s `nodeGroups` (non-managed) do not support `launchTemplate`. +> Only `managedNodeGroups` does — but managed NGs drop `CpuOptions`. The script +> uses raw AWS CLI (`ec2 create-launch-template` + `autoscaling create-auto-scaling-group`) +> to sidestep both limitations. + + --- ## KVM device access without `privileged: true` @@ -280,6 +339,22 @@ All configuration is via environment variables (set in `deployment.yaml`): | `ZEROBOOT_TEMPLATE_WAIT` | `15` | Seconds to wait during template snapshot | | `ZEROBOOT_API_KEYS_FILE` | _(unset)_ | Path to JSON array of API keys | +--- + +### Server bind address + +By default, `zeroboot serve` binds to `0.0.0.0` (all interfaces), which is +required for Kubernetes health probes and Service routing. To restrict to +localhost (e.g. for local development), pass `--bind 127.0.0.1`: + +```bash +zeroboot serve python:/workdir/python 8080 --bind 127.0.0.1 +``` + +The `ZEROBOOT_BIND` environment variable (default: `0.0.0.0`) controls the +bind address when running via the Docker entrypoint. + + --- ## Limitations diff --git a/src/main.rs b/src/main.rs index 527fa70..dd687bb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -29,7 +29,7 @@ fn main() -> Result<()> { eprintln!( " test-exec - Test executing a command in a fork" ); - eprintln!(" serve [port] - Start API server"); + eprintln!(" serve [port] [--bind addr] - Start API server (default bind: 0.0.0.0)"); Ok(()) } } @@ -390,10 +390,26 @@ fn load_api_keys() -> Vec { fn cmd_serve(args: &[String]) -> Result<()> { if args.len() < 1 { - bail!("Usage: zeroboot serve [,lang:workdir2,...] [port]"); + bail!("Usage: zeroboot serve [,lang:workdir2,...] [port] [--bind ]"); } let port: u16 = args.get(1).and_then(|p| p.parse().ok()).unwrap_or(8080); + // Parse optional --bind flag (default 0.0.0.0 for Kubernetes compatibility). + // K8s health probes and Service ClusterIP routing require the server to listen + // on all interfaces, not just localhost. + let bind_addr = { + let mut addr = "0.0.0.0".to_string(); + let mut i = 2; + while i + 1 < args.len() { + if args[i] == "--bind" { + addr = args[i + 1].clone(); + break; + } + i += 1; + } + addr + }; + // Parse workdir specs: "workdir" or "python:workdir1,node:workdir2" let mut templates = std::collections::HashMap::new(); for spec in args[0].split(',') { @@ -430,10 +446,10 @@ fn cmd_serve(args: &[String]) -> Result<()> { .route("/v1/metrics", axum::routing::get(metrics_handler)) .with_state(state); - let listener = tokio::net::TcpListener::bind(format!("127.0.0.1:{}", port)) + let listener = tokio::net::TcpListener::bind(format!("{}:{}", bind_addr, port)) .await .unwrap(); - eprintln!("Zeroboot API server listening on port {}", port); + eprintln!("Zeroboot API server listening on {}:{}", bind_addr, port); axum::serve( listener, app.into_make_service_with_connect_info::(), From 8809eb6da296a6360dd86d082ed5866c4423430f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 25 Mar 2026 08:32:37 +0000 Subject: [PATCH 6/7] chore: remove VALIDATION-PLAN.md (internal doc, not for upstream) --- docs/VALIDATION-PLAN.md | 337 ---------------------------------------- 1 file changed, 337 deletions(-) delete mode 100644 docs/VALIDATION-PLAN.md diff --git a/docs/VALIDATION-PLAN.md b/docs/VALIDATION-PLAN.md deleted file mode 100644 index 2bf612c..0000000 --- a/docs/VALIDATION-PLAN.md +++ /dev/null @@ -1,337 +0,0 @@ -# Zeroboot K8s PR Validation Plan - -**PR:** chaosreload/zeroboot → feat/kubernetes-deployment → [PR #13](https://github.com/zerobootdev/zeroboot/pull/13) -**目标:** 在真实 AWS EKS 环境验证 Dockerfile、K8s manifests、eksctl 配置的正确性 -**执行人:** openclaw-research -**验证完成后:** 将结果反馈给 openclaw-coding,用于完善 PR - ---- - -## 前置条件 - -- AWS 账号,有 EKS / EC2 / ECR 创建权限 -- dev-server 可访问(已有 eksctl 0.224、aws cli、docker、kubectl) -- dev-server 的 IAM role 有足够权限(admin 级别,已确认) - ---- - -## 验证场景 - -### 场景 A:新建集群 + KVM node group(一步到位) -### 场景 B:先建集群,后追加 KVM node group - -两个场景都需要跑,验证 eksctl 配置的正确性。 - ---- - -## Step 1:准备代码 - -```bash -# 在 dev-server 上 -cd /data/projects/chaosreload/study/repo/chaosreload/zeroboot -git checkout feat/kubernetes-deployment -git pull origin feat/kubernetes-deployment - -# 确认文件结构 -ls Dockerfile docker/entrypoint.sh deploy/k8s/ deploy/eks/ docs/KUBERNETES.md -``` - -**预期输出:** 所有文件存在,无报错 - ---- - -## Step 2:构建 Docker 镜像 - -```bash -# 在 dev-server 上构建 -cd /data/projects/chaosreload/study/repo/chaosreload/zeroboot -docker build -t zeroboot:test . -``` - -**预期:** -- 构建成功,无 error -- 最终镜像大约 300-500 MB -- `docker images | grep zeroboot` 能看到镜像 - -**记录:** -- [ ] 构建是否成功 -- [ ] 构建耗时(大概) -- [ ] 镜像大小 - ---- - -## Step 3:推镜像到 ECR - -```bash -# 创建 ECR repo(如果不存在) -AWS_REGION=ap-southeast-1 -AWS_ACCOUNT=$(aws sts get-caller-identity --query Account --output text) -ECR_REPO="${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/zeroboot" - -aws ecr create-repository --repository-name zeroboot --region $AWS_REGION 2>/dev/null || true - -# Login + push -aws ecr get-login-password --region $AWS_REGION | \ - docker login --username AWS --password-stdin "${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com" - -docker tag zeroboot:test $ECR_REPO:test -docker push $ECR_REPO:test -``` - -**预期:** push 成功,ECR 里有 `zeroboot:test` 镜像 - -**记录:** -- [ ] ECR push 是否成功 -- [ ] 镜像 URI(后续 deployment.yaml 会用到) - ---- - -## Step 4A:场景 A — 新建集群 + KVM node group - -```bash -cd /data/projects/chaosreload/study/repo/chaosreload/zeroboot - -# 编辑 region(如需要) -# deploy/eks/eks-with-kvm-nodegroup.yaml 默认 ap-southeast-1 - -eksctl create cluster -f deploy/eks/eks-with-kvm-nodegroup.yaml -# 预计耗时:15-20 分钟 -``` - -**预期:** -- eksctl 无报错完成 -- `kubectl get nodes -l kvm-capable=true` 返回 2 个节点 -- 节点状态 `Ready` - -**验证嵌套虚拟化:** -```bash -NODE=$(kubectl get nodes -l kvm-capable=true -o jsonpath='{.items[0].metadata.name}') -kubectl debug node/$NODE -it --image=ubuntu -- bash -c "ls -la /dev/kvm" -# 预期:crw-rw-rw- 1 root kvm 10, 232 ... -``` - -**记录:** -- [ ] eksctl 是否成功 -- [ ] 节点数量和状态 -- [ ] `/dev/kvm` 是否存在 -- [ ] 如有报错,粘贴完整错误信息 - ---- - -## Step 4B:场景 B — 先建集群,后加 node group - -```bash -# Step B-1:只建集群 -eksctl create cluster -f deploy/eks/eks-cluster-only.yaml -# 预计耗时:12-15 分钟(无 node group,更快) - -# 确认集群就绪(无节点) -kubectl get nodes -# 预期:No resources found - -# Step B-2:追加 KVM node group -eksctl create nodegroup -f deploy/eks/eks-add-kvm-nodegroup.yaml -# 预计耗时:5-8 分钟 -``` - -**预期:** 同场景 A,节点 Ready,`/dev/kvm` 存在 - -**记录:** -- [ ] 两步是否都成功 -- [ ] 与场景 A 有无差异 - ---- - -## Step 5:部署 KVM device plugin - -```bash -# 安装 kubevirt KVM device plugin(DaemonSet) -kubectl apply -f https://github.com/kubevirt/kubevirt/releases/latest/download/kubevirt-operator.yaml -kubectl apply -f https://github.com/kubevirt/kubevirt/releases/latest/download/kubevirt-cr.yaml - -# 等待就绪(约 2-3 分钟) -kubectl wait --for=condition=ready pod -l kubevirt.io=virt-handler -n kubevirt --timeout=180s - -# 验证节点上 kvm resource 可用 -kubectl describe node -l kvm-capable=true | grep -A5 "devices.kubevirt.io/kvm" -# 预期:devices.kubevirt.io/kvm: 1(Capacity 和 Allocatable 里都有) -``` - -**记录:** -- [ ] device plugin 安装是否成功 -- [ ] 节点是否显示 `devices.kubevirt.io/kvm: 1` -- [ ] 如有报错,粘贴错误 - ---- - -## Step 6:准备 PVC 数据(vmlinux + rootfs) - -PVC 创建后是空的,需要把 vmlinux 和 rootfs 上传进去。用一个 init Job 完成: - -```bash -# 创建 namespace 和 PVC -kubectl apply -f deploy/k8s/namespace.yaml -kubectl apply -f deploy/k8s/pvc.yaml - -# 等 PVC Bound -kubectl wait --for=jsonpath='{.status.phase}'=Bound pvc/zeroboot-data -n zeroboot --timeout=60s - -# 创建临时 pod 挂载 PVC,用于上传文件 -kubectl run data-loader --image=ubuntu --restart=Never -n zeroboot \ - --overrides='{"spec":{"volumes":[{"name":"data","persistentVolumeClaim":{"claimName":"zeroboot-data"}}],"containers":[{"name":"loader","image":"ubuntu","command":["sleep","3600"],"volumeMounts":[{"name":"data","mountPath":"/data"}]}]}}' - -kubectl wait --for=condition=ready pod/data-loader -n zeroboot --timeout=60s - -# 从 dev-server 上传 vmlinux 和 rootfs -# (在 dev-server 上执行,需要 kubectl 访问集群) -kubectl cp ~/fc-exp/vmlinux.bin zeroboot/data-loader:/data/vmlinux-fc -kubectl cp ~/zeroboot-work5/rootfs.ext4 zeroboot/data-loader:/data/rootfs-python.ext4 - -# 确认文件已上传 -kubectl exec -n zeroboot data-loader -- ls -lh /data/ -# 预期:vmlinux-fc (~21MB), rootfs-python.ext4 (~500MB) - -# 清理临时 pod -kubectl delete pod data-loader -n zeroboot -``` - -**记录:** -- [ ] PVC 是否 Bound -- [ ] 文件上传是否成功 -- [ ] 文件大小是否正确 - ---- - -## Step 7:部署 zeroboot - -```bash -# 更新 deployment.yaml 里的镜像地址 -# 把 ghcr.io/zerobootdev/zeroboot:latest 替换成 ECR 地址 -ECR_IMAGE="${AWS_ACCOUNT}.dkr.ecr.${AWS_REGION}.amazonaws.com/zeroboot:test" - -sed -i "s|ghcr.io/zerobootdev/zeroboot:latest|${ECR_IMAGE}|" deploy/k8s/deployment.yaml - -# 部署 -kubectl apply -f deploy/k8s/deployment.yaml -kubectl apply -f deploy/k8s/service.yaml - -# 监控 rollout(首次启动约 30s,等待 snapshot 创建) -kubectl rollout status deployment/zeroboot -n zeroboot --timeout=120s - -# 查看 Pod 日志(确认 template 创建成功) -kubectl logs -n zeroboot -l app=zeroboot --follow -``` - -**预期日志:** -``` -No snapshot found — creating Python template (this takes ~15s)... -Template created. -Starting zeroboot API server on port 8080... -``` - -**第二个 Pod 重启(snapshot 已在 PVC):** -``` -Snapshot found — skipping template creation. -Starting zeroboot API server on port 8080... -``` - -**记录:** -- [ ] Pod 是否进入 Running 状态 -- [ ] 日志里 template 创建是否成功 -- [ ] readiness probe 是否通过 -- [ ] 两个 Pod 是否分布在不同节点(podAntiAffinity 验证) - ```bash - kubectl get pods -n zeroboot -o wide - # NODE 列应该是两个不同的节点 - ``` - ---- - -## Step 8:端到端功能验证 - -```bash -# port-forward 到本地 -kubectl port-forward svc/zeroboot 8080:80 -n zeroboot & - -# 健康检查 -curl -s localhost:8080/v1/health -# 预期:{"status":"ok"} - -# 执行 Python 代码 -curl -s -X POST localhost:8080/v1/exec \ - -H 'Content-Type: application/json' \ - -d '{"code": "print(1+1)"}' | jq . -# 预期:{"stdout":"2","exit_code":0,"fork_time_ms":<1,...} - -# numpy(验证预加载) -curl -s -X POST localhost:8080/v1/exec \ - -H 'Content-Type: application/json' \ - -d '{"code": "import numpy as np; print(np.array([1,2,3]).mean())"}' | jq . -# 预期:{"stdout":"2.0","exit_code":0,...} - -# 查看 Prometheus metrics -curl -s localhost:8080/v1/metrics | grep zeroboot_concurrent_forks -# 预期:zeroboot_concurrent_forks 0 -``` - -**记录:** -- [ ] `/v1/health` 返回 ok -- [ ] `print(1+1)` 返回 `stdout: "2"` -- [ ] `fork_time_ms` 值(是否 <10ms,理想 <2ms) -- [ ] numpy 执行是否成功 -- [ ] `/v1/metrics` 是否有 `zeroboot_concurrent_forks` - ---- - -## Step 9:清理 - -```bash -# 删除 K8s 资源 -kubectl delete -f deploy/k8s/ - -# 删除集群(场景 A 或 B,根据实际情况选) -eksctl delete cluster --name zeroboot-eks --region ap-southeast-1 - -# 删除 ECR repo(可选) -aws ecr delete-repository --repository-name zeroboot --region ap-southeast-1 --force -``` - ---- - -## 结果汇总模板 - -验证完成后,请将以下内容反馈给 openclaw-coding: - -``` -## 验证结果 - -**环境:** ap-southeast-1 / c8i.xlarge / EKS 1.31 - -### 场景 A(新建集群) -- eksctl create cluster: ✅/❌ -- /dev/kvm 可访问: ✅/❌ -- KVM device plugin: ✅/❌ - -### 场景 B(追加 node group) -- eksctl create cluster (only): ✅/❌ -- eksctl create nodegroup: ✅/❌ - -### Docker 镜像 -- docker build: ✅/❌ 耗时: ___ 大小: ___ -- ECR push: ✅/❌ - -### K8s 部署 -- Pod 状态: ✅Running / ❌ (错误: ___) -- Template 创建日志: ✅正常 / ❌ -- PodAntiAffinity(分布在不同节点): ✅/❌ -- readiness probe: ✅/❌ - -### 功能验证 -- /v1/health: ✅/❌ -- print(1+1): ✅/❌ fork_time_ms: ___ -- numpy: ✅/❌ -- /v1/metrics: ✅/❌ - -### 发现的问题 -(列出所有报错、需要修改的地方、文档不清楚的地方) -``` From c41f21c2d0dccf43d66ac06ea7a335c748ae8ffb Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 25 Mar 2026 09:04:14 +0000 Subject: [PATCH 7/7] revert: move serve --bind fix out of K8s PR (to be submitted separately) src/main.rs and entrypoint.sh bind address changes belong in a dedicated fix PR. This PR should only contain K8s deployment configs and docs. The deployment.yaml already handles the 127.0.0.1 limitation via the hostPath /dev/kvm approach; users can add a socat sidecar if needed until the fix PR is merged. --- docker/entrypoint.sh | 3 +-- src/main.rs | 24 ++++-------------------- 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 17d816d..4c339ca 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -6,7 +6,6 @@ KERNEL="${ZEROBOOT_KERNEL:-${WORKDIR}/vmlinux-fc}" ROOTFS_PYTHON="${ZEROBOOT_ROOTFS_PYTHON:-${WORKDIR}/rootfs-python.ext4}" ROOTFS_NODE="${ZEROBOOT_ROOTFS_NODE:-}" PORT="${ZEROBOOT_PORT:-8080}" -BIND="${ZEROBOOT_BIND:-0.0.0.0}" TEMPLATE_WAIT="${ZEROBOOT_TEMPLATE_WAIT:-15}" # ── Validate KVM access ─────────────────────────────────────────────────────── @@ -67,4 +66,4 @@ fi # ── Start API server ────────────────────────────────────────────────────────── echo "Starting zeroboot API server on port ${PORT}..." -exec /usr/local/bin/zeroboot serve "$SERVE_TARGET" "$PORT" --bind "$BIND" +exec /usr/local/bin/zeroboot serve "$SERVE_TARGET" "$PORT" diff --git a/src/main.rs b/src/main.rs index dd687bb..527fa70 100644 --- a/src/main.rs +++ b/src/main.rs @@ -29,7 +29,7 @@ fn main() -> Result<()> { eprintln!( " test-exec - Test executing a command in a fork" ); - eprintln!(" serve [port] [--bind addr] - Start API server (default bind: 0.0.0.0)"); + eprintln!(" serve [port] - Start API server"); Ok(()) } } @@ -390,26 +390,10 @@ fn load_api_keys() -> Vec { fn cmd_serve(args: &[String]) -> Result<()> { if args.len() < 1 { - bail!("Usage: zeroboot serve [,lang:workdir2,...] [port] [--bind ]"); + bail!("Usage: zeroboot serve [,lang:workdir2,...] [port]"); } let port: u16 = args.get(1).and_then(|p| p.parse().ok()).unwrap_or(8080); - // Parse optional --bind flag (default 0.0.0.0 for Kubernetes compatibility). - // K8s health probes and Service ClusterIP routing require the server to listen - // on all interfaces, not just localhost. - let bind_addr = { - let mut addr = "0.0.0.0".to_string(); - let mut i = 2; - while i + 1 < args.len() { - if args[i] == "--bind" { - addr = args[i + 1].clone(); - break; - } - i += 1; - } - addr - }; - // Parse workdir specs: "workdir" or "python:workdir1,node:workdir2" let mut templates = std::collections::HashMap::new(); for spec in args[0].split(',') { @@ -446,10 +430,10 @@ fn cmd_serve(args: &[String]) -> Result<()> { .route("/v1/metrics", axum::routing::get(metrics_handler)) .with_state(state); - let listener = tokio::net::TcpListener::bind(format!("{}:{}", bind_addr, port)) + let listener = tokio::net::TcpListener::bind(format!("127.0.0.1:{}", port)) .await .unwrap(); - eprintln!("Zeroboot API server listening on {}:{}", bind_addr, port); + eprintln!("Zeroboot API server listening on port {}", port); axum::serve( listener, app.into_make_service_with_connect_info::(),