diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..86dea68
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,60 @@
+# syntax=docker/dockerfile:1
+# Zeroboot server image
+# Multi-stage build: compile Rust binary, then assemble minimal runtime image.
+#
+# Usage:
+#   docker build -t zeroboot:latest .
+#
+# The image does NOT bundle vmlinux or rootfs — mount them via PersistentVolume.
+# See deploy/k8s/ for Kubernetes manifests.
+
+# ─── Stage 1: Build zeroboot binary ──────────────────────────────────────────
+FROM rust:1.86-bookworm AS builder
+
+WORKDIR /build
+
+# Cache dependencies separately from source
+COPY Cargo.toml Cargo.lock ./
+RUN mkdir src && echo 'fn main(){}' > src/main.rs && \
+    cargo build --release && \
+    rm -f target/release/zeroboot target/release/deps/zeroboot*
+
+# Build actual source
+COPY src/ src/
+COPY guest/ guest/
+RUN cargo build --release
+
+# ─── Stage 2: Runtime image ───────────────────────────────────────────────────
+FROM ubuntu:22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Runtime dependencies only
+RUN apt-get update -qq && \
+    apt-get install -y --no-install-recommends \
+        ca-certificates \
+        curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Firecracker
+ARG FC_VERSION=v1.15.0
+RUN curl -fsSL -o /tmp/fc.tgz \
+        "https://github.com/firecracker-microvm/firecracker/releases/download/${FC_VERSION}/firecracker-${FC_VERSION}-x86_64.tgz" && \
+    tar -xzf /tmp/fc.tgz -C /tmp && \
+    mv "/tmp/release-${FC_VERSION}-x86_64/firecracker-${FC_VERSION}-x86_64" /usr/local/bin/firecracker && \
+    chmod +x /usr/local/bin/firecracker && \
+    rm -rf /tmp/fc.tgz /tmp/release-*
+
+# Copy zeroboot binary
+COPY --from=builder /build/target/release/zeroboot /usr/local/bin/zeroboot
+
+# Data directory — mount a PersistentVolume here to persist snapshots
+VOLUME ["/var/lib/zeroboot"]
+
+# Copy entrypoint
+COPY docker/entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+EXPOSE 8080
+
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/deploy/eks/eks-add-kvm-nodegroup.yaml b/deploy/eks/eks-add-kvm-nodegroup.yaml
new file mode 100644
index 0000000..6b5a761
--- /dev/null
+++ b/deploy/eks/eks-add-kvm-nodegroup.yaml
@@ -0,0 +1,37 @@
+# Scenario 2b: Add KVM node group to an EXISTING EKS cluster
+# Usage:
+#   eksctl create nodegroup -f eks-add-kvm-nodegroup.yaml
+
+apiVersion: eksctl.io/v1alpha5
+kind: ClusterConfig
+
+metadata:
+  name: zeroboot-eks       # must match existing cluster name
+  region: ap-southeast-1   # must match existing cluster region
+
+managedNodeGroups:
+  - name: zeroboot-kvm
+    instanceType: c8i.xlarge
+    minSize: 1
+    maxSize: 5
+    desiredCapacity: 2
+    amiFamily: AmazonLinux2023
+    volumeSize: 50
+    privateNetworking: true
+
+    cpuOptions:
+      nestedVirtualization: enabled
+
+    labels:
+      kvm-capable: "true"
+      workload: zeroboot
+
+    iam:
+      attachPolicyARNs:
+        - arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy
+        - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly
+        - arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy
+
+    tags:
+      Project: zeroboot
+      ManagedBy: eksctl
diff --git a/deploy/eks/eks-cluster-only.yaml b/deploy/eks/eks-cluster-only.yaml
new file mode 100644
index 0000000..54fb2ba
--- /dev/null
+++ b/deploy/eks/eks-cluster-only.yaml
@@ -0,0 +1,28 @@
+# Scenario 2a: Create EKS cluster WITHOUT any node group
+# Usage:
+#   eksctl create cluster -f eks-cluster-only.yaml
+
+apiVersion: eksctl.io/v1alpha5
+kind: ClusterConfig
+
+metadata:
+  name: zeroboot-eks
+  region: ap-southeast-1
+  version: "1.31"
+
+vpc:
+  clusterEndpoints:
+    privateAccess: true
+    publicAccess: true
+
+# Explicitly no node groups at cluster creation time
+managedNodeGroups: []
+
+iam:
+  withOIDC: true
+
+addons:
+  - name: aws-ebs-csi-driver
+    version: latest
+    attachPolicyARNs:
+      - arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy
diff --git a/deploy/eks/eks-self-managed-kvm.sh b/deploy/eks/eks-self-managed-kvm.sh
new file mode 100755
index 0000000..aaac251
--- /dev/null
+++ b/deploy/eks/eks-self-managed-kvm.sh
@@ -0,0 +1,243 @@
+#!/usr/bin/env bash
+# deploy/eks/eks-self-managed-kvm.sh
+#
+# Creates a self-managed EKS node group with nested virtualization enabled.
+#
+# WHY SELF-MANAGED?
+# EKS Managed Node Groups silently drop CpuOptions when generating their
+# internal Launch Template — even when you supply CpuOptions in your own LT.
+# Self-managed ASG + Launch Template bypasses EKS entirely, so CpuOptions
+# (including NestedVirtualization=enabled) is applied directly to the instance.
+#
+# USAGE:
+#   export AWS_PROFILE=your-profile
+#   export CLUSTER_NAME=zeroboot-eks
+#   export REGION=ap-southeast-1
+#   bash eks-self-managed-kvm.sh
+#
+# REQUIREMENTS:
+#   - aws cli v2
+#   - kubectl configured for the target cluster
+#   - eksctl (for cluster-only creation, see eks-cluster-only.yaml)
+#
+# WHAT THIS SCRIPT DOES:
+#   1. Creates IAM role + instance profile for worker nodes
+#   2. Registers node role with EKS (access entry)
+#   3. Fetches cluster params (endpoint, cert, subnets, SGs)
+#   4. Queries latest EKS-optimized AL2023 AMI
+#   5. Creates Launch Template with CpuOptions.NestedVirtualization=enabled
+#   6. Creates Auto Scaling Group (2-4 nodes)
+#   7. Verifies /dev/kvm is present on nodes
+
+set -euo pipefail
+
+: "${CLUSTER_NAME:=zeroboot-eks}"
+: "${REGION:=ap-southeast-1}"
+: "${INSTANCE_TYPE:=c8i.xlarge}"
+: "${K8S_VERSION:=1.31}"
+: "${MIN_SIZE:=1}"
+: "${MAX_SIZE:=4}"
+: "${DESIRED:=2}"
+: "${NODE_ROLE_NAME:=zeroboot-eks-node-role}"
+: "${INSTANCE_PROFILE_NAME:=zeroboot-eks-node-profile}"
+: "${LT_NAME:=zeroboot-kvm-nested-virt}"
+: "${ASG_NAME:=zeroboot-kvm-self-managed}"
+
+echo "==> Fetching cluster info..."
+ENDPOINT=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \
+  --query "cluster.endpoint" --output text)
+CERT_AUTH=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \
+  --query "cluster.certificateAuthority.data" --output text)
+CIDR=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \
+  --query "cluster.kubernetesNetworkConfig.serviceIpv4Cidr" --output text)
+CLUSTER_SG=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \
+  --query "cluster.resourcesVpcConfig.clusterSecurityGroupId" --output text)
+SUBNETS=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \
+  --query "cluster.resourcesVpcConfig.subnetIds" --output text | tr '\t' ',')
+ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
+
+echo "    Cluster:    $CLUSTER_NAME"
+echo "    Region:     $REGION"
+echo "    Account:    $ACCOUNT_ID"
+echo "    ClusterSG:  $CLUSTER_SG"
+echo "    Subnets:    $SUBNETS"
+
+# ─── Step 1: IAM role ─────────────────────────────────────────────────────────
+echo ""
+echo "==> Creating IAM node role: $NODE_ROLE_NAME"
+
+if aws iam get-role --role-name "$NODE_ROLE_NAME" &>/dev/null; then
+  echo "    Role already exists, skipping."
+else
+  aws iam create-role \
+    --role-name "$NODE_ROLE_NAME" \
+    --assume-role-policy-document '{
+      "Version":"2012-10-17",
+      "Statement":[{"Effect":"Allow","Principal":{"Service":"ec2.amazonaws.com"},"Action":"sts:AssumeRole"}]
+    }' > /dev/null
+
+  for POLICY in AmazonEKSWorkerNodePolicy AmazonEKS_CNI_Policy AmazonEC2ContainerRegistryReadOnly; do
+    aws iam attach-role-policy \
+      --role-name "$NODE_ROLE_NAME" \
+      --policy-arn "arn:aws:iam::aws:policy/${POLICY}"
+  done
+  echo "    Role created."
+fi
+
+# Instance profile
+if aws iam get-instance-profile --instance-profile-name "$INSTANCE_PROFILE_NAME" &>/dev/null; then
+  echo "    Instance profile already exists, skipping."
+else
+  aws iam create-instance-profile --instance-profile-name "$INSTANCE_PROFILE_NAME" > /dev/null
+  aws iam add-role-to-instance-profile \
+    --instance-profile-name "$INSTANCE_PROFILE_NAME" \
+    --role-name "$NODE_ROLE_NAME"
+  echo "    Instance profile created. Waiting 15s for IAM propagation..."
+  sleep 15
+fi
+
+NODE_ROLE_ARN="arn:aws:iam::${ACCOUNT_ID}:role/${NODE_ROLE_NAME}"
+INSTANCE_PROFILE_ARN="arn:aws:iam::${ACCOUNT_ID}:instance-profile/${INSTANCE_PROFILE_NAME}"
+
+# ─── Step 2: EKS access entry ────────────────────────────────────────────────
+echo ""
+echo "==> Registering node role with EKS cluster..."
+aws eks create-access-entry \
+  --cluster-name "$CLUSTER_NAME" \
+  --principal-arn "$NODE_ROLE_ARN" \
+  --type EC2_LINUX \
+  --region "$REGION" 2>/dev/null || echo "    Access entry already exists."
+
+# ─── Step 3: AMI ─────────────────────────────────────────────────────────────
+echo ""
+echo "==> Fetching latest EKS-optimized AMI (AL2023, K8s ${K8S_VERSION})..."
+AMI_ID=$(aws ssm get-parameter \
+  --name "/aws/service/eks/optimized-ami/${K8S_VERSION}/amazon-linux-2023/x86_64/standard/recommended/image_id" \
+  --region "$REGION" --query "Parameter.Value" --output text)
+echo "    AMI: $AMI_ID"
+
+# ─── Step 4: UserData (AL2023 nodeadm format) ────────────────────────────────
+echo ""
+echo "==> Preparing UserData..."
+USERDATA=$(cat << EOF
+MIME-Version: 1.0
+Content-Type: multipart/mixed; boundary="//"
+
+--//
+Content-Type: application/node.eks.aws
+
+---
+apiVersion: node.eks.aws/v1alpha1
+kind: NodeConfig
+spec:
+  cluster:
+    apiServerEndpoint: ${ENDPOINT}
+    certificateAuthority: ${CERT_AUTH}
+    cidr: ${CIDR}
+    name: ${CLUSTER_NAME}
+  kubelet:
+    config:
+      maxPods: 110
+    flags:
+    - "--node-labels=kvm-capable=true,workload=zeroboot"
+
+--//--
+EOF
+)
+USERDATA_B64=$(echo "$USERDATA" | base64 -w 0)
+
+# ─── Step 5: Launch Template ──────────────────────────────────────────────────
+echo ""
+echo "==> Creating Launch Template: $LT_NAME"
+echo "    (CpuOptions.NestedVirtualization=enabled — this is the key field that"
+echo "     EKS managed node groups silently drop)"
+
+LT_DATA=$(cat << EOF
+{
+  "ImageId": "${AMI_ID}",
+  "InstanceType": "${INSTANCE_TYPE}",
+  "CpuOptions": {"NestedVirtualization": "enabled"},
+  "SecurityGroupIds": ["${CLUSTER_SG}"],
+  "MetadataOptions": {"HttpTokens": "required", "HttpPutResponseHopLimit": 2},
+  "IamInstanceProfile": {"Arn": "${INSTANCE_PROFILE_ARN}"},
+  "UserData": "${USERDATA_B64}",
+  "TagSpecifications": [{
+    "ResourceType": "instance",
+    "Tags": [
+      {"Key": "Name", "Value": "zeroboot-kvm-node"},
+      {"Key": "kubernetes.io/cluster/${CLUSTER_NAME}", "Value": "owned"},
+      {"Key": "kvm-capable", "Value": "true"}
+    ]
+  }]
+}
+EOF
+)
+
+LT_RESULT=$(aws ec2 create-launch-template \
+  --launch-template-name "$LT_NAME" \
+  --region "$REGION" \
+  --launch-template-data "$LT_DATA" \
+  --output json 2>/dev/null || \
+  aws ec2 describe-launch-templates \
+    --launch-template-names "$LT_NAME" \
+    --region "$REGION" \
+    --query "LaunchTemplates[0]" --output json)
+
+LT_ID=$(echo "$LT_RESULT" | python3 -c "
+import json,sys
+d = json.load(sys.stdin)
+# handle both create and describe responses
+print(d.get('LaunchTemplate', d).get('LaunchTemplateId'))
+")
+LT_VERSION=$(aws ec2 describe-launch-template-versions \
+  --launch-template-id "$LT_ID" --region "$REGION" \
+  --query "LaunchTemplateVersions[-1].VersionNumber" --output text)
+
+echo "    LT ID:      $LT_ID"
+echo "    LT Version: $LT_VERSION"
+
+# ─── Step 6: Auto Scaling Group ───────────────────────────────────────────────
+echo ""
+echo "==> Creating Auto Scaling Group: $ASG_NAME"
+aws autoscaling create-auto-scaling-group \
+  --auto-scaling-group-name "$ASG_NAME" \
+  --launch-template "LaunchTemplateId=${LT_ID},Version=${LT_VERSION}" \
+  --min-size "$MIN_SIZE" \
+  --max-size "$MAX_SIZE" \
+  --desired-capacity "$DESIRED" \
+  --vpc-zone-identifier "$SUBNETS" \
+  --tags \
+    "Key=Name,Value=zeroboot-kvm-node,PropagateAtLaunch=true" \
+    "Key=kubernetes.io/cluster/${CLUSTER_NAME},Value=owned,PropagateAtLaunch=true" \
+    "Key=kvm-capable,Value=true,PropagateAtLaunch=true" \
+  --region "$REGION" 2>/dev/null || echo "    ASG already exists."
+
+echo "    ASG created. Waiting for nodes to join (up to 3 minutes)..."
+sleep 60
+
+# ─── Step 7: Verify ───────────────────────────────────────────────────────────
+echo ""
+echo "==> Verifying nodes..."
+kubectl get nodes -l kvm-capable=true 2>/dev/null || echo "    (kubectl not configured or nodes not yet ready)"
+
+echo ""
+echo "==> Testing /dev/kvm access..."
+kubectl run kvm-verify --restart=Never \
+  --image=amazonlinux:2023 \
+  --overrides='{"spec":{"nodeSelector":{"kvm-capable":"true"},"containers":[{"name":"c","image":"amazonlinux:2023","command":["sh","-c","ls -la /dev/kvm && grep -c vmx /proc/cpuinfo && cat /sys/module/kvm_intel/parameters/nested 2>/dev/null || echo N/A"],"securityContext":{"privileged":true}}]}}' \
+  2>/dev/null || true
+
+echo "    Waiting 30s for pod to start..."
+sleep 30
+kubectl logs kvm-verify 2>/dev/null || echo "    Pod not ready yet, check manually: kubectl logs kvm-verify"
+kubectl delete pod kvm-verify --ignore-not-found 2>/dev/null
+
+echo ""
+echo "==> Done! Self-managed node group with nested virtualization created."
+echo "    - Launch Template: $LT_ID (v${LT_VERSION}) — CpuOptions.NestedVirtualization=enabled"
+echo "    - ASG: $ASG_NAME"
+echo "    - Node label: kvm-capable=true (already set via --node-labels in userdata)"
+echo ""
+echo "    Next: Deploy zeroboot using deploy/k8s/"
+echo "      kubectl apply -f deploy/k8s/namespace.yaml"
+echo "      kubectl apply -f deploy/k8s/"
diff --git a/deploy/eks/eks-with-kvm-nodegroup.yaml b/deploy/eks/eks-with-kvm-nodegroup.yaml
new file mode 100644
index 0000000..d2efc47
--- /dev/null
+++ b/deploy/eks/eks-with-kvm-nodegroup.yaml
@@ -0,0 +1,80 @@
+# ⚠️  WARNING: EKS Managed Node Groups silently drop CpuOptions
+#
+# This file uses managedNodeGroups with cpuOptions.nestedVirtualization.
+# However, EKS generates its own internal Launch Template from the user-supplied
+# one, and CpuOptions is NOT carried over — resulting in nodes WITHOUT /dev/kvm
+# even though the YAML appears correct.
+#
+# STATUS: AWS has acknowledged this as a documentation gap (CpuOptions is not in
+# the official "blocked fields" list but is still ignored in practice).
+#
+# RECOMMENDED ALTERNATIVE: Use deploy/eks/eks-self-managed-kvm.yaml instead.
+# That file creates a Self-managed Node Group (ASG + Launch Template) where
+# CpuOptions is applied directly without EKS interference.
+#
+# Use THIS file only if you have verified that your eksctl version correctly
+# passes CpuOptions through (test with: kubectl run kvm-check ... ls /dev/kvm).
+
+# Scenario 1: Create EKS cluster with KVM node group in one shot
+# Usage:
+#   eksctl create cluster -f eks-with-kvm-nodegroup.yaml
+
+apiVersion: eksctl.io/v1alpha5
+kind: ClusterConfig
+
+metadata:
+  name: zeroboot-eks
+  region: ap-southeast-1
+  version: "1.31"
+
+# Cluster-level VPC (auto-created)
+vpc:
+  clusterEndpoints:
+    privateAccess: true
+    publicAccess: true
+
+# No default node group — only our KVM-capable group
+managedNodeGroups:
+  - name: zeroboot-kvm
+    instanceType: c8i.xlarge
+    minSize: 1
+    maxSize: 5
+    desiredCapacity: 2
+    amiFamily: AmazonLinux2023
+    volumeSize: 50          # GB — enough for OS + Docker images + zeroboot binary
+    privateNetworking: true # place nodes in private subnets
+
+    # Enable nested virtualization — requires c8i/m8i/r8i
+    cpuOptions:
+      nestedVirtualization: enabled
+
+    # Labels used by Deployment nodeSelector and Karpenter NodePool
+    labels:
+      kvm-capable: "true"
+      workload: zeroboot
+
+    # Optional: taint to reserve these nodes exclusively for zeroboot
+    # taints:
+    #   - key: zeroboot
+    #     value: "true"
+    #     effect: NoSchedule
+
+    iam:
+      attachPolicyARNs:
+        - arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy
+        - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly
+        - arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy
+
+    tags:
+      Project: zeroboot
+      ManagedBy: eksctl
+
+# Add-ons needed for EBS PVC support
+addons:
+  - name: aws-ebs-csi-driver
+    version: latest
+    attachPolicyARNs:
+      - arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy
+
+iam:
+  withOIDC: true   # required for add-on IAM role binding (IRSA)
diff --git a/deploy/k8s/deployment.yaml b/deploy/k8s/deployment.yaml
new file mode 100644
index 0000000..cf3edd4
--- /dev/null
+++ b/deploy/k8s/deployment.yaml
@@ -0,0 +1,99 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: zeroboot
+  namespace: zeroboot
+  labels:
+    app: zeroboot
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: zeroboot
+  template:
+    metadata:
+      labels:
+        app: zeroboot
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "8080"
+        prometheus.io/path: "/v1/metrics"
+    spec:
+      # ── Scheduling: only run on KVM-capable nodes ───────────────────────────
+      nodeSelector:
+        kvm-capable: "true"   # label KVM nodes with: kubectl label node <node> kvm-capable=true
+
+      # Spread pods across nodes — each pod needs its own physical memory for CoW
+      affinity:
+        podAntiAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchLabels:
+                  app: zeroboot
+              topologyKey: kubernetes.io/hostname
+
+      containers:
+        - name: zeroboot
+          image: ghcr.io/zerobootdev/zeroboot:latest
+          imagePullPolicy: Always
+          ports:
+            - name: http
+              containerPort: 8080
+
+          # ── KVM device access ──────────────────────────────────────────────
+          # Use privileged mode + hostPath mount (compatible with EKS without kubevirt).
+          # If KubeVirt device plugin is installed, you can replace this with:
+          #   resources.limits: { devices.kubevirt.io/kvm: "1" }
+          # and remove securityContext.privileged.
+          securityContext:
+            privileged: true
+          resources:
+            requests:
+              memory: "2Gi"
+              cpu: "1"
+            limits:
+              memory: "8Gi"
+              cpu: "4"
+
+          env:
+            - name: ZEROBOOT_WORKDIR
+              value: /var/lib/zeroboot
+            - name: ZEROBOOT_PORT
+              value: "8080"
+            - name: ZEROBOOT_TEMPLATE_WAIT
+              value: "15"
+            # Optional: path to api_keys.json on the PVC
+            # - name: ZEROBOOT_API_KEYS_FILE
+            #   value: /var/lib/zeroboot/api_keys.json
+
+          volumeMounts:
+            - name: data
+              mountPath: /var/lib/zeroboot
+            - name: kvm
+              mountPath: /dev/kvm
+
+          # ── Health checks ──────────────────────────────────────────────────
+          readinessProbe:
+            httpGet:
+              path: /v1/health
+              port: 8080
+            initialDelaySeconds: 30   # allow time for template creation on first boot
+            periodSeconds: 5
+            failureThreshold: 6
+
+          livenessProbe:
+            httpGet:
+              path: /v1/health
+              port: 8080
+            initialDelaySeconds: 120  # template creation takes ~19s; allow 2x margin
+            periodSeconds: 10
+            failureThreshold: 6
+
+      volumes:
+        - name: data
+          persistentVolumeClaim:
+            claimName: zeroboot-data
+        - name: kvm
+          hostPath:
+            path: /dev/kvm
+            type: CharDevice
diff --git a/deploy/k8s/hpa.yaml b/deploy/k8s/hpa.yaml
new file mode 100644
index 0000000..5c34940
--- /dev/null
+++ b/deploy/k8s/hpa.yaml
@@ -0,0 +1,45 @@
+# Horizontal Pod Autoscaler for zeroboot
+#
+# Scales based on zeroboot_concurrent_forks — the number of active VM sandboxes
+# per pod. CPU/memory are poor proxies for zeroboot workloads because:
+#   - Fork is memory-bound (CoW page faults), not CPU-bound
+#   - RSS grows proportionally with concurrent sandboxes
+#
+# Prerequisites:
+#   1. prometheus-adapter installed and configured to expose zeroboot_concurrent_forks
+#      as a Kubernetes custom metric (pods/zeroboot_concurrent_forks)
+#   2. See docs/KUBERNETES.md for prometheus-adapter config snippet
+
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: zeroboot
+  namespace: zeroboot
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: zeroboot
+  minReplicas: 2
+  maxReplicas: 10
+  metrics:
+    - type: Pods
+      pods:
+        metric:
+          name: zeroboot_concurrent_forks
+        target:
+          type: AverageValue
+          averageValue: "800"   # scale out when avg concurrent sandboxes > 800 per pod
+  behavior:
+    scaleUp:
+      stabilizationWindowSeconds: 30
+      policies:
+        - type: Pods
+          value: 2
+          periodSeconds: 60
+    scaleDown:
+      stabilizationWindowSeconds: 300  # 5 min cooldown before scale-in
+      policies:
+        - type: Pods
+          value: 1
+          periodSeconds: 120
diff --git a/deploy/k8s/namespace.yaml b/deploy/k8s/namespace.yaml
new file mode 100644
index 0000000..4307e42
--- /dev/null
+++ b/deploy/k8s/namespace.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: zeroboot
diff --git a/deploy/k8s/pvc.yaml b/deploy/k8s/pvc.yaml
new file mode 100644
index 0000000..92cadba
--- /dev/null
+++ b/deploy/k8s/pvc.yaml
@@ -0,0 +1,14 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: zeroboot-data
+  namespace: zeroboot
+  labels:
+    app: zeroboot
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: gp3  # AWS EBS gp3; adjust for your cloud provider
+  resources:
+    requests:
+      storage: 20Gi     # vmlinux (~21MB) + rootfs (~500MB) + snapshot (~512MB) per template
diff --git a/deploy/k8s/service.yaml b/deploy/k8s/service.yaml
new file mode 100644
index 0000000..8854f22
--- /dev/null
+++ b/deploy/k8s/service.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: zeroboot
+  namespace: zeroboot
+  labels:
+    app: zeroboot
+spec:
+  type: ClusterIP   # Use LoadBalancer to expose externally
+  selector:
+    app: zeroboot
+  ports:
+    - name: http
+      port: 80
+      targetPort: 8080
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
new file mode 100644
index 0000000..4c339ca
--- /dev/null
+++ b/docker/entrypoint.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+set -euo pipefail
+
+WORKDIR="${ZEROBOOT_WORKDIR:-/var/lib/zeroboot}"
+KERNEL="${ZEROBOOT_KERNEL:-${WORKDIR}/vmlinux-fc}"
+ROOTFS_PYTHON="${ZEROBOOT_ROOTFS_PYTHON:-${WORKDIR}/rootfs-python.ext4}"
+ROOTFS_NODE="${ZEROBOOT_ROOTFS_NODE:-}"
+PORT="${ZEROBOOT_PORT:-8080}"
+TEMPLATE_WAIT="${ZEROBOOT_TEMPLATE_WAIT:-15}"
+
+# ── Validate KVM access ───────────────────────────────────────────────────────
+if [ ! -c /dev/kvm ]; then
+    echo "ERROR: /dev/kvm not found. Node must support KVM and use the KVM device plugin."
+    exit 1
+fi
+
+# ── Check required files ──────────────────────────────────────────────────────
+if [ ! -f "$KERNEL" ]; then
+    echo "ERROR: Kernel not found at $KERNEL"
+    echo "Mount a PersistentVolume to $WORKDIR containing vmlinux-fc and rootfs images."
+    exit 1
+fi
+
+if [ ! -f "$ROOTFS_PYTHON" ]; then
+    echo "ERROR: Python rootfs not found at $ROOTFS_PYTHON"
+    exit 1
+fi
+
+# ── Create template if snapshot doesn't exist ────────────────────────────────
+PYTHON_SNAPSHOT="${WORKDIR}/python/snapshot/vmstate"
+
+if [ ! -f "$PYTHON_SNAPSHOT" ]; then
+    echo "No snapshot found — creating Python template (this takes ~${TEMPLATE_WAIT}s)..."
+    mkdir -p "${WORKDIR}/python"
+    cp "$ROOTFS_PYTHON" "${WORKDIR}/python-rootfs.ext4"
+    /usr/local/bin/zeroboot template \
+        "$KERNEL" \
+        "${WORKDIR}/python-rootfs.ext4" \
+        "${WORKDIR}/python" \
+        "$TEMPLATE_WAIT" \
+        /init
+    echo "Template created."
+else
+    echo "Snapshot found — skipping template creation."
+fi
+
+# ── Build serve target ────────────────────────────────────────────────────────
+SERVE_TARGET="python:${WORKDIR}/python"
+
+if [ -n "$ROOTFS_NODE" ] && [ -f "$ROOTFS_NODE" ]; then
+    NODE_SNAPSHOT="${WORKDIR}/node/snapshot/vmstate"
+    if [ ! -f "$NODE_SNAPSHOT" ]; then
+        echo "Creating Node.js template..."
+        mkdir -p "${WORKDIR}/node"
+        cp "$ROOTFS_NODE" "${WORKDIR}/node-rootfs.ext4"
+        /usr/local/bin/zeroboot template \
+            "$KERNEL" \
+            "${WORKDIR}/node-rootfs.ext4" \
+            "${WORKDIR}/node" \
+            "$TEMPLATE_WAIT" \
+            /init-node.sh
+        echo "Node template created."
+    fi
+    SERVE_TARGET="${SERVE_TARGET},node:${WORKDIR}/node"
+fi
+
+# ── Start API server ──────────────────────────────────────────────────────────
+echo "Starting zeroboot API server on port ${PORT}..."
+exec /usr/local/bin/zeroboot serve "$SERVE_TARGET" "$PORT"
diff --git a/docs/KUBERNETES.md b/docs/KUBERNETES.md
new file mode 100644
index 0000000..a131b4c
--- /dev/null
+++ b/docs/KUBERNETES.md
@@ -0,0 +1,371 @@
+# Running Zeroboot on Kubernetes
+
+Zeroboot can be deployed as a stateful service inside a Kubernetes cluster.
+This guide covers node requirements, KVM device access, persistent storage for
+snapshots, reference manifests, and autoscaling.
+
+---
+
+## Architecture overview
+
+```
+Internet → K8s Service
+               │
+        ┌──────┼──────┐
+        │      │      │
+     Pod-1  Pod-2  Pod-3        ← one Pod per KVM-capable Node (podAntiAffinity)
+        │      │      │
+     VM VM  VM VM  VM VM        ← KVM forks happen inside the Pod, sub-millisecond
+```
+
+**Key point:** Kubernetes manages the lifecycle of the zeroboot *server* process.
+It does not schedule individual sandboxes — each `v1/exec` request is handled
+entirely within the Pod that receives it via a KVM fork (~0.8 ms). Kubernetes'
+role is capacity management: health checks, rolling updates, and horizontal scaling.
+
+---
+
+## Node requirements
+
+### Instance families with KVM support
+
+Not all EC2 instance types expose `/dev/kvm`. The following families support KVM
+and are suitable for zeroboot:
+
+| Family | KVM method | Notes |
+|---|---|---|
+| `c8i`, `m8i`, `r8i` | ✅ **Nested virtualization** | **Recommended** — Intel 8th-gen Nitro platform; supports nested virt without metal. Enable at launch via `--cpu-options NestedVirtualization=enabled` (requires AWS CLI ≥ v2.34) |
+| `c6i`, `c6a`, `c7i`, `m6i`, `m7i`, `r6i`, `r7i` | ✅ Bare-metal only | KVM available only on `.metal` sizes (e.g. `c6i.metal`) |
+| `c5`, `m5`, `r5` | ✅ Bare-metal only | Older Nitro generation; `.metal` sizes only |
+| `t3`, `t4g` | ❌ Not available | Burstable — `/dev/kvm` not exposed |
+| `t2` | ❌ Not available | No Nitro, no KVM |
+| Any ARM (`*g`) | ❌ Architecture mismatch | Firecracker x86_64 binary required |
+
+**TL;DR for EKS node groups:** Use `c8i`, `m8i`, or `r8i` with nested virtualization
+enabled — these are the only non-metal families where regular (non-`.metal`) instance
+sizes expose `/dev/kvm`. All other families require `.metal` sizes which are significantly
+more expensive and harder to schedule in K8s.
+
+```bash
+# Enable nested virtualization when launching a new instance (c8i/m8i/r8i only)
+aws ec2 run-instances \
+  --instance-type c8i.xlarge \
+  --cpu-options "NestedVirtualization=enabled" \
+  ...
+```
+
+> On GCP: `n2`, `n2d`, `c2`, `c3` families support KVM.
+> On Azure: `Dv3`, `Ev3`, `Dsv3` with nested virtualization enabled.
+
+### Label KVM-capable nodes
+
+```bash
+kubectl label node <node-name> kvm-capable=true
+```
+
+The Deployment's `nodeSelector` uses this label to ensure Pods are only scheduled
+where `/dev/kvm` is available.
+
+---
+
+---
+
+## EKS deployment: managed vs self-managed node groups
+
+> **TL;DR:** Use a self-managed node group. EKS managed node groups silently
+> drop `CpuOptions.NestedVirtualization` — your nodes will start without `/dev/kvm`.
+
+### The problem with managed node groups
+
+EKS managed node groups take your Launch Template, then generate a new internal
+Launch Template that merges only a subset of fields. `CpuOptions` is not in that
+subset — even though it is **not** listed in the [official blocked-fields docs](https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-basics).
+
+Symptoms:
+- `ls /dev/kvm` returns "No such file or directory"
+- `/proc/cpuinfo` has no `vmx` flag
+- `eksctl create nodegroup` succeeds, but KVM is silently missing
+
+You can verify by inspecting the EKS-generated internal Launch Template:
+
+```bash
+# Get the internal LT id (not your LT)
+aws ec2 describe-launch-template-versions   --launch-template-id <EKS_GENERATED_LT_ID> --versions 1   --query "LaunchTemplateVersions[0].LaunchTemplateData.CpuOptions"
+# Expected for managed nodegroup: null  (even if you set it in your own LT)
+```
+
+### The solution: self-managed node group
+
+Create an Auto Scaling Group with a Launch Template directly — bypassing EKS's
+internal LT generation. The provided script handles the full setup:
+
+```bash
+export AWS_PROFILE=your-profile
+export CLUSTER_NAME=zeroboot-eks
+export REGION=ap-southeast-1
+
+# Step 1: Create cluster without node group
+eksctl create cluster -f deploy/eks/eks-cluster-only.yaml
+
+# Step 2: Create self-managed KVM node group
+bash deploy/eks/eks-self-managed-kvm.sh
+```
+
+The script:
+1. Creates an IAM node role + instance profile
+2. Registers the role with EKS via `create-access-entry`
+3. Queries the latest EKS-optimized AL2023 AMI
+4. Creates a Launch Template with `CpuOptions.NestedVirtualization=enabled`
+5. Creates an ASG referencing the LT directly
+6. Verifies `/dev/kvm` is present on the new nodes
+
+> **Note:** `eksctl`'s `nodeGroups` (non-managed) do not support `launchTemplate`.
+> Only `managedNodeGroups` does — but managed NGs drop `CpuOptions`. The script
+> uses raw AWS CLI (`ec2 create-launch-template` + `autoscaling create-auto-scaling-group`)
+> to sidestep both limitations.
+
+
+---
+
+## KVM device access without `privileged: true`
+
+Pods request `/dev/kvm` via the [KVM device plugin](https://github.com/kubevirt/kubevirt/tree/main/cmd/virt-handler)
+from the KubeVirt project:
+
+```bash
+# Install KVM device plugin (DaemonSet)
+kubectl apply -f https://github.com/kubevirt/kubevirt/releases/latest/download/kubevirt-operator.yaml
+kubectl apply -f https://github.com/kubevirt/kubevirt/releases/latest/download/kubevirt-cr.yaml
+```
+
+Once installed, Pods can request KVM access via resources (already set in `deployment.yaml`):
+
+```yaml
+resources:
+  limits:
+    devices.kubevirt.io/kvm: "1"
+```
+
+This grants `/dev/kvm` access without `privileged: true` or `hostDevice` mounts.
+
+---
+
+## Persistent storage for snapshots
+
+Zeroboot's `template` command snapshots ~512 MB of VM memory to disk. Without a
+PersistentVolume, every Pod restart triggers a ~15 s re-snapshot.
+
+Mount a PVC at `/var/lib/zeroboot` (see `deploy/k8s/pvc.yaml`). The directory
+layout on the volume:
+
+```
+/var/lib/zeroboot/
+├── vmlinux-fc          ← kernel binary (~21 MB)
+├── rootfs-python.ext4  ← base rootfs image (pre-loaded numpy/pandas)
+├── python/             ← snapshot created by entrypoint on first boot
+│   ├── snapshot/
+│   │   ├── vmstate     ← CPU register state (~14 KB)
+│   │   └── mem         ← 512 MB memory image (CoW source)
+│   └── rootfs_path
+└── api_keys.json       ← optional API key list
+```
+
+> **Populate the volume before first deploy.** Copy `vmlinux-fc` and
+> `rootfs-python.ext4` to the PVC (e.g., via a one-shot init Job or manual
+> `kubectl cp`). The entrypoint will create the snapshot automatically on
+> first boot if it is missing.
+
+### Storage class recommendations
+
+| Cloud | StorageClass | Notes |
+|---|---|---|
+| AWS | `gp3` | Default for EKS; good random-read IOPS for CoW page faults |
+| GCP | `premium-rwo` | SSD-backed, low latency |
+| Azure | `managed-premium` | SSD, required for sub-ms fork performance |
+
+Avoid `gp2` or spinning-disk storage classes — the CoW page fault path is
+latency-sensitive and benefits from SSD IOPS.
+
+---
+
+## Deploying
+
+```bash
+# 1. Create namespace
+kubectl apply -f deploy/k8s/namespace.yaml
+
+# 2. Create PVC
+kubectl apply -f deploy/k8s/pvc.yaml
+
+# 3. Deploy (2 replicas by default)
+kubectl apply -f deploy/k8s/deployment.yaml
+kubectl apply -f deploy/k8s/service.yaml
+
+# 4. Watch rollout — first boot takes ~30s for template creation
+kubectl rollout status deployment/zeroboot -n zeroboot
+
+# 5. Verify
+kubectl exec -n zeroboot deploy/zeroboot -- curl -s localhost:8080/v1/health
+```
+
+---
+
+## Autoscaling
+
+### Why not CPU-based HPA?
+
+Zeroboot workloads are **memory-bound**, not CPU-bound. Each concurrent fork
+adds ~265 KB of CoW memory pressure. CPU utilization is a poor scaling signal.
+
+### Custom metric HPA
+
+The `zeroboot_concurrent_forks` gauge (exposed at `/v1/metrics`) reflects the
+number of active VM sandboxes per Pod. Use this for HPA:
+
+```bash
+# Apply HPA (requires prometheus-adapter, see below)
+kubectl apply -f deploy/k8s/hpa.yaml
+```
+
+Scale-out triggers when average concurrent forks per Pod exceeds 800. Adjust
+this threshold based on your Node's available memory:
+
+```
+max_concurrent_forks ≈ (node_memory - 2GB_overhead) / 265KB_per_fork
+# Example: 8GB node → (8192 - 2048) / 0.265 ≈ 23,000 theoretical max
+# Practical limit with snapshot RSS: ~1000–2000 per Pod
+```
+
+### Exposing the metric via prometheus-adapter
+
+Add to your `prometheus-adapter` ConfigMap:
+
+```yaml
+rules:
+  - seriesQuery: 'zeroboot_concurrent_forks{namespace!="",pod!=""}'
+    resources:
+      overrides:
+        namespace: {resource: "namespace"}
+        pod: {resource: "pod"}
+    name:
+      matches: "zeroboot_concurrent_forks"
+      as: "zeroboot_concurrent_forks"
+    metricsQuery: 'avg_over_time(zeroboot_concurrent_forks{<<.LabelMatchers>>}[1m])'
+```
+
+### Karpenter node provisioning
+
+For cluster autoscaling with Karpenter, create a NodePool that targets KVM-capable instances:
+
+```yaml
+apiVersion: karpenter.sh/v1
+kind: NodePool
+metadata:
+  name: zeroboot-kvm
+spec:
+  template:
+    metadata:
+      labels:
+        kvm-capable: "true"
+    spec:
+      requirements:
+        - key: karpenter.k8s.aws/instance-family
+          operator: In
+          values: [c6i, c7i, c8i, m6i, m7i]
+        - key: karpenter.k8s.aws/instance-size
+          operator: In
+          values: [xlarge, 2xlarge, 4xlarge]
+        - key: kubernetes.io/arch
+          operator: In
+          values: [amd64]
+  limits:
+    cpu: 100
+```
+
+> **Scaling latency note:** Karpenter takes 60–120 s to provision a new KVM
+> node (EC2 start + kubelet join + Pod scheduling + snapshot load). Karpenter
+> handles **capacity expansion** for sustained load — it is not designed to
+> absorb sudden request spikes. Size your warm pool (`minReplicas`) to handle
+> peak burst traffic; use HPA to scale within the existing node pool first.
+
+---
+
+## Monitoring
+
+Zeroboot exposes Prometheus metrics at `/v1/metrics` (not `/metrics`).
+
+### ServiceMonitor (Prometheus Operator)
+
+```yaml
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: zeroboot
+  namespace: zeroboot
+spec:
+  selector:
+    matchLabels:
+      app: zeroboot
+  endpoints:
+    - port: http
+      path: /v1/metrics
+      interval: 15s
+```
+
+### Key metrics
+
+| Metric | Type | Description |
+|---|---|---|
+| `zeroboot_concurrent_forks` | gauge | Active VM sandboxes — **use for HPA** |
+| `zeroboot_fork_time_milliseconds` | histogram | Fork latency (P50/P99) |
+| `zeroboot_exec_time_milliseconds` | histogram | Code execution latency |
+| `zeroboot_total_time_milliseconds` | histogram | End-to-end request latency |
+| `zeroboot_total_executions{status}` | counter | Success / error / timeout counts |
+| `zeroboot_memory_usage_bytes` | gauge | Process RSS — monitor for memory pressure |
+
+---
+
+## Configuration reference
+
+All configuration is via environment variables (set in `deployment.yaml`):
+
+| Variable | Default | Description |
+|---|---|---|
+| `ZEROBOOT_WORKDIR` | `/var/lib/zeroboot` | Working directory (PVC mount point) |
+| `ZEROBOOT_KERNEL` | `$WORKDIR/vmlinux-fc` | Path to kernel binary |
+| `ZEROBOOT_ROOTFS_PYTHON` | `$WORKDIR/rootfs-python.ext4` | Python rootfs image |
+| `ZEROBOOT_ROOTFS_NODE` | _(unset)_ | Node.js rootfs image (optional) |
+| `ZEROBOOT_PORT` | `8080` | API server port |
+| `ZEROBOOT_TEMPLATE_WAIT` | `15` | Seconds to wait during template snapshot |
+| `ZEROBOOT_API_KEYS_FILE` | _(unset)_ | Path to JSON array of API keys |
+
+---
+
+### Server bind address
+
+By default, `zeroboot serve` binds to `0.0.0.0` (all interfaces), which is
+required for Kubernetes health probes and Service routing. To restrict to
+localhost (e.g. for local development), pass `--bind 127.0.0.1`:
+
+```bash
+zeroboot serve python:/workdir/python 8080 --bind 127.0.0.1
+```
+
+The `ZEROBOOT_BIND` environment variable (default: `0.0.0.0`) controls the
+bind address when running via the Docker entrypoint.
+
+
+---
+
+## Limitations
+
+- **Single-node fork pool:** All sandboxes on a Pod run on the same physical Node.
+  Scale out by adding Pods (and Nodes), not by resizing individual Pods.
+- **ReadWriteOnce PVC:** Each Pod needs its own PVC (`ReadWriteOnce`). If you
+  use a `StatefulSet` instead of a `Deployment`, each replica gets its own PVC
+  automatically via `volumeClaimTemplates`.
+- **Snapshot on first boot:** The first Pod startup after PVC creation takes
+  ~15–30 s while the template snapshot is created. Subsequent restarts are fast
+  (~2 s) because the snapshot is persisted on the PVC.
+- **x86_64 only:** Firecracker and the guest kernel are x86_64. ARM nodes are
+  not supported.