Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# syntax=docker/dockerfile:1
# Zeroboot server image
# Multi-stage build: compile Rust binary, then assemble minimal runtime image.
#
# Usage:
# docker build -t zeroboot:latest .
#
# The image does NOT bundle vmlinux or rootfs — mount them via PersistentVolume.
# See deploy/k8s/ for Kubernetes manifests.

# ─── Stage 1: Build zeroboot binary ──────────────────────────────────────────
FROM rust:1.86-bookworm AS builder

WORKDIR /build

# Cache dependencies separately from source
COPY Cargo.toml Cargo.lock ./
RUN mkdir src && echo 'fn main(){}' > src/main.rs && \
cargo build --release && \
rm -f target/release/zeroboot target/release/deps/zeroboot*

# Build actual source
COPY src/ src/
COPY guest/ guest/
RUN cargo build --release

# ─── Stage 2: Runtime image ───────────────────────────────────────────────────
FROM ubuntu:22.04

ENV DEBIAN_FRONTEND=noninteractive

# Runtime dependencies only
RUN apt-get update -qq && \
apt-get install -y --no-install-recommends \
ca-certificates \
curl \
&& rm -rf /var/lib/apt/lists/*

# Install Firecracker
ARG FC_VERSION=v1.15.0
RUN curl -fsSL -o /tmp/fc.tgz \
"https://github.com/firecracker-microvm/firecracker/releases/download/${FC_VERSION}/firecracker-${FC_VERSION}-x86_64.tgz" && \
tar -xzf /tmp/fc.tgz -C /tmp && \
mv "/tmp/release-${FC_VERSION}-x86_64/firecracker-${FC_VERSION}-x86_64" /usr/local/bin/firecracker && \
chmod +x /usr/local/bin/firecracker && \
rm -rf /tmp/fc.tgz /tmp/release-*

# Copy zeroboot binary
COPY --from=builder /build/target/release/zeroboot /usr/local/bin/zeroboot

# Data directory — mount a PersistentVolume here to persist snapshots
VOLUME ["/var/lib/zeroboot"]

# Copy entrypoint
COPY docker/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh

EXPOSE 8080

ENTRYPOINT ["/entrypoint.sh"]
37 changes: 37 additions & 0 deletions deploy/eks/eks-add-kvm-nodegroup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Scenario 2b: Add KVM node group to an EXISTING EKS cluster
# Usage:
# eksctl create nodegroup -f eks-add-kvm-nodegroup.yaml

apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig

metadata:
name: zeroboot-eks # must match existing cluster name
region: ap-southeast-1 # must match existing cluster region

managedNodeGroups:
- name: zeroboot-kvm
instanceType: c8i.xlarge
minSize: 1
maxSize: 5
desiredCapacity: 2
amiFamily: AmazonLinux2023
volumeSize: 50
privateNetworking: true

cpuOptions:
nestedVirtualization: enabled

labels:
kvm-capable: "true"
workload: zeroboot

iam:
attachPolicyARNs:
- arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy
- arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly
- arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy

tags:
Project: zeroboot
ManagedBy: eksctl
28 changes: 28 additions & 0 deletions deploy/eks/eks-cluster-only.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Scenario 2a: Create EKS cluster WITHOUT any node group
# Usage:
# eksctl create cluster -f eks-cluster-only.yaml

apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig

metadata:
name: zeroboot-eks
region: ap-southeast-1
version: "1.31"

vpc:
clusterEndpoints:
privateAccess: true
publicAccess: true

# Explicitly no node groups at cluster creation time
managedNodeGroups: []

iam:
withOIDC: true

addons:
- name: aws-ebs-csi-driver
version: latest
attachPolicyARNs:
- arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy
243 changes: 243 additions & 0 deletions deploy/eks/eks-self-managed-kvm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
#!/usr/bin/env bash
# deploy/eks/eks-self-managed-kvm.sh
#
# Creates a self-managed EKS node group with nested virtualization enabled.
#
# WHY SELF-MANAGED?
# EKS Managed Node Groups silently drop CpuOptions when generating their
# internal Launch Template — even when you supply CpuOptions in your own LT.
# Self-managed ASG + Launch Template bypasses EKS entirely, so CpuOptions
# (including NestedVirtualization=enabled) is applied directly to the instance.
#
# USAGE:
# export AWS_PROFILE=your-profile
# export CLUSTER_NAME=zeroboot-eks
# export REGION=ap-southeast-1
# bash eks-self-managed-kvm.sh
#
# REQUIREMENTS:
# - aws cli v2
# - kubectl configured for the target cluster
# - eksctl (for cluster-only creation, see eks-cluster-only.yaml)
#
# WHAT THIS SCRIPT DOES:
# 1. Creates IAM role + instance profile for worker nodes
# 2. Registers node role with EKS (access entry)
# 3. Fetches cluster params (endpoint, cert, subnets, SGs)
# 4. Queries latest EKS-optimized AL2023 AMI
# 5. Creates Launch Template with CpuOptions.NestedVirtualization=enabled
# 6. Creates Auto Scaling Group (2-4 nodes)
# 7. Verifies /dev/kvm is present on nodes

set -euo pipefail

: "${CLUSTER_NAME:=zeroboot-eks}"
: "${REGION:=ap-southeast-1}"
: "${INSTANCE_TYPE:=c8i.xlarge}"
: "${K8S_VERSION:=1.31}"
: "${MIN_SIZE:=1}"
: "${MAX_SIZE:=4}"
: "${DESIRED:=2}"
: "${NODE_ROLE_NAME:=zeroboot-eks-node-role}"
: "${INSTANCE_PROFILE_NAME:=zeroboot-eks-node-profile}"
: "${LT_NAME:=zeroboot-kvm-nested-virt}"
: "${ASG_NAME:=zeroboot-kvm-self-managed}"

echo "==> Fetching cluster info..."
ENDPOINT=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \
--query "cluster.endpoint" --output text)
CERT_AUTH=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \
--query "cluster.certificateAuthority.data" --output text)
CIDR=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \
--query "cluster.kubernetesNetworkConfig.serviceIpv4Cidr" --output text)
CLUSTER_SG=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \
--query "cluster.resourcesVpcConfig.clusterSecurityGroupId" --output text)
SUBNETS=$(aws eks describe-cluster --name "$CLUSTER_NAME" --region "$REGION" \
--query "cluster.resourcesVpcConfig.subnetIds" --output text | tr '\t' ',')
ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)

echo " Cluster: $CLUSTER_NAME"
echo " Region: $REGION"
echo " Account: $ACCOUNT_ID"
echo " ClusterSG: $CLUSTER_SG"
echo " Subnets: $SUBNETS"

# ─── Step 1: IAM role ─────────────────────────────────────────────────────────
echo ""
echo "==> Creating IAM node role: $NODE_ROLE_NAME"

if aws iam get-role --role-name "$NODE_ROLE_NAME" &>/dev/null; then
echo " Role already exists, skipping."
else
aws iam create-role \
--role-name "$NODE_ROLE_NAME" \
--assume-role-policy-document '{
"Version":"2012-10-17",
"Statement":[{"Effect":"Allow","Principal":{"Service":"ec2.amazonaws.com"},"Action":"sts:AssumeRole"}]
}' > /dev/null

for POLICY in AmazonEKSWorkerNodePolicy AmazonEKS_CNI_Policy AmazonEC2ContainerRegistryReadOnly; do
aws iam attach-role-policy \
--role-name "$NODE_ROLE_NAME" \
--policy-arn "arn:aws:iam::aws:policy/${POLICY}"
done
echo " Role created."
fi

# Instance profile
if aws iam get-instance-profile --instance-profile-name "$INSTANCE_PROFILE_NAME" &>/dev/null; then
echo " Instance profile already exists, skipping."
else
aws iam create-instance-profile --instance-profile-name "$INSTANCE_PROFILE_NAME" > /dev/null
aws iam add-role-to-instance-profile \
--instance-profile-name "$INSTANCE_PROFILE_NAME" \
--role-name "$NODE_ROLE_NAME"
echo " Instance profile created. Waiting 15s for IAM propagation..."
sleep 15
fi

NODE_ROLE_ARN="arn:aws:iam::${ACCOUNT_ID}:role/${NODE_ROLE_NAME}"
INSTANCE_PROFILE_ARN="arn:aws:iam::${ACCOUNT_ID}:instance-profile/${INSTANCE_PROFILE_NAME}"

# ─── Step 2: EKS access entry ────────────────────────────────────────────────
echo ""
echo "==> Registering node role with EKS cluster..."
aws eks create-access-entry \
--cluster-name "$CLUSTER_NAME" \
--principal-arn "$NODE_ROLE_ARN" \
--type EC2_LINUX \
--region "$REGION" 2>/dev/null || echo " Access entry already exists."

# ─── Step 3: AMI ─────────────────────────────────────────────────────────────
echo ""
echo "==> Fetching latest EKS-optimized AMI (AL2023, K8s ${K8S_VERSION})..."
AMI_ID=$(aws ssm get-parameter \
--name "/aws/service/eks/optimized-ami/${K8S_VERSION}/amazon-linux-2023/x86_64/standard/recommended/image_id" \
--region "$REGION" --query "Parameter.Value" --output text)
echo " AMI: $AMI_ID"

# ─── Step 4: UserData (AL2023 nodeadm format) ────────────────────────────────
echo ""
echo "==> Preparing UserData..."
USERDATA=$(cat << EOF
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="//"

--//
Content-Type: application/node.eks.aws

---
apiVersion: node.eks.aws/v1alpha1
kind: NodeConfig
spec:
cluster:
apiServerEndpoint: ${ENDPOINT}
certificateAuthority: ${CERT_AUTH}
cidr: ${CIDR}
name: ${CLUSTER_NAME}
kubelet:
config:
maxPods: 110
flags:
- "--node-labels=kvm-capable=true,workload=zeroboot"

--//--
EOF
)
USERDATA_B64=$(echo "$USERDATA" | base64 -w 0)

# ─── Step 5: Launch Template ──────────────────────────────────────────────────
echo ""
echo "==> Creating Launch Template: $LT_NAME"
echo " (CpuOptions.NestedVirtualization=enabled — this is the key field that"
echo " EKS managed node groups silently drop)"

LT_DATA=$(cat << EOF
{
"ImageId": "${AMI_ID}",
"InstanceType": "${INSTANCE_TYPE}",
"CpuOptions": {"NestedVirtualization": "enabled"},
"SecurityGroupIds": ["${CLUSTER_SG}"],
"MetadataOptions": {"HttpTokens": "required", "HttpPutResponseHopLimit": 2},
"IamInstanceProfile": {"Arn": "${INSTANCE_PROFILE_ARN}"},
"UserData": "${USERDATA_B64}",
"TagSpecifications": [{
"ResourceType": "instance",
"Tags": [
{"Key": "Name", "Value": "zeroboot-kvm-node"},
{"Key": "kubernetes.io/cluster/${CLUSTER_NAME}", "Value": "owned"},
{"Key": "kvm-capable", "Value": "true"}
]
}]
}
EOF
)

LT_RESULT=$(aws ec2 create-launch-template \
--launch-template-name "$LT_NAME" \
--region "$REGION" \
--launch-template-data "$LT_DATA" \
--output json 2>/dev/null || \
aws ec2 describe-launch-templates \
--launch-template-names "$LT_NAME" \
--region "$REGION" \
--query "LaunchTemplates[0]" --output json)

LT_ID=$(echo "$LT_RESULT" | python3 -c "
import json,sys
d = json.load(sys.stdin)
# handle both create and describe responses
print(d.get('LaunchTemplate', d).get('LaunchTemplateId'))
")
LT_VERSION=$(aws ec2 describe-launch-template-versions \
--launch-template-id "$LT_ID" --region "$REGION" \
--query "LaunchTemplateVersions[-1].VersionNumber" --output text)

echo " LT ID: $LT_ID"
echo " LT Version: $LT_VERSION"

# ─── Step 6: Auto Scaling Group ───────────────────────────────────────────────
echo ""
echo "==> Creating Auto Scaling Group: $ASG_NAME"
aws autoscaling create-auto-scaling-group \
--auto-scaling-group-name "$ASG_NAME" \
--launch-template "LaunchTemplateId=${LT_ID},Version=${LT_VERSION}" \
--min-size "$MIN_SIZE" \
--max-size "$MAX_SIZE" \
--desired-capacity "$DESIRED" \
--vpc-zone-identifier "$SUBNETS" \
--tags \
"Key=Name,Value=zeroboot-kvm-node,PropagateAtLaunch=true" \
"Key=kubernetes.io/cluster/${CLUSTER_NAME},Value=owned,PropagateAtLaunch=true" \
"Key=kvm-capable,Value=true,PropagateAtLaunch=true" \
--region "$REGION" 2>/dev/null || echo " ASG already exists."

echo " ASG created. Waiting for nodes to join (up to 3 minutes)..."
sleep 60

# ─── Step 7: Verify ───────────────────────────────────────────────────────────
echo ""
echo "==> Verifying nodes..."
kubectl get nodes -l kvm-capable=true 2>/dev/null || echo " (kubectl not configured or nodes not yet ready)"

echo ""
echo "==> Testing /dev/kvm access..."
kubectl run kvm-verify --restart=Never \
--image=amazonlinux:2023 \
--overrides='{"spec":{"nodeSelector":{"kvm-capable":"true"},"containers":[{"name":"c","image":"amazonlinux:2023","command":["sh","-c","ls -la /dev/kvm && grep -c vmx /proc/cpuinfo && cat /sys/module/kvm_intel/parameters/nested 2>/dev/null || echo N/A"],"securityContext":{"privileged":true}}]}}' \
2>/dev/null || true

echo " Waiting 30s for pod to start..."
sleep 30
kubectl logs kvm-verify 2>/dev/null || echo " Pod not ready yet, check manually: kubectl logs kvm-verify"
kubectl delete pod kvm-verify --ignore-not-found 2>/dev/null

echo ""
echo "==> Done! Self-managed node group with nested virtualization created."
echo " - Launch Template: $LT_ID (v${LT_VERSION}) — CpuOptions.NestedVirtualization=enabled"
echo " - ASG: $ASG_NAME"
echo " - Node label: kvm-capable=true (already set via --node-labels in userdata)"
echo ""
echo " Next: Deploy zeroboot using deploy/k8s/"
echo " kubectl apply -f deploy/k8s/namespace.yaml"
echo " kubectl apply -f deploy/k8s/"
Loading