Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions .github/workflows/qemu-emulator-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,23 @@ env:
jobs:
build:
name: Build QEMU Image (${{ matrix.arch }})
runs-on: ubicloud-standard-8
runs-on: ${{ matrix.runner }}
timeout-minutes: 120
strategy:
fail-fast: false
matrix:
include:
# amd64 runs natively under KVM on ubicloud's amd64 runner.
- arch: amd64
runner: ubicloud-standard-8
# arm64 runs under same-arch TCG on GitHub's native arm64 runner.
# No KVM (Azure Hyper-V doesn't expose nested virt on arm64) but
# same-arch TCG avoids the V8 JIT translation crashes that kill
# cross-arch TCG, and the smoke test is skipped on arm64 since
# the backend can't come up within any reasonable window under
# software emulation.
- arch: arm64
runner: ubuntu-24.04-arm

steps:
- uses: actions/checkout@v6
Expand All @@ -47,7 +56,20 @@ jobs:
- name: Install QEMU dependencies
run: |
sudo apt-get update
sudo apt-get install -y qemu-system-x86 qemu-system-arm qemu-utils genisoimage socat qemu-efi-aarch64
sudo apt-get install -y qemu-system-x86 qemu-system-arm qemu-kvm qemu-utils genisoimage socat qemu-efi-aarch64

- name: Enable KVM access
run: |
echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' \
| sudo tee /etc/udev/rules.d/99-kvm4all.rules
sudo udevadm control --reload-rules
sudo udevadm trigger --name-match=kvm || true
ls -la /dev/kvm || echo "no /dev/kvm present"
if [ -w /dev/kvm ]; then
echo "KVM is writable — hardware acceleration will be used"
else
echo "WARNING: /dev/kvm is not writable — will fall back to TCG (very slow)"
fi

- name: Build QEMU image
run: |
Expand Down
37 changes: 28 additions & 9 deletions docker/local-emulator/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,21 @@ RUN cp $(which qstash) /qstash-binary 2>/dev/null || \
{ echo "ERROR: qstash binary not found" >&2; exit 1; }


# ── Strip / compress service binaries (parallel stages) ──────────────────────

FROM debian:trixie-slim AS upx-compress
RUN apt-get update && apt-get install -y --no-install-recommends upx-ucl binutils && \
rm -rf /var/lib/apt/lists/*
COPY --from=clickhouse-bin /usr/bin/clickhouse /out/clickhouse
COPY --from=svix-bin /usr/local/bin/svix-server /out/svix-server
COPY --from=minio-bin /usr/bin/minio /out/minio
COPY --from=mc-bin /usr/bin/mc /out/mc
COPY --from=qstash-bin /qstash-binary /out/qstash
RUN chmod u+w /out/* && \
strip --strip-all /out/clickhouse /out/minio /out/svix-server /out/mc /out/qstash && \
upx -9 /out/minio /out/svix-server /out/mc /out/qstash


# ── Final image ───────────────────────────────────────────────────────────────

FROM debian:trixie-slim
Expand Down Expand Up @@ -139,20 +154,20 @@ COPY --from=node-base /usr/local/bin/node /usr/local/bin/node
# Inbucket
COPY --from=inbucket-bin /opt/inbucket /opt/inbucket

# Svix
COPY --from=svix-bin /usr/local/bin/svix-server /usr/local/bin/svix-server
# Svix (UPX-compressed)
COPY --from=upx-compress /out/svix-server /usr/local/bin/svix-server

# ClickHouse
COPY --from=clickhouse-bin /usr/bin/clickhouse /usr/bin/clickhouse
# ClickHouse (stripped only)
COPY --from=upx-compress /out/clickhouse /usr/bin/clickhouse
RUN ln -sf /usr/bin/clickhouse /usr/bin/clickhouse-server && \
ln -sf /usr/bin/clickhouse /usr/bin/clickhouse-client

# MinIO
COPY --from=minio-bin /usr/bin/minio /usr/local/bin/minio
COPY --from=mc-bin /usr/bin/mc /usr/local/bin/mc
# MinIO (UPX-compressed)
COPY --from=upx-compress /out/minio /usr/local/bin/minio
COPY --from=upx-compress /out/mc /usr/local/bin/mc

# QStash
COPY --from=qstash-bin --chmod=755 /qstash-binary /usr/local/bin/qstash
# QStash (UPX-compressed)
COPY --from=upx-compress --chmod=755 /out/qstash /usr/local/bin/qstash

# App
WORKDIR /app
Expand All @@ -164,6 +179,10 @@ COPY --from=builder /app/apps/backend/node_modules ./apps/backend/node_modules
COPY --from=builder /app/apps/dashboard/.next/standalone ./
COPY --from=builder /app/apps/dashboard/.next/static ./apps/dashboard/.next/static
COPY --from=builder /app/apps/dashboard/public ./apps/dashboard/public
# Save the standalone-traced node_modules (runtime deps only) before the full
# migration-pruner copy overwrites it. The slim-docker-image step in the QEMU
# build restores this after migrations are baked in.
RUN cp -a /app/node_modules /app/node_modules.standalone 2>/dev/null || mkdir -p /app/node_modules.standalone
COPY --from=migration-pruner /pruned-node_modules ./node_modules
COPY --from=builder /app/packages ./packages

Expand Down
130 changes: 118 additions & 12 deletions docker/local-emulator/qemu/build-image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,29 @@ qemu_cmd_prefix_for_arch() {
case "$arch" in
arm64)
local accel="tcg"
local cpu="max"
if [ "$HOST_ARCH" = "arm64" ]; then
# Same-arch: prefer hardware acceleration, keep -cpu max. If no
# accelerator is available (e.g. Azure arm64 runners with no
# nested virt) we fall through to TCG, but same-arch TCG handles
# -cpu max correctly and more named CPU models have TCG bugs
# than -cpu max does.
case "$HOST_OS" in
darwin) accel="hvf" ;;
linux) [ -w /dev/kvm ] && accel="kvm" ;;
esac
else
# Cross-arch TCG (amd64 host emulating arm64 guest): -cpu max
# advertises armv8.5+ features (PAC, BTI, SVE, LSE…) that V8
# emits JIT code for, but the host's TCG mistranslates some of
# those instructions across architectures and node crashes with
# SIGTRAP during migrations. Dropping to cortex-a72 limits V8
# to armv8.0-a which cross-arch TCG handles cleanly.
cpu="cortex-a72"
fi
local firmware
firmware="$(find_aarch64_firmware)"
echo "qemu-system-aarch64 -machine virt -accel $accel -cpu max -bios $firmware"
echo "qemu-system-aarch64 -machine virt -accel $accel -cpu $cpu -bios $firmware"
;;
amd64)
local accel="tcg"
Expand Down Expand Up @@ -176,6 +190,46 @@ prepare_bundle_artifacts() {
printf "%s" "$current_ids" > "$bundle_meta"
}

contains_provision_marker() {
local provision_log="$1"
local serial_log="$2"
local marker="$3"

if [ -f "$provision_log" ] && grep -Fqx "$marker" "$provision_log" 2>/dev/null; then
return 0
fi

if [ -f "$serial_log" ] && LC_ALL=C strings -a "$serial_log" 2>/dev/null | grep -Fqx "$marker" 2>/dev/null; then
return 0
fi

return 1
}

line_count() {
local file="$1"
local count=0

if [ -f "$file" ]; then
count="$(wc -l < "$file" | tr -d '[:space:]')" || count=0
fi

case "$count" in
''|*[!0-9]*) count=0 ;;
esac

printf '%s\n' "$count"
}

persist_provision_logs() {
local arch="$1"
local serial_log="$2"
local provision_log="$3"

cp "$serial_log" "$IMAGE_DIR/provision-emulator-${arch}.log" 2>/dev/null || true
cp "$provision_log" "$IMAGE_DIR/provision-emulator-${arch}.progress.log" 2>/dev/null || true
}

build_one() {
local arch="$1"
local base_img="$IMAGE_DIR/debian-${DEBIAN_VERSION}-base-${arch}.qcow2"
Expand All @@ -192,8 +246,12 @@ build_one() {
local bundle_iso="$tmp_dir/bundle.iso"
local bundle_dir="$tmp_dir/bundle"
local serial_log="$tmp_dir/serial.log"
local provision_log="$tmp_dir/provision.log"
local pidfile="$tmp_dir/qemu.pid"
local qemu_base pid elapsed
local qemu_base pid elapsed total_build_lines
local last_build_lines=0
local guest_exited=false
local guest_failed=false
local start_time=$SECONDS

cp "$base_img" "$tmp_img"
Expand All @@ -209,21 +267,28 @@ build_one() {

mkdir -p "$bundle_dir"
cp "$bundle_tgz" "$bundle_dir/img.tgz"
cp "$BUILD_ENV_FILE" "$bundle_dir/build.env"
# Tell the guest which arch it's being built for so cross-arch (TCG) builds
# can skip the smoke test, which isn't reliable under software emulation.
printf 'STACK_EMULATOR_BUILD_ARCH=%s\n' "$arch" > "$bundle_dir/build-arch.env"
make_iso_from_dir "$bundle_iso" "STACKBUNDLE" "$bundle_dir"

: > "$serial_log"
: > "$provision_log"
qemu_base="$(qemu_cmd_prefix_for_arch "$arch")"
log "QEMU command prefix (${arch}): $qemu_base"

# shellcheck disable=SC2086
$qemu_base \
-boot order=c \
-m "$RAM" \
-smp "$CPUS" \
-drive "file=$tmp_img,format=qcow2,if=virtio" \
-drive "file=$tmp_img,format=qcow2,if=virtio,discard=on,detect-zeroes=unmap" \
-drive "file=$seed_iso,format=raw,if=virtio,readonly=on" \
-drive "file=$bundle_iso,format=raw,if=virtio,readonly=on" \
-netdev user,id=net0 \
-device virtio-net-pci,netdev=net0 \
-virtfs "local,path=$tmp_dir,mount_tag=hostfs,security_model=none" \
-serial "file:$serial_log" \
-display none \
-daemonize \
Expand All @@ -232,23 +297,62 @@ build_one() {
pid="$(cat "$pidfile")"
elapsed=0
while [ "$elapsed" -lt "$PROVISION_TIMEOUT" ]; do
if grep -q "STACK_CLOUD_INIT_DONE" "$serial_log" 2>/dev/null; then
if contains_provision_marker "$provision_log" "$serial_log" "STACK_CLOUD_INIT_DONE"; then
break
fi

if contains_provision_marker "$provision_log" "$serial_log" "STACK_CLOUD_INIT_FAILED"; then
guest_failed=true
break
fi

if [ -f "$provision_log" ]; then
total_build_lines="$(line_count "$provision_log")"
if [ "$total_build_lines" -gt "$last_build_lines" ]; then
echo ""
sed -n "$((last_build_lines + 1)),${total_build_lines}p" "$provision_log" 2>/dev/null | while IFS= read -r msg; do
if [ "$msg" = "STACK_CLOUD_INIT_DONE" ]; then
continue
fi
printf " [%3ds] %s\n" "$elapsed" "$msg"
done
last_build_lines="$total_build_lines"
fi
fi

if ! kill -0 "$pid" 2>/dev/null; then
guest_exited=true
break
fi

sleep 5
elapsed=$((SECONDS - start_time))
printf "\r [%3ds / %ds] provisioning emulator..." "$elapsed" "$PROVISION_TIMEOUT"
done
echo ""

if ! grep -q "STACK_CLOUD_INIT_DONE" "$serial_log" 2>/dev/null; then
err "Provisioning timed out for emulator (${arch})"
tail -50 "$serial_log" >&2 || true
if ! contains_provision_marker "$provision_log" "$serial_log" "STACK_CLOUD_INIT_DONE"; then
if [ "$guest_failed" = true ]; then
err "Guest provisioning reported failure for emulator (${arch})"
elif [ "$guest_exited" = true ]; then
err "Provisioning exited before completion for emulator (${arch})"
else
err "Provisioning timed out for emulator (${arch})"
fi

if [ -s "$provision_log" ]; then
tail -50 "$provision_log" >&2 || true
else
LC_ALL=C strings -a "$serial_log" 2>/dev/null | tail -50 >&2 || tail -50 "$serial_log" >&2 || true
fi

if kill -0 "$pid" 2>/dev/null; then
kill "$pid" 2>/dev/null || true
sleep 1
kill -9 "$pid" 2>/dev/null || true
fi

persist_provision_logs "$arch" "$serial_log" "$provision_log"
rm -rf "$tmp_dir"
exit 1
fi
Expand All @@ -266,19 +370,21 @@ build_one() {
kill -9 "$pid" 2>/dev/null || true
fi

cp "$tmp_img" "$final_img"
cp "$serial_log" "$IMAGE_DIR/provision-emulator-${arch}.log"
rm -rf "$tmp_dir"
persist_provision_logs "$arch" "$serial_log" "$provision_log"

log "Compressing final image (this may take several minutes)..."
qemu-img convert -p -O qcow2 -c "$final_img" "$final_img.tmp"
mv "$final_img.tmp" "$final_img"
qemu-img convert -p -O qcow2 -c "$tmp_img" "$final_img"
rm -rf "$tmp_dir"

local size
size="$(du -h "$final_img" | cut -f1)"
log "━━━ Emulator image ready: $final_img (${size}) ━━━"
}

log "Generating emulator build env file..."
node "$REPO_ROOT/docker/local-emulator/generate-env-development.mjs"
BUILD_ENV_FILE="$REPO_ROOT/docker/local-emulator/.env.development"

for arch in "${TARGET_ARCHS[@]}"; do
local_base="$IMAGE_DIR/debian-${DEBIAN_VERSION}-base-${arch}.qcow2"
download_cloud_image "$arch" "$local_base"
Expand Down
Loading
Loading