Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions docker/local-emulator/qemu/build-image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,21 @@ qemu_cmd_prefix_for_arch() {
case "$arch" in
arm64)
local accel="tcg"
# Under TCG (software emulation on an amd64 host) -cpu max advertises
# armv8.5+ features (PAC, BTI, SVE, LSE atomics…) that V8 happily emits
# JIT code for, but QEMU TCG mistranslates some of those instructions
# and the node process crashes with SIGTRAP during migrations. Falling
# back to cortex-a72 limits V8 to armv8.0-a, which TCG handles cleanly.
local cpu="cortex-a72"
if [ "$HOST_ARCH" = "arm64" ]; then
case "$HOST_OS" in
darwin) accel="hvf" ;;
linux) [ -w /dev/kvm ] && accel="kvm" ;;
darwin) accel="hvf"; cpu="max" ;;
linux) [ -w /dev/kvm ] && { accel="kvm"; cpu="max"; } ;;
esac
fi
local firmware
firmware="$(find_aarch64_firmware)"
echo "qemu-system-aarch64 -machine virt -accel $accel -cpu max -bios $firmware"
echo "qemu-system-aarch64 -machine virt -accel $accel -cpu $cpu -bios $firmware"
;;
amd64)
local accel="tcg"
Expand Down Expand Up @@ -254,6 +260,9 @@ build_one() {
mkdir -p "$bundle_dir"
cp "$bundle_tgz" "$bundle_dir/img.tgz"
cp "$BUILD_ENV_FILE" "$bundle_dir/build.env"
# Tell the guest which arch it's being built for so cross-arch (TCG) builds
# can skip the smoke test, which isn't reliable under software emulation.
printf 'STACK_EMULATOR_BUILD_ARCH=%s\n' "$arch" > "$bundle_dir/build-arch.env"
make_iso_from_dir "$bundle_iso" "STACKBUNDLE" "$bundle_dir"

: > "$serial_log"
Expand Down
120 changes: 78 additions & 42 deletions docker/local-emulator/qemu/cloud-init/emulator/user-data
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ write_files:
cp /mnt/stack-bundle/build.env /etc/stack-build.env
fi

# Copy per-arch build metadata (used to skip smoke test on cross-arch TCG builds)
if [ -f /mnt/stack-bundle/build-arch.env ]; then
cp /mnt/stack-bundle/build-arch.env /etc/stack-build-arch.env
fi

- path: /usr/local/bin/render-stack-env
permissions: '0755'
content: |
Expand Down Expand Up @@ -226,11 +231,26 @@ write_files:
log "init-services done (${elapsed}s)."

log "Running migrations..."
# Capture stdout+stderr so failures surface the actual node error in
# the host-visible provision log instead of being swallowed by the
# serial-only stream.
migrate_log="$(mktemp)"
set +e
docker exec \
--env-file /etc/stack-build.env \
--env-file /etc/stack-build-computed.env \
stack-build-init \
sh -c 'cd /app/apps/backend && node dist/db-migrations.mjs migrate && node dist/db-migrations.mjs seed'
sh -c 'cd /app/apps/backend && node dist/db-migrations.mjs migrate && node dist/db-migrations.mjs seed' \
> "$migrate_log" 2>&1
migrate_status=$?
set -e
if [ "$migrate_status" -ne 0 ]; then
log "MIGRATIONS FAILED (exit ${migrate_status}) — last 200 lines of migration output:"
tail -200 "$migrate_log" | while IFS= read -r line; do log "migrate: $line"; done || true
rm -f "$migrate_log"
exit "$migrate_status"
fi
rm -f "$migrate_log"
log "Migrations + seed complete."

log "Stopping deps container..."
Expand Down Expand Up @@ -258,52 +278,68 @@ write_files:
DOCKERFILE
log "Slim image built."

log "Running smoke test on slim image..."
docker run --rm --name smoke-test \
--network host \
--env-file /etc/stack-build.env \
--env-file /etc/stack-build-computed.env \
-e STACK_SKIP_MIGRATIONS=true \
-e STACK_SKIP_SEED_SCRIPT=true \
-e STACK_RUNTIME_WORK_DIR=/app \
-v stack-postgres-data:/data/postgres \
-v stack-redis-data:/data/redis \
-v stack-clickhouse-data:/data/clickhouse \
-v stack-minio-data:/data/minio \
-v stack-inbucket-data:/data/inbucket \
-d stack-local-emulator-slim

smoke_timeout=300
smoke_elapsed=0
smoke_passed=false
while [ "$smoke_elapsed" -lt "$smoke_timeout" ]; do
code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 3 http://127.0.0.1:8102/health?db=1 2>/dev/null || true)
if [ "$code" = "200" ]; then
smoke_passed=true
break
# Determine build arch to decide whether to run the smoke test. Cross-arch
# (TCG) builds can't reliably run the Next.js backend inside the smoke
# test container: V8 JIT ↔ QEMU TCG mistranslations crash the process,
# and even with --jitless the backend is too slow to respond within any
# sane timeout. amd64 builds run under KVM and are unaffected.
BUILD_ARCH=""
if [ -f /etc/stack-build-arch.env ]; then
# shellcheck disable=SC1091
. /etc/stack-build-arch.env
BUILD_ARCH="${STACK_EMULATOR_BUILD_ARCH:-}"
fi

if [ "$BUILD_ARCH" = "arm64" ]; then
log "Skipping smoke test: build arch is arm64 and cross-arch TCG can't reliably run the backend."
else
log "Running smoke test on slim image..."
docker run --rm --name smoke-test \
--network host \
--env-file /etc/stack-build.env \
--env-file /etc/stack-build-computed.env \
-e STACK_SKIP_MIGRATIONS=true \
-e STACK_SKIP_SEED_SCRIPT=true \
-e STACK_RUNTIME_WORK_DIR=/app \
-v stack-postgres-data:/data/postgres \
-v stack-redis-data:/data/redis \
-v stack-clickhouse-data:/data/clickhouse \
-v stack-minio-data:/data/minio \
-v stack-inbucket-data:/data/inbucket \
-d stack-local-emulator-slim

smoke_timeout=300
smoke_elapsed=0
smoke_passed=false
while [ "$smoke_elapsed" -lt "$smoke_timeout" ]; do
code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 3 http://127.0.0.1:8102/health?db=1 2>/dev/null || true)
if [ "$code" = "200" ]; then
smoke_passed=true
break
fi
sleep 2
smoke_elapsed=$((smoke_elapsed + 2))
done

if [ "$smoke_passed" = "false" ]; then
log "SMOKE TEST FAILED: backend /health?db=1 did not return 200 within ${smoke_timeout}s"
log "--- docker ps -a ---"
docker ps -a 2>&1 | while IFS= read -r line; do log "ps: $line"; done || true
log "--- smoke-test container logs (last 200 lines) ---"
docker logs --tail 200 smoke-test 2>&1 | while IFS= read -r line; do log "smoke-test: $line"; done || true
log "--- free -m ---"
free -m 2>&1 | while IFS= read -r line; do log "mem: $line"; done || true
log "--- curl -v /health?db=1 ---"
curl -v --max-time 5 http://127.0.0.1:8102/health?db=1 2>&1 | while IFS= read -r line; do log "curl: $line"; done || true
docker stop smoke-test 2>/dev/null || true
exit 1
fi
sleep 2
smoke_elapsed=$((smoke_elapsed + 2))
done

if [ "$smoke_passed" = "false" ]; then
log "SMOKE TEST FAILED: backend /health?db=1 did not return 200 within ${smoke_timeout}s"
log "--- docker ps -a ---"
docker ps -a 2>&1 | while IFS= read -r line; do log "ps: $line"; done || true
log "--- smoke-test container logs (last 200 lines) ---"
docker logs --tail 200 smoke-test 2>&1 | while IFS= read -r line; do log "smoke-test: $line"; done || true
log "--- free -m ---"
free -m 2>&1 | while IFS= read -r line; do log "mem: $line"; done || true
log "--- curl -v /health?db=1 ---"
curl -v --max-time 5 http://127.0.0.1:8102/health?db=1 2>&1 | while IFS= read -r line; do log "curl: $line"; done || true
docker stop smoke-test 2>/dev/null || true
exit 1
sleep 2
log "Smoke test passed (${smoke_elapsed}s)."
fi

docker stop smoke-test 2>/dev/null || true
sleep 2
log "Smoke test passed (${smoke_elapsed}s)."

log "Flattening image (docker export/import)..."
docker create --name flatten stack-local-emulator-slim /bin/true
docker export flatten | docker import \
Expand Down
Loading