From dbf0991fb743c49255162d4ce08262203cc4a7fb Mon Sep 17 00:00:00 2001 From: Test Date: Mon, 11 May 2026 10:40:08 -0500 Subject: [PATCH 1/3] fix(mac): gate native LiveKit bridge behind explicit profile --- docker-compose.mac.yml | 16 +++++++- src/package.json | 4 +- src/scripts/parallel-start.sh | 5 +++ src/workers/start-workers.sh | 67 ++++++++++++++++++++++++++++----- src/workers/workers-config.json | 10 +++++ 5 files changed, 89 insertions(+), 13 deletions(-) diff --git a/docker-compose.mac.yml b/docker-compose.mac.yml index 558c78268..63a54262d 100644 --- a/docker-compose.mac.yml +++ b/docker-compose.mac.yml @@ -24,7 +24,12 @@ # or localhost (since it's also on the host). # Support services → IN CONTAINERS (this file): # postgres, node-server, widget-server, -# livekit-bridge, model-init. +# model-init. +# livekit-bridge → NATIVE process only when +# CONTINUUM_LIVEKIT_NATIVE=1. It exposes a Unix +# socket consumed by native continuum-core, and +# Docker Desktop bind mounts cannot host Unix +# socket creation reliably (Linux errno 95). # # This override sets continuum-core's `deploy.replicas: 0` so `docker compose # up` on Mac brings up everything EXCEPT continuum-core. The install.sh @@ -56,6 +61,15 @@ services: deploy: replicas: 0 + # livekit-bridge also stays native on Mac. Containerizing it requires + # binding /root/.continuum/sockets/livekit-bridge.sock on a host-mounted + # ~/.continuum/sockets directory; Docker Desktop returns ENOTSUP for that + # socket bind. npm start launches the native binary only when + # CONTINUUM_LIVEKIT_NATIVE=1. + livekit-bridge: + deploy: + replicas: 0 + # node-server runs in Docker on Mac (containerized per Option B). # It needs to reach the NATIVE continuum-core-server on the host. # Unix sockets don't traverse Docker Desktop's VM boundary on Mac, so diff --git a/src/package.json b/src/package.json index 17bbdd6f1..e930fe03e 100644 --- a/src/package.json +++ b/src/package.json @@ -119,12 +119,12 @@ }, "scripts": { "setup:rust": "./scripts/setup-rust.sh", - "worker:build": "jq -c '.workers[] | select(.enabled != false)' workers/workers-config.json | while read -r w; do dir=$(echo $w | jq -r '.binary' | sed 's|/target/.*||'); echo \"Building $(echo $w | jq -r '.name')...\"; (cd $dir && cargo build --release --quiet); done", + "worker:build": "jq -c '.workers[] | select((.enabled // true) != false) | select((.enabledEnv // \"\") as $e | $e == \"\" or ((env[$e] // \"\") | test(\"^(1|true|TRUE|yes|YES|on|ON)$\")))' workers/workers-config.json | while read -r w; do dir=$(echo $w | jq -r '.binary' | sed 's|/target/.*||'); echo \"Building $(echo $w | jq -r '.name')...\"; (cd $dir && cargo build --release --quiet); done", "worker:models": "./scripts/download-voice-models.sh", "worker:start": "./scripts/download-voice-models.sh && ./workers/start-workers.sh", "worker:stop": "./workers/stop-workers.sh", "worker:kill": "jq -r '.workers[].name' workers/workers-config.json | while read n; do pkill -f \"$n-worker\" || true; done; jq -r '.workers[].socket' workers/workers-config.json | xargs rm -f", - "worker:status": "jq -r '.workers[] | select(.enabled != false) | .name' workers/workers-config.json | while read n; do pgrep -f \"$n-worker\" > /dev/null && echo \"✅ $n-worker running\" || echo \"❌ $n-worker NOT running\"; done", + "worker:status": "jq -r '.workers[] | select((.enabled // true) != false) | select((.enabledEnv // \"\") as $e | $e == \"\" or ((env[$e] // \"\") | test(\"^(1|true|TRUE|yes|YES|on|ON)$\"))) | .name' workers/workers-config.json | while read n; do pgrep -f \"$n-worker\" > /dev/null && echo \"✅ $n-worker running\" || echo \"❌ $n-worker NOT running\"; done", "worker:restart": "npm run worker:stop && npm run worker:start", "system:ensure": "npm run signal:check || (npm run system:start && npm run signal:wait)", "start": "bash scripts/parallel-start.sh", diff --git a/src/scripts/parallel-start.sh b/src/scripts/parallel-start.sh index 1c46e5a30..d841dd06f 100755 --- a/src/scripts/parallel-start.sh +++ b/src/scripts/parallel-start.sh @@ -234,6 +234,11 @@ build_pkg() { for pkg in archive-worker jtag-mcp; do build_pkg "$pkg" done +case "${CONTINUUM_LIVEKIT_NATIVE:-}" in + 1|true|TRUE|yes|YES|on|ON) + build_pkg livekit-bridge + ;; +esac # continuum-core: all GPU features (metal+accelerate on macOS, cuda on Linux) if [ -n "$GPU_FEAT" ]; then build_pkg continuum-core --features "$GPU_FEAT" diff --git a/src/workers/start-workers.sh b/src/workers/start-workers.sh index 5d9389ac4..cce8db880 100755 --- a/src/workers/start-workers.sh +++ b/src/workers/start-workers.sh @@ -19,6 +19,31 @@ resolve_path() { echo "$1" | sed "s|^\.continuum|$CONTINUUM_ROOT|" } +env_truthy() { + local name="$1" + local value="${!name:-}" + case "$value" in + 1|true|TRUE|yes|YES|on|ON) return 0 ;; + *) return 1 ;; + esac +} + +worker_enabled_for_runtime() { + local worker="$1" + local enabled_env + + if [ "$(echo "$worker" | jq -r '.enabled // true')" = "false" ]; then + return 1 + fi + + enabled_env=$(echo "$worker" | jq -r '.enabledEnv // empty') + if [ -n "$enabled_env" ] && ! env_truthy "$enabled_env"; then + return 1 + fi + + return 0 +} + # Memory limit helper - converts "8G" to bytes for ulimit parse_memory_limit() { local limit="$1" @@ -95,11 +120,13 @@ mkdir -p "$CONTINUUM_ROOT/jtag/logs/system/modules" mkdir -p "$CONTINUUM_ROOT/jtag/logs/system/daemons" mkdir -p "$CONTINUUM_ROOT/sockets" -# Start LiveKit SFU server (WebRTC media transport) -# Check brew first, then manual install location -LIVEKIT_BIN=$(command -v livekit-server 2>/dev/null || echo "$HOME/.continuum/bin/livekit-server") -LIVEKIT_LOG="$CONTINUUM_ROOT/jtag/logs/system/livekit-server.log" -if [ -x "$LIVEKIT_BIN" ] || command -v livekit-server &>/dev/null; then +# Start native LiveKit only when the native live profile is explicitly enabled. +# Default npm start stays text/chat-light; Docker live mode uses compose instead. +if env_truthy CONTINUUM_LIVEKIT_NATIVE; then + # Check brew first, then manual install location + LIVEKIT_BIN=$(command -v livekit-server 2>/dev/null || echo "$HOME/.continuum/bin/livekit-server") + LIVEKIT_LOG="$CONTINUUM_ROOT/jtag/logs/system/livekit-server.log" + if [ -x "$LIVEKIT_BIN" ] || command -v livekit-server &>/dev/null; then # Kill existing LiveKit server (SIGKILL for clean port release) pkill -9 -f "livekit-server" 2>/dev/null || true # Wait for UDP ports to be fully released (7880 TCP, 7881-7882 UDP) @@ -189,9 +216,12 @@ YAML fi sleep 0.5 done + else + echo -e "${RED}⚠️ LiveKit server not installed — voice/video calls will NOT work${NC}" + echo -e " Install with: ./scripts/install-livekit.sh" + fi else - echo -e "${RED}⚠️ LiveKit server not installed — voice/video calls will NOT work${NC}" - echo -e " Install with: ./scripts/install-livekit.sh" + echo -e "${YELLOW}⏭️ Native LiveKit disabled (set CONTINUUM_LIVEKIT_NATIVE=1 for live media)${NC}" fi # Build Rust workers — let cargo handle incremental compilation (it's smart enough) @@ -255,6 +285,15 @@ elif [ -f "/usr/local/lib/libonnxruntime.so" ]; then fi while read -r worker; do + if ! worker_enabled_for_runtime "$worker"; then + name=$(echo "$worker" | jq -r '.name') + enabled_env=$(echo "$worker" | jq -r '.enabledEnv // empty') + if [ -n "$enabled_env" ]; then + echo -e "${YELLOW}⏭️ Skipping ${name} (${enabled_env} not enabled)${NC}" + fi + continue + fi + name=$(echo "$worker" | jq -r '.name') binary=$(echo "$worker" | jq -r '.binary') socket=$(resolve_path "$(echo "$worker" | jq -r '.socket // empty')") @@ -354,13 +393,17 @@ while read -r worker; do done <<< "$preload_models" fi fi -done < <(jq -c '.workers[] | select(.enabled != false)' "$CONFIG_FILE") +done < <(jq -c '.workers[]' "$CONFIG_FILE") # Verify all enabled workers are running sleep 0.5 ALL_RUNNING=true while read -r worker; do + if ! worker_enabled_for_runtime "$worker"; then + continue + fi + name=$(echo "$worker" | jq -r '.name') binary_name=$(basename "$(echo "$worker" | jq -r '.binary')") worker_type=$(echo "$worker" | jq -r '.type // "socket"') @@ -377,13 +420,17 @@ while read -r worker; do ALL_RUNNING=false fi fi -done < <(jq -c '.workers[] | select(.enabled != false)' "$CONFIG_FILE") +done < <(jq -c '.workers[]' "$CONFIG_FILE") if [ "$ALL_RUNNING" = true ]; then echo -e "${GREEN}✅ All workers running successfully${NC}" # Show status while read -r worker; do + if ! worker_enabled_for_runtime "$worker"; then + continue + fi + name=$(echo "$worker" | jq -r '.name') binary_name=$(basename "$(echo "$worker" | jq -r '.binary')") socket=$(echo "$worker" | jq -r '.socket // empty') @@ -397,7 +444,7 @@ if [ "$ALL_RUNNING" = true ]; then pid=$(pgrep -f "$binary_name" | head -1) echo -e " ${name}: PID $pid ($socket)" fi - done < <(jq -c '.workers[] | select(.enabled != false)' "$CONFIG_FILE") + done < <(jq -c '.workers[]' "$CONFIG_FILE") exit 0 else echo -e "${RED}❌ One or more workers failed to start${NC}" diff --git a/src/workers/workers-config.json b/src/workers/workers-config.json index 4f0f6f63f..9c6681919 100644 --- a/src/workers/workers-config.json +++ b/src/workers/workers-config.json @@ -33,6 +33,16 @@ "args": [], "description": "Unified Rust runtime: Voice, Data, Embedding, Search, Logger modules", "enabled": true + }, + { + "name": "livekit-bridge", + "binary": "workers/target/release/livekit-bridge", + "socket": ".continuum/sockets/livekit-bridge.sock", + "args": [], + "description": "LiveKit WebRTC bridge for native continuum-core", + "enabled": true, + "enabledEnv": "CONTINUUM_LIVEKIT_NATIVE", + "memoryLimit": "2G" } ], "sharedSockets": [ From 0b52502377f283b1af243e5b38b1da8aadd771aa Mon Sep 17 00:00:00 2001 From: Test Date: Wed, 13 May 2026 13:15:25 -0500 Subject: [PATCH 2/3] ci: preflight Carl smoke images --- .github/workflows/carl-install-smoke.yml | 38 ++++++++++++++++++++++-- install.sh | 7 ++++- scripts/verify-image-revisions.sh | 6 ++-- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/.github/workflows/carl-install-smoke.yml b/.github/workflows/carl-install-smoke.yml index 27c563935..402537fed 100644 --- a/.github/workflows/carl-install-smoke.yml +++ b/.github/workflows/carl-install-smoke.yml @@ -66,8 +66,27 @@ jobs: # githubusercontent.com wouldn't be the one in this PR. Same # rationale as docker-images.yml's ref pattern. ref: ${{ github.event.pull_request.head.sha || github.sha }} - # Smoke uses the local script directly; no need for full history. - fetch-depth: 1 + # Full history lets scripts/verify-image-revisions.sh decide whether + # an image revision mismatch is real staleness or a non-image PR + # riding on the existing canary image set. + fetch-depth: 0 + + - name: Detect image-relevant changes + id: image-paths + uses: dorny/paths-filter@v3 + with: + filters: | + image_relevant: + - 'src/**' + - 'src/workers/**/Cargo.toml' + - 'src/workers/**/Cargo.lock' + - 'docker/**' + - 'docker-compose*.yml' + - 'Dockerfile*' + - 'install.sh' + - 'src/scripts/install*.sh' + - 'src/scripts/download-voice-models.sh' + - 'src/scripts/download-models.sh' - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -90,6 +109,16 @@ jobs: - name: Login to ghcr.io (so install.sh can pull pre-built images) run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin + - name: Verify Carl image set before install + env: + EXPECTED_SHA: ${{ github.event.pull_request.head.sha || inputs.install_ref || github.sha }} + # Canary PRs without image-relevant changes should exercise the PR's + # install script against the current canary image set. PRs that do + # change image bits must publish :pr-N first. Either way, verify + # before install.sh so CI never falls through to a long local build. + TAG: ${{ steps.image-paths.outputs.image_relevant == 'true' && github.event.pull_request.number && format('pr-{0}', github.event.pull_request.number) || inputs.image_tag || 'canary' }} + run: bash scripts/verify-image-revisions.sh + - name: Run carl-install smoke env: # PR HEAD sha so smoke fetches install.sh from THIS PR. @@ -111,11 +140,14 @@ jobs: # 25400718464). The conditional below makes manual triggers # default to the canary tag (the cadence we publish on) and lets # operators override via the image_tag input from the UI. - CONTINUUM_IMAGE_TAG: ${{ github.event.pull_request.number && format('pr-{0}', github.event.pull_request.number) || inputs.image_tag || 'canary' }} + CONTINUUM_IMAGE_TAG: ${{ steps.image-paths.outputs.image_relevant == 'true' && github.event.pull_request.number && format('pr-{0}', github.event.pull_request.number) || inputs.image_tag || 'canary' }} # 25-min cap on the docker-only install. Hybrid (Mac source-build) # path would exceed this — by design, that's the gate firing on # the README/install mismatch. CARL_INSTALL_TIMEOUT_SEC: '1500' + # CI is a verifier, not an image builder. If the selected tag is + # missing, fail with the image-push fix instead of compiling Rust. + CONTINUUM_REQUIRE_PREBUILT_IMAGES: '1' # Generous health wait — model-init can take 3-5min on cold pull. CARL_HEALTH_TIMEOUT_SEC: '300' # Cold persona load on no-GPU CI runner (Linux ubuntu-latest, no diff --git a/install.sh b/install.sh index 4e1e3199d..915a28edd 100644 --- a/install.sh +++ b/install.sh @@ -942,7 +942,12 @@ EFFECTIVE_IMAGE_TAG="${CONTINUUM_IMAGE_TAG:-latest}" } > "$INSTALL_DIR/.env" info "Pulling container images (tag: $EFFECTIVE_IMAGE_TAG)..." -$CONTAINER_CMD compose $COMPOSE_FILES $COMPOSE_ARGS pull 2>/dev/null || warn "Some images not published yet — will build locally" +if ! $CONTAINER_CMD compose $COMPOSE_FILES $COMPOSE_ARGS pull 2>/dev/null; then + if [[ "${CONTINUUM_REQUIRE_PREBUILT_IMAGES:-0}" == "1" ]]; then + fail "Required prebuilt images for tag '$EFFECTIVE_IMAGE_TAG' are missing or not pullable. Run scripts/push-current-arch.sh on the required dev host, then retry." + fi + warn "Some images not published yet — will build locally" +fi # ── 8. Start support services ────────────────────────────── PHASE="start support services" diff --git a/scripts/verify-image-revisions.sh b/scripts/verify-image-revisions.sh index 8e44491f1..527f42dc8 100755 --- a/scripts/verify-image-revisions.sh +++ b/scripts/verify-image-revisions.sh @@ -37,7 +37,7 @@ # # Exit codes: # 0 = no amd64 stale (arm64 stale OK — warning-only until #965 lands) -# 1 = amd64 stale on at least one image +# 1 = amd64 stale/missing on at least one image # 2 = usage / pre-flight error set -uo pipefail @@ -170,7 +170,9 @@ for IMAGE in "${IMAGE_ARRAY[@]}"; do ' 2>/dev/null) if [[ -z "$ARCH_LIST" ]]; then - echo " ⚠️ No manifest entries — image may not exist yet at this tag" + echo " ❌ amd64: MISSING — image does not exist at this tag" + echo "$REF" >> "$STALE_AMD64_OUT" + FAILED=1 continue fi From 9c65d9daddc0cfaf806f91cf5a2e37a64d5f21b1 Mon Sep 17 00:00:00 2001 From: Test Date: Wed, 13 May 2026 13:20:26 -0500 Subject: [PATCH 3/3] ci: detect image context in pre-push --- src/scripts/git-prepush.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/scripts/git-prepush.sh b/src/scripts/git-prepush.sh index 441dafaee..f6ea657ee 100755 --- a/src/scripts/git-prepush.sh +++ b/src/scripts/git-prepush.sh @@ -65,6 +65,11 @@ if echo "$CHANGED_FILES" | grep -qE "^(src/workers/|docker/|src/shared/generated RUST_RELEVANT=1 fi +DOCKER_RELEVANT=0 +if echo "$CHANGED_FILES" | grep -qE "^(src/|docker/|docker-compose.*\.yml$|Dockerfile|src/scripts/download-(voice-)?models\.sh$|src/scripts/install.*\.sh$)"; then + DOCKER_RELEVANT=1 +fi + # Phase 1: TypeScript compilation (<15s) echo "" echo "📋 Phase 1: TypeScript compilation" @@ -199,8 +204,6 @@ echo "📋 Phase 4: Native-arch Docker images (if Rust/docker changed)" echo "---------------------------------------------------------------" DOCKER_PUSH_START=$(date +%s) -DOCKER_RELEVANT="$RUST_RELEVANT" - if [ "$DOCKER_RELEVANT" -eq 0 ]; then echo "⏭️ No Rust/docker changes in this push — skipping native-arch build." elif [ ! -x "$REPO_ROOT/scripts/push-current-arch.sh" ]; then