From 51165d1492b93b486e4e6bb8559442419bf68536 Mon Sep 17 00:00:00 2001 From: Test Date: Sun, 3 May 2026 12:11:17 -0500 Subject: [PATCH] fix(mac): run livekit bridge natively --- .github/workflows/carl-install-smoke.yml | 4 + docker-compose.mac.yml | 14 ++- docker-compose.yml | 4 +- install.sh | 104 ++++++++++++++++++++++- scripts/ci/carl-install-smoke.sh | 9 ++ src/scripts/parallel-start.sh | 2 +- src/workers/workers-config.json | 9 ++ 7 files changed, 139 insertions(+), 7 deletions(-) diff --git a/.github/workflows/carl-install-smoke.yml b/.github/workflows/carl-install-smoke.yml index d93e0bc76..cbd547697 100644 --- a/.github/workflows/carl-install-smoke.yml +++ b/.github/workflows/carl-install-smoke.yml @@ -77,6 +77,10 @@ jobs: # THIS PR (not main). Falls back to manual workflow_dispatch input # when not in a PR context. CARL_INSTALL_REF: ${{ github.event.pull_request.head.sha || inputs.install_ref || github.sha }} + # Canary is the integration sync point. A canary PR/push must test + # the image set that agents are actually validating from canary, + # not stale :latest images from main. + CONTINUUM_IMAGE_TAG: ${{ (github.event.pull_request.base.ref == 'canary' || github.ref_name == 'canary') && 'canary' || 'latest' }} # 25-min cap on the docker-only install. Hybrid (Mac source-build) # path would exceed this — by design, that's the gate firing on # the README/install mismatch. diff --git a/docker-compose.mac.yml b/docker-compose.mac.yml index 558c78268..37677a286 100644 --- a/docker-compose.mac.yml +++ b/docker-compose.mac.yml @@ -24,7 +24,11 @@ # or localhost (since it's also on the host). # Support services → IN CONTAINERS (this file): # postgres, node-server, widget-server, -# livekit-bridge, model-init. +# model-init. +# livekit-bridge → NATIVE process on host. It exposes a Unix +# socket consumed by native continuum-core, and +# Docker Desktop bind mounts cannot host Unix +# socket creation reliably (Linux errno 95). # # This override sets continuum-core's `deploy.replicas: 0` so `docker compose # up` on Mac brings up everything EXCEPT continuum-core. The install.sh @@ -56,6 +60,14 @@ services: deploy: replicas: 0 + # livekit-bridge also stays native on Mac. Containerizing it requires + # binding /root/.continuum/sockets/livekit-bridge.sock on a host-mounted + # ~/.continuum/sockets directory; Docker Desktop returns ENOTSUP for that + # socket bind. npm start launches the native binary via workers-config.json. + livekit-bridge: + deploy: + replicas: 0 + # node-server runs in Docker on Mac (containerized per Option B). # It needs to reach the NATIVE continuum-core-server on the host. # Unix sockets don't traverse Docker Desktop's VM boundary on Mac, so diff --git a/docker-compose.yml b/docker-compose.yml index 2a4a99085..4310ba074 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -78,7 +78,7 @@ services: # --no-default-features excludes livekit-webrtc (handled by livekit-bridge). # load-dynamic-ort loads ONNX Runtime as shared lib (runtime discovery). GPU_FEATURES: "--no-default-features --features load-dynamic-ort" - image: ghcr.io/cambriantech/continuum-core:${CONTINUUM_IMAGE_TAG:-latest} + image: ghcr.io/cambriantech/continuum-core:${CONTINUUM_CORE_IMAGE_TAG:-latest} restart: unless-stopped # Sized for mission: Qwen 4-8B Q4 + KV cache for 5 personas + embeddings # + Bevy render + vision + audio. Auto-calculated by install.sh from host @@ -138,7 +138,7 @@ services: build: context: ./src/workers dockerfile: ../../docker/livekit-bridge.Dockerfile - image: ghcr.io/cambriantech/continuum-livekit-bridge:${CONTINUUM_IMAGE_TAG:-latest} + image: ghcr.io/cambriantech/continuum-livekit-bridge:${CONTINUUM_LIVEKIT_BRIDGE_IMAGE_TAG:-latest} restart: unless-stopped # WebRTC encode/decode buffers + multi-stream. Scales with host RAM — # install.sh sets LIVEKIT_BRIDGE_MEM to max(2, host_gb/8). Default 2g diff --git a/install.sh b/install.sh index 2bcf8dd5f..d30c55966 100644 --- a/install.sh +++ b/install.sh @@ -850,9 +850,16 @@ fi # ── 7. Pull support-service images ───────────────────────── PHASE="pull images" -# Image tag resolution: compose files honor ${CONTINUUM_IMAGE_TAG:-latest}. +# Image tag resolution: compose files honor ${CONTINUUM_IMAGE_TAG:-latest} +# for the light TypeScript/downloader services. Heavy Rust services are +# deliberately split: continuum-core honors ${CONTINUUM_CORE_IMAGE_TAG:-latest} +# and livekit-bridge honors ${CONTINUUM_LIVEKIT_BRIDGE_IMAGE_TAG:-latest}. +# Canary publishes light images more frequently than the heavy Rust images; +# splitting the tags lets canary installs test fresh node/widget code without +# falling back to multi-hour local Rust image builds. +# # Main-branch installs (Carl's default) use :latest. Reviewers validating -# a PR before merge can pin the PR's staged image set: +# a PR before merge can pin the PR's staged light-image set: # CONTINUUM_IMAGE_TAG=pr-891 curl -fsSL install.sh | bash # CI tags every PR build with pr- (see .github/workflows/docker-images.yml). # Merging to main promotes that image set to :latest, so main and :latest @@ -861,7 +868,7 @@ PHASE="pull images" # On Mac: `continuum-core` is not pulled (replicas=0 in docker-compose.mac.yml); # only support services (postgres, node-server, widget-server, livekit-bridge, # model-init) are pulled. continuum-core runs natively from `npm start` below. -info "Pulling container images (tag: ${CONTINUUM_IMAGE_TAG:-latest})..." +info "Pulling container images (light=${CONTINUUM_IMAGE_TAG:-latest}, core=${CONTINUUM_CORE_IMAGE_TAG:-latest}, livekit-bridge=${CONTINUUM_LIVEKIT_BRIDGE_IMAGE_TAG:-latest})..." $CONTAINER_CMD compose $COMPOSE_FILES $COMPOSE_ARGS pull 2>/dev/null || warn "Some images not published yet — will build locally" # ── 8. Start support services ────────────────────────────── @@ -879,6 +886,36 @@ fi info "Starting support services..." $CONTAINER_CMD compose $COMPOSE_FILES $COMPOSE_ARGS up -d +# Some published continuum-core images may predate the in-binary socket chmod +# fix. On Linux installs the host-side jtag CLI connects to the bind-mounted +# core socket, so make the install path resilient until every architecture's +# heavy core image has been refreshed. +fix_core_socket_permissions() { + local socket_dir="$CONTINUUM_DATA/sockets" + local core_socket="$socket_dir/continuum-core.sock" + + [ -d "$socket_dir" ] || return 1 + + chmod 755 "$socket_dir" 2>/dev/null \ + || sudo -n chmod 755 "$socket_dir" 2>/dev/null \ + || warn "Could not chmod $socket_dir; host jtag may get EACCES" + + [ -S "$core_socket" ] || return 1 + + chmod 666 "$core_socket" 2>/dev/null \ + || sudo -n chmod 666 "$core_socket" 2>/dev/null \ + || warn "Could not chmod $core_socket; host jtag may get EACCES" +} + +if [[ "$OS" != "Darwin" ]]; then + for _ in $(seq 1 60); do + if fix_core_socket_permissions; then + break + fi + sleep 1 + done +fi + # ── 8b. Start continuum-core natively on Mac ─────────────── # Mac runs continuum-core as a native host process so it can link Metal # directly. `npm start` drives the full build (cargo build --release @@ -942,11 +979,72 @@ for i in $(seq 1 "$HEALTH_TIMEOUT_SEC"); do || curl -sfk --max-time 2 https://localhost:9003/health >/dev/null 2>&1; then HEALTH_OK=1 ok "widget-server healthy after ${i}s" + if [[ "$OS" != "Darwin" ]]; then + fix_core_socket_permissions || true + fi break fi sleep 1 done +# ── 8c. Wait for first-chat seed readiness ───────────────── +PHASE="chat seed readiness" +wait_for_general_room() { + local jtag_bin="" + local out="" + local cand + + for cand in \ + "$INSTALL_DIR/src/jtag" \ + "$HOME/.local/bin/jtag" \ + "$(command -v jtag 2>/dev/null)"; do + if [ -n "$cand" ] && [ -x "$cand" ]; then + jtag_bin="$cand" + break + fi + done + + if [ -z "$jtag_bin" ]; then + warn "jtag CLI not found; cannot verify seeded #general room before browser open" + return 1 + fi + + # Probe the same routing path first chat uses. Raw data/list can prove a + # room row exists, but chat/send resolves through RoutingService; the user + # is not ready until that resolver accepts "general". + out=$("$jtag_bin" collaboration/chat/export --room=general --limit=1 2>&1 || true) + if echo "$out" | grep -Eq '"success"[[:space:]]*:[[:space:]]*true'; then + return 0 + fi + + return 1 +} + +if [ "$HEALTH_OK" -eq 1 ]; then + info "Waiting for chat seed readiness (timeout ${CHAT_READY_TIMEOUT_SEC:=120}s)..." + CHAT_READY_OK=0 + for i in $(seq 1 "$CHAT_READY_TIMEOUT_SEC"); do + if wait_for_general_room; then + CHAT_READY_OK=1 + ok "chat seed ready after ${i}s (#general exists)" + break + fi + sleep 1 + done + + if [ "$CHAT_READY_OK" -ne 1 ]; then + warn "Chat seed did not become ready after ${CHAT_READY_TIMEOUT_SEC}s — #general is not resolvable by chat commands." + warn " The UI may load, but first chat will fail until auto-seed catches up." + echo "" + echo " Diagnose:" + echo " $CONTAINER_CMD compose $COMPOSE_FILES logs --tail=200 node-server continuum-core" + echo " $INSTALL_DIR/src/jtag data/list --collection=rooms --filter='{\"uniqueId\":\"general\"}' --limit=1" + echo " $INSTALL_DIR/src/jtag collaboration/chat/export --room=general --limit=1" + echo "" + fail "Chat seed readiness failed" + fi +fi + # ── 9. Determine URL + open browser (only if healthy) ────── PHASE="open browser" if [ -n "$TS_HOSTNAME" ] && [ -f "$CONTINUUM_DATA/$TS_HOSTNAME.crt" ]; then diff --git a/scripts/ci/carl-install-smoke.sh b/scripts/ci/carl-install-smoke.sh index 2233915a3..75ad60d0e 100755 --- a/scripts/ci/carl-install-smoke.sh +++ b/scripts/ci/carl-install-smoke.sh @@ -220,6 +220,15 @@ if [ $SEND_RC -ne 0 ]; then echo "❌ chat probe: chat/send command FAILED (exit $SEND_RC)" echo " Output:" echo "$SEND_OUT" | head -10 | sed 's/^/ /' + { + echo "" + echo "━━ diagnostics after chat/send failure ━━" + echo "$ $JTAG_BIN data/list --collection=rooms --filter='{\"uniqueId\":\"general\"}' --limit=3" + "$JTAG_BIN" data/list --collection=rooms --filter='{"uniqueId":"general"}' --limit=3 2>&1 || true + echo "" + echo "$ $JTAG_BIN collaboration/chat/export --room=general --limit=1" + "$JTAG_BIN" collaboration/chat/export --room=general --limit=1 2>&1 || true + } | sed 's/^/ /' >> "$CHAT_LOG" exit 4 fi diff --git a/src/scripts/parallel-start.sh b/src/scripts/parallel-start.sh index 21da9e57d..828a4fda6 100755 --- a/src/scripts/parallel-start.sh +++ b/src/scripts/parallel-start.sh @@ -231,7 +231,7 @@ build_pkg() { fi } -for pkg in archive-worker jtag-mcp; do +for pkg in archive-worker jtag-mcp livekit-bridge; do build_pkg "$pkg" done # continuum-core: all GPU features (metal+accelerate on macOS, cuda on Linux) diff --git a/src/workers/workers-config.json b/src/workers/workers-config.json index 4f0f6f63f..6b3e67a50 100644 --- a/src/workers/workers-config.json +++ b/src/workers/workers-config.json @@ -33,6 +33,15 @@ "args": [], "description": "Unified Rust runtime: Voice, Data, Embedding, Search, Logger modules", "enabled": true + }, + { + "name": "livekit-bridge", + "binary": "workers/target/release/livekit-bridge", + "socket": ".continuum/sockets/livekit-bridge.sock", + "args": [], + "description": "LiveKit WebRTC bridge for native continuum-core", + "enabled": true, + "memoryLimit": "2G" } ], "sharedSockets": [