CambrianTech · joelteply · May 11, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/.github/workflows/carl-install-smoke.yml b/.github/workflows/carl-install-smoke.yml
@@ -66,21 +66,18 @@ jobs:
           # githubusercontent.com wouldn't be the one in this PR. Same
           # rationale as docker-images.yml's ref pattern.
           ref: ${{ github.event.pull_request.head.sha || github.sha }}
-          # Smoke uses the local script directly; no need for full history.
-          fetch-depth: 1
+          # verify-image-revisions.sh can compare image-label SHAs against
+          # HEAD and skip byte-identical non-image changes. Full history keeps
+          # that smart stale check honest.
+          fetch-depth: 0
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
       - name: Install mesa-vulkan-drivers (llvmpipe ICD for no-GPU CI runner)
-        # The default continuum-core-vulkan binary calls Vulkan via the loader.
-        # On ubuntu-latest there's no GPU hardware → no real ICD → loader returns
-        # zero devices → binary panics per Joel's "lack of GPU integration is
-        # forbidden" rule. mesa-vulkan-drivers installs the llvmpipe software
-        # ICD so the loader returns a (software) device, the binary sees a real
-        # Vulkan API surface, and the GPU code path is exercised exactly like
-        # it would be on a hardware-GPU host. vulkan-tools provides vulkaninfo
-        # for the slice probes (test-slices.sh).
+        # Host-side vulkaninfo is a diagnostic for the runner. The actual
+        # continuum-core process runs inside Docker, so the smoke also enables
+        # docker-compose.ci.yml below to select the container's lavapipe ICD.
         run: |
           sudo apt-get update -y
           sudo apt-get install -y mesa-vulkan-drivers vulkan-tools
@@ -90,17 +87,27 @@ jobs:
       - name: Login to ghcr.io (so install.sh can pull pre-built images)
         run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
 
+      - name: Validate model registry artifacts
+        run: VALIDATE_ALL_TIERS=1 src/scripts/validate-model-registry-downloads.sh
+
+      - name: Verify Carl smoke images are published for this PR
+        if: github.event_name == 'pull_request'
+        env:
+          EXPECTED_SHA: ${{ github.event.pull_request.head.sha }}
+          TAG: pr-${{ github.event.pull_request.number }}
+          IMAGES: ghcr.io/cambriantech/continuum-core-vulkan:ghcr.io/cambriantech/continuum-livekit-bridge:ghcr.io/cambriantech/continuum-node:ghcr.io/cambriantech/continuum-model-init:ghcr.io/cambriantech/continuum-widgets
+          STALE_AMD64_OUT: ${{ runner.temp }}/carl-stale-amd64.txt
+          STALE_ARM64_OUT: ${{ runner.temp }}/carl-stale-arm64.txt
+        run: bash scripts/verify-image-revisions.sh
+
       - name: Run carl-install smoke
         env:
           # PR HEAD sha so smoke fetches install.sh from THIS PR.
           CARL_INSTALL_REF: ${{ github.event.pull_request.head.sha || inputs.install_ref || github.sha }}
-          # Pin docker images to :pr-N (PR-scoped, mutable per push). Refreshed
-          # by push-image.sh on every dev push, so always reflects this PR's
-          # latest source — but never collides with another PR or canary.
-          # Slices the dev didn't push directly are aliased from :canary by the
-          # dev script (manifest copy, no rebuild). :latest was the prior
-          # default and went 9-14 days stale in April 2026 — never use it for
-          # smoke.
+          # Every PR smoke uses :pr-N. Canary PRs cannot safely use :canary:
+          # install.sh can fetch the PR's scripts and compose files, but Rust
+          # binaries still come from images. Pulling :canary here can test a
+          # stale continuum-core-vulkan binary and hide missing runtime code.
           #
           # Resolution priority: PR# > input.image_tag > 'canary'.
           # On workflow_dispatch (no PR context) the bare `pr-${{ ... }}`
@@ -111,7 +118,7 @@ jobs:
           # 25400718464). The conditional below makes manual triggers
           # default to the canary tag (the cadence we publish on) and lets
           # operators override via the image_tag input from the UI.
-          CONTINUUM_IMAGE_TAG: ${{ github.event.pull_request.number && format('pr-{0}', github.event.pull_request.number) || inputs.image_tag || 'canary' }}
+          CONTINUUM_IMAGE_TAG: ${{ github.event_name == 'pull_request' && format('pr-{0}', github.event.pull_request.number) || inputs.image_tag || 'canary' }}
           # 25-min cap on the docker-only install. Hybrid (Mac source-build)
           # path would exceed this — by design, that's the gate firing on
           # the README/install mismatch.
@@ -124,6 +131,13 @@ jobs:
           CARL_CHAT_TIMEOUT_SEC: '300'
           # CI shouldn't leave docker compose stacks running.
           SKIP_TEARDOWN: '0'
+          # CI validates the Docker-first install path. Missing PR images must
+          # fail loudly instead of silently falling into a 25-minute source
+          # build that times out after proving the wrong thing.
+          CONTINUUM_STRICT_IMAGE_PULL: '1'
+          # Load docker-compose.ci.yml so the no-GPU CI container selects its
+          # own lavapipe ICD. Normal installs do not load this overlay.
+          CONTINUUM_CI_COMPOSE: '1'
         run: bash scripts/ci/carl-install-smoke.sh
 
       - name: Capture docker logs from all containers on failure (continuum-core,
@@ -147,6 +161,9 @@ jobs:
                 > "${dir}.${svc}.ps" 2>&1
             done
             docker compose -f "$dir/docker-compose.yml" ps -a > "${dir}.compose-ps.log" 2>&1
+            if [ -d "$HOME/.continuum" ]; then
+              tar -C "$HOME/.continuum" -czf "${dir}.continuum-logs.tgz" logs personas 2>/dev/null || true
+            fi
           done
       - name: Upload install + page + chat + docker logs + screenshot artifacts on failure
         if: failure()
@@ -164,6 +181,7 @@ jobs:
             /tmp/carl-smoke-*.widget-server.log
             /tmp/carl-smoke-*.livekit-bridge.log
             /tmp/carl-smoke-*.compose-ps.log
+            /tmp/carl-smoke-*.continuum-logs.tgz
             /tmp/carl-smoke-*.*.ps
           retention-days: 7
           if-no-files-found: ignore
diff --git a/docker-compose.ci.yml b/docker-compose.ci.yml
@@ -0,0 +1,15 @@
+# CI-only docker compose overlay.
+#
+# The no-GPU GitHub runner has no physical Vulkan device. The base image
+# installs Mesa, but the Vulkan loader inside the container still needs an
+# explicit ICD choice so continuum-core exercises the Vulkan backend instead
+# of enumerating zero devices and tripping the fail-hard CPU fallback guard.
+#
+# install.sh loads this file only when CONTINUUM_CI_COMPOSE=1, so normal Linux
+# installs keep the default loader behavior and pick hardware ICDs naturally.
+
+services:
+  continuum-core:
+    environment:
+      VK_ICD_FILENAMES: /usr/share/vulkan/icd.d/lvp_icd.x86_64.json
+      LIBGL_ALWAYS_SOFTWARE: "1"
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -58,13 +58,31 @@ services:
     # One-time downloader. Fixed budget — doesn't scale with host RAM.
     mem_limit: ${MODEL_INIT_MEM:-2g}
     volumes:
-      - voice-models:/models
+      - ${HOME}/.continuum/genome/models:/models
+      # Keep install-time model metadata/scripts authoritative from the cloned
+      # repo, not whatever an already-published model-init image happened to
+      # bake in. This lets PR/canary install smoke exercise the exact registry
+      # and fail-hard downloader under test.
+      - ./src/shared/models.json:/app/shared/models.json:ro
+      - ./src/scripts/download-models.sh:/app/scripts/download-models.sh:ro
+      - ./src/scripts/download-avatar-models.sh:/app/scripts/download-avatar-models.sh:ro
+      - ./src/scripts/shared:/app/scripts/shared:ro
     environment:
       - MODELS_DIR=/models
-      - HF_TOKEN=${HF_TOKEN:-}
       # base: small (74MB), fast, works with all whisper.cpp versions.
       # large-v3-turbo requires whisper.cpp v1.7+ (different tensor count).
       - WHISPER_MODEL=${WHISPER_MODEL:-base}
+      # Tier passed from install.sh's CONTINUUM_TIER (mba | mid | full),
+      # defaulting to `full` so headed installs without install.sh's
+      # hardware-tier block still pull the multimodal Qwen set. Without
+      # this explicit pass-through, download-models.sh inside the container
+      # reads /proc/meminfo which (cgroups-aware) reflects the model-init
+      # mem_limit (2GB), NOT the host's RAM, and silently downgrades every
+      # install to the `mba` tier — leaving Qwen unseeded and personas
+      # silent at runtime (root cause of the RTX 5090 'no local Qwen
+      # models' install finding 2026-05-11). Canonical tier names live
+      # in src/shared/models.json `auto_download.by_tier` keys.
+      - TIER=${CONTINUUM_TIER:-full}
 
   # ── Continuum Core (Rust) ─────────────────────────────────
   # Default uses the vulkan variant: software rendering via mesa's llvmpipe ICD
@@ -102,12 +120,25 @@ services:
     # cuda / continuum-core-vulkan overlays) it's the actual ceiling.
     mem_limit: ${CONTINUUM_CORE_MEM:-16g}
     working_dir: /app
+    # Model-init is a one-shot prerequisite for first-run local persona
+    # inference. continuum-core registers in-process llama.cpp adapters during
+    # startup by scanning ~/.continuum/genome/models; if it starts while the
+    # downloader is still writing qwen3.5, the adapter is skipped and local
+    # chat routes to the wrong local surface for the rest of the process.
+    depends_on:
+      model-init:
+        condition: service_completed_successfully
     # No depends_on for services behind profiles (postgres, livekit-bridge).
-    # Core starts independently; connections to optional services (postgres
-    # pool, livekit bridge socket) retry on demand. Text chat works without
-    # any profile active — voice/video requires `--profile live`.
+    # Connections to optional services (postgres pool, livekit bridge socket)
+    # retry on demand. Text chat works without any profile active — voice/video
+    # requires `--profile live`.
     volumes:
-      - voice-models:/app/models:ro
+      - ${HOME}/.continuum/genome/models:/app/models:ro
+      # Keep the runtime model/provider registry authoritative from the
+      # checked-out repo. Canary PR smoke pulls prebuilt continuum-core images;
+      # without this mount, the binary can run with stale baked TOML and never
+      # see newly declared local adapters/models.
+      - ./src/workers/continuum-core/config:/app/continuum-core/config:ro
       # Mount the ENTIRE ~/.continuum directory R/W. The Rust core reads config,
       # writes model cache, logs, grid state, sockets, sessions — all under
       # ~/.continuum. Cherry-picking subdirs with :ro caused silent failures
@@ -191,6 +222,12 @@ services:
       - "${NODE_WS_PORT:-9001}:9001"   # WebSocket
     volumes:
       - ~/.continuum:/root/.continuum
+      - ./src/server/seed-in-process.ts:/app/server/seed-in-process.ts:ro
+      - ./src/system/user/server/PersonaLifecycleManager.ts:/app/system/user/server/PersonaLifecycleManager.ts:ro
+      - ./src/system/user/server/PersonaUser.ts:/app/system/user/server/PersonaUser.ts:ro
+      - ./src/system/user/server/modules/PersonaMessageEvaluator.ts:/app/system/user/server/modules/PersonaMessageEvaluator.ts:ro
+      - ./src/system/user/server/modules/PersonaResponseGenerator.ts:/app/system/user/server/modules/PersonaResponseGenerator.ts:ro
+      - ./src/daemons/user-daemon/server/UserDaemonServer.ts:/app/daemons/user-daemon/server/UserDaemonServer.ts:ro
     environment:
       # node-server never directly connects to a database — all data ops
       # go through continuum-core via IPC, using opaque handles ('main' for
@@ -202,6 +239,11 @@ services:
       - NODE_ENV=production
       - JTAG_SKIP_HTTP=1
       - JTAG_NO_TLS=1
+      # Keep persona seed/reconcile on the same tier as model-init.
+      # Without this, node-server reads cgroup-limited container RAM and
+      # resolves local-default to the MBA model while model-init downloads
+      # the full-tier GGUF. That mismatch creates silent no-reply installs.
+      - CONTINUUM_TIER=${CONTINUUM_TIER:-full}
       # Browser connects to LiveKit via host-mapped port, not Docker DNS.
       # 'ws://livekit:7880' only resolves inside the Docker network;
       # the browser runs on the host where 'livekit' doesn't resolve.
@@ -356,5 +398,4 @@ volumes:
   hf-cache:
   forge-output:
   models:
-  voice-models:
   tailscale-state:
diff --git a/install.sh b/install.sh
@@ -206,9 +206,17 @@ case "$OS" in
     #             those code paths still load lazily). Native budget 5GB.
     #   24-31GB → mid tier: still chat-focused but slightly larger model;
     #             Bevy/vision/audio available. Native budget 8GB.
-    #   32GB+   → primary tier: full Qwen 4B code-forged + multimodal +
+    #   32GB+   → full tier: full Qwen 4B code-forged + multimodal +
     #             everything pre-pulled. Native budget 12GB (original).
     #
+    # Tier-name canon: `mba | mid | full`. Source of truth is
+    # src/shared/models.json (`auto_download.by_tier` keys + `tiers`
+    # keys). Both src/scripts/download-models.sh and ModelRegistry.ts
+    # consume that canon. Keep CONTINUUM_TIER in sync — `primary` was
+    # the legacy name and silently breaks the model-init download
+    # because by_tier[primary] doesn't exist (jq returns []), leaving
+    # the install with voice models only and personas with no Qwen.
+    #
     # PERSONA_MODEL also tiers (set later when ic_decide_gpu_path runs;
     # this just sets the byte budget for Docker VM sizing). The tiered
     # PERSONA_MODEL is referenced by the docker model pull section below.
@@ -230,10 +238,10 @@ For 16GB MBA: chat-only OOTB works (smaller model). For 32GB+: full multimodal e
       CONTINUUM_TIER="mid"
       info "Hardware tier: mid (${PHYS_GB}GB) — multimodal available with mid-size persona model"
     else
-      # Primary tier (original behavior)
+      # Full tier (original behavior — formerly named `primary`)
       NATIVE_RESERVE_MIB=$((12 * 1024))
-      CONTINUUM_TIER="primary"
-      info "Hardware tier: primary (${PHYS_GB}GB) — full multimodal + Qwen 4B code-forged"
+      CONTINUUM_TIER="full"
+      info "Hardware tier: full (${PHYS_GB}GB) — full multimodal + Qwen 4B code-forged"
     fi
     export CONTINUUM_TIER
     MACOS_RESERVE_MIB=$((6 * 1024))
@@ -404,9 +412,14 @@ EOF
   #
   # Tiered by CONTINUUM_TIER (set in the Mac RAM-tier block above; Linux
   # paths skip this block since CONTINUUM_TIER isn't set there → defaults
-  # to the primary model). Lets a 16GB MBA install with a model that fits
+  # to the full model). Lets a 16GB MBA install with a model that fits
   # rather than failing the install or OOMing on first chat.
-  case "${CONTINUUM_TIER:-primary}" in
+  #
+  # Tier-name canon: `mba | mid | full`. Matches src/shared/models.json
+  # `auto_download.by_tier` keys + src/scripts/download-models.sh. The
+  # legacy `primary` name silently broke the model-init download because
+  # `by_tier[primary]` doesn't exist — keep this in sync going forward.
+  case "${CONTINUUM_TIER:-full}" in
     mba)
       # 16-23GB: 0.8B general (~500MB GGUF). Chat-functional + leaves
       # headroom for macOS + Docker + native continuum-core working set.
@@ -777,7 +790,12 @@ mod_jtag_bin_link "$INSTALL_DIR/src/jtag"
 
 # ── 4. Configuration ───────────────────────────────────────
 PHASE="configuration"
-mkdir -p "$CONTINUUM_DATA"
+mkdir -p \
+  "$CONTINUUM_DATA" \
+  "$CONTINUUM_DATA/sockets" \
+  "$CONTINUUM_DATA/logs" \
+  "$CONTINUUM_DATA/sessions" \
+  "$CONTINUUM_DATA/hf_cache"
 
 CONFIG_FILE="$CONTINUUM_DATA/config.env"
 if [ ! -f "$CONFIG_FILE" ]; then
@@ -892,6 +910,13 @@ elif [[ "$HAS_GPU" == "true" ]]; then
   fi
   COMPOSE_ARGS="--profile gpu"
 fi
+if [[ "${CONTINUUM_CI_COMPOSE:-0}" == "1" ]]; then
+  if [ -f "docker-compose.ci.yml" ]; then
+    COMPOSE_FILES="$COMPOSE_FILES -f docker-compose.ci.yml"
+  else
+    fail "CONTINUUM_CI_COMPOSE=1 but docker-compose.ci.yml is missing"
+  fi
+fi
 # Linux without a CUDA GPU: base docker-compose.yml uses continuum-core-vulkan.
 # On real-driver hosts (Intel/AMD with vulkan) this picks up the hardware ICD;
 # on hosts without a driver, mesa-vulkan-drivers (apt) provides llvmpipe as a
@@ -942,7 +967,13 @@ EFFECTIVE_IMAGE_TAG="${CONTINUUM_IMAGE_TAG:-latest}"
 } > "$INSTALL_DIR/.env"
 
 info "Pulling container images (tag: $EFFECTIVE_IMAGE_TAG)..."
-$CONTAINER_CMD compose $COMPOSE_FILES $COMPOSE_ARGS pull 2>/dev/null || warn "Some images not published yet — will build locally"
+if ! PULL_OUTPUT=$($CONTAINER_CMD compose $COMPOSE_FILES $COMPOSE_ARGS pull 2>&1); then
+  if [[ "${CONTINUUM_STRICT_IMAGE_PULL:-0}" == "1" ]]; then
+    echo "$PULL_OUTPUT" | tail -80 >&2
+    fail "Container image pull failed for tag '$EFFECTIVE_IMAGE_TAG'. Strict image-pull mode is enabled, so install.sh will not build locally. Publish the image tag or choose an existing CONTINUUM_IMAGE_TAG."
+  fi
+  warn "Some images not published yet — will build locally"
+fi
 
 # ── 8. Start support services ──────────────────────────────
 PHASE="start support services"