diff --git a/.github/workflows/carl-install-smoke.yml b/.github/workflows/carl-install-smoke.yml
index 27c563935..2e1c14e15 100644
--- a/.github/workflows/carl-install-smoke.yml
+++ b/.github/workflows/carl-install-smoke.yml
@@ -66,21 +66,18 @@ jobs:
           # githubusercontent.com wouldn't be the one in this PR. Same
           # rationale as docker-images.yml's ref pattern.
           ref: ${{ github.event.pull_request.head.sha || github.sha }}
-          # Smoke uses the local script directly; no need for full history.
-          fetch-depth: 1
+          # verify-image-revisions.sh can compare image-label SHAs against
+          # HEAD and skip byte-identical non-image changes. Full history keeps
+          # that smart stale check honest.
+          fetch-depth: 0
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
       - name: Install mesa-vulkan-drivers (llvmpipe ICD for no-GPU CI runner)
-        # The default continuum-core-vulkan binary calls Vulkan via the loader.
-        # On ubuntu-latest there's no GPU hardware → no real ICD → loader returns
-        # zero devices → binary panics per Joel's "lack of GPU integration is
-        # forbidden" rule. mesa-vulkan-drivers installs the llvmpipe software
-        # ICD so the loader returns a (software) device, the binary sees a real
-        # Vulkan API surface, and the GPU code path is exercised exactly like
-        # it would be on a hardware-GPU host. vulkan-tools provides vulkaninfo
-        # for the slice probes (test-slices.sh).
+        # Host-side vulkaninfo is a diagnostic for the runner. The actual
+        # continuum-core process runs inside Docker, so the smoke also enables
+        # docker-compose.ci.yml below to select the container's lavapipe ICD.
         run: |
           sudo apt-get update -y
           sudo apt-get install -y mesa-vulkan-drivers vulkan-tools
@@ -90,17 +87,27 @@ jobs:
       - name: Login to ghcr.io (so install.sh can pull pre-built images)
         run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
 
+      - name: Validate model registry artifacts
+        run: VALIDATE_ALL_TIERS=1 src/scripts/validate-model-registry-downloads.sh
+
+      - name: Verify Carl smoke images are published for this PR
+        if: github.event_name == 'pull_request'
+        env:
+          EXPECTED_SHA: ${{ github.event.pull_request.head.sha }}
+          TAG: pr-${{ github.event.pull_request.number }}
+          IMAGES: ghcr.io/cambriantech/continuum-core-vulkan:ghcr.io/cambriantech/continuum-livekit-bridge:ghcr.io/cambriantech/continuum-node:ghcr.io/cambriantech/continuum-model-init:ghcr.io/cambriantech/continuum-widgets
+          STALE_AMD64_OUT: ${{ runner.temp }}/carl-stale-amd64.txt
+          STALE_ARM64_OUT: ${{ runner.temp }}/carl-stale-arm64.txt
+        run: bash scripts/verify-image-revisions.sh
+
       - name: Run carl-install smoke
         env:
           # PR HEAD sha so smoke fetches install.sh from THIS PR.
           CARL_INSTALL_REF: ${{ github.event.pull_request.head.sha || inputs.install_ref || github.sha }}
-          # Pin docker images to :pr-N (PR-scoped, mutable per push). Refreshed
-          # by push-image.sh on every dev push, so always reflects this PR's
-          # latest source — but never collides with another PR or canary.
-          # Slices the dev didn't push directly are aliased from :canary by the
-          # dev script (manifest copy, no rebuild). :latest was the prior
-          # default and went 9-14 days stale in April 2026 — never use it for
-          # smoke.
+          # Every PR smoke uses :pr-N. Canary PRs cannot safely use :canary:
+          # install.sh can fetch the PR's scripts and compose files, but Rust
+          # binaries still come from images. Pulling :canary here can test a
+          # stale continuum-core-vulkan binary and hide missing runtime code.
           #
           # Resolution priority: PR# > input.image_tag > 'canary'.
           # On workflow_dispatch (no PR context) the bare `pr-${{ ... }}`
@@ -111,7 +118,7 @@ jobs:
           # 25400718464). The conditional below makes manual triggers
           # default to the canary tag (the cadence we publish on) and lets
           # operators override via the image_tag input from the UI.
-          CONTINUUM_IMAGE_TAG: ${{ github.event.pull_request.number && format('pr-{0}', github.event.pull_request.number) || inputs.image_tag || 'canary' }}
+          CONTINUUM_IMAGE_TAG: ${{ github.event_name == 'pull_request' && format('pr-{0}', github.event.pull_request.number) || inputs.image_tag || 'canary' }}
           # 25-min cap on the docker-only install. Hybrid (Mac source-build)
           # path would exceed this — by design, that's the gate firing on
           # the README/install mismatch.
@@ -124,6 +131,13 @@ jobs:
           CARL_CHAT_TIMEOUT_SEC: '300'
           # CI shouldn't leave docker compose stacks running.
           SKIP_TEARDOWN: '0'
+          # CI validates the Docker-first install path. Missing PR images must
+          # fail loudly instead of silently falling into a 25-minute source
+          # build that times out after proving the wrong thing.
+          CONTINUUM_STRICT_IMAGE_PULL: '1'
+          # Load docker-compose.ci.yml so the no-GPU CI container selects its
+          # own lavapipe ICD. Normal installs do not load this overlay.
+          CONTINUUM_CI_COMPOSE: '1'
         run: bash scripts/ci/carl-install-smoke.sh
 
       - name: Capture docker logs from all containers on failure (continuum-core,
@@ -147,6 +161,9 @@ jobs:
                 > "${dir}.${svc}.ps" 2>&1
             done
             docker compose -f "$dir/docker-compose.yml" ps -a > "${dir}.compose-ps.log" 2>&1
+            if [ -d "$HOME/.continuum" ]; then
+              tar -C "$HOME/.continuum" -czf "${dir}.continuum-logs.tgz" logs personas 2>/dev/null || true
+            fi
           done
       - name: Upload install + page + chat + docker logs + screenshot artifacts on failure
         if: failure()
@@ -164,6 +181,7 @@ jobs:
             /tmp/carl-smoke-*.widget-server.log
             /tmp/carl-smoke-*.livekit-bridge.log
             /tmp/carl-smoke-*.compose-ps.log
+            /tmp/carl-smoke-*.continuum-logs.tgz
             /tmp/carl-smoke-*.*.ps
           retention-days: 7
           if-no-files-found: ignore
diff --git a/docker-compose.ci.yml b/docker-compose.ci.yml
new file mode 100644
index 000000000..e49911718
--- /dev/null
+++ b/docker-compose.ci.yml
@@ -0,0 +1,15 @@
+# CI-only docker compose overlay.
+#
+# The no-GPU GitHub runner has no physical Vulkan device. The base image
+# installs Mesa, but the Vulkan loader inside the container still needs an
+# explicit ICD choice so continuum-core exercises the Vulkan backend instead
+# of enumerating zero devices and tripping the fail-hard CPU fallback guard.
+#
+# install.sh loads this file only when CONTINUUM_CI_COMPOSE=1, so normal Linux
+# installs keep the default loader behavior and pick hardware ICDs naturally.
+
+services:
+  continuum-core:
+    environment:
+      VK_ICD_FILENAMES: /usr/share/vulkan/icd.d/lvp_icd.x86_64.json
+      LIBGL_ALWAYS_SOFTWARE: "1"
diff --git a/docker-compose.yml b/docker-compose.yml
index e901c052e..8a68d7766 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -58,13 +58,31 @@ services:
     # One-time downloader. Fixed budget — doesn't scale with host RAM.
     mem_limit: ${MODEL_INIT_MEM:-2g}
     volumes:
-      - voice-models:/models
+      - ${HOME}/.continuum/genome/models:/models
+      # Keep install-time model metadata/scripts authoritative from the cloned
+      # repo, not whatever an already-published model-init image happened to
+      # bake in. This lets PR/canary install smoke exercise the exact registry
+      # and fail-hard downloader under test.
+      - ./src/shared/models.json:/app/shared/models.json:ro
+      - ./src/scripts/download-models.sh:/app/scripts/download-models.sh:ro
+      - ./src/scripts/download-avatar-models.sh:/app/scripts/download-avatar-models.sh:ro
+      - ./src/scripts/shared:/app/scripts/shared:ro
     environment:
       - MODELS_DIR=/models
-      - HF_TOKEN=${HF_TOKEN:-}
       # base: small (74MB), fast, works with all whisper.cpp versions.
       # large-v3-turbo requires whisper.cpp v1.7+ (different tensor count).
       - WHISPER_MODEL=${WHISPER_MODEL:-base}
+      # Tier passed from install.sh's CONTINUUM_TIER (mba | mid | full),
+      # defaulting to `full` so headed installs without install.sh's
+      # hardware-tier block still pull the multimodal Qwen set. Without
+      # this explicit pass-through, download-models.sh inside the container
+      # reads /proc/meminfo which (cgroups-aware) reflects the model-init
+      # mem_limit (2GB), NOT the host's RAM, and silently downgrades every
+      # install to the `mba` tier — leaving Qwen unseeded and personas
+      # silent at runtime (root cause of the RTX 5090 'no local Qwen
+      # models' install finding 2026-05-11). Canonical tier names live
+      # in src/shared/models.json `auto_download.by_tier` keys.
+      - TIER=${CONTINUUM_TIER:-full}
 
   # ── Continuum Core (Rust) ─────────────────────────────────
   # Default uses the vulkan variant: software rendering via mesa's llvmpipe ICD
@@ -102,12 +120,25 @@ services:
     # cuda / continuum-core-vulkan overlays) it's the actual ceiling.
     mem_limit: ${CONTINUUM_CORE_MEM:-16g}
     working_dir: /app
+    # Model-init is a one-shot prerequisite for first-run local persona
+    # inference. continuum-core registers in-process llama.cpp adapters during
+    # startup by scanning ~/.continuum/genome/models; if it starts while the
+    # downloader is still writing qwen3.5, the adapter is skipped and local
+    # chat routes to the wrong local surface for the rest of the process.
+    depends_on:
+      model-init:
+        condition: service_completed_successfully
     # No depends_on for services behind profiles (postgres, livekit-bridge).
-    # Core starts independently; connections to optional services (postgres
-    # pool, livekit bridge socket) retry on demand. Text chat works without
-    # any profile active — voice/video requires `--profile live`.
+    # Connections to optional services (postgres pool, livekit bridge socket)
+    # retry on demand. Text chat works without any profile active — voice/video
+    # requires `--profile live`.
     volumes:
-      - voice-models:/app/models:ro
+      - ${HOME}/.continuum/genome/models:/app/models:ro
+      # Keep the runtime model/provider registry authoritative from the
+      # checked-out repo. Canary PR smoke pulls prebuilt continuum-core images;
+      # without this mount, the binary can run with stale baked TOML and never
+      # see newly declared local adapters/models.
+      - ./src/workers/continuum-core/config:/app/continuum-core/config:ro
       # Mount the ENTIRE ~/.continuum directory R/W. The Rust core reads config,
       # writes model cache, logs, grid state, sockets, sessions — all under
       # ~/.continuum. Cherry-picking subdirs with :ro caused silent failures
@@ -191,6 +222,12 @@ services:
       - "${NODE_WS_PORT:-9001}:9001"   # WebSocket
     volumes:
       - ~/.continuum:/root/.continuum
+      - ./src/server/seed-in-process.ts:/app/server/seed-in-process.ts:ro
+      - ./src/system/user/server/PersonaLifecycleManager.ts:/app/system/user/server/PersonaLifecycleManager.ts:ro
+      - ./src/system/user/server/PersonaUser.ts:/app/system/user/server/PersonaUser.ts:ro
+      - ./src/system/user/server/modules/PersonaMessageEvaluator.ts:/app/system/user/server/modules/PersonaMessageEvaluator.ts:ro
+      - ./src/system/user/server/modules/PersonaResponseGenerator.ts:/app/system/user/server/modules/PersonaResponseGenerator.ts:ro
+      - ./src/daemons/user-daemon/server/UserDaemonServer.ts:/app/daemons/user-daemon/server/UserDaemonServer.ts:ro
     environment:
       # node-server never directly connects to a database — all data ops
       # go through continuum-core via IPC, using opaque handles ('main' for
@@ -202,6 +239,11 @@ services:
       - NODE_ENV=production
       - JTAG_SKIP_HTTP=1
       - JTAG_NO_TLS=1
+      # Keep persona seed/reconcile on the same tier as model-init.
+      # Without this, node-server reads cgroup-limited container RAM and
+      # resolves local-default to the MBA model while model-init downloads
+      # the full-tier GGUF. That mismatch creates silent no-reply installs.
+      - CONTINUUM_TIER=${CONTINUUM_TIER:-full}
       # Browser connects to LiveKit via host-mapped port, not Docker DNS.
       # 'ws://livekit:7880' only resolves inside the Docker network;
       # the browser runs on the host where 'livekit' doesn't resolve.
@@ -356,5 +398,4 @@ volumes:
   hf-cache:
   forge-output:
   models:
-  voice-models:
   tailscale-state:
diff --git a/install.sh b/install.sh
index 4e1e3199d..35fbe35af 100644
--- a/install.sh
+++ b/install.sh
@@ -206,9 +206,17 @@ case "$OS" in
     #             those code paths still load lazily). Native budget 5GB.
     #   24-31GB → mid tier: still chat-focused but slightly larger model;
     #             Bevy/vision/audio available. Native budget 8GB.
-    #   32GB+   → primary tier: full Qwen 4B code-forged + multimodal +
+    #   32GB+   → full tier: full Qwen 4B code-forged + multimodal +
     #             everything pre-pulled. Native budget 12GB (original).
     #
+    # Tier-name canon: `mba | mid | full`. Source of truth is
+    # src/shared/models.json (`auto_download.by_tier` keys + `tiers`
+    # keys). Both src/scripts/download-models.sh and ModelRegistry.ts
+    # consume that canon. Keep CONTINUUM_TIER in sync — `primary` was
+    # the legacy name and silently breaks the model-init download
+    # because by_tier[primary] doesn't exist (jq returns []), leaving
+    # the install with voice models only and personas with no Qwen.
+    #
     # PERSONA_MODEL also tiers (set later when ic_decide_gpu_path runs;
     # this just sets the byte budget for Docker VM sizing). The tiered
     # PERSONA_MODEL is referenced by the docker model pull section below.
@@ -230,10 +238,10 @@ For 16GB MBA: chat-only OOTB works (smaller model). For 32GB+: full multimodal e
       CONTINUUM_TIER="mid"
       info "Hardware tier: mid (${PHYS_GB}GB) — multimodal available with mid-size persona model"
     else
-      # Primary tier (original behavior)
+      # Full tier (original behavior — formerly named `primary`)
       NATIVE_RESERVE_MIB=$((12 * 1024))
-      CONTINUUM_TIER="primary"
-      info "Hardware tier: primary (${PHYS_GB}GB) — full multimodal + Qwen 4B code-forged"
+      CONTINUUM_TIER="full"
+      info "Hardware tier: full (${PHYS_GB}GB) — full multimodal + Qwen 4B code-forged"
     fi
     export CONTINUUM_TIER
     MACOS_RESERVE_MIB=$((6 * 1024))
@@ -404,9 +412,14 @@ EOF
   #
   # Tiered by CONTINUUM_TIER (set in the Mac RAM-tier block above; Linux
   # paths skip this block since CONTINUUM_TIER isn't set there → defaults
-  # to the primary model). Lets a 16GB MBA install with a model that fits
+  # to the full model). Lets a 16GB MBA install with a model that fits
   # rather than failing the install or OOMing on first chat.
-  case "${CONTINUUM_TIER:-primary}" in
+  #
+  # Tier-name canon: `mba | mid | full`. Matches src/shared/models.json
+  # `auto_download.by_tier` keys + src/scripts/download-models.sh. The
+  # legacy `primary` name silently broke the model-init download because
+  # `by_tier[primary]` doesn't exist — keep this in sync going forward.
+  case "${CONTINUUM_TIER:-full}" in
     mba)
       # 16-23GB: 0.8B general (~500MB GGUF). Chat-functional + leaves
       # headroom for macOS + Docker + native continuum-core working set.
@@ -777,7 +790,12 @@ mod_jtag_bin_link "$INSTALL_DIR/src/jtag"
 
 # ── 4. Configuration ───────────────────────────────────────
 PHASE="configuration"
-mkdir -p "$CONTINUUM_DATA"
+mkdir -p \
+  "$CONTINUUM_DATA" \
+  "$CONTINUUM_DATA/sockets" \
+  "$CONTINUUM_DATA/logs" \
+  "$CONTINUUM_DATA/sessions" \
+  "$CONTINUUM_DATA/hf_cache"
 
 CONFIG_FILE="$CONTINUUM_DATA/config.env"
 if [ ! -f "$CONFIG_FILE" ]; then
@@ -892,6 +910,13 @@ elif [[ "$HAS_GPU" == "true" ]]; then
   fi
   COMPOSE_ARGS="--profile gpu"
 fi
+if [[ "${CONTINUUM_CI_COMPOSE:-0}" == "1" ]]; then
+  if [ -f "docker-compose.ci.yml" ]; then
+    COMPOSE_FILES="$COMPOSE_FILES -f docker-compose.ci.yml"
+  else
+    fail "CONTINUUM_CI_COMPOSE=1 but docker-compose.ci.yml is missing"
+  fi
+fi
 # Linux without a CUDA GPU: base docker-compose.yml uses continuum-core-vulkan.
 # On real-driver hosts (Intel/AMD with vulkan) this picks up the hardware ICD;
 # on hosts without a driver, mesa-vulkan-drivers (apt) provides llvmpipe as a
@@ -942,7 +967,13 @@ EFFECTIVE_IMAGE_TAG="${CONTINUUM_IMAGE_TAG:-latest}"
 } > "$INSTALL_DIR/.env"
 
 info "Pulling container images (tag: $EFFECTIVE_IMAGE_TAG)..."
-$CONTAINER_CMD compose $COMPOSE_FILES $COMPOSE_ARGS pull 2>/dev/null || warn "Some images not published yet — will build locally"
+if ! PULL_OUTPUT=$($CONTAINER_CMD compose $COMPOSE_FILES $COMPOSE_ARGS pull 2>&1); then
+  if [[ "${CONTINUUM_STRICT_IMAGE_PULL:-0}" == "1" ]]; then
+    echo "$PULL_OUTPUT" | tail -80 >&2
+    fail "Container image pull failed for tag '$EFFECTIVE_IMAGE_TAG'. Strict image-pull mode is enabled, so install.sh will not build locally. Publish the image tag or choose an existing CONTINUUM_IMAGE_TAG."
+  fi
+  warn "Some images not published yet — will build locally"
+fi
 
 # ── 8. Start support services ──────────────────────────────
 PHASE="start support services"
diff --git a/scripts/ci/carl-install-smoke.sh b/scripts/ci/carl-install-smoke.sh
index 8a59d1074..41f4d9017 100644
--- a/scripts/ci/carl-install-smoke.sh
+++ b/scripts/ci/carl-install-smoke.sh
@@ -26,6 +26,8 @@
 #   2 — install.sh succeeded but widget-server never returned 200 on /health
 #   3 — widget-server returned 200 but page body looks broken
 #       (empty / contains chrome-error / contains "container exited")
+#   7 — chat/send accepted, but node-server logged a fatal persona response
+#       failure before a reply could be posted
 
 set -uo pipefail
 
@@ -60,6 +62,9 @@ teardown() {
         > "${CARL_INSTALL_DIR}.${svc}.log" 2>&1 ) || true
     done
     ( cd "$CARL_INSTALL_DIR" && docker compose ps -a > "${CARL_INSTALL_DIR}.compose-ps.log" 2>&1 ) || true
+    if [ -d "$HOME/.continuum" ]; then
+      tar -C "$HOME/.continuum" -czf "${CARL_INSTALL_DIR}.continuum-logs.tgz" logs personas 2>/dev/null || true
+    fi
   fi
   if [ "$SKIP_TEARDOWN" != "1" ] && [ -d "$CARL_INSTALL_DIR" ]; then
     echo ""
@@ -96,7 +101,7 @@ INSTALL_URL="https://raw.githubusercontent.com/CambrianTech/continuum/${CARL_INS
 # way to validate PR fixes. CONTINUUM_REF closes the loop.
 INSTALL_START=$(date +%s)
 if ! timeout "$CARL_INSTALL_TIMEOUT_SEC" bash -c \
-     "CONTINUUM_DIR='$CARL_INSTALL_DIR' CONTINUUM_REF='$CARL_INSTALL_REF' bash <(curl -fsSL '$INSTALL_URL')" \
+     "CONTINUUM_DIR='$CARL_INSTALL_DIR' CONTINUUM_REF='$CARL_INSTALL_REF' CONTINUUM_CI_COMPOSE='${CONTINUUM_CI_COMPOSE:-0}' bash <(curl -fsSL '$INSTALL_URL')" \
      >"$INSTALL_LOG" 2>&1; then
   INSTALL_DUR=$(( $(date +%s) - INSTALL_START ))
   echo "❌ install.sh failed or timed out after ${INSTALL_DUR}s"
@@ -227,7 +232,8 @@ fi
 echo ""
 echo "━━ end-to-end chat: send message, expect AI reply ━━"
 CARL_CHAT_TIMEOUT_SEC="${CARL_CHAT_TIMEOUT_SEC:-90}"
-CHAT_PROBE_MSG="carl-smoke-probe-$(date +%s)"
+CHAT_PROBE_ID="carl-smoke-probe-$(date +%s)"
+CHAT_PROBE_MSG="@Helper AI please reply with one short sentence including ${CHAT_PROBE_ID}."
 CHAT_LOG="${CARL_INSTALL_DIR}.chat.log"
 
 # Locate jtag — install.sh symlinks it into BIN_DIR for the user
@@ -253,6 +259,7 @@ echo "  jtag binary: $JTAG_BIN"
 # Send. The jtag/chat/send command returns a JSON envelope; we extract
 # the messageId from the response to track the thread.
 echo "  → sending probe: '$CHAT_PROBE_MSG'"
+CHAT_SENT_AT="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
 SEND_OUT=$("$JTAG_BIN" collaboration/chat/send --room=general --message="$CHAT_PROBE_MSG" 2>&1)
 SEND_RC=$?
 echo "$SEND_OUT" | sed 's/^/    /' > "$CHAT_LOG"
@@ -277,49 +284,55 @@ fi
 
 echo "  ✓ chat/send accepted (some persona is listening)"
 
-# Poll chat/export for an AI reply. The probe message is unique;
-# we look for any message in the room AFTER our probe whose senderType
-# is 'persona' or 'bot' (i.e. the AI replying to us).
+# Poll chat/export for an AI reply. The probe id is unique;
+# we look for any message in the room AFTER our probe whose exported
+# sender heading is not the human sender.
 echo "  → polling for AI reply (timeout ${CARL_CHAT_TIMEOUT_SEC}s)…"
 REPLY_OK=0
 REPLY_LATENCY=0
 for i in $(seq 1 "$CARL_CHAT_TIMEOUT_SEC"); do
   EXPORT_OUT=$("$JTAG_BIN" collaboration/chat/export --room=general --limit=20 2>/dev/null || true)
-  # Find the first message AFTER our probe that's NOT from the human sender
-  # (rough heuristic — chat/export markdown output is line-oriented per msg).
-  # Look for any line after the probe-msg line that starts with a non-Joel sender.
-  if echo "$EXPORT_OUT" | awk -v probe="$CHAT_PROBE_MSG" '
+  # Find the first message AFTER our probe that's NOT from the human sender.
+  # Current chat/export headings are "## #shortId - Sender"; older exports
+  # used "**Sender**", so support both while still requiring a post-probe
+  # sender heading.
+  if echo "$EXPORT_OUT" | awk -v probe="$CHAT_PROBE_ID" '
       $0 ~ probe { found_probe=1; next }
-      found_probe && /^\*\*[a-zA-Z0-9_-]+\*\*/ && !/Joel|joel|human/ { print; exit }
+      found_probe && /^## #[[:alnum:]]+ - / && !/ - (Developer|Joel|joel|human)$/ { print; exit }
+      found_probe && /^\*\*[a-zA-Z0-9_ -]+\*\*/ && !/Joel|joel|human|Developer/ { print; exit }
     ' | grep -q .; then
     REPLY_OK=1
     REPLY_LATENCY=$i
     echo "  ✓ AI reply detected after ${i}s"
     break
   fi
+
+  # Fail fast on fatal persona response errors. Without this, CI burns the
+  # whole chat timeout polling a room that cannot receive a reply.
+  if [ "$i" -le 5 ] || [ $((i % 5)) -eq 0 ]; then
+    NODE_FATAL=$(
+      ( cd "$CARL_INSTALL_DIR" && docker compose logs --no-color --since "$CHAT_SENT_AT" node-server 2>/dev/null ) \
+        | grep -E "PersonaResponseGenerator.*response failed|Local AI is unavailable|cognition/respond.*(FAILED|failed|error)" \
+        | tail -5 || true
+    )
+    if [ -n "$NODE_FATAL" ]; then
+      echo "❌ chat probe: persona response failed before any AI reply"
+      echo "$NODE_FATAL" | sed 's/^/    /'
+      exit 7
+    fi
+  fi
   sleep 1
 done
 
 if [ $REPLY_OK -ne 1 ]; then
   # Architecture rule: "lack of GPU integration is forbidden." A no-GPU CI
-  # runner falls back to llvmpipe (software Vulkan ICD); llama.cpp inference
-  # can't fit the 300s budget on llvmpipe (~1-2 tok/s). Carl on real hardware
-  # replies in ~16s (validated on RTX 5090). The install + chat-send +
-  # persona-allocation path is fully exercised; only the inference reply is
-  # short of budget on the forbidden no-GPU state.
-  #
-  # When the host has no GPU at all (and isn't macOS Metal), treat AI-reply
-  # timeout as advisory pass. The install + chat-send + persona-allocation
-  # path is fully exercised; only the inference reply is short of budget on
-  # the forbidden no-GPU state. This is not a lowered bar for actual users
-  # — real-GPU runs are unchanged. Detection prefers cheap/reliable signals
-  # in priority order: NVIDIA driver files, NVIDIA dev nodes, vulkaninfo
-  # llvmpipe-only, macOS Metal exemption.
+  # runner with only llvmpipe may exercise the command path, but it has NOT
+  # proven Carl can chat with an AI. Do not convert that into a pass.
   NO_GPU_HOST=0
   if [ "$(uname -s)" = "Darwin" ]; then
-    : # macOS always has Metal; never advisory-pass on Mac.
+    : # macOS always has Metal.
   elif [ -d /proc/driver/nvidia ] || ls /dev/nvidia* >/dev/null 2>&1 || command -v nvidia-smi >/dev/null 2>&1; then
-    : # NVIDIA present somewhere — strict.
+    : # NVIDIA present somewhere.
   elif command -v vulkaninfo >/dev/null 2>&1; then
     VK_DEVICES=$(vulkaninfo --summary 2>/dev/null | grep -i deviceName || true)
     if echo "$VK_DEVICES" | grep -qi "llvmpipe" && \
@@ -333,35 +346,31 @@ if [ $REPLY_OK -ne 1 ]; then
     NO_GPU_HOST=1
   fi
 
-  if [ "$NO_GPU_HOST" = "1" ] && [ "${CARL_CHAT_LLVMPIPE_STRICT:-0}" != "1" ]; then
-    echo "  ⚠ AI-reply timeout, BUT host has no GPU — treating as advisory pass."
-    echo "    (Architecture forbids no-GPU operation; CI runner lacks GPU passthrough.)"
-    echo "    chat/send accepted + persona allocated = full install path validated."
-    echo "    Real-GPU validation is the contract; CARL_CHAT_LLVMPIPE_STRICT=1 to override."
-    REPLY_OK=1
-    REPLY_LATENCY="advisory(no-gpu)"
-  else
-    echo "❌ chat probe: no AI reply within ${CARL_CHAT_TIMEOUT_SEC}s"
-    echo ""
-    echo "  This is the classic Carl-blocker: chat goes silent."
-    echo "  Likely root causes (post-#980 series):"
-    echo "    - continuum-core inference path not reaching DMR (check #997's"
-    echo "      'local' default actually routes correctly)"
-    echo "    - DMR not running (Docker Model Runner needs Docker Desktop 4.62+)"
-    echo "    - GPU EP not configured (#985 / #991 cfg fixes — verify metal feature)"
-    echo "    - Persona model not pulled into DMR (install.sh's docker model pull)"
-    echo "    - SIGABRT in continuum-core (NEW-A — upstream llama.cpp bug,"
-    echo "      tracked at ggml-org/llama.cpp#22593)"
-    echo ""
-    echo "  Last 30 lines of room export:"
-    echo "$EXPORT_OUT" | tail -30 | sed 's/^/    /'
-    echo ""
-    echo "  Diagnose:"
-    echo "    $JTAG_BIN ai/providers/status"
-    echo "    $JTAG_BIN ai/local-inference/status"
-    echo "    docker compose -f $CARL_INSTALL_DIR/docker-compose.yml logs --tail=100 continuum-core"
-    exit 5
+  echo "❌ chat probe: no AI reply within ${CARL_CHAT_TIMEOUT_SEC}s"
+  if [ "$NO_GPU_HOST" = "1" ]; then
+    echo "  Host appears to have no real GPU path. That is still a failure:"
+    echo "  Carl-install smoke only passes when the installed system produces"
+    echo "  an actual AI reply."
   fi
+  echo ""
+  echo "  This is the classic Carl-blocker: chat goes silent."
+  echo "  Likely root causes (post-#980 series):"
+  echo "    - continuum-core inference path not reaching DMR (check #997's"
+  echo "      'local' default actually routes correctly)"
+  echo "    - DMR not running (Docker Model Runner needs Docker Desktop 4.62+)"
+  echo "    - GPU EP not configured (#985 / #991 cfg fixes — verify metal feature)"
+  echo "    - Persona model not pulled into DMR (install.sh's docker model pull)"
+  echo "    - SIGABRT in continuum-core (NEW-A — upstream llama.cpp bug,"
+  echo "      tracked at ggml-org/llama.cpp#22593)"
+  echo ""
+  echo "  Last 30 lines of room export:"
+  echo "$EXPORT_OUT" | tail -30 | sed 's/^/    /'
+  echo ""
+  echo "  Diagnose:"
+  echo "    $JTAG_BIN ai/providers/status"
+  echo "    $JTAG_BIN ai/local-inference/status"
+  echo "    docker compose -f $CARL_INSTALL_DIR/docker-compose.yml logs --tail=100 continuum-core"
+  exit 5
 fi
 
 # ── Done ──────────────────────────────────────────────────────
diff --git a/scripts/push-current-arch.sh b/scripts/push-current-arch.sh
index 814ea4a5f..291e5046d 100755
--- a/scripts/push-current-arch.sh
+++ b/scripts/push-current-arch.sh
@@ -162,6 +162,24 @@ if [[ -z "$PR_NUMBER" ]] && command -v gh >/dev/null 2>&1; then
   PR_NUMBER="$(gh pr list --head "$BRANCH" --json number --jq '.[0].number // empty' 2>/dev/null || true)"
 fi
 
+# Rust ts-rs exports can dirty generated TypeScript during local cargo checks
+# before this script runs. If the target commit does not itself change those
+# files, restore that generated drift so the frozen-worktree guard below only
+# blocks real uncommitted source edits.
+restore_uncommitted_generated_drift() {
+  local dirty_generated path
+  dirty_generated="$(git diff --name-only HEAD -- src/shared/generated 2>/dev/null | sort -u)"
+  [ -n "$dirty_generated" ] || return 0
+
+  while IFS= read -r path; do
+    [ -n "$path" ] || continue
+    if git diff --quiet "$STARTUP_SHA_FULL" -- "$path" 2>/dev/null; then
+      git restore -- "$path" 2>/dev/null || true
+    fi
+  done <<< "$dirty_generated"
+}
+restore_uncommitted_generated_drift
+
 # ── Working-tree cleanliness guard ───────────────────────────────────
 # git worktree add checks out the committed tree at $STARTUP_SHA_FULL, so
 # ANY uncommitted modifications to tracked files would silently NOT make
diff --git a/scripts/verify-image-revisions.sh b/scripts/verify-image-revisions.sh
index 8e44491f1..d4c10cfd3 100755
--- a/scripts/verify-image-revisions.sh
+++ b/scripts/verify-image-revisions.sh
@@ -96,7 +96,7 @@ image_relevant_paths() {
       echo "src/widgets src/browser src/shared docker/widget-server.Dockerfile"
       ;;
     *continuum-model-init*)
-      echo "src/scripts/install-livekit.sh src/scripts/download-voice-models.sh docker/model-init.Dockerfile"
+      echo "src/shared/models.json src/scripts/download-models.sh src/scripts/download-avatar-models.sh src/scripts/shared docker/model-init.Dockerfile"
       ;;
     *)
       # Unknown image — be safe, treat any change as relevant.
@@ -150,7 +150,13 @@ for IMAGE in "${IMAGE_ARRAY[@]}"; do
   REF="$IMAGE:$TAG"
   echo "━━━ $REF ━━━"
 
-  RAW=$(docker buildx imagetools inspect --raw "$REF" 2>/dev/null || echo '{}')
+  if ! RAW=$(docker buildx imagetools inspect --raw "$REF" 2>&1); then
+    echo "  ❌ MISSING in registry"
+    echo "     $RAW"
+    echo "$REF" >> "$STALE_AMD64_OUT"
+    FAILED=1
+    continue
+  fi
 
   # For multi-arch indexes: enumerate per-platform manifests. Skip the
   # `unknown/unknown` attestation manifests buildx adds alongside real
@@ -170,7 +176,9 @@ for IMAGE in "${IMAGE_ARRAY[@]}"; do
   ' 2>/dev/null)
 
   if [[ -z "$ARCH_LIST" ]]; then
-    echo "  ⚠️  No manifest entries — image may not exist yet at this tag"
+    echo "  ❌ No linux manifest entries — image tag is unusable for install smoke"
+    echo "$REF" >> "$STALE_AMD64_OUT"
+    FAILED=1
     continue
   fi
 
@@ -267,7 +275,7 @@ fi
 
 if [ "$FAILED" -ne 0 ]; then
   echo ""
-  echo "❌ STALE-IMAGE GATE FAILED — amd64 image(s) at :$TAG built from a different commit."
+  echo "❌ IMAGE GATE FAILED — amd64 image(s) at :$TAG are missing or stale."
   echo "   The user-facing target must always be current."
   echo ""
   echo "   Fix:"
diff --git a/src/daemons/user-daemon/server/UserDaemonServer.ts b/src/daemons/user-daemon/server/UserDaemonServer.ts
index b323ea6e5..959a2352d 100644
--- a/src/daemons/user-daemon/server/UserDaemonServer.ts
+++ b/src/daemons/user-daemon/server/UserDaemonServer.ts
@@ -92,6 +92,7 @@ export class UserDaemonServer extends UserDaemon {
     });
 
     // Start PersonaLifecycleManager — listens for API key add/remove events
+    PersonaLifecycleManager.instance.setRuntimeActivator((user, reason) => this.ensurePersonaRuntimeClient(user, reason));
     PersonaLifecycleManager.instance.subscribe();
 
     const deferredMs = Date.now() - deferredStart;
@@ -167,6 +168,17 @@ export class UserDaemonServer extends UserDaemon {
 
   }
 
+  public async ensurePersonaRuntimeClient(userEntity: UserEntity, reason: string): Promise<void> {
+    if (userEntity.type !== 'persona') {
+      throw new Error(`UserDaemon refused runtime activation for non-persona user ${userEntity.id}`);
+    }
+    await this.ensurePersonaCorrectState(userEntity);
+    if (!this.personaClients.has(userEntity.id)) {
+      throw new Error(`Persona client did not activate for ${userEntity.displayName} (${userEntity.id}) from ${reason}`);
+    }
+    this.log.info(`✅ UserDaemon: Runtime persona client ensured for ${userEntity.displayName} (${reason})`);
+  }
+
   /**
    * Handle user created event
    * Note: Room membership handled by RoomMembershipDaemon (Discord-style auto-join)
diff --git a/src/eslint.config.js b/src/eslint.config.js
index b726ea8d2..608070d36 100644
--- a/src/eslint.config.js
+++ b/src/eslint.config.js
@@ -43,6 +43,7 @@ export default tseslint.config(
       'node_modules/**',
       'shared/config.ts',
       'shared/generated/**',
+      'src/**',
       'workers/target/**',
       'workers/vendor/**',
       '**/*.d.ts',
diff --git a/src/scripts/download-avatar-models.sh b/src/scripts/download-avatar-models.sh
index 58ce926b3..7ca22ff46 100755
--- a/src/scripts/download-avatar-models.sh
+++ b/src/scripts/download-avatar-models.sh
@@ -121,8 +121,18 @@ download_vroid_zip() {
     return
   fi
 
-  # Extract zip — use python3 (always available) so we don't need unzip installed
-  if ! python3 -c "
+  # Extract zip. model-init images include unzip; local dev machines often
+  # have python3. Require one explicit extractor and report which path failed.
+  if command -v unzip >/dev/null 2>&1; then
+    if ! unzip -q "$tmpzip" -d "$tmpdir"; then
+      echo -e "  ${RED}⚠ Failed to extract ${name}: unzip rejected archive${NC}" >&2
+      rm -rf "$tmpzip" "$tmpdir"
+      FAILED=$((FAILED + 1))
+      FAILED_NAMES+=("$name")
+      return
+    fi
+  elif command -v python3 >/dev/null 2>&1; then
+    if ! python3 -c "
 import zipfile, sys
 try:
     with zipfile.ZipFile('$tmpzip', 'r') as z:
@@ -131,7 +141,14 @@ except (zipfile.BadZipFile, Exception) as e:
     print(f'Extract failed: {e}', file=sys.stderr)
     sys.exit(1)
 "; then
-    echo -e "  ${RED}⚠ Failed to extract ${name}: file may be corrupt or not a zip${NC}" >&2
+      echo -e "  ${RED}⚠ Failed to extract ${name}: python3 rejected archive${NC}" >&2
+      rm -rf "$tmpzip" "$tmpdir"
+      FAILED=$((FAILED + 1))
+      FAILED_NAMES+=("$name")
+      return
+    fi
+  else
+    echo -e "  ${RED}⚠ Failed to extract ${name}: no unzip or python3 available${NC}" >&2
     rm -rf "$tmpzip" "$tmpdir"
     FAILED=$((FAILED + 1))
     FAILED_NAMES+=("$name")
diff --git a/src/scripts/download-models.sh b/src/scripts/download-models.sh
index 53d343dba..70b59c835 100755
--- a/src/scripts/download-models.sh
+++ b/src/scripts/download-models.sh
@@ -62,6 +62,22 @@ if ! command -v jq >/dev/null 2>&1; then
   exit 1
 fi
 
+# Validate TIER against the canonical set BEFORE the jq lookup. Without
+# this, an unknown tier (e.g. legacy `primary` from older install.sh)
+# would silently produce an empty `by_tier` set — install ships only
+# voice models and personas have no local Qwen at runtime. That was the
+# 2026-05-11 RTX 5090 silent-no-replies root cause. Fail loud per Joel's
+# 'no silent fallback to placeholder models' rule.
+case "$TIER" in
+  mba|mid|full) ;;
+  *)
+    echo -e "${RED}ERROR: TIER='${TIER}' is not a canonical tier name.${NC}" >&2
+    echo "  Valid: mba | mid | full (canon: src/shared/models.json auto_download.by_tier keys)." >&2
+    echo "  Likely cause: install.sh CONTINUUM_TIER (e.g. legacy 'primary') diverged from registry. Align both ends." >&2
+    exit 1
+    ;;
+esac
+
 # Compute the download set: always[] + by_tier[$TIER][]
 mapfile -t MODEL_KEYS < <(jq -r --arg tier "$TIER" '
   [
@@ -75,11 +91,16 @@ echo ""
 
 # Download via huggingface direct-URL pattern: each model has files[].
 # We resolve to https://huggingface.co/<repo>/resolve/main/<file> and curl.
-# The huggingface-cli would be cleaner but adds Python+pip to model-init
-# (currently a tiny node:slim image, ~120MB). Direct curl keeps it lean.
+# The standard install path must work without a HuggingFace account. Do not
+# pass HF_TOKEN here: a token can mask private/gated default models during dev
+# or CI. If any auto_download artifact requires auth, this script must fail.
+FAILED=0
+FAILED_ITEMS=()
+
 for KEY in "${MODEL_KEYS[@]}"; do
   KIND=$(jq -r --arg k "$KEY" '.models[$k].kind // "unknown"' "$REGISTRY")
   REPO=$(jq -r --arg k "$KEY" '.models[$k].hf_repo // ""' "$REGISTRY")
+  REVISION=$(jq -r --arg k "$KEY" '.models[$k].hf_revision // "main"' "$REGISTRY")
   FORMAT=$(jq -r --arg k "$KEY" '.models[$k].format // ""' "$REGISTRY")
   SIZE=$(jq -r --arg k "$KEY" '.models[$k].size_gb // "?"' "$REGISTRY")
 
@@ -96,34 +117,46 @@ for KEY in "${MODEL_KEYS[@]}"; do
   TARGET_DIR="$MODELS_DIR/$KEY"
   mkdir -p "$TARGET_DIR"
 
-  # Get files list. Some entries omit files (huggingface-cli style); skip those.
+  # Get files list. Downloadable auto_download entries must name every required
+  # artifact. An empty files[] for a non-builtin model is a broken registry row,
+  # not a runtime fallback opportunity.
   mapfile -t FILES < <(jq -r --arg k "$KEY" '.models[$k].files // [] | .[]' "$REGISTRY")
   if [[ ${#FILES[@]} -eq 0 ]]; then
-    echo -e "${YELLOW}  SKIP $KEY — no files[] specified (huggingface-cli pull required)${NC}"
+    echo -e "${RED}  ✗ $KEY — no files[] specified for downloadable model${NC}" >&2
+    FAILED=$((FAILED + 1))
+    FAILED_ITEMS+=("$KEY:<missing files[]>")
     continue
   fi
 
   echo -e "${YELLOW}━━ $KEY (kind=$KIND, ~${SIZE}GB) ━━${NC}"
   for FILE in "${FILES[@]}"; do
-    DEST="$TARGET_DIR/$(basename "$FILE")"
+    DEST="$TARGET_DIR/$FILE"
+    mkdir -p "$(dirname "$DEST")"
     if [[ -f "$DEST" ]]; then
-      echo -e "${GREEN}  ✓ already cached: $(basename "$FILE")${NC}"
+      echo -e "${GREEN}  ✓ already cached: $FILE${NC}"
       continue
     fi
-    URL="https://huggingface.co/${REPO}/resolve/main/${FILE}"
+    URL="https://huggingface.co/${REPO}/resolve/${REVISION}/${FILE}"
     echo "  ↓ $URL"
-    if curl -fsSL --retry 3 --retry-delay 2 -o "$DEST.partial" "$URL"; then
+    CURL_ARGS=(-fsSL --retry 3 --retry-delay 2 --retry-all-errors)
+    if curl "${CURL_ARGS[@]}" -o "$DEST.partial" "$URL"; then
       mv "$DEST.partial" "$DEST"
-      echo -e "${GREEN}  ✓ $(basename "$FILE") ($(du -h "$DEST" | cut -f1))${NC}"
+      echo -e "${GREEN}  ✓ $FILE ($(du -h "$DEST" | cut -f1))${NC}"
     else
       rm -f "$DEST.partial"
       echo -e "${RED}  ✗ FAILED to download $FILE${NC}" >&2
-      # Continue rather than fail-the-container — partial models is better
-      # than no models. continuum-core will report missing-file at load time.
+      FAILED=$((FAILED + 1))
+      FAILED_ITEMS+=("$KEY:$FILE")
     fi
   done
 done
 
 echo ""
+if [[ "$FAILED" -gt 0 ]]; then
+  echo -e "${RED}━━ download-models.sh FAILED — ${FAILED} required artifact(s) missing ━━${NC}" >&2
+  printf '  %s\n' "${FAILED_ITEMS[@]}" >&2
+  exit 1
+fi
+
 echo -e "${GREEN}━━ download-models.sh complete (TIER=$TIER) ━━${NC}"
 echo "  Total in $MODELS_DIR: $(du -sh "$MODELS_DIR" 2>/dev/null | cut -f1)"
diff --git a/src/scripts/git-prepush.sh b/src/scripts/git-prepush.sh
index 8d9e58eca..1e24b8bec 100755
--- a/src/scripts/git-prepush.sh
+++ b/src/scripts/git-prepush.sh
@@ -64,6 +64,33 @@ RUST_RELEVANT=0
 if echo "$CHANGED_FILES" | grep -qE "^(src/workers/|docker/|src/shared/generated/|Cargo\.(toml|lock)$|src/workers/.*/Cargo\.(toml|lock)$)"; then
     RUST_RELEVANT=1
 fi
+TS_RELEVANT=0
+if echo "$CHANGED_FILES" | grep -qE "^src/.*\.tsx?$"; then
+    TS_RELEVANT=1
+fi
+PUSH_GENERATED_FILES="$(printf '%s\n' "$CHANGED_FILES" | grep -E "^src/shared/generated/.*\.tsx?$" || true)"
+
+restore_generated_type_drift() {
+    local current_dirty generated_to_restore path
+    current_dirty="$(git diff --name-only HEAD -- src/shared/generated 2>/dev/null | sort -u)"
+    [ -n "$current_dirty" ] || return 0
+
+    generated_to_restore=""
+    while IFS= read -r path; do
+        [ -n "$path" ] || continue
+        if ! printf '%s\n' "$PUSH_GENERATED_FILES" | grep -Fxq "$path"; then
+            generated_to_restore="${generated_to_restore}${path}"$'\n'
+        fi
+    done <<< "$current_dirty"
+
+    [ -n "$generated_to_restore" ] || return 0
+
+    echo "🔄 Restoring ts-rs generated type drift from Rust checks..."
+    while IFS= read -r path; do
+        [ -n "$path" ] || continue
+        git restore -- "$path" 2>/dev/null || true
+    done <<< "$generated_to_restore"
+}
 
 # Phase 1: TypeScript compilation (<15s)
 echo ""
@@ -78,6 +105,7 @@ else
     echo "   Run: cd src && npm run build:ts"
     FAILED=1
 fi
+restore_generated_type_drift
 
 # Phase 1b: ESLint — baseline-tolerant.
 #
@@ -127,10 +155,15 @@ else
         fi
     else
         DELTA=$(( CURRENT - BASELINE ))
-        echo "❌ ESLint: $CURRENT errors — baseline is $BASELINE, this push added $DELTA new violation(s)."
-        echo "   Run to see what's new:"
-        echo "   cd src && npx eslint './**/*.ts' --max-warnings 0 --quiet"
-        FAILED=1
+        if [ "$TS_RELEVANT" -eq 0 ]; then
+            echo "⚠️  ESLint: $CURRENT errors — baseline is $BASELINE (+$DELTA), but this push has no TypeScript changes."
+            echo "   Not blocking this non-TS push. Refresh eslint-baseline.txt or fix the drift in a dedicated TS cleanup."
+        else
+            echo "❌ ESLint: $CURRENT errors — baseline is $BASELINE, this push added $DELTA new violation(s)."
+            echo "   Run to see what's new:"
+            echo "   cd src && npx eslint './**/*.ts' --max-warnings 0 --quiet"
+            FAILED=1
+        fi
     fi
 fi
 
@@ -191,6 +224,8 @@ else
     echo "⚠️  Rust directory not found (skipping)"
 fi
 
+restore_generated_type_drift
+
 # Phase 4: Native-arch Docker images (conditional)
 # Fires only when the push touches Rust or Docker files. TS/docs/widget-
 # only pushes skip — they don't affect the continuum-core/vulkan/cuda
diff --git a/src/scripts/validate-model-registry-downloads.sh b/src/scripts/validate-model-registry-downloads.sh
new file mode 100755
index 000000000..f9a9f6e98
--- /dev/null
+++ b/src/scripts/validate-model-registry-downloads.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+# Validate that registry auto_download artifacts resolve on HuggingFace.
+# This is a fast preflight for model-init: it catches stale repos/filenames
+# before the install smoke spends minutes booting a system with no persona LLM.
+# Validation is intentionally anonymous: default install models must be public.
+
+set -euo pipefail
+
+REGISTRY="${REGISTRY:-src/shared/models.json}"
+TIER="${TIER:-full}"
+VALIDATE_ALL_TIERS="${VALIDATE_ALL_TIERS:-0}"
+
+if [[ ! -f "$REGISTRY" ]]; then
+  echo "ERROR: registry file not found: $REGISTRY" >&2
+  exit 1
+fi
+
+if ! command -v jq >/dev/null 2>&1; then
+  echo "ERROR: jq is required" >&2
+  exit 1
+fi
+
+MODEL_KEYS=()
+if [[ "$VALIDATE_ALL_TIERS" == "1" ]]; then
+  while IFS= read -r key; do
+    MODEL_KEYS+=("$key")
+  done < <(jq -r '
+    [
+      .auto_download.always[],
+      (.auto_download.by_tier[] // [])[]
+    ] | unique | .[]
+  ' "$REGISTRY")
+else
+  case "$TIER" in
+    mba|mid|full) ;;
+    *)
+      echo "ERROR: TIER='$TIER' is not valid; use mba, mid, or full" >&2
+      exit 1
+      ;;
+  esac
+  while IFS= read -r key; do
+    MODEL_KEYS+=("$key")
+  done < <(jq -r --arg tier "$TIER" '
+    [
+      .auto_download.always[],
+      (.auto_download.by_tier[$tier] // [])[]
+    ] | unique | .[]
+  ' "$REGISTRY")
+fi
+
+FAILED=0
+
+for KEY in "${MODEL_KEYS[@]}"; do
+  FORMAT=$(jq -r --arg k "$KEY" '.models[$k].format // ""' "$REGISTRY")
+  REPO=$(jq -r --arg k "$KEY" '.models[$k].hf_repo // ""' "$REGISTRY")
+  REVISION=$(jq -r --arg k "$KEY" '.models[$k].hf_revision // "main"' "$REGISTRY")
+  FILES=()
+  while IFS= read -r file; do
+    FILES+=("$file")
+  done < <(jq -r --arg k "$KEY" '.models[$k].files // [] | .[]' "$REGISTRY")
+
+  if [[ "$FORMAT" == "candle-builtin" ]]; then
+    continue
+  fi
+
+  if [[ -z "$REPO" ]]; then
+    echo "ERROR: $KEY has no hf_repo" >&2
+    FAILED=$((FAILED + 1))
+    continue
+  fi
+
+  if [[ ${#FILES[@]} -eq 0 ]]; then
+    echo "ERROR: $KEY has no files[]" >&2
+    FAILED=$((FAILED + 1))
+    continue
+  fi
+
+  for FILE in "${FILES[@]}"; do
+    URL="https://huggingface.co/${REPO}/resolve/${REVISION}/${FILE}"
+    CURL_ARGS=(-fsSIL --retry 2 --retry-delay 1 --retry-all-errors)
+    if curl "${CURL_ARGS[@]}" "$URL" >/dev/null; then
+      echo "OK $KEY $FILE"
+    else
+      echo "ERROR: missing artifact: $URL" >&2
+      FAILED=$((FAILED + 1))
+    fi
+  done
+done
+
+if [[ "$FAILED" -gt 0 ]]; then
+  echo "model registry validation failed: $FAILED missing/broken artifact(s)" >&2
+  exit 1
+fi
+
+echo "model registry validation passed (${#MODEL_KEYS[@]} model keys, tier=${TIER}, all_tiers=${VALIDATE_ALL_TIERS})"
diff --git a/src/server/seed-in-process.ts b/src/server/seed-in-process.ts
index 6dfdaba9d..c760ca9c4 100644
--- a/src/server/seed-in-process.ts
+++ b/src/server/seed-in-process.ts
@@ -222,6 +222,41 @@ class DatabaseSeeder {
     }
   }
 
+  async ensureUsersInRooms(users: readonly UserEntity[], rooms: readonly RoomEntity[]): Promise<number> {
+    let updates = 0;
+    for (const room of rooms) {
+      const existingMembers = room.members ?? [];
+      const existingIds = new Set(existingMembers.map(member => member.userId));
+      const missingUsers = users.filter(user => !existingIds.has(user.id));
+      if (missingUsers.length === 0) {
+        continue;
+      }
+
+      const members = [
+        ...existingMembers,
+        ...missingUsers.map(user => ({
+          userId: user.id,
+          role: 'member' as const,
+          joinedAt: new Date(),
+        })),
+      ];
+
+      const result = await DataUpdate.execute<RoomEntity>({
+        collection: RoomEntity.collection,
+        dbHandle: 'default',
+        id: room.id,
+        data: { members },
+        suppressEvents: false,
+      });
+      if (!result.success || !result.data) {
+        throw new Error(`Seed FATAL: failed to add seeded personas to room "${room.uniqueId}": ${result.error ?? 'unknown error'}`);
+      }
+      room.members = members;
+      updates += missingUsers.length;
+    }
+    return updates;
+  }
+
   /** Generate avatar PNGs for all personas */
   async generateAvatars(personas: { uniqueId: string; displayName: string; accentColor: string }[]): Promise<number> {
     try {
@@ -309,10 +344,8 @@ async function syncPersonaProviders(_seeder: DatabaseSeeder): Promise<void> {
         : undefined;
       let desiredModelId = config.modelId;
       if (!desiredModelId && config.modelRef) {
-        const { resolveModel, tierFromRamGB } = await import('../shared/ModelRegistry');
-        const ramGB = Math.round((require('os').totalmem() / 1024 / 1024 / 1024));
-        const tier = tierFromRamGB(ramGB);
-        const spec = resolveModel(config.modelRef, tier);
+        const { resolveModel } = await import('../shared/ModelRegistry');
+        const spec = resolveModel(config.modelRef, resolveInstallTier());
         desiredModelId = spec.hf_repo;
       }
       const providerChanged = currentProvider !== config.provider;
@@ -337,6 +370,20 @@ async function syncPersonaProviders(_seeder: DatabaseSeeder): Promise<void> {
   }
 }
 
+export function resolveInstallTier(): import('../shared/ModelRegistry').Tier {
+  const envTier = process.env.CONTINUUM_TIER ?? process.env.TIER;
+  if (envTier) {
+    if (envTier === 'mba' || envTier === 'mid' || envTier === 'full') {
+      return envTier;
+    }
+    throw new Error(`Seed FATAL: invalid CONTINUUM_TIER/TIER '${envTier}'. Valid tiers: mba, mid, full`);
+  }
+
+  const { tierFromRamGB } = require('../shared/ModelRegistry') as typeof import('../shared/ModelRegistry');
+  const ramGB = Math.round(require('os').totalmem() / 1024 / 1024 / 1024);
+  return tierFromRamGB(ramGB);
+}
+
 /**
  * Seed the database if empty. Returns true if seeding was performed.
  */
@@ -402,9 +449,8 @@ export async function seedDatabase(): Promise<boolean> {
   // changing the registry value updates seeded personas on next startup
   // (Joel 2026-05-04: "personas PICK UP THE MODEL change and arent stuck
   // in the past").
-  const { resolveModel, tierFromRamGB } = await import('../shared/ModelRegistry');
-  const seedRamGB = Math.round(require('os').totalmem() / 1024 / 1024 / 1024);
-  const seedTier = tierFromRamGB(seedRamGB);
+  const { resolveModel } = await import('../shared/ModelRegistry');
+  const seedTier = resolveInstallTier();
 
   for (const config of personas) {
     try {
@@ -430,6 +476,9 @@ export async function seedDatabase(): Promise<boolean> {
   }
   console.log(`  ✅ ${created.size} personas`);
 
+  const membershipAdds = await seeder.ensureUsersInRooms([...created.values()], roomEntities);
+  console.log(`  ✅ Persona room memberships (${membershipAdds} added)`);
+
   // Profiles
   for (const [uniqueId, profile] of Object.entries(PROFILES)) {
     const user = created.get(uniqueId);
diff --git a/src/shared/generated/cognition/ResolutionError.ts b/src/shared/generated/cognition/ResolutionError.ts
index 42bfd5cd7..380a18c96 100644
--- a/src/shared/generated/cognition/ResolutionError.ts
+++ b/src/shared/generated/cognition/ResolutionError.ts
@@ -10,4 +10,4 @@ import type { TargetSilicon } from "./TargetSilicon";
  * a soft retry on a default. Callers that want graceful degradation must
  * EXPLICITLY relax their requirement and re-invoke.
  */
-export type ResolutionError = { "kind": "noModelMatchesRequirement", registry_count: number, candidates_after_filter: number, unmet_filters: Array<string>, } | { "kind": "noMultimodalBase", registry_count: number, required_sensory_capabilities: Array<string>, } | { "kind": "siliconResidencyViolated", rejected_model_id: string, actual_silicon: TargetSilicon, };
+export type ResolutionError = { "kind": "noModelMatchesRequirement", registry_count: number, candidates_after_filter: number, unmet_filters: Array<string>, } | { "kind": "noMultimodalBase", registry_count: number, required_sensory_capabilities: Array<string>, } | { "kind": "siliconResidencyViolated", rejected_model_id: string, actual_silicon: TargetSilicon, } | { "kind": "unknownProviderReferenced", model_id: string, provider_id: string, };
diff --git a/src/shared/models.json b/src/shared/models.json
index 5bcd6aa21..3bb78e3e6 100644
--- a/src/shared/models.json
+++ b/src/shared/models.json
@@ -12,9 +12,10 @@
     "qwen3.5-0.8b-general": {
       "kind": "chat-llm",
       "hf_repo": "continuum-ai/qwen3.5-0.8b-general-forged",
-      "format": "gguf",
+      "hf_revision": "47980026ffe598c1981507aa06c2de3228fe95d5",
+      "format": "safetensors",
       "architecture": "qwen3",
-      "files": ["qwen3.5-0.8b-general-forged-q4_k_m.gguf"],
+      "files": ["config.json", "generation_config.json", "model.safetensors", "tokenizer.json", "tokenizer_config.json"],
       "size_gb": 0.5,
       "min_ram_gb": 16,
       "chat_template": "qwen2",
@@ -23,9 +24,10 @@
     "qwen3.5-2b-general": {
       "kind": "chat-llm",
       "hf_repo": "continuum-ai/qwen3.5-2b-general-forged",
-      "format": "gguf",
+      "hf_revision": "ad73e8567db57bb016d2721b3387268e0e5533f7",
+      "format": "safetensors",
       "architecture": "qwen3",
-      "files": ["qwen3.5-2b-general-forged-q4_k_m.gguf"],
+      "files": ["config.json", "generation_config.json", "model.safetensors", "tokenizer.json", "tokenizer_config.json"],
       "size_gb": 1.4,
       "min_ram_gb": 24,
       "chat_template": "qwen2",
@@ -34,9 +36,10 @@
     "qwen3.5-4b-code-forged": {
       "kind": "chat-llm",
       "hf_repo": "continuum-ai/qwen3.5-4b-code-forged-GGUF",
+      "hf_revision": "6cfe43981913730b1abc4ad520510a24b3f05922",
       "format": "gguf",
       "architecture": "qwen3",
-      "files": ["qwen3.5-4b-code-forged-q4_k_m.gguf"],
+      "files": ["qwen3.5-4b-code-forged-Q4_K_M.gguf"],
       "size_gb": 2.7,
       "min_ram_gb": 32,
       "chat_template": "qwen2",
@@ -44,10 +47,11 @@
     },
     "qwen2-vl-7b": {
       "kind": "vision-llm",
-      "hf_repo": "Qwen/Qwen2-VL-7B-Instruct-GGUF",
+      "hf_repo": "bartowski/Qwen2-VL-7B-Instruct-GGUF",
+      "hf_revision": "3088669af444bb2b86da6272694edd905f9c5a5b",
       "format": "gguf",
       "architecture": "qwen2-vl",
-      "files": ["qwen2-vl-7b-instruct-q4_k_m.gguf", "mmproj-Qwen2-VL-7B-Instruct-f16.gguf"],
+      "files": ["Qwen2-VL-7B-Instruct-Q4_K_M.gguf", "mmproj-Qwen2-VL-7B-Instruct-f16.gguf"],
       "size_gb": 5.0,
       "min_ram_gb": 16,
       "chat_template": "qwen2",
@@ -64,6 +68,7 @@
     "whisper-base-en": {
       "kind": "stt",
       "hf_repo": "ggerganov/whisper.cpp",
+      "hf_revision": "5359861c739e955e79d9a303bcbc70fb988958b1",
       "format": "ggml",
       "files": ["ggml-base.en.bin"],
       "size_gb": 0.075,
@@ -72,6 +77,7 @@
     "piper-libritts-r-medium": {
       "kind": "tts",
       "hf_repo": "rhasspy/piper-voices",
+      "hf_revision": "7a6c333ec560f0e688371adc2fbb7bbe105028c6",
       "format": "onnx",
       "files": ["en/en_US/libritts_r/medium/en_US-libritts_r-medium.onnx", "en/en_US/libritts_r/medium/en_US-libritts_r-medium.onnx.json"],
       "size_gb": 0.063,
@@ -80,14 +86,16 @@
     "kokoro-82m": {
       "kind": "tts",
       "hf_repo": "onnx-community/Kokoro-82M-v1.0-ONNX",
+      "hf_revision": "1939ad2a8e416c0acfeecc08a694d14ef25f2231",
       "format": "onnx",
-      "files": ["onnx/model_q8f16.onnx", "voices.bin"],
+      "files": ["onnx/model_q8f16.onnx", "voices/af.bin"],
       "size_gb": 0.08,
       "description": "Kokoro 82M ONNX TTS — high quality, lightweight."
     },
     "silero-vad": {
       "kind": "vad",
       "hf_repo": "onnx-community/silero-vad",
+      "hf_revision": "e71cae966052b992a7eca6b17738916ce0eca4ec",
       "format": "onnx",
       "files": ["onnx/model.onnx"],
       "size_gb": 0.002,
diff --git a/src/system/user/server/PersonaLifecycleManager.ts b/src/system/user/server/PersonaLifecycleManager.ts
index 1963c11f2..fb6578c9c 100644
--- a/src/system/user/server/PersonaLifecycleManager.ts
+++ b/src/system/user/server/PersonaLifecycleManager.ts
@@ -1,18 +1,9 @@
-/**
- * PersonaLifecycleManager — runtime persona creation/removal based on API key changes.
- *
- * Subscribes to:
- * - system:config:key-added  → calls persona/allocate IPC, creates new personas
- * - system:config:key-removed → gracefully shuts down that provider's personas
- *
- * This enables the adaptive self-installing system: add an API key in Settings,
- * and the persona appears in chat within seconds — no restart needed.
- */
-
 import { Events } from '../../core/shared/Events';
 import { Commands } from '../../core/shared/Commands';
 import type { CommandParams } from '../../core/types/JTAGTypes';
 import { SecretManager } from '../../secrets/SecretManager';
+import { COLLECTIONS } from '../../data/config/DatabaseConfig';
+import type { UserEntity } from '../../data/entities/UserEntity';
 
 interface KeyChangeEvent {
   provider: string;
@@ -46,9 +37,13 @@ interface AllocationResult {
   localModel: string;
 }
 
+interface UserListResult { success: boolean; items?: readonly UserEntity[]; error?: string; }
+interface UserCreateResult { success: boolean; user?: UserEntity; error?: string; }
+
 export class PersonaLifecycleManager {
   private static _instance: PersonaLifecycleManager | null = null;
   private _subscribed = false;
+  private runtimeActivator?: (user: UserEntity, reason: string) => Promise<void>;
 
   static get instance(): PersonaLifecycleManager {
     if (!this._instance) {
@@ -57,10 +52,6 @@ export class PersonaLifecycleManager {
     return this._instance;
   }
 
-  /**
-   * Start listening for key change events.
-   * Call once during server startup (after commands are registered).
-   */
   subscribe(): void {
     if (this._subscribed) return;
     this._subscribed = true;
@@ -79,17 +70,15 @@ export class PersonaLifecycleManager {
 
     console.log('🔄 PersonaLifecycleManager: Subscribed to config change events');
 
-    // Run initial allocation on startup — config.env keys are already loaded
-    // by SecretManager but no key-added event fires for pre-existing keys.
     setTimeout(() => this.runInitialAllocation().catch(err => {
       console.error('❌ PersonaLifecycleManager: Initial allocation failed:', err);
     }), 2000);
   }
 
-  /**
-   * Run allocation on startup with all currently available API keys.
-   * Creates any personas that should exist based on the current hardware + keys.
-   */
+  setRuntimeActivator(activate: (user: UserEntity, reason: string) => Promise<void>): void {
+    this.runtimeActivator = activate;
+  }
+
   private async runInitialAllocation(): Promise<void> {
     const availableApiKeys = this.collectAvailableApiKeys();
     console.log(`🎭 PersonaLifecycleManager: Initial allocation with ${availableApiKeys.length} API keys: [${availableApiKeys.join(', ')}]`);
@@ -100,8 +89,15 @@ export class PersonaLifecycleManager {
     ) as unknown as AllocationResult;
 
     if (!allocation?.allocations?.length) {
-      console.warn('⚠️ PersonaLifecycleManager: No allocations from initial run');
-      return;
+      const activated = await this.activatePersistedLocalPersonas(allocation);
+      if (activated > 0) {
+        console.log(`✅ PersonaLifecycleManager: ${activated} persisted persona(s) activated on startup`);
+        return;
+      }
+
+      const summary = allocation?.summary?.length ? allocation.summary.join('; ') : 'no allocator summary';
+      const skipped = allocation?.skipped?.length ? ` skipped=${allocation.skipped.length}` : '';
+      throw new Error(`persona/allocate returned zero startup allocations and no persisted local personas were available;${skipped} summary=${summary}`);
     }
 
     console.log(`🎭 PersonaLifecycleManager: Allocator returned ${allocation.allocations.length} persona(s)`);
@@ -114,11 +110,6 @@ export class PersonaLifecycleManager {
 
     console.log(`✅ PersonaLifecycleManager: ${created} persona(s) activated on startup`);
 
-    // Local model prewarm allocates the full model/KV context. Doing that at
-    // boot competes with seed, browser reconnect, and first room hydration, and
-    // on unified-memory Macs can push continuum-core into OS pressure before
-    // the system is actually ready. Keep it as an explicit performance knob,
-    // not default startup behavior.
     if (process.env.CONTINUUM_PREWARM_PERSONAS === '1' || process.env.CONTINUUM_PREWARM_PERSONAS === 'true') {
       void this.prewarmAllPersonas(allocation.allocations);
     } else {
@@ -126,16 +117,10 @@ export class PersonaLifecycleManager {
     }
   }
 
-  /**
-   * Fire prewarm requests in parallel for local personas. Each is bounded
-   * by short timeouts so a stuck DMR can never hang boot.
-   */
   private async prewarmAllPersonas(allocations: PersonaAllocation[]): Promise<void> {
     const local = allocations.filter(a => this.isLocalProvider(a.provider));
     if (local.length === 0) return;
 
-    // Probe DMR availability ONCE before firing all prewarms — saves N
-    // failed connection attempts when DMR isn't up yet (Docker still booting).
     const dmrUp = await this.checkDmrAvailable();
     if (!dmrUp) {
       console.log(`⏭️ PersonaLifecycleManager: DMR not reachable yet — skipping prewarm for ${local.length} local persona(s)`);
@@ -148,13 +133,6 @@ export class PersonaLifecycleManager {
     console.log(`🔥 PersonaLifecycleManager: Prewarm batch finished in ${Date.now() - startedAt}ms`);
   }
 
-  /**
-   * Quick DMR availability probe with a hard 2s timeout. Returns false on
-   * any failure (network, timeout, non-200) — never throws. Docker concern:
-   * DMR runs in Docker Desktop's container; on cold Docker start it may
-   * take a few seconds beyond our system boot to be reachable. We'd rather
-   * skip prewarm than hang.
-   */
   private async checkDmrAvailable(): Promise<boolean> {
     try {
       const ctrl = new AbortController();
@@ -167,11 +145,6 @@ export class PersonaLifecycleManager {
     }
   }
 
-  /**
-   * Fire a single tiny generation to warm the model + DMR slot for one persona.
-   * max_tokens=1 keeps it nearly free; the cost we want is the model load,
-   * not the generation. Errors are swallowed — prewarm failure is non-fatal.
-   */
   private async prewarmPersona(allocation: PersonaAllocation): Promise<void> {
     const model = allocation.resolvedModel || allocation.modelId;
     if (!model) return;
@@ -190,25 +163,15 @@ export class PersonaLifecycleManager {
     }
   }
 
-  /**
-   * Provider classes that route to the local DMR/llama-server pool — these
-   * benefit from prewarm because they pay model-load cold start. Cloud
-   * providers maintain their own warm state via API connection pooling.
-   */
   private isLocalProvider(provider: string): boolean {
     return provider === 'local' || provider === 'sentinel';
   }
 
-  /**
-   * When an API key is added, re-run allocation and create any new personas.
-   */
   private async handleKeyAdded(event: KeyChangeEvent): Promise<void> {
     console.log(`🔑 PersonaLifecycleManager: Key added — ${event.provider}`);
 
-    // Collect all currently set API keys from process.env
     const availableApiKeys = this.collectAvailableApiKeys();
 
-    // Call Rust allocator for optimal persona assignments
     const allocation = await Commands.execute(
       'persona/allocate',
       { availableApiKeys } as Partial<CommandParams>
@@ -219,7 +182,6 @@ export class PersonaLifecycleManager {
       return;
     }
 
-    // Find personas that need this specific API key
     const newPersonas = allocation.allocations.filter(
       a => a.apiKeyEnv === event.provider
     );
@@ -229,7 +191,6 @@ export class PersonaLifecycleManager {
       return;
     }
 
-    // Create each new persona
     for (const persona of newPersonas) {
       await this.createPersona(persona);
     }
@@ -237,13 +198,9 @@ export class PersonaLifecycleManager {
     console.log(`✅ PersonaLifecycleManager: Created ${newPersonas.length} persona(s) for ${event.provider}`);
   }
 
-  /**
-   * When an API key is removed, deactivate that provider's personas.
-   */
   private async handleKeyRemoved(event: KeyChangeEvent): Promise<void> {
     console.log(`🔑 PersonaLifecycleManager: Key removed — ${event.provider}`);
 
-    // Emit a deactivation event that PersonaUser instances can listen for
     await Events.emit('persona:provider-deactivated', {
       provider: event.provider,
       timestamp: Date.now(),
@@ -252,33 +209,64 @@ export class PersonaLifecycleManager {
     console.log(`⚠️ PersonaLifecycleManager: Deactivation event emitted for ${event.provider} personas`);
   }
 
-  /**
-   * Create a persona user via the user/create command.
-   * The command already handles duplicate checking (idempotent).
-   */
   private async createPersona(allocation: PersonaAllocation): Promise<void> {
-    try {
-      const result = await Commands.execute('user/create', {
-        type: allocation.personaType,
-        displayName: allocation.displayName,
-        uniqueId: allocation.uniqueId,
-        provider: allocation.provider,
-      } as Partial<CommandParams>) as unknown as { success: boolean; error?: string };
+    const result = await Commands.execute('user/create', {
+      type: allocation.personaType,
+      displayName: allocation.displayName,
+      uniqueId: allocation.uniqueId,
+      provider: allocation.provider,
+    } as Partial<CommandParams>) as unknown as UserCreateResult;
+
+    if (!result?.success || !result.user) {
+      throw new Error(`user/create failed for persona ${allocation.displayName} (${allocation.uniqueId}): ${result?.error ?? 'missing user in result'}`);
+    }
 
-      if (result?.success) {
-        console.log(`  ✅ Created persona: ${allocation.displayName} (${allocation.uniqueId})`);
-      } else {
-        console.warn(`  ⚠️ Persona creation returned: ${JSON.stringify(result)}`);
-      }
-    } catch (error) {
-      console.error(`  ❌ Failed to create persona ${allocation.displayName}:`, error);
+    await this.ensurePersonaRuntimeClient(result.user, 'allocator');
+    console.log(`  ✅ Activated persona: ${allocation.displayName} (${allocation.uniqueId})`);
+  }
+
+  private async activatePersistedLocalPersonas(allocation?: AllocationResult): Promise<number> {
+    const result = await Commands.execute('data/list', {
+      dbHandle: 'default',
+      collection: COLLECTIONS.USERS,
+      filter: { type: 'persona' },
+      limit: 100,
+      skipCount: true,
+    } as Partial<CommandParams>) as unknown as UserListResult;
+
+    if (!result?.success) {
+      throw new Error(`data/list failed while checking persisted personas: ${result?.error ?? 'unknown error'}`);
+    }
+
+    const personas = result.items ?? [];
+    if (personas.length === 0) {
+      return 0;
+    }
+
+    console.error(
+      `❌ PersonaLifecycleManager: persona/allocate returned zero allocations with ${personas.length} persisted persona(s); activating persisted local personas and preserving the allocator defect for CI.`
+    );
+    if (allocation?.summary?.length) {
+      console.error(`❌ PersonaLifecycleManager: allocator summary: ${allocation.summary.join('; ')}`);
+    }
+
+    for (const persona of personas) {
+      await this.ensurePersonaRuntimeClient(persona, 'persisted-local');
+    }
+    return personas.length;
+  }
+
+  private async ensurePersonaRuntimeClient(user: UserEntity, reason: string): Promise<void> {
+    if (user.type !== 'persona') {
+      throw new Error(`Refusing to activate non-persona user ${user.displayName} (${user.id}) from ${reason}`);
+    }
+
+    if (!this.runtimeActivator) {
+      throw new Error(`Persona runtime activator is not registered; cannot activate persona ${user.displayName} (${user.id}) from ${reason}`);
     }
+    await this.runtimeActivator(user, reason);
   }
 
-  /**
-   * Collect all API key env vars that are currently set in process.env.
-   * These are the keys the Rust allocator needs to make decisions.
-   */
   private collectAvailableApiKeys(): string[] {
     const knownKeyVars = [
       'ANTHROPIC_API_KEY',
diff --git a/src/system/user/server/PersonaUser.ts b/src/system/user/server/PersonaUser.ts
index 9eb665c01..ffc5abd15 100644
--- a/src/system/user/server/PersonaUser.ts
+++ b/src/system/user/server/PersonaUser.ts
@@ -1725,9 +1725,13 @@ export class PersonaUser extends AIUser {
     }
 
     const result = await this.responseGenerator.generateAndPostResponse(originalMessage, decisionContext, preBuiltRagContext, socialSignals);
+    if (!result.success) {
+      const error = result.error ?? 'unknown response generation failure';
+      throw new Error(`${this.displayName}: response generation failed for message ${originalMessage.id}: ${error}`);
+    }
 
     // Mark tool results as processed to prevent infinite loops
-    if (result.success && result.storedToolResultIds.length > 0) {
+    if (result.storedToolResultIds.length > 0) {
       this.taskTracker.markMultipleProcessed(result.storedToolResultIds);
     }
   }
diff --git a/src/system/user/server/modules/PersonaMessageEvaluator.ts b/src/system/user/server/modules/PersonaMessageEvaluator.ts
index 118d2bb3a..c3cec8e5b 100644
--- a/src/system/user/server/modules/PersonaMessageEvaluator.ts
+++ b/src/system/user/server/modules/PersonaMessageEvaluator.ts
@@ -126,6 +126,10 @@ export class PersonaMessageEvaluator {
     this.personaUser.logger.enqueueLog('cognition.log', `[${timestamp}] ${message}${formattedArgs}\n`);
   }
 
+  private phase(message: string): void {
+    console.error(`[persona-phase] ${this.personaUser.displayName}: ${message}`);
+  }
+
   /**
    * Evaluate message with full cognition system (planning, focus, working memory)
    *
@@ -165,6 +169,7 @@ export class PersonaMessageEvaluator {
     evalTiming['early_gate'] = Date.now() - earlyGateStart;
 
     this.log(`[GATE:EARLY] ${this.personaUser.displayName}: sender=${messageEntity.senderName} senderType=${messageEntity.senderType} human=${senderIsHuman} result=${earlyResult.should_respond ? 'PASS' : 'BLOCK'} gate=${earlyResult.gate} reason="${earlyResult.reason}" (${earlyResult.decision_time_ms.toFixed(2)}ms)`);
+    this.phase(`early gate ${earlyResult.should_respond ? 'PASS' : 'BLOCK'} gate=${earlyResult.gate} ${evalTiming['early_gate']}ms message=${messageEntity.id}`);
 
     if (!earlyResult.should_respond) {
       this.personaUser.logAIDecision('SILENT', `${earlyResult.gate}: ${earlyResult.reason}`, {
@@ -178,6 +183,7 @@ export class PersonaMessageEvaluator {
     const coordinationStart = Date.now();
     const claimGranted = await this.coordinateResponseClaim(messageEntity, earlyResult);
     evalTiming['coordination_claim'] = Date.now() - coordinationStart;
+    this.phase(`coordination ${claimGranted ? 'granted' : 'deferred'} ${evalTiming['coordination_claim']}ms message=${messageEntity.id}`);
     if (!claimGranted) {
       this.personaUser.logAIDecision('SILENT', 'coordination: another persona owns this turn', {
         message: safeMessageText.slice(0, 100),
@@ -447,6 +453,7 @@ export class PersonaMessageEvaluator {
     const gatingStart = Date.now();
     const gatingResult = await this.evaluateShouldRespond(messageEntity, senderIsHuman, isMentioned, preComputedDecision, socialSignals);
     this.log(`⏱️ ${this.personaUser.displayName}: [INNER] evaluateShouldRespond=${Date.now() - gatingStart}ms`);
+    this.phase(`respond gate ${gatingResult.shouldRespond ? 'RESPOND' : 'SILENT'} ${Date.now() - gatingStart}ms message=${messageEntity.id}`);
 
     // FULL TRANSPARENCY LOGGING
     this.log(`\n${'='.repeat(80)}`);
@@ -608,6 +615,7 @@ export class PersonaMessageEvaluator {
       messageEntity,
       this.personaUser.rustCognition,
     );
+    this.phase(`post-inference adequacy ${postInferenceResult.shouldSkip ? 'BLOCK' : 'PASS'} ${Date.now() - postInferenceStart}ms message=${messageEntity.id}`);
 
     if (postInferenceResult.shouldSkip) {
       this.log(`[GATE:POST_INFERENCE] ${this.personaUser.displayName}: BLOCK — ${postInferenceResult.reason}`);
@@ -698,7 +706,9 @@ export class PersonaMessageEvaluator {
     // 🔧 PHASE: Generate and post response
     this.log(`🔧 TRACE-POINT-B: Before respondToMessage call (timestamp=${Date.now()})`);
     this.log(`🔧 ${this.personaUser.displayName}: [PHASE 3/3] Calling respondToMessage...`);
+    this.phase(`respondToMessage start message=${messageEntity.id}`);
     await this.personaUser.respondToMessage(messageEntity, decisionContext, gatingResult.filteredRagContext, gatingResult.socialSignals);
+    this.phase(`respondToMessage done message=${messageEntity.id}`);
     this.log(`🔧 TRACE-POINT-C: After respondToMessage returned (timestamp=${Date.now()})`);
     this.log(`✅ ${this.personaUser.displayName}: [PHASE 3/3] Response posted successfully`);
 
diff --git a/src/system/user/server/modules/PersonaResponseGenerator.ts b/src/system/user/server/modules/PersonaResponseGenerator.ts
index 94598c2a2..78d903d40 100644
--- a/src/system/user/server/modules/PersonaResponseGenerator.ts
+++ b/src/system/user/server/modules/PersonaResponseGenerator.ts
@@ -1,26 +1,5 @@
 /* eslint-disable max-lines -- pre-existing 720-line file; scheduled for split into PRG.ts (orchestration) + PRG-postResponse.ts + PRG-pipeline.ts in the cleanup-sweep PR after #950 */
-/**
- * PersonaResponseGenerator — TS shim over the Rust cognition core.
- *
- * The cognitive verb ("this persona, given this message, produces this
- * response") now lives in Rust (continuum-core::persona::response::respond).
- * This shim is the TS-side contract that:
- *
- *   1. Applies dormancy / engagement gate (pre-flight, TS-only concern).
- *   2. Routes sentinel dispatch (complex multi-step tasks become sentinels
- *      instead of tool loops — orthogonal to cognition, stays TS).
- *   3. Builds the minimal RAG slice Rust needs (system prompt + recent
- *      history + known specialties) and calls cognitionPersonaRespond.
- *   4. Handles Silent|Spoke: Silent is logged + returned; Spoke runs the
- *      tool agent loop on the returned text and posts to chat.
- *   5. Emits UI events (POSTED / ERROR / typing / voice / stage) and
- *      captures training-data + fitness telemetry off the critical path.
- *
- * Out of scope for this PR (anvil's next rungs):
- *   - Tool agent loop migration to Rust.
- *   - Sentinel dispatch relocation.
- *   - Cloud-provider routing through Rust ai_provider.
- */
+/** PersonaResponseGenerator — TS shim over Rust persona cognition. */
 
 import type { UUID } from '../../../core/types/CrossPlatformUUID';
 import { ChatMessageEntity } from '../../../data/entities/ChatMessageEntity';
@@ -54,14 +33,6 @@ import { FitnessTracker } from '../../../genome/server/FitnessTracker';
 import { getAIAudioBridge } from '../../../voice/server/AIAudioBridge';
 import { PRESENCE_EVENTS } from '../../../core/shared/EventConstants';
 import { PersonaEngagementDecider, type DormancyState } from './PersonaEngagementDecider';
-// PersonaAgentLoop / PersonaResponseValidator / PersonaPromptAssembler
-// were the TS-side second-pass inference + retry loop on Rust
-// personaRespond's output — duplicated work the Rust cognition crate
-// already owns and bypassed the model's full context window via a TS
-// maxTokens cap. Removed from this file's call path 2026-04-20; deleted
-// entirely in the 0.5.1/0.5.2/0.5.4 cleanup sweep once the subgraph
-// was confirmed closed (no live importers, no test refs). Tool calling
-// continues through Rust cognition::tool_executor (0.5.3).
 import { SentinelDispatchDecider } from '../../../sentinel/SentinelDispatchDecider';
 import { SentinelDispatchCoordinator } from '../../../sentinel/SentinelDispatchCoordinator';
 import { Commands } from '../../../core/shared/Commands';
@@ -216,6 +187,10 @@ export class PersonaResponseGenerator {
     this.logger.enqueueLog('cognition.log', `[${timestamp}] ${message}${formattedArgs}\n`);
   }
 
+  private phase(message: string): void {
+    console.error(`[persona-phase] ${this.personaName}: ${message}`);
+  }
+
   shouldRespondToMessage(
     message: ProcessableMessage,
     dormancyState?: DormancyState,
@@ -308,6 +283,7 @@ export class PersonaResponseGenerator {
     const pipelineTiming: Record<string, number> = {};
 
     try {
+      this.phase(`generate start message=${originalMessage.id} room=${originalMessage.roomId}`);
       // Sentinel short-circuit.
       const dispatchResult = await this.checkSentinelDispatch(originalMessage);
       if (dispatchResult) return dispatchResult;
@@ -322,6 +298,7 @@ export class PersonaResponseGenerator {
       const phase31Start = Date.now();
       const ragContext = preBuiltRagContext ?? await this.buildRagContext(originalMessage);
       pipelineTiming['3.1_rag'] = Date.now() - phase31Start;
+      this.phase(`rag ready ${pipelineTiming['3.1_rag']}ms history=${ragContext.conversationHistory.length} memories=${ragContext.privateMemories.length} artifacts=${ragContext.artifacts.length}`);
 
       const knownSpecialties = this.buildKnownSpecialties(ragContext);
       const recentHistory = this.buildRecentHistory(ragContext);
@@ -345,14 +322,18 @@ export class PersonaResponseGenerator {
       // refs only. Resolve back to bytes here, on the request path —
       // chat-send already wrote the file to disk via
       // MediaBlobService.externalize (synchronously, before data/create).
-      // Description (from VisionDescriptionService cache) gets pulled
-      // alongside so text-only personas downstream get the bridge text
-      // instead of hallucinating from prompt context.
+      // Resolve THIS persona's model capabilities before media projection.
+      // Native sensory models must receive source bytes directly; text
+      // description lookup is only for non-native models and must not sit
+      // on the native path.
+      const capabilities = await this.resolveModelCapabilities();
+      const hasNativeVision = capabilities.includes('vision');
+      this.phase(`capabilities ready model=${this.modelConfig.model} caps=${capabilities.join(',') || 'none'}`);
+
       const { MediaBlobService } = await import('../../../storage/MediaBlobService');
-      const { VisionDescriptionService } = await import('../../../vision/VisionDescriptionService');
       const fs = await import('fs');
 
-      const messageMediaResolved = await Promise.all(
+      const messageMedia = await Promise.all(
         (originalMessage.content.media ?? []).map(async (m) => {
           // Prefer inline base64 if it's still around (browser pre-encode
           // path or an item smaller than the externalize threshold), else
@@ -360,38 +341,30 @@ export class PersonaResponseGenerator {
           let base64: string | undefined = m.base64;
           if (!base64 && m.blobHash) {
             const path = MediaBlobService.getPath(m.blobHash);
-            if (path) {
-              try {
-                const buf = await fs.promises.readFile(path);
-                base64 = buf.toString('base64');
-              } catch {
-                // File missing despite hash — drop this item, log later.
-                return null;
-              }
+            if (!path) {
+              throw new Error(`Media blob ${m.blobHash} has no resolved path`);
+            }
+            try {
+              const buf = await fs.promises.readFile(path);
+              base64 = buf.toString('base64');
+            } catch (error) {
+              const message = error instanceof Error ? error.message : String(error);
+              throw new Error(`Failed to read media blob ${m.blobHash} at ${path}: ${message}`);
             }
           }
           if (!base64) {
-            return null; // Nothing to send to the model
+            throw new Error(`Media item ${m.type} has neither inline base64 nor blobHash`);
           }
-          // Pull description from VDS — populated by prewarmVisionDescriptions
-          // at chat-send time. Two states are valid waits:
-          //   'cached'   → ~0ms instant lookup (pre-warm finished).
-          //   'inflight' → bounded wait. Pre-warm started but hasn't
-          //                resolved yet; we'd rather wait up to 8s than
-          //                hand the persona an empty description and
-          //                let it hallucinate "I don't see any image."
-          //                VDS already deduplicates inflight requests, so
-          //                this await piggybacks on the existing call —
-          //                no extra inference cost.
-          // Status `none` / `error` → don't trigger a blocking describe
-          // here; the chat-send path is responsible for prewarming. Stage
-          // 2 (Rust-side) is responsible for emitting an [Attached image:
-          // unavailable] marker when description ends up undefined, so a
-          // text-only persona at least KNOWS an image was attached
-          // instead of fabricating absence. Tracked in #970.
+
+          // Description lookup is NOT on the native vision path. Vision-
+          // capable personas get bytes; only text-only image recipients
+          // may use a prewarmed description bridge. Errors are surfaced
+          // because swallowing them makes sensory failures look like
+          // normal text-only cognition.
           let description: string | undefined;
-          if (m.type === 'image') {
+          if (m.type === 'image' && !hasNativeVision) {
             try {
+              const { VisionDescriptionService } = await import('../../../vision/VisionDescriptionService');
               const visionSvc = VisionDescriptionService.getInstance();
               const status = visionSvc.descriptionStatus(base64);
               if (status === 'cached' || status === 'inflight') {
@@ -402,8 +375,9 @@ export class PersonaResponseGenerator {
                 ]);
                 description = desc?.description;
               }
-            } catch {
-              // Best-effort; drop to undefined on any cache error
+            } catch (error) {
+              const message = error instanceof Error ? error.message : String(error);
+              throw new Error(`Vision description lookup failed for text-only image bridge: ${message}`);
             }
           }
           return {
@@ -414,14 +388,6 @@ export class PersonaResponseGenerator {
           };
         })
       );
-      const messageMedia = messageMediaResolved.filter((x): x is NonNullable<typeof x> => x !== null);
-
-      // Resolve THIS persona's model capabilities (cached). Required by
-      // the IPC contract — Rust no longer does a registry lookup on its
-      // side, so the answer to "is this model vision-capable?" must
-      // travel WITH the request. Hard error if the model isn't in the
-      // registry (broken persona configuration, fail loudly here).
-      const capabilities = await this.resolveModelCapabilities();
 
       // IPC shape: { signal, personaContext }. Rust projects (signal,
       // ctx) → RespondInput via cognition_io::build_respond_input,
@@ -509,10 +475,12 @@ export class PersonaResponseGenerator {
       };
 
       const ipcStart = Date.now();
+      this.phase(`cognition/respond start media=${messageMedia.length} history=${recentHistory.length}`);
       const response = await this._rustBridge.personaRespond(rustRequest);
       const ipcDurationMs = Date.now() - ipcStart;
       pipelineTiming['3.2_cognition'] = Date.now() - phase32Start;
       pipelineTiming['3.2_ipc'] = ipcDurationMs;
+      this.phase(`cognition/respond done kind=${response.kind} ipc=${ipcDurationMs}ms total=${response.kind === 'spoke' ? response.total_ms : 'n/a'}ms`);
 
       if (response.kind === 'silent') {
         return this.handleSilent(originalMessage, response, pipelineTiming, generateStartTime);
@@ -529,11 +497,11 @@ export class PersonaResponseGenerator {
       // is exactly what Rust returned.
       const finalText = response.text.trim();
       if (!finalText) {
-        this.log(`⚠️ ${this.personaName}: Rust returned empty text — skipping post`);
-        return { success: false, error: 'Empty response from Rust', storedToolResultIds: allStoredResultIds };
+        throw new Error(`${this.personaName}: Rust cognition returned empty visible text for message ${originalMessage.id}`);
       }
 
       const phase35Start = Date.now();
+      this.phase(`post start chars=${finalText.length}`);
       const postedMessageId = await this.postResponse(
         originalMessage,
         finalText,
@@ -542,6 +510,7 @@ export class PersonaResponseGenerator {
         generateStartTime,
       );
       pipelineTiming['3.5_post'] = Date.now() - phase35Start;
+      this.phase(`post done id=${postedMessageId ?? 'missing'} post=${pipelineTiming['3.5_post']}ms`);
 
       if (decisionContext) {
         CoordinationDecisionLogger.logDecision({
@@ -865,6 +834,7 @@ export class PersonaResponseGenerator {
       this.log(`⏭️ ${this.personaName}: Provider not configured, staying quiet`);
     } else {
       this.log(`❌ ${this.personaName}: ${errorMsg}`);
+      console.error(`[PersonaResponseGenerator] ${this.personaName} response failed for message ${originalMessage.id}: ${errorMsg}`);
       AIDecisionLogger.logError(this.personaName, 'Response generation/posting', errorMsg);
     }
 
diff --git a/src/tests/unit/seed-install-tier.spec.ts b/src/tests/unit/seed-install-tier.spec.ts
new file mode 100644
index 000000000..1b26454df
--- /dev/null
+++ b/src/tests/unit/seed-install-tier.spec.ts
@@ -0,0 +1,33 @@
+import { describe, expect, it, afterEach } from 'vitest';
+import { resolveInstallTier } from '../../server/seed-in-process';
+
+const ORIGINAL_CONTINUUM_TIER = process.env.CONTINUUM_TIER;
+const ORIGINAL_TIER = process.env.TIER;
+
+afterEach(() => {
+  process.env.CONTINUUM_TIER = ORIGINAL_CONTINUUM_TIER;
+  process.env.TIER = ORIGINAL_TIER;
+});
+
+describe('seed install tier resolution', () => {
+  it('uses CONTINUUM_TIER before host/container memory inference', () => {
+    process.env.CONTINUUM_TIER = 'full';
+    delete process.env.TIER;
+
+    expect(resolveInstallTier()).toBe('full');
+  });
+
+  it('uses TIER when CONTINUUM_TIER is absent', () => {
+    delete process.env.CONTINUUM_TIER;
+    process.env.TIER = 'mid';
+
+    expect(resolveInstallTier()).toBe('mid');
+  });
+
+  it('fails on invalid explicit tiers', () => {
+    process.env.CONTINUUM_TIER = 'primary';
+    delete process.env.TIER;
+
+    expect(() => resolveInstallTier()).toThrow(/invalid CONTINUUM_TIER\/TIER 'primary'/);
+  });
+});
diff --git a/src/tsconfig.eslint.json b/src/tsconfig.eslint.json
index 95cf75fc1..d0968fc6b 100644
--- a/src/tsconfig.eslint.json
+++ b/src/tsconfig.eslint.json
@@ -18,6 +18,7 @@
     "generator/generate-command-schemas.ts",
     "widgets/**/*.ts",
     "tests/workers/**/*.ts",
+    "tests/unit/seed-install-tier.spec.ts",
     "tests/unit/url-card-adapter-xss.spec.ts",
     "test-path-aliases.ts",
     "test-path-aliases-runtime.ts"
diff --git a/src/workers/continuum-core/src/cognition/model_resolver/mod.rs b/src/workers/continuum-core/src/cognition/model_resolver/mod.rs
index cc52ed93d..1cdfa72ac 100644
--- a/src/workers/continuum-core/src/cognition/model_resolver/mod.rs
+++ b/src/workers/continuum-core/src/cognition/model_resolver/mod.rs
@@ -43,17 +43,8 @@ use crate::cognition::adaptive_throughput::TargetSilicon;
 use crate::model_registry::types::{Capability, Model, Provider, ProviderKind};
 use std::collections::HashMap;
 
-
-fn derive_target_silicon(
-    model: &Model,
-    provider_kinds: &HashMap<&str, ProviderKind>,
-    host: &HostCapability,
-) -> TargetSilicon {
-    let kind = provider_kinds
-        .get(model.provider.as_str())
-        .copied()
-        .unwrap_or_default(); // ProviderKind::Cloud — unknown provider treated as cloud
-    match kind {
+fn derive_target_silicon(provider_kind: ProviderKind, host: &HostCapability) -> TargetSilicon {
+    match provider_kind {
         ProviderKind::Local => host.primary_target_silicon,
         ProviderKind::Cloud => TargetSilicon::Cloud,
     }
@@ -95,9 +86,6 @@ where
         .into_iter()
         .map(|p| (p.id.as_str(), p.kind))
         .collect();
-    let is_local = |provider_id: &str| {
-        provider_kinds.get(provider_id).copied().unwrap_or_default() == ProviderKind::Local
-    };
 
     let registry: Vec<&Model> = models.into_iter().collect();
     let registry_count = registry.len();
@@ -187,6 +175,22 @@ where
         }
     }
 
+    for model in &candidates {
+        if !provider_kinds.contains_key(model.provider.as_str()) {
+            return Err(ResolutionError::UnknownProviderReferenced {
+                model_id: model.id.clone(),
+                provider_id: model.provider.clone(),
+            });
+        }
+    }
+
+    let provider_kind = |provider_id: &str| {
+        *provider_kinds
+            .get(provider_id)
+            .expect("provider existence validated before provider policy")
+    };
+    let is_local = |provider_id: &str| provider_kind(provider_id) == ProviderKind::Local;
+
     // Filter 4: provider policy.
     let before_provider = candidates.len();
     candidates.retain(|m| match requirement.provider_policy {
@@ -223,7 +227,7 @@ where
     }
 
     let best = candidates.first().expect("non-empty after filters");
-    let target_silicon = derive_target_silicon(best, &provider_kinds, &requirement.host);
+    let target_silicon = derive_target_silicon(provider_kind(&best.provider), &requirement.host);
 
     // Silicon-residency gate. No silent CPU fallback. No silent Cloud
     // fallback under GpuOrUnifiedMemoryOnly. The check happens AFTER all
@@ -688,11 +692,10 @@ mod tests {
     }
 
     #[test]
-    fn unknown_provider_defaults_to_cloud_for_safety() {
+    fn unknown_provider_errors_before_policy_ranking() {
         // If a model references a provider id that isn't in the providers
-        // table at all, the resolver treats it as Cloud (default kind).
-        // This is loud: a LocalOnly query will reject the model rather
-        // than silently routing unknown-residency work to local hardware.
+        // table at all, the resolver must not classify it as either Cloud
+        // or Local. Registry integrity is a hard precondition.
         let models = vec![make_model(
             "orphan-model",
             "orphan-provider",
@@ -703,9 +706,36 @@ mod tests {
         let providers: Vec<Provider> = vec![];
         let req = req_chat_local(host_m1_8gb());
         let err = resolve_model(&req, models.iter(), providers.iter()).unwrap_err();
+        match err {
+            ResolutionError::UnknownProviderReferenced {
+                model_id,
+                provider_id,
+            } => {
+                assert_eq!(model_id, "orphan-model");
+                assert_eq!(provider_id, "orphan-provider");
+            }
+            other => panic!("expected unknown provider error, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn unknown_provider_does_not_pass_cloud_only() {
+        let models = vec![make_model(
+            "orphan-model",
+            "orphan-provider",
+            Arch::Llama,
+            8192,
+            &[Capability::Chat],
+        )];
+        let providers: Vec<Provider> = vec![];
+        let mut req = req_chat_local(host_m1_8gb());
+        req.provider_policy = LocalOrCloudPolicy::CloudOnly;
+
+        let err = resolve_model(&req, models.iter(), providers.iter()).unwrap_err();
+
         assert!(
-            matches!(err, ResolutionError::NoModelMatchesRequirement { .. }),
-            "LocalOnly with unknown provider must error, not silently treat as local"
+            matches!(err, ResolutionError::UnknownProviderReferenced { .. }),
+            "CloudOnly must not accept models with unknown provider residency: {err:?}"
         );
     }
 
diff --git a/src/workers/continuum-core/src/cognition/model_resolver/types.rs b/src/workers/continuum-core/src/cognition/model_resolver/types.rs
index 00d4a857f..123e1fd3f 100644
--- a/src/workers/continuum-core/src/cognition/model_resolver/types.rs
+++ b/src/workers/continuum-core/src/cognition/model_resolver/types.rs
@@ -318,4 +318,12 @@ pub enum ResolutionError {
         rejected_model_id: String,
         actual_silicon: TargetSilicon,
     },
+    #[error(
+        "model '{model_id}' references unknown provider '{provider_id}'. \
+         Add the provider to the registry or remove the model row."
+    )]
+    UnknownProviderReferenced {
+        model_id: String,
+        provider_id: String,
+    },
 }
diff --git a/src/workers/continuum-core/src/gpu/memory_manager.rs b/src/workers/continuum-core/src/gpu/memory_manager.rs
index f184afee6..93d3d075d 100644
--- a/src/workers/continuum-core/src/gpu/memory_manager.rs
+++ b/src/workers/continuum-core/src/gpu/memory_manager.rs
@@ -855,12 +855,9 @@ fn detect_vulkan() -> Option<(u64, String)> {
     use std::process::Command;
 
     let output = Command::new("vulkaninfo").arg("--summary").output().ok()?;
-
-    if !output.status.success() {
-        return None;
-    }
-
-    let stdout = String::from_utf8(output.stdout).ok()?;
+    let mut text = String::new();
+    text.push_str(&String::from_utf8_lossy(&output.stdout));
+    text.push_str(&String::from_utf8_lossy(&output.stderr));
 
     // vulkaninfo --summary format (excerpt):
     //   Devices:
@@ -875,12 +872,7 @@ fn detect_vulkan() -> Option<(u64, String)> {
     //
     // Take the FIRST deviceName (vulkaninfo orders discrete > integrated > CPU
     // by default on most loaders). If absent, no usable ICD.
-    let device_name = stdout
-        .lines()
-        .find(|l| l.trim_start().starts_with("deviceName"))
-        .and_then(|l| l.split('=').nth(1))
-        .map(|s| s.trim().to_string())
-        .filter(|s| !s.is_empty())?;
+    let device_name = parse_vulkan_device_name(&text)?;
 
     // Conservative VRAM budget: 4 GiB. Real allocations go through the
     // Vulkan loader at runtime; this only seeds the GpuMemoryManager
@@ -892,6 +884,16 @@ fn detect_vulkan() -> Option<(u64, String)> {
     Some((total_bytes, device_name))
 }
 
+#[cfg(feature = "vulkan")]
+fn parse_vulkan_device_name(vulkaninfo_output: &str) -> Option<String> {
+    vulkaninfo_output
+        .lines()
+        .find(|l| l.trim_start().starts_with("deviceName"))
+        .and_then(|l| l.split('=').nth(1))
+        .map(|s| s.trim().to_string())
+        .filter(|s| !s.is_empty())
+}
+
 // detect_cpu_fallback() removed — see detect_gpu()'s panic for rationale.
 // CPU fallback is forbidden architecturally; absent GPU = absent system.
 
@@ -981,6 +983,30 @@ mod tests {
         assert!(!name.is_empty(), "GPU name should not be empty");
     }
 
+    #[cfg(feature = "vulkan")]
+    #[test]
+    fn test_parse_vulkan_device_name_with_headless_warnings() {
+        let output = r#"
+error: XDG_RUNTIME_DIR is invalid or not set in the environment.
+Vulkan Instance Version: 1.3.275
+
+Devices:
+========
+GPU0:
+        apiVersion         = 1.3.275
+        driverVersion      = 0x1
+        vendorID           = 0x10005
+        deviceID           = 0x0000
+        deviceType         = PHYSICAL_DEVICE_TYPE_CPU
+        deviceName         = llvmpipe (LLVM 20.1.2, 256 bits)
+"#;
+
+        assert_eq!(
+            parse_vulkan_device_name(output).as_deref(),
+            Some("llvmpipe (LLVM 20.1.2, 256 bits)")
+        );
+    }
+
     #[test]
     fn test_initial_pressure_zero() {
         let mgr = test_manager(36_864); // 36GB like M3 Max
diff --git a/src/workers/continuum-core/src/inference/candle_adapter.rs b/src/workers/continuum-core/src/inference/candle_adapter.rs
index 01ed0e934..3b375d1b6 100644
--- a/src/workers/continuum-core/src/inference/candle_adapter.rs
+++ b/src/workers/continuum-core/src/inference/candle_adapter.rs
@@ -1466,7 +1466,7 @@ mod tests {
         );
 
         let vision = resolve_model_id("vision-default");
-        assert_eq!(vision, "Qwen/Qwen2-VL-7B-Instruct-GGUF");
+        assert_eq!(vision, "bartowski/Qwen2-VL-7B-Instruct-GGUF");
 
         let gating = resolve_model_id("gating");
         assert_eq!(gating, "Qwen/Qwen2-0.5B-Instruct");
diff --git a/src/workers/continuum-core/src/ipc/mod.rs b/src/workers/continuum-core/src/ipc/mod.rs
index ee7c6202a..38d541224 100644
--- a/src/workers/continuum-core/src/ipc/mod.rs
+++ b/src/workers/continuum-core/src/ipc/mod.rs
@@ -47,6 +47,7 @@ use dashmap::DashMap;
 use serde::{Deserialize, Serialize};
 use std::io::{BufRead, BufReader, Read, Write};
 use std::net::{TcpListener, TcpStream};
+use std::os::unix::fs::FileTypeExt;
 use std::os::unix::net::{UnixListener, UnixStream};
 use std::path::Path;
 use std::sync::Arc;
@@ -63,6 +64,33 @@ trait IpcStream: Read + Write + Send + Sized + 'static {
     fn peer_addr_str(&self) -> String;
 }
 
+fn prepare_socket_path(socket_path: impl AsRef<Path>) -> std::io::Result<()> {
+    let socket_path = socket_path.as_ref();
+    if let Some(parent) = socket_path.parent() {
+        std::fs::create_dir_all(parent)?;
+    }
+
+    match std::fs::symlink_metadata(socket_path) {
+        Ok(metadata) => {
+            let file_type = metadata.file_type();
+            if file_type.is_socket() {
+                std::fs::remove_file(socket_path)?;
+                Ok(())
+            } else {
+                Err(std::io::Error::new(
+                    std::io::ErrorKind::AlreadyExists,
+                    format!(
+                        "IPC socket path exists but is not a socket: {}",
+                        socket_path.display()
+                    ),
+                ))
+            }
+        }
+        Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
+        Err(e) => Err(e),
+    }
+}
+
 impl IpcStream for UnixStream {
     fn try_clone_stream(&self) -> std::io::Result<Self> {
         self.try_clone()
@@ -538,6 +566,59 @@ mod tests {
     // Binary Framing Unit Tests
     // ========================================================================
 
+    #[test]
+    fn prepare_socket_path_creates_parent_directory() {
+        let dir = tempfile::tempdir().unwrap();
+        let socket = dir.path().join(".continuum/sockets/continuum-core.sock");
+
+        prepare_socket_path(&socket).unwrap();
+
+        assert!(
+            socket.parent().unwrap().is_dir(),
+            "socket parent should be created under .continuum"
+        );
+        assert!(
+            !socket.exists(),
+            "prepare should not create the socket before UnixListener::bind"
+        );
+    }
+
+    #[test]
+    fn prepare_socket_path_removes_stale_socket() {
+        let dir = tempfile::tempdir().unwrap();
+        let socket = dir.path().join(".continuum/sockets/continuum-core.sock");
+        std::fs::create_dir_all(socket.parent().unwrap()).unwrap();
+        let listener = UnixListener::bind(&socket).unwrap();
+        drop(listener);
+        assert!(std::fs::symlink_metadata(&socket)
+            .unwrap()
+            .file_type()
+            .is_socket());
+
+        prepare_socket_path(&socket).unwrap();
+
+        assert!(
+            !socket.exists(),
+            "stale socket should be removed before rebinding"
+        );
+    }
+
+    #[test]
+    fn prepare_socket_path_rejects_non_socket_existing_path() {
+        let dir = tempfile::tempdir().unwrap();
+        let socket = dir.path().join(".continuum/sockets/continuum-core.sock");
+        std::fs::create_dir_all(socket.parent().unwrap()).unwrap();
+        std::fs::write(&socket, b"not a socket").unwrap();
+
+        let err = prepare_socket_path(&socket).unwrap_err();
+
+        assert_eq!(err.kind(), std::io::ErrorKind::AlreadyExists);
+        assert!(
+            err.to_string().contains("not a socket"),
+            "unexpected error: {err}"
+        );
+    }
+
     #[test]
     fn test_json_frame_roundtrip() {
         // Create a response, write to buffer, verify framing
@@ -792,10 +873,7 @@ pub fn start_server(
     memory_manager: Arc<crate::memory::PersonaMemoryManager>,
     pressure_monitor: Arc<crate::system_resources::MemoryPressureMonitor>,
 ) -> std::io::Result<()> {
-    // Remove socket file if it exists
-    if Path::new(socket_path).exists() {
-        std::fs::remove_file(socket_path)?;
-    }
+    prepare_socket_path(socket_path)?;
 
     log_info!("ipc", "server", "Starting IPC server on {}", socket_path);
 
diff --git a/src/workers/continuum-core/src/model_registry/artifacts.rs b/src/workers/continuum-core/src/model_registry/artifacts.rs
index fdc629adf..fcf062461 100644
--- a/src/workers/continuum-core/src/model_registry/artifacts.rs
+++ b/src/workers/continuum-core/src/model_registry/artifacts.rs
@@ -33,7 +33,7 @@ pub fn resolve_gguf_for_model_id(model_id: &str) -> Option<PathBuf> {
 }
 
 pub fn resolve_local_model_dir_for_model_id(model_id: &str) -> Option<PathBuf> {
-    resolve_from_local_model_roots(model_id).and_then(|gguf| gguf.parent().map(Path::to_path_buf))
+    resolve_dir_from_local_model_roots(model_id)
 }
 
 pub fn find_first_local_gguf() -> Option<PathBuf> {
@@ -102,6 +102,15 @@ fn resolve_from_local_model_roots(model_id: &str) -> Option<PathBuf> {
     None
 }
 
+fn resolve_dir_from_local_model_roots(model_id: &str) -> Option<PathBuf> {
+    for root in local_model_roots() {
+        if let Some(dir) = find_model_dir_in_root(model_id, &root) {
+            return Some(dir);
+        }
+    }
+    None
+}
+
 fn local_model_roots() -> Vec<PathBuf> {
     let mut roots = Vec::new();
     if let Some(home) = home_dir_string() {
@@ -147,13 +156,22 @@ fn find_model_dir_in_root(model_id: &str, root: &Path) -> Option<PathBuf> {
         return None;
     }
 
+    let model_lower = model_id.to_lowercase();
+    let model_normalized = normalize_model_dir_token(model_id);
     for entry in fs::read_dir(root).ok()?.flatten() {
         let path = entry.path();
-        if !path.is_dir() || first_gguf_in_dir(&path).is_none() {
+        if !path.is_dir() || !is_model_artifact_dir(&path) {
             continue;
         }
-        let dir_name = path.file_name()?.to_str()?.to_lowercase();
-        let model_lower = model_id.to_lowercase();
+        let dir_raw = path.file_name()?.to_str()?;
+        let dir_name = dir_raw.to_lowercase();
+        let dir_normalized = normalize_model_dir_token(dir_raw);
+        if !dir_normalized.is_empty()
+            && (model_normalized.contains(&dir_normalized)
+                || dir_normalized.contains(&model_normalized))
+        {
+            return Some(path);
+        }
         if model_lower.contains("qwen")
             && model_lower.contains("compacted")
             && dir_name.contains("qwen")
@@ -180,6 +198,29 @@ fn find_model_dir_in_root(model_id: &str, root: &Path) -> Option<PathBuf> {
     None
 }
 
+fn is_model_artifact_dir(dir: &Path) -> bool {
+    first_gguf_in_dir(dir).is_some()
+        || dir.join("model.safetensors").exists()
+        || fs::read_dir(dir).ok().is_some_and(|entries| {
+            entries.flatten().any(|entry| {
+                entry
+                    .file_name()
+                    .to_str()
+                    .is_some_and(|name| name.starts_with("model-") && name.ends_with(".safetensors"))
+            })
+        })
+}
+
+fn normalize_model_dir_token(value: &str) -> String {
+    value
+        .trim_end_matches("-GGUF")
+        .trim_end_matches("-gguf")
+        .chars()
+        .filter(|c| c.is_ascii_alphanumeric())
+        .flat_map(|c| c.to_lowercase())
+        .collect()
+}
+
 fn resolve_from_huggingface_hint(hint: &str) -> Option<PathBuf> {
     let repo_slug = hf_repo_slug(hint)?;
     let cache = huggingface_cache_root()?;
@@ -409,4 +450,60 @@ mod tests {
             assert_eq!(resolved.as_deref(), Some(explicit.as_path()));
         });
     }
+
+    #[test]
+    fn resolves_model_init_registry_key_dir_for_qwen35_gguf() {
+        let home = tempfile::tempdir().unwrap();
+        with_test_home(home.path(), || {
+            let model_dir = home
+                .path()
+                .join(".continuum/genome/models/qwen3.5-4b-code-forged");
+            fs::create_dir_all(&model_dir).unwrap();
+            let gguf = model_dir.join("qwen3.5-4b-code-forged-Q4_K_M.gguf");
+            fs::write(&gguf, b"gguf").unwrap();
+
+            let resolved = resolve_gguf(
+                "continuum-ai/qwen3.5-4b-code-forged-GGUF",
+                None,
+                None,
+            );
+
+            assert_eq!(resolved.as_deref(), Some(gguf.as_path()));
+        });
+    }
+
+    #[test]
+    fn resolves_short_qwen2_vl_model_init_dir_for_instruct_model() {
+        let home = tempfile::tempdir().unwrap();
+        with_test_home(home.path(), || {
+            let model_dir = home.path().join(".continuum/genome/models/qwen2-vl-7b");
+            fs::create_dir_all(&model_dir).unwrap();
+            let gguf = model_dir.join("Qwen2-VL-7B-Instruct-Q4_K_M.gguf");
+            fs::write(&gguf, b"gguf").unwrap();
+
+            let resolved = resolve_gguf("qwen2-vl-7b-instruct", None, None);
+
+            assert_eq!(resolved.as_deref(), Some(gguf.as_path()));
+        });
+    }
+
+    #[test]
+    fn resolves_safetensors_model_init_dir_for_mid_tier_model() {
+        let home = tempfile::tempdir().unwrap();
+        with_test_home(home.path(), || {
+            let model_dir = home
+                .path()
+                .join(".continuum/genome/models/qwen3.5-2b-general");
+            fs::create_dir_all(&model_dir).unwrap();
+            fs::write(model_dir.join("model.safetensors"), b"weights").unwrap();
+            fs::write(model_dir.join("config.json"), b"{}").unwrap();
+            fs::write(model_dir.join("tokenizer.json"), b"{}").unwrap();
+
+            let resolved = resolve_local_model_dir_for_model_id(
+                "continuum-ai/qwen3.5-2b-general-forged",
+            );
+
+            assert_eq!(resolved.as_deref(), Some(model_dir.as_path()));
+        });
+    }
 }
diff --git a/src/workers/continuum-core/src/persona/cognition_io.rs b/src/workers/continuum-core/src/persona/cognition_io.rs
index b39414c68..78c7f3111 100644
--- a/src/workers/continuum-core/src/persona/cognition_io.rs
+++ b/src/workers/continuum-core/src/persona/cognition_io.rs
@@ -225,8 +225,16 @@ pub fn build_respond_input(
         }
     }
 
-    let message_id = signal.message_id.unwrap_or(Uuid::nil());
-    let room_id = ctx.room_id.unwrap_or(Uuid::nil());
+    let message_id = signal.message_id.ok_or_else(|| {
+        "build_respond_input: chat-shaped cognition requires signal.messageId; \
+         missing ids would collapse shared-analysis cache keys"
+            .to_string()
+    })?;
+    let room_id = ctx.room_id.ok_or_else(|| {
+        "build_respond_input: chat-shaped cognition requires personaContext.roomId; \
+         route roomless hosts through a non-chat projection"
+            .to_string()
+    })?;
 
     // Per-turn shared context. Hoisting the room-level fields
     // (room_id + recent_history + known_specialties) into an
@@ -349,7 +357,7 @@ mod tests {
 
     fn empty_ctx() -> PersonaContext {
         PersonaContext {
-            persona_id: Uuid::nil(),
+            persona_id: uuid("00000000-0000-4000-8000-000000000001"),
             display_name: String::new(),
             specialty: String::new(),
             model: String::new(),
@@ -358,19 +366,25 @@ mod tests {
             recent_history: vec![],
             known_specialties: vec![],
             other_persona_names: vec![],
-            room_id: None,
+            room_id: Some(uuid("00000000-0000-4000-8000-000000000002")),
             is_voice: false,
         }
     }
 
+    fn uuid(value: &str) -> Uuid {
+        Uuid::parse_str(value).unwrap()
+    }
+
     fn chat_signal(text: &str) -> Signal {
         Signal {
             kind: SignalKind::ChatMessage,
             text: text.to_string(),
             media: vec![],
-            originator: SignalOriginator::User { user_id: Uuid::nil() },
+            originator: SignalOriginator::User {
+                user_id: uuid("00000000-0000-4000-8000-000000000003"),
+            },
             timestamp_ms: 0,
-            message_id: Some(Uuid::nil()),
+            message_id: Some(uuid("00000000-0000-4000-8000-000000000004")),
         }
     }
 
@@ -384,9 +398,11 @@ mod tests {
             kind: SignalKind::ChatMessage,
             text: "hello".to_string(),
             media: vec![],
-            originator: SignalOriginator::User { user_id: Uuid::nil() },
+            originator: SignalOriginator::User {
+                user_id: uuid("00000000-0000-4000-8000-000000000003"),
+            },
             timestamp_ms: 1234,
-            message_id: Some(Uuid::nil()),
+            message_id: Some(uuid("00000000-0000-4000-8000-000000000004")),
         };
         let json = serde_json::to_string(&signal).expect("serializes");
         let back: Signal = serde_json::from_str(&json).expect("round-trips");
@@ -402,7 +418,7 @@ mod tests {
     #[test]
     fn persona_context_slot_mirrors_fields() {
         let mut ctx = empty_ctx();
-        ctx.persona_id = Uuid::nil();
+        ctx.persona_id = uuid("00000000-0000-4000-8000-000000000001");
         ctx.specialty = "vision".to_string();
         ctx.display_name = "Vision AI".to_string();
         let slot = ctx.slot();
@@ -456,6 +472,31 @@ mod tests {
         assert!(input.message_text.is_empty());
     }
 
+    /// What this catches: missing message ids used to become
+    /// `Uuid::nil()`, collapsing unrelated turns into the same shared
+    /// analysis cache key. That is a host contract bug and must be
+    /// loud at the projection boundary.
+    #[test]
+    fn projection_rejects_missing_message_id() {
+        let mut signal = chat_signal("hello");
+        signal.message_id = None;
+        let err = build_respond_input(&signal, &empty_ctx())
+            .expect_err("missing message id should be rejected");
+        assert!(err.contains("signal.messageId"));
+    }
+
+    /// What this catches: roomless chat-shaped cognition used to run
+    /// under `Uuid::nil()`, mixing cache/recorder state across hosts.
+    /// Non-chat hosts need a different projection instead of a fake room.
+    #[test]
+    fn projection_rejects_missing_room_id() {
+        let mut ctx = empty_ctx();
+        ctx.room_id = None;
+        let err = build_respond_input(&chat_signal("hello"), &ctx)
+            .expect_err("missing room id should be rejected");
+        assert!(err.contains("personaContext.roomId"));
+    }
+
     /// What this catches: media on the signal passes through to
     /// `RespondInput::message_media` unchanged. Downstream
     /// `MediaPolicy` decides byte-vs-marker; the projection stays
diff --git a/src/workers/continuum-core/src/persona/response.rs b/src/workers/continuum-core/src/persona/response.rs
index b926ce16d..4e18fea65 100644
--- a/src/workers/continuum-core/src/persona/response.rs
+++ b/src/workers/continuum-core/src/persona/response.rs
@@ -322,6 +322,14 @@ async fn respond_inner(
             "visible_chars": visible_text.len(),
         }),
     );
+    if visible_text.trim().is_empty() {
+        return Err(format!(
+            "persona '{}' produced empty visible text after post-processing (raw_chars={}, think_blocks={})",
+            input.persona.display_name,
+            raw_response.text.len(),
+            think_count
+        ));
+    }
 
     Ok(PersonaResponse::Spoke {
         persona_id: input.persona.persona_id,
@@ -454,7 +462,7 @@ async fn run_render(
         assembled.messages,
         &input.message_media,
         &input.capabilities,
-    );
+    )?;
 
     let request = TextGenerationRequest {
         messages,
@@ -517,32 +525,33 @@ async fn run_render(
 /// the FINAL user-role message — but only when the persona's resolved
 /// model declares the matching capability (`Vision` for image,
 /// `AudioInput` for audio). Native-multimodal models receive the source
-/// bytes directly; text-only models fall back to the simple text path
-/// (the sensory bridge would inject a description upstream — its job,
-/// not ours).
+/// bytes directly; text-only models use the simple text path plus
+/// explicit description markers from the sensory bridge.
 ///
 /// Behavior contract:
 ///   - empty `media` → identical to the legacy text-only path.
 ///   - non-empty `media` + model has Vision/AudioInput → last user
 ///     message becomes `MessageContent::Parts(text + media)`.
 ///   - non-empty `media` + model lacks the capability → text-only
-///     path; the bridge layer (VisionDescriptionService etc.) is
-///     expected to have already converted media → text upstream.
+///     path with description markers; the bridge layer
+///     (VisionDescriptionService etc.) is expected to have already
+///     converted media → text upstream.
 ///   - `media` items whose `item_type` doesn't match a capability the
-///     model has are dropped (e.g. audio sent to a vision-only model).
-///   - no user-role messages found → media silently dropped (rare —
-///     would mean the assembler produced an unusual shape).
+///     model has are emitted as description markers only.
+///   - no user-role messages found or target user message already uses
+///     `MessageContent::Parts` → hard error. These shapes mean the
+///     assembler contract changed and media cannot be attached safely.
 pub fn build_messages_with_media(
     prompt_messages: Vec<crate::persona::prompt_assembly::PromptMessage>,
     media: &[MediaItemLite],
     model_caps: &std::collections::HashSet<crate::model_registry::Capability>,
-) -> Vec<crate::ai::types::ChatMessage> {
+) -> Result<Vec<crate::ai::types::ChatMessage>, String> {
     use crate::ai::types::{AudioInput, ChatMessage, ContentPart, ImageInput, MessageContent};
     use crate::persona::media_policy::MediaPolicy;
 
     // Default text-only path. Always start here; we may rewrite the
     // last user message below if the policy chose an attachable item.
-    let mut messages: Vec<ChatMessage> = prompt_messages
+    let messages: Vec<ChatMessage> = prompt_messages
         .into_iter()
         .map(|m| ChatMessage {
             role: m.role,
@@ -552,7 +561,7 @@ pub fn build_messages_with_media(
         .collect();
 
     if media.is_empty() {
-        return messages;
+        return Ok(messages);
     }
 
     // Apply the AT-MOST-ONE-LATEST policy. The byte-attachment slot
@@ -625,8 +634,17 @@ pub fn build_messages_with_media(
         emitted_parts.push(ContentPart::Text { text });
     }
 
+    attach_media_parts_to_last_user(messages, emitted_parts)
+}
+
+fn attach_media_parts_to_last_user(
+    mut messages: Vec<crate::ai::types::ChatMessage>,
+    emitted_parts: Vec<crate::ai::types::ContentPart>,
+) -> Result<Vec<crate::ai::types::ChatMessage>, String> {
+    use crate::ai::types::{ContentPart, MessageContent};
+
     if emitted_parts.is_empty() {
-        return messages;
+        return Ok(messages);
     }
 
     // Find the LAST user-role message and convert it to Parts (text +
@@ -634,17 +652,20 @@ pub fn build_messages_with_media(
     // turn after assemble().
     let last_user_idx = messages.iter().rposition(|m| m.role == "user");
     let Some(idx) = last_user_idx else {
-        // No user message to attach to. Drop media silently — caller
-        // shape was unusual; assembling new user messages here would
-        // hide the actual bug.
-        return messages;
+        return Err(
+            "build_messages_with_media: media was provided but prompt has no user message to attach it to"
+                .to_string(),
+        );
     };
 
     let existing_text = match &messages[idx].content {
         MessageContent::Text(t) => t.clone(),
-        // Defensive: if the assembler somehow already produced Parts,
-        // we don't try to merge — leave it alone.
-        MessageContent::Parts(_) => return messages,
+        MessageContent::Parts(_) => {
+            return Err(
+                "build_messages_with_media: media was provided but target user message already has parts"
+                    .to_string(),
+            );
+        }
     };
 
     let mut parts: Vec<ContentPart> = Vec::with_capacity(emitted_parts.len() + 1);
@@ -655,7 +676,7 @@ pub fn build_messages_with_media(
     }
     parts.extend(emitted_parts);
     messages[idx].content = MessageContent::Parts(parts);
-    messages
+    Ok(messages)
 }
 
 /// each as a `cognition:think-block` event for the (future) hippocampus
@@ -872,6 +893,18 @@ mod tests {
         assert_eq!(count, 2);
     }
 
+    #[test]
+    fn all_think_output_leaves_no_postable_text() {
+        let raw = "<think>plan only</think>";
+        let (think_stripped, count) = strip_thinks_emit_events(raw, Uuid::nil(), Uuid::nil());
+        let visible = strip_leaked_tool_markup(&think_stripped);
+        assert_eq!(count, 1);
+        assert!(
+            visible.trim().is_empty(),
+            "all-think model output must trip the hard empty-visible-text guard"
+        );
+    }
+
     #[test]
     fn strip_thinks_handles_multiline_thinks() {
         let raw = "<think>\nLine one\nLine two\n</think>\nVisible response.";
@@ -1009,7 +1042,7 @@ mod tests {
     // attached to the LAST user message; media without capability →
     // text path (the bridge is upstream's job, not ours).
 
-    use crate::ai::types::{ContentPart, MessageContent};
+    use crate::ai::types::{ChatMessage, ContentPart, MessageContent};
     use crate::cognition::tool_executor::types::MediaItemLite;
     use crate::model_registry::Capability;
     use crate::persona::prompt_assembly::PromptMessage;
@@ -1049,7 +1082,7 @@ mod tests {
     fn no_media_returns_text_only_messages() {
         let prompt = vec![pm("system", "you are helpful"), pm("user", "hello")];
         let caps = HashSet::new();
-        let out = build_messages_with_media(prompt, &[], &caps);
+        let out = build_messages_with_media(prompt, &[], &caps).unwrap();
         assert_eq!(out.len(), 2);
         assert!(matches!(out[0].content, MessageContent::Text(_)));
         assert!(matches!(out[1].content, MessageContent::Text(_)));
@@ -1072,7 +1105,7 @@ mod tests {
         let prompt = vec![pm("user", "describe this")];
         let media = vec![img_b64("AAAA")];
         let caps = HashSet::new(); // model has NO Vision capability
-        let out = build_messages_with_media(prompt, &media, &caps);
+        let out = build_messages_with_media(prompt, &media, &caps).unwrap();
         assert_eq!(out.len(), 1);
         // New contract (2026-04-22): when model lacks the matching
         // capability, ContentPart::Image bytes MUST NOT attach. The
@@ -1114,7 +1147,7 @@ mod tests {
         let media = vec![img_b64("PNG_BASE64_DATA")];
         let mut caps = HashSet::new();
         caps.insert(Capability::Vision);
-        let out = build_messages_with_media(prompt, &media, &caps);
+        let out = build_messages_with_media(prompt, &media, &caps).unwrap();
         assert_eq!(out.len(), 2);
         // System message untouched.
         assert!(matches!(out[0].content, MessageContent::Text(_)));
@@ -1156,7 +1189,7 @@ mod tests {
         let media = vec![img_b64("X")];
         let mut caps = HashSet::new();
         caps.insert(Capability::Vision);
-        let out = build_messages_with_media(prompt, &media, &caps);
+        let out = build_messages_with_media(prompt, &media, &caps).unwrap();
         // First user message stays text.
         match &out[0].content {
             MessageContent::Text(t) => assert_eq!(t, "earlier turn"),
@@ -1192,7 +1225,8 @@ mod tests {
         };
         let mut vision_only = HashSet::new();
         vision_only.insert(Capability::Vision);
-        let out = build_messages_with_media(prompt.clone(), &[audio.clone()], &vision_only);
+        let out =
+            build_messages_with_media(prompt.clone(), &[audio.clone()], &vision_only).unwrap();
         // Vision-only model: audio bytes MUST NOT attach. Wrapper MAY
         // be Parts(Text-marker) per the new policy contract — what
         // matters is no ContentPart::Audio carrying real bytes.
@@ -1210,7 +1244,7 @@ mod tests {
 
         let mut audio_capable = HashSet::new();
         audio_capable.insert(Capability::AudioInput);
-        let out = build_messages_with_media(prompt, &[audio], &audio_capable);
+        let out = build_messages_with_media(prompt, &[audio], &audio_capable).unwrap();
         // Audio-capable model: audio attaches.
         match &out[0].content {
             MessageContent::Parts(p) => {
@@ -1219,4 +1253,44 @@ mod tests {
             _ => panic!("audio-capable model should receive Parts"),
         }
     }
+
+    #[test]
+    fn media_with_no_user_message_fails_loud() {
+        let prompt = vec![pm("system", "you describe images")];
+        let media = vec![img_b64("PNG_BASE64_DATA")];
+        let mut caps = HashSet::new();
+        caps.insert(Capability::Vision);
+
+        let err = build_messages_with_media(prompt, &media, &caps).unwrap_err();
+
+        assert!(
+            err.contains("no user message"),
+            "unexpected error for impossible media attachment shape: {err}"
+        );
+    }
+
+    #[test]
+    fn media_with_existing_parts_fails_loud() {
+        let messages = vec![ChatMessage {
+            role: "user".to_string(),
+            content: MessageContent::Parts(vec![ContentPart::Text {
+                text: "already structured".to_string(),
+            }]),
+            name: None,
+        }];
+        let media_parts = vec![ContentPart::Image {
+            image: crate::ai::types::ImageInput {
+                url: None,
+                base64: Some("PNG_BASE64_DATA".to_string()),
+                mime_type: Some("image/png".to_string()),
+            },
+        }];
+
+        let err = attach_media_parts_to_last_user(messages, media_parts).unwrap_err();
+
+        assert!(
+            err.contains("already has parts"),
+            "unexpected error for pre-structured user media target: {err}"
+        );
+    }
 }
diff --git a/src/workers/continuum-core/tests/fixture_assembly_replay.rs b/src/workers/continuum-core/tests/fixture_assembly_replay.rs
index c4edc7eda..8df0cf4bc 100644
--- a/src/workers/continuum-core/tests/fixture_assembly_replay.rs
+++ b/src/workers/continuum-core/tests/fixture_assembly_replay.rs
@@ -65,10 +65,10 @@
 use continuum_core::ai::types::{ContentPart, MessageContent};
 use continuum_core::cognition::tool_executor::types::MediaItemLite;
 use continuum_core::model_registry::Capability;
-use continuum_core::persona::prompt_assembly::PromptMessage;
 use continuum_core::persona::cognition_io::{
     build_respond_input, PersonaContext, Signal, SignalKind, SignalOriginator,
 };
+use continuum_core::persona::prompt_assembly::PromptMessage;
 use continuum_core::persona::response::build_messages_with_media;
 use serde_json::Value;
 use std::collections::HashSet;
@@ -215,9 +215,10 @@ fn signal_and_ctx_from_legacy_fixture(
     // New shape (post-IPC-reshape commit 983d30102): rust_request already
     // has `signal` + `personaContext` as nested objects matching the wire
     // shape exactly. Deserialize directly. No reconstruction needed.
-    if let (Some(signal_json), Some(ctx_json)) =
-        (rust_request.get("signal"), rust_request.get("personaContext"))
-    {
+    if let (Some(signal_json), Some(ctx_json)) = (
+        rust_request.get("signal"),
+        rust_request.get("personaContext"),
+    ) {
         let signal: Signal = serde_json::from_value(signal_json.clone())
             .map_err(|e| format!("new-shape signal deserialize failed: {e}"))?;
         let ctx: PersonaContext = serde_json::from_value(ctx_json.clone())
@@ -286,7 +287,9 @@ fn signal_and_ctx_from_legacy_fixture(
         kind: SignalKind::ChatMessage,
         text: message_text,
         media,
-        originator: SignalOriginator::User { user_id: Uuid::nil() },
+        originator: SignalOriginator::User {
+            user_id: Uuid::nil(),
+        },
         timestamp_ms: 0,
         message_id: Some(message_id),
     };
@@ -332,9 +335,21 @@ fn fixtures_replay_through_message_builder() {
         let media = extract_media(rust_request);
         let caps = extract_capabilities(rust_request);
         let prompt = synth_prompt_messages(rust_request);
-        let out = build_messages_with_media(prompt, &media, &caps);
+        let out = match build_messages_with_media(prompt, &media, &caps) {
+            Ok(out) => out,
+            Err(e) => {
+                failures.push(format!(
+                    "[{}] build_messages_with_media failed: {}",
+                    path.file_name().unwrap().to_string_lossy(),
+                    e
+                ));
+                continue;
+            }
+        };
 
-        let last = out.last().expect("builder always returns at least one message");
+        let last = out
+            .last()
+            .expect("builder always returns at least one message");
         let image_parts: Vec<&ContentPart> = match &last.content {
             MessageContent::Text(_) => Vec::new(),
             MessageContent::Parts(parts) => parts
@@ -493,8 +508,10 @@ async fn ensure_llamacpp_qwen2vl_registered() -> Option<()> {
         if !gguf_path.exists() {
             continue;
         }
-        let mut adapter: Box<dyn AIProviderAdapter> =
-            Box::new(LlamaCppAdapter::with_model_id(gguf_path.clone(), m.id.clone()));
+        let mut adapter: Box<dyn AIProviderAdapter> = Box::new(LlamaCppAdapter::with_model_id(
+            gguf_path.clone(),
+            m.id.clone(),
+        ));
         adapter
             .initialize()
             .await
@@ -537,10 +554,7 @@ async fn vision_fixture_describes_image_via_real_model() {
             let caps = extract_capabilities(rust_request);
             let has_real_image = media.iter().any(|m| {
                 m.item_type == "image"
-                    && m.base64
-                        .as_deref()
-                        .map(|b| !b.is_empty())
-                        .unwrap_or(false)
+                    && m.base64.as_deref().map(|b| !b.is_empty()).unwrap_or(false)
             });
             has_real_image && caps.contains(&Capability::Vision)
         })
@@ -602,7 +616,9 @@ async fn vision_fixture_describes_image_via_real_model() {
         let (signal, ctx) = match signal_and_ctx_from_legacy_fixture(rust_request) {
             Ok(pair) => pair,
             Err(e) => {
-                failures.push(format!("[{fname}] could not build Signal+PersonaContext: {e}"));
+                failures.push(format!(
+                    "[{fname}] could not build Signal+PersonaContext: {e}"
+                ));
                 continue;
             }
         };
@@ -647,7 +663,9 @@ async fn vision_fixture_describes_image_via_real_model() {
                      a response. reason: {reason}"
                 ));
             }
-            PersonaResponse::Spoke { text, model_used, .. } => {
+            PersonaResponse::Spoke {
+                text, model_used, ..
+            } => {
                 let trimmed = text.trim();
                 if trimmed.len() < 30 {
                     failures.push(format!(