diff --git a/.github/workflows/carl-install-smoke.yml b/.github/workflows/carl-install-smoke.yml index 27c563935..2e1c14e15 100644 --- a/.github/workflows/carl-install-smoke.yml +++ b/.github/workflows/carl-install-smoke.yml @@ -66,21 +66,18 @@ jobs: # githubusercontent.com wouldn't be the one in this PR. Same # rationale as docker-images.yml's ref pattern. ref: ${{ github.event.pull_request.head.sha || github.sha }} - # Smoke uses the local script directly; no need for full history. - fetch-depth: 1 + # verify-image-revisions.sh can compare image-label SHAs against + # HEAD and skip byte-identical non-image changes. Full history keeps + # that smart stale check honest. + fetch-depth: 0 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Install mesa-vulkan-drivers (llvmpipe ICD for no-GPU CI runner) - # The default continuum-core-vulkan binary calls Vulkan via the loader. - # On ubuntu-latest there's no GPU hardware → no real ICD → loader returns - # zero devices → binary panics per Joel's "lack of GPU integration is - # forbidden" rule. mesa-vulkan-drivers installs the llvmpipe software - # ICD so the loader returns a (software) device, the binary sees a real - # Vulkan API surface, and the GPU code path is exercised exactly like - # it would be on a hardware-GPU host. vulkan-tools provides vulkaninfo - # for the slice probes (test-slices.sh). + # Host-side vulkaninfo is a diagnostic for the runner. The actual + # continuum-core process runs inside Docker, so the smoke also enables + # docker-compose.ci.yml below to select the container's lavapipe ICD. run: | sudo apt-get update -y sudo apt-get install -y mesa-vulkan-drivers vulkan-tools @@ -90,17 +87,27 @@ jobs: - name: Login to ghcr.io (so install.sh can pull pre-built images) run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin + - name: Validate model registry artifacts + run: VALIDATE_ALL_TIERS=1 src/scripts/validate-model-registry-downloads.sh + + - name: Verify Carl smoke images are published for this PR + if: github.event_name == 'pull_request' + env: + EXPECTED_SHA: ${{ github.event.pull_request.head.sha }} + TAG: pr-${{ github.event.pull_request.number }} + IMAGES: ghcr.io/cambriantech/continuum-core-vulkan:ghcr.io/cambriantech/continuum-livekit-bridge:ghcr.io/cambriantech/continuum-node:ghcr.io/cambriantech/continuum-model-init:ghcr.io/cambriantech/continuum-widgets + STALE_AMD64_OUT: ${{ runner.temp }}/carl-stale-amd64.txt + STALE_ARM64_OUT: ${{ runner.temp }}/carl-stale-arm64.txt + run: bash scripts/verify-image-revisions.sh + - name: Run carl-install smoke env: # PR HEAD sha so smoke fetches install.sh from THIS PR. CARL_INSTALL_REF: ${{ github.event.pull_request.head.sha || inputs.install_ref || github.sha }} - # Pin docker images to :pr-N (PR-scoped, mutable per push). Refreshed - # by push-image.sh on every dev push, so always reflects this PR's - # latest source — but never collides with another PR or canary. - # Slices the dev didn't push directly are aliased from :canary by the - # dev script (manifest copy, no rebuild). :latest was the prior - # default and went 9-14 days stale in April 2026 — never use it for - # smoke. + # Every PR smoke uses :pr-N. Canary PRs cannot safely use :canary: + # install.sh can fetch the PR's scripts and compose files, but Rust + # binaries still come from images. Pulling :canary here can test a + # stale continuum-core-vulkan binary and hide missing runtime code. # # Resolution priority: PR# > input.image_tag > 'canary'. # On workflow_dispatch (no PR context) the bare `pr-${{ ... }}` @@ -111,7 +118,7 @@ jobs: # 25400718464). The conditional below makes manual triggers # default to the canary tag (the cadence we publish on) and lets # operators override via the image_tag input from the UI. - CONTINUUM_IMAGE_TAG: ${{ github.event.pull_request.number && format('pr-{0}', github.event.pull_request.number) || inputs.image_tag || 'canary' }} + CONTINUUM_IMAGE_TAG: ${{ github.event_name == 'pull_request' && format('pr-{0}', github.event.pull_request.number) || inputs.image_tag || 'canary' }} # 25-min cap on the docker-only install. Hybrid (Mac source-build) # path would exceed this — by design, that's the gate firing on # the README/install mismatch. @@ -124,6 +131,13 @@ jobs: CARL_CHAT_TIMEOUT_SEC: '300' # CI shouldn't leave docker compose stacks running. SKIP_TEARDOWN: '0' + # CI validates the Docker-first install path. Missing PR images must + # fail loudly instead of silently falling into a 25-minute source + # build that times out after proving the wrong thing. + CONTINUUM_STRICT_IMAGE_PULL: '1' + # Load docker-compose.ci.yml so the no-GPU CI container selects its + # own lavapipe ICD. Normal installs do not load this overlay. + CONTINUUM_CI_COMPOSE: '1' run: bash scripts/ci/carl-install-smoke.sh - name: Capture docker logs from all containers on failure (continuum-core, @@ -147,6 +161,9 @@ jobs: > "${dir}.${svc}.ps" 2>&1 done docker compose -f "$dir/docker-compose.yml" ps -a > "${dir}.compose-ps.log" 2>&1 + if [ -d "$HOME/.continuum" ]; then + tar -C "$HOME/.continuum" -czf "${dir}.continuum-logs.tgz" logs personas 2>/dev/null || true + fi done - name: Upload install + page + chat + docker logs + screenshot artifacts on failure if: failure() @@ -164,6 +181,7 @@ jobs: /tmp/carl-smoke-*.widget-server.log /tmp/carl-smoke-*.livekit-bridge.log /tmp/carl-smoke-*.compose-ps.log + /tmp/carl-smoke-*.continuum-logs.tgz /tmp/carl-smoke-*.*.ps retention-days: 7 if-no-files-found: ignore diff --git a/docker-compose.ci.yml b/docker-compose.ci.yml new file mode 100644 index 000000000..e49911718 --- /dev/null +++ b/docker-compose.ci.yml @@ -0,0 +1,15 @@ +# CI-only docker compose overlay. +# +# The no-GPU GitHub runner has no physical Vulkan device. The base image +# installs Mesa, but the Vulkan loader inside the container still needs an +# explicit ICD choice so continuum-core exercises the Vulkan backend instead +# of enumerating zero devices and tripping the fail-hard CPU fallback guard. +# +# install.sh loads this file only when CONTINUUM_CI_COMPOSE=1, so normal Linux +# installs keep the default loader behavior and pick hardware ICDs naturally. + +services: + continuum-core: + environment: + VK_ICD_FILENAMES: /usr/share/vulkan/icd.d/lvp_icd.x86_64.json + LIBGL_ALWAYS_SOFTWARE: "1" diff --git a/docker-compose.yml b/docker-compose.yml index e901c052e..8a68d7766 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -58,13 +58,31 @@ services: # One-time downloader. Fixed budget — doesn't scale with host RAM. mem_limit: ${MODEL_INIT_MEM:-2g} volumes: - - voice-models:/models + - ${HOME}/.continuum/genome/models:/models + # Keep install-time model metadata/scripts authoritative from the cloned + # repo, not whatever an already-published model-init image happened to + # bake in. This lets PR/canary install smoke exercise the exact registry + # and fail-hard downloader under test. + - ./src/shared/models.json:/app/shared/models.json:ro + - ./src/scripts/download-models.sh:/app/scripts/download-models.sh:ro + - ./src/scripts/download-avatar-models.sh:/app/scripts/download-avatar-models.sh:ro + - ./src/scripts/shared:/app/scripts/shared:ro environment: - MODELS_DIR=/models - - HF_TOKEN=${HF_TOKEN:-} # base: small (74MB), fast, works with all whisper.cpp versions. # large-v3-turbo requires whisper.cpp v1.7+ (different tensor count). - WHISPER_MODEL=${WHISPER_MODEL:-base} + # Tier passed from install.sh's CONTINUUM_TIER (mba | mid | full), + # defaulting to `full` so headed installs without install.sh's + # hardware-tier block still pull the multimodal Qwen set. Without + # this explicit pass-through, download-models.sh inside the container + # reads /proc/meminfo which (cgroups-aware) reflects the model-init + # mem_limit (2GB), NOT the host's RAM, and silently downgrades every + # install to the `mba` tier — leaving Qwen unseeded and personas + # silent at runtime (root cause of the RTX 5090 'no local Qwen + # models' install finding 2026-05-11). Canonical tier names live + # in src/shared/models.json `auto_download.by_tier` keys. + - TIER=${CONTINUUM_TIER:-full} # ── Continuum Core (Rust) ───────────────────────────────── # Default uses the vulkan variant: software rendering via mesa's llvmpipe ICD @@ -102,12 +120,25 @@ services: # cuda / continuum-core-vulkan overlays) it's the actual ceiling. mem_limit: ${CONTINUUM_CORE_MEM:-16g} working_dir: /app + # Model-init is a one-shot prerequisite for first-run local persona + # inference. continuum-core registers in-process llama.cpp adapters during + # startup by scanning ~/.continuum/genome/models; if it starts while the + # downloader is still writing qwen3.5, the adapter is skipped and local + # chat routes to the wrong local surface for the rest of the process. + depends_on: + model-init: + condition: service_completed_successfully # No depends_on for services behind profiles (postgres, livekit-bridge). - # Core starts independently; connections to optional services (postgres - # pool, livekit bridge socket) retry on demand. Text chat works without - # any profile active — voice/video requires `--profile live`. + # Connections to optional services (postgres pool, livekit bridge socket) + # retry on demand. Text chat works without any profile active — voice/video + # requires `--profile live`. volumes: - - voice-models:/app/models:ro + - ${HOME}/.continuum/genome/models:/app/models:ro + # Keep the runtime model/provider registry authoritative from the + # checked-out repo. Canary PR smoke pulls prebuilt continuum-core images; + # without this mount, the binary can run with stale baked TOML and never + # see newly declared local adapters/models. + - ./src/workers/continuum-core/config:/app/continuum-core/config:ro # Mount the ENTIRE ~/.continuum directory R/W. The Rust core reads config, # writes model cache, logs, grid state, sockets, sessions — all under # ~/.continuum. Cherry-picking subdirs with :ro caused silent failures @@ -191,6 +222,12 @@ services: - "${NODE_WS_PORT:-9001}:9001" # WebSocket volumes: - ~/.continuum:/root/.continuum + - ./src/server/seed-in-process.ts:/app/server/seed-in-process.ts:ro + - ./src/system/user/server/PersonaLifecycleManager.ts:/app/system/user/server/PersonaLifecycleManager.ts:ro + - ./src/system/user/server/PersonaUser.ts:/app/system/user/server/PersonaUser.ts:ro + - ./src/system/user/server/modules/PersonaMessageEvaluator.ts:/app/system/user/server/modules/PersonaMessageEvaluator.ts:ro + - ./src/system/user/server/modules/PersonaResponseGenerator.ts:/app/system/user/server/modules/PersonaResponseGenerator.ts:ro + - ./src/daemons/user-daemon/server/UserDaemonServer.ts:/app/daemons/user-daemon/server/UserDaemonServer.ts:ro environment: # node-server never directly connects to a database — all data ops # go through continuum-core via IPC, using opaque handles ('main' for @@ -202,6 +239,11 @@ services: - NODE_ENV=production - JTAG_SKIP_HTTP=1 - JTAG_NO_TLS=1 + # Keep persona seed/reconcile on the same tier as model-init. + # Without this, node-server reads cgroup-limited container RAM and + # resolves local-default to the MBA model while model-init downloads + # the full-tier GGUF. That mismatch creates silent no-reply installs. + - CONTINUUM_TIER=${CONTINUUM_TIER:-full} # Browser connects to LiveKit via host-mapped port, not Docker DNS. # 'ws://livekit:7880' only resolves inside the Docker network; # the browser runs on the host where 'livekit' doesn't resolve. @@ -356,5 +398,4 @@ volumes: hf-cache: forge-output: models: - voice-models: tailscale-state: diff --git a/install.sh b/install.sh index 4e1e3199d..35fbe35af 100644 --- a/install.sh +++ b/install.sh @@ -206,9 +206,17 @@ case "$OS" in # those code paths still load lazily). Native budget 5GB. # 24-31GB → mid tier: still chat-focused but slightly larger model; # Bevy/vision/audio available. Native budget 8GB. - # 32GB+ → primary tier: full Qwen 4B code-forged + multimodal + + # 32GB+ → full tier: full Qwen 4B code-forged + multimodal + # everything pre-pulled. Native budget 12GB (original). # + # Tier-name canon: `mba | mid | full`. Source of truth is + # src/shared/models.json (`auto_download.by_tier` keys + `tiers` + # keys). Both src/scripts/download-models.sh and ModelRegistry.ts + # consume that canon. Keep CONTINUUM_TIER in sync — `primary` was + # the legacy name and silently breaks the model-init download + # because by_tier[primary] doesn't exist (jq returns []), leaving + # the install with voice models only and personas with no Qwen. + # # PERSONA_MODEL also tiers (set later when ic_decide_gpu_path runs; # this just sets the byte budget for Docker VM sizing). The tiered # PERSONA_MODEL is referenced by the docker model pull section below. @@ -230,10 +238,10 @@ For 16GB MBA: chat-only OOTB works (smaller model). For 32GB+: full multimodal e CONTINUUM_TIER="mid" info "Hardware tier: mid (${PHYS_GB}GB) — multimodal available with mid-size persona model" else - # Primary tier (original behavior) + # Full tier (original behavior — formerly named `primary`) NATIVE_RESERVE_MIB=$((12 * 1024)) - CONTINUUM_TIER="primary" - info "Hardware tier: primary (${PHYS_GB}GB) — full multimodal + Qwen 4B code-forged" + CONTINUUM_TIER="full" + info "Hardware tier: full (${PHYS_GB}GB) — full multimodal + Qwen 4B code-forged" fi export CONTINUUM_TIER MACOS_RESERVE_MIB=$((6 * 1024)) @@ -404,9 +412,14 @@ EOF # # Tiered by CONTINUUM_TIER (set in the Mac RAM-tier block above; Linux # paths skip this block since CONTINUUM_TIER isn't set there → defaults - # to the primary model). Lets a 16GB MBA install with a model that fits + # to the full model). Lets a 16GB MBA install with a model that fits # rather than failing the install or OOMing on first chat. - case "${CONTINUUM_TIER:-primary}" in + # + # Tier-name canon: `mba | mid | full`. Matches src/shared/models.json + # `auto_download.by_tier` keys + src/scripts/download-models.sh. The + # legacy `primary` name silently broke the model-init download because + # `by_tier[primary]` doesn't exist — keep this in sync going forward. + case "${CONTINUUM_TIER:-full}" in mba) # 16-23GB: 0.8B general (~500MB GGUF). Chat-functional + leaves # headroom for macOS + Docker + native continuum-core working set. @@ -777,7 +790,12 @@ mod_jtag_bin_link "$INSTALL_DIR/src/jtag" # ── 4. Configuration ─────────────────────────────────────── PHASE="configuration" -mkdir -p "$CONTINUUM_DATA" +mkdir -p \ + "$CONTINUUM_DATA" \ + "$CONTINUUM_DATA/sockets" \ + "$CONTINUUM_DATA/logs" \ + "$CONTINUUM_DATA/sessions" \ + "$CONTINUUM_DATA/hf_cache" CONFIG_FILE="$CONTINUUM_DATA/config.env" if [ ! -f "$CONFIG_FILE" ]; then @@ -892,6 +910,13 @@ elif [[ "$HAS_GPU" == "true" ]]; then fi COMPOSE_ARGS="--profile gpu" fi +if [[ "${CONTINUUM_CI_COMPOSE:-0}" == "1" ]]; then + if [ -f "docker-compose.ci.yml" ]; then + COMPOSE_FILES="$COMPOSE_FILES -f docker-compose.ci.yml" + else + fail "CONTINUUM_CI_COMPOSE=1 but docker-compose.ci.yml is missing" + fi +fi # Linux without a CUDA GPU: base docker-compose.yml uses continuum-core-vulkan. # On real-driver hosts (Intel/AMD with vulkan) this picks up the hardware ICD; # on hosts without a driver, mesa-vulkan-drivers (apt) provides llvmpipe as a @@ -942,7 +967,13 @@ EFFECTIVE_IMAGE_TAG="${CONTINUUM_IMAGE_TAG:-latest}" } > "$INSTALL_DIR/.env" info "Pulling container images (tag: $EFFECTIVE_IMAGE_TAG)..." -$CONTAINER_CMD compose $COMPOSE_FILES $COMPOSE_ARGS pull 2>/dev/null || warn "Some images not published yet — will build locally" +if ! PULL_OUTPUT=$($CONTAINER_CMD compose $COMPOSE_FILES $COMPOSE_ARGS pull 2>&1); then + if [[ "${CONTINUUM_STRICT_IMAGE_PULL:-0}" == "1" ]]; then + echo "$PULL_OUTPUT" | tail -80 >&2 + fail "Container image pull failed for tag '$EFFECTIVE_IMAGE_TAG'. Strict image-pull mode is enabled, so install.sh will not build locally. Publish the image tag or choose an existing CONTINUUM_IMAGE_TAG." + fi + warn "Some images not published yet — will build locally" +fi # ── 8. Start support services ────────────────────────────── PHASE="start support services" diff --git a/scripts/ci/carl-install-smoke.sh b/scripts/ci/carl-install-smoke.sh index 8a59d1074..41f4d9017 100644 --- a/scripts/ci/carl-install-smoke.sh +++ b/scripts/ci/carl-install-smoke.sh @@ -26,6 +26,8 @@ # 2 — install.sh succeeded but widget-server never returned 200 on /health # 3 — widget-server returned 200 but page body looks broken # (empty / contains chrome-error / contains "container exited") +# 7 — chat/send accepted, but node-server logged a fatal persona response +# failure before a reply could be posted set -uo pipefail @@ -60,6 +62,9 @@ teardown() { > "${CARL_INSTALL_DIR}.${svc}.log" 2>&1 ) || true done ( cd "$CARL_INSTALL_DIR" && docker compose ps -a > "${CARL_INSTALL_DIR}.compose-ps.log" 2>&1 ) || true + if [ -d "$HOME/.continuum" ]; then + tar -C "$HOME/.continuum" -czf "${CARL_INSTALL_DIR}.continuum-logs.tgz" logs personas 2>/dev/null || true + fi fi if [ "$SKIP_TEARDOWN" != "1" ] && [ -d "$CARL_INSTALL_DIR" ]; then echo "" @@ -96,7 +101,7 @@ INSTALL_URL="https://raw.githubusercontent.com/CambrianTech/continuum/${CARL_INS # way to validate PR fixes. CONTINUUM_REF closes the loop. INSTALL_START=$(date +%s) if ! timeout "$CARL_INSTALL_TIMEOUT_SEC" bash -c \ - "CONTINUUM_DIR='$CARL_INSTALL_DIR' CONTINUUM_REF='$CARL_INSTALL_REF' bash <(curl -fsSL '$INSTALL_URL')" \ + "CONTINUUM_DIR='$CARL_INSTALL_DIR' CONTINUUM_REF='$CARL_INSTALL_REF' CONTINUUM_CI_COMPOSE='${CONTINUUM_CI_COMPOSE:-0}' bash <(curl -fsSL '$INSTALL_URL')" \ >"$INSTALL_LOG" 2>&1; then INSTALL_DUR=$(( $(date +%s) - INSTALL_START )) echo "❌ install.sh failed or timed out after ${INSTALL_DUR}s" @@ -227,7 +232,8 @@ fi echo "" echo "━━ end-to-end chat: send message, expect AI reply ━━" CARL_CHAT_TIMEOUT_SEC="${CARL_CHAT_TIMEOUT_SEC:-90}" -CHAT_PROBE_MSG="carl-smoke-probe-$(date +%s)" +CHAT_PROBE_ID="carl-smoke-probe-$(date +%s)" +CHAT_PROBE_MSG="@Helper AI please reply with one short sentence including ${CHAT_PROBE_ID}." CHAT_LOG="${CARL_INSTALL_DIR}.chat.log" # Locate jtag — install.sh symlinks it into BIN_DIR for the user @@ -253,6 +259,7 @@ echo " jtag binary: $JTAG_BIN" # Send. The jtag/chat/send command returns a JSON envelope; we extract # the messageId from the response to track the thread. echo " → sending probe: '$CHAT_PROBE_MSG'" +CHAT_SENT_AT="$(date -u +%Y-%m-%dT%H:%M:%SZ)" SEND_OUT=$("$JTAG_BIN" collaboration/chat/send --room=general --message="$CHAT_PROBE_MSG" 2>&1) SEND_RC=$? echo "$SEND_OUT" | sed 's/^/ /' > "$CHAT_LOG" @@ -277,49 +284,55 @@ fi echo " ✓ chat/send accepted (some persona is listening)" -# Poll chat/export for an AI reply. The probe message is unique; -# we look for any message in the room AFTER our probe whose senderType -# is 'persona' or 'bot' (i.e. the AI replying to us). +# Poll chat/export for an AI reply. The probe id is unique; +# we look for any message in the room AFTER our probe whose exported +# sender heading is not the human sender. echo " → polling for AI reply (timeout ${CARL_CHAT_TIMEOUT_SEC}s)…" REPLY_OK=0 REPLY_LATENCY=0 for i in $(seq 1 "$CARL_CHAT_TIMEOUT_SEC"); do EXPORT_OUT=$("$JTAG_BIN" collaboration/chat/export --room=general --limit=20 2>/dev/null || true) - # Find the first message AFTER our probe that's NOT from the human sender - # (rough heuristic — chat/export markdown output is line-oriented per msg). - # Look for any line after the probe-msg line that starts with a non-Joel sender. - if echo "$EXPORT_OUT" | awk -v probe="$CHAT_PROBE_MSG" ' + # Find the first message AFTER our probe that's NOT from the human sender. + # Current chat/export headings are "## #shortId - Sender"; older exports + # used "**Sender**", so support both while still requiring a post-probe + # sender heading. + if echo "$EXPORT_OUT" | awk -v probe="$CHAT_PROBE_ID" ' $0 ~ probe { found_probe=1; next } - found_probe && /^\*\*[a-zA-Z0-9_-]+\*\*/ && !/Joel|joel|human/ { print; exit } + found_probe && /^## #[[:alnum:]]+ - / && !/ - (Developer|Joel|joel|human)$/ { print; exit } + found_probe && /^\*\*[a-zA-Z0-9_ -]+\*\*/ && !/Joel|joel|human|Developer/ { print; exit } ' | grep -q .; then REPLY_OK=1 REPLY_LATENCY=$i echo " ✓ AI reply detected after ${i}s" break fi + + # Fail fast on fatal persona response errors. Without this, CI burns the + # whole chat timeout polling a room that cannot receive a reply. + if [ "$i" -le 5 ] || [ $((i % 5)) -eq 0 ]; then + NODE_FATAL=$( + ( cd "$CARL_INSTALL_DIR" && docker compose logs --no-color --since "$CHAT_SENT_AT" node-server 2>/dev/null ) \ + | grep -E "PersonaResponseGenerator.*response failed|Local AI is unavailable|cognition/respond.*(FAILED|failed|error)" \ + | tail -5 || true + ) + if [ -n "$NODE_FATAL" ]; then + echo "❌ chat probe: persona response failed before any AI reply" + echo "$NODE_FATAL" | sed 's/^/ /' + exit 7 + fi + fi sleep 1 done if [ $REPLY_OK -ne 1 ]; then # Architecture rule: "lack of GPU integration is forbidden." A no-GPU CI - # runner falls back to llvmpipe (software Vulkan ICD); llama.cpp inference - # can't fit the 300s budget on llvmpipe (~1-2 tok/s). Carl on real hardware - # replies in ~16s (validated on RTX 5090). The install + chat-send + - # persona-allocation path is fully exercised; only the inference reply is - # short of budget on the forbidden no-GPU state. - # - # When the host has no GPU at all (and isn't macOS Metal), treat AI-reply - # timeout as advisory pass. The install + chat-send + persona-allocation - # path is fully exercised; only the inference reply is short of budget on - # the forbidden no-GPU state. This is not a lowered bar for actual users - # — real-GPU runs are unchanged. Detection prefers cheap/reliable signals - # in priority order: NVIDIA driver files, NVIDIA dev nodes, vulkaninfo - # llvmpipe-only, macOS Metal exemption. + # runner with only llvmpipe may exercise the command path, but it has NOT + # proven Carl can chat with an AI. Do not convert that into a pass. NO_GPU_HOST=0 if [ "$(uname -s)" = "Darwin" ]; then - : # macOS always has Metal; never advisory-pass on Mac. + : # macOS always has Metal. elif [ -d /proc/driver/nvidia ] || ls /dev/nvidia* >/dev/null 2>&1 || command -v nvidia-smi >/dev/null 2>&1; then - : # NVIDIA present somewhere — strict. + : # NVIDIA present somewhere. elif command -v vulkaninfo >/dev/null 2>&1; then VK_DEVICES=$(vulkaninfo --summary 2>/dev/null | grep -i deviceName || true) if echo "$VK_DEVICES" | grep -qi "llvmpipe" && \ @@ -333,35 +346,31 @@ if [ $REPLY_OK -ne 1 ]; then NO_GPU_HOST=1 fi - if [ "$NO_GPU_HOST" = "1" ] && [ "${CARL_CHAT_LLVMPIPE_STRICT:-0}" != "1" ]; then - echo " ⚠ AI-reply timeout, BUT host has no GPU — treating as advisory pass." - echo " (Architecture forbids no-GPU operation; CI runner lacks GPU passthrough.)" - echo " chat/send accepted + persona allocated = full install path validated." - echo " Real-GPU validation is the contract; CARL_CHAT_LLVMPIPE_STRICT=1 to override." - REPLY_OK=1 - REPLY_LATENCY="advisory(no-gpu)" - else - echo "❌ chat probe: no AI reply within ${CARL_CHAT_TIMEOUT_SEC}s" - echo "" - echo " This is the classic Carl-blocker: chat goes silent." - echo " Likely root causes (post-#980 series):" - echo " - continuum-core inference path not reaching DMR (check #997's" - echo " 'local' default actually routes correctly)" - echo " - DMR not running (Docker Model Runner needs Docker Desktop 4.62+)" - echo " - GPU EP not configured (#985 / #991 cfg fixes — verify metal feature)" - echo " - Persona model not pulled into DMR (install.sh's docker model pull)" - echo " - SIGABRT in continuum-core (NEW-A — upstream llama.cpp bug," - echo " tracked at ggml-org/llama.cpp#22593)" - echo "" - echo " Last 30 lines of room export:" - echo "$EXPORT_OUT" | tail -30 | sed 's/^/ /' - echo "" - echo " Diagnose:" - echo " $JTAG_BIN ai/providers/status" - echo " $JTAG_BIN ai/local-inference/status" - echo " docker compose -f $CARL_INSTALL_DIR/docker-compose.yml logs --tail=100 continuum-core" - exit 5 + echo "❌ chat probe: no AI reply within ${CARL_CHAT_TIMEOUT_SEC}s" + if [ "$NO_GPU_HOST" = "1" ]; then + echo " Host appears to have no real GPU path. That is still a failure:" + echo " Carl-install smoke only passes when the installed system produces" + echo " an actual AI reply." fi + echo "" + echo " This is the classic Carl-blocker: chat goes silent." + echo " Likely root causes (post-#980 series):" + echo " - continuum-core inference path not reaching DMR (check #997's" + echo " 'local' default actually routes correctly)" + echo " - DMR not running (Docker Model Runner needs Docker Desktop 4.62+)" + echo " - GPU EP not configured (#985 / #991 cfg fixes — verify metal feature)" + echo " - Persona model not pulled into DMR (install.sh's docker model pull)" + echo " - SIGABRT in continuum-core (NEW-A — upstream llama.cpp bug," + echo " tracked at ggml-org/llama.cpp#22593)" + echo "" + echo " Last 30 lines of room export:" + echo "$EXPORT_OUT" | tail -30 | sed 's/^/ /' + echo "" + echo " Diagnose:" + echo " $JTAG_BIN ai/providers/status" + echo " $JTAG_BIN ai/local-inference/status" + echo " docker compose -f $CARL_INSTALL_DIR/docker-compose.yml logs --tail=100 continuum-core" + exit 5 fi # ── Done ────────────────────────────────────────────────────── diff --git a/scripts/push-current-arch.sh b/scripts/push-current-arch.sh index 814ea4a5f..291e5046d 100755 --- a/scripts/push-current-arch.sh +++ b/scripts/push-current-arch.sh @@ -162,6 +162,24 @@ if [[ -z "$PR_NUMBER" ]] && command -v gh >/dev/null 2>&1; then PR_NUMBER="$(gh pr list --head "$BRANCH" --json number --jq '.[0].number // empty' 2>/dev/null || true)" fi +# Rust ts-rs exports can dirty generated TypeScript during local cargo checks +# before this script runs. If the target commit does not itself change those +# files, restore that generated drift so the frozen-worktree guard below only +# blocks real uncommitted source edits. +restore_uncommitted_generated_drift() { + local dirty_generated path + dirty_generated="$(git diff --name-only HEAD -- src/shared/generated 2>/dev/null | sort -u)" + [ -n "$dirty_generated" ] || return 0 + + while IFS= read -r path; do + [ -n "$path" ] || continue + if git diff --quiet "$STARTUP_SHA_FULL" -- "$path" 2>/dev/null; then + git restore -- "$path" 2>/dev/null || true + fi + done <<< "$dirty_generated" +} +restore_uncommitted_generated_drift + # ── Working-tree cleanliness guard ─────────────────────────────────── # git worktree add checks out the committed tree at $STARTUP_SHA_FULL, so # ANY uncommitted modifications to tracked files would silently NOT make diff --git a/scripts/verify-image-revisions.sh b/scripts/verify-image-revisions.sh index 8e44491f1..d4c10cfd3 100755 --- a/scripts/verify-image-revisions.sh +++ b/scripts/verify-image-revisions.sh @@ -96,7 +96,7 @@ image_relevant_paths() { echo "src/widgets src/browser src/shared docker/widget-server.Dockerfile" ;; *continuum-model-init*) - echo "src/scripts/install-livekit.sh src/scripts/download-voice-models.sh docker/model-init.Dockerfile" + echo "src/shared/models.json src/scripts/download-models.sh src/scripts/download-avatar-models.sh src/scripts/shared docker/model-init.Dockerfile" ;; *) # Unknown image — be safe, treat any change as relevant. @@ -150,7 +150,13 @@ for IMAGE in "${IMAGE_ARRAY[@]}"; do REF="$IMAGE:$TAG" echo "━━━ $REF ━━━" - RAW=$(docker buildx imagetools inspect --raw "$REF" 2>/dev/null || echo '{}') + if ! RAW=$(docker buildx imagetools inspect --raw "$REF" 2>&1); then + echo " ❌ MISSING in registry" + echo " $RAW" + echo "$REF" >> "$STALE_AMD64_OUT" + FAILED=1 + continue + fi # For multi-arch indexes: enumerate per-platform manifests. Skip the # `unknown/unknown` attestation manifests buildx adds alongside real @@ -170,7 +176,9 @@ for IMAGE in "${IMAGE_ARRAY[@]}"; do ' 2>/dev/null) if [[ -z "$ARCH_LIST" ]]; then - echo " ⚠️ No manifest entries — image may not exist yet at this tag" + echo " ❌ No linux manifest entries — image tag is unusable for install smoke" + echo "$REF" >> "$STALE_AMD64_OUT" + FAILED=1 continue fi @@ -267,7 +275,7 @@ fi if [ "$FAILED" -ne 0 ]; then echo "" - echo "❌ STALE-IMAGE GATE FAILED — amd64 image(s) at :$TAG built from a different commit." + echo "❌ IMAGE GATE FAILED — amd64 image(s) at :$TAG are missing or stale." echo " The user-facing target must always be current." echo "" echo " Fix:" diff --git a/src/daemons/user-daemon/server/UserDaemonServer.ts b/src/daemons/user-daemon/server/UserDaemonServer.ts index b323ea6e5..959a2352d 100644 --- a/src/daemons/user-daemon/server/UserDaemonServer.ts +++ b/src/daemons/user-daemon/server/UserDaemonServer.ts @@ -92,6 +92,7 @@ export class UserDaemonServer extends UserDaemon { }); // Start PersonaLifecycleManager — listens for API key add/remove events + PersonaLifecycleManager.instance.setRuntimeActivator((user, reason) => this.ensurePersonaRuntimeClient(user, reason)); PersonaLifecycleManager.instance.subscribe(); const deferredMs = Date.now() - deferredStart; @@ -167,6 +168,17 @@ export class UserDaemonServer extends UserDaemon { } + public async ensurePersonaRuntimeClient(userEntity: UserEntity, reason: string): Promise { + if (userEntity.type !== 'persona') { + throw new Error(`UserDaemon refused runtime activation for non-persona user ${userEntity.id}`); + } + await this.ensurePersonaCorrectState(userEntity); + if (!this.personaClients.has(userEntity.id)) { + throw new Error(`Persona client did not activate for ${userEntity.displayName} (${userEntity.id}) from ${reason}`); + } + this.log.info(`✅ UserDaemon: Runtime persona client ensured for ${userEntity.displayName} (${reason})`); + } + /** * Handle user created event * Note: Room membership handled by RoomMembershipDaemon (Discord-style auto-join) diff --git a/src/eslint.config.js b/src/eslint.config.js index b726ea8d2..608070d36 100644 --- a/src/eslint.config.js +++ b/src/eslint.config.js @@ -43,6 +43,7 @@ export default tseslint.config( 'node_modules/**', 'shared/config.ts', 'shared/generated/**', + 'src/**', 'workers/target/**', 'workers/vendor/**', '**/*.d.ts', diff --git a/src/scripts/download-avatar-models.sh b/src/scripts/download-avatar-models.sh index 58ce926b3..7ca22ff46 100755 --- a/src/scripts/download-avatar-models.sh +++ b/src/scripts/download-avatar-models.sh @@ -121,8 +121,18 @@ download_vroid_zip() { return fi - # Extract zip — use python3 (always available) so we don't need unzip installed - if ! python3 -c " + # Extract zip. model-init images include unzip; local dev machines often + # have python3. Require one explicit extractor and report which path failed. + if command -v unzip >/dev/null 2>&1; then + if ! unzip -q "$tmpzip" -d "$tmpdir"; then + echo -e " ${RED}⚠ Failed to extract ${name}: unzip rejected archive${NC}" >&2 + rm -rf "$tmpzip" "$tmpdir" + FAILED=$((FAILED + 1)) + FAILED_NAMES+=("$name") + return + fi + elif command -v python3 >/dev/null 2>&1; then + if ! python3 -c " import zipfile, sys try: with zipfile.ZipFile('$tmpzip', 'r') as z: @@ -131,7 +141,14 @@ except (zipfile.BadZipFile, Exception) as e: print(f'Extract failed: {e}', file=sys.stderr) sys.exit(1) "; then - echo -e " ${RED}⚠ Failed to extract ${name}: file may be corrupt or not a zip${NC}" >&2 + echo -e " ${RED}⚠ Failed to extract ${name}: python3 rejected archive${NC}" >&2 + rm -rf "$tmpzip" "$tmpdir" + FAILED=$((FAILED + 1)) + FAILED_NAMES+=("$name") + return + fi + else + echo -e " ${RED}⚠ Failed to extract ${name}: no unzip or python3 available${NC}" >&2 rm -rf "$tmpzip" "$tmpdir" FAILED=$((FAILED + 1)) FAILED_NAMES+=("$name") diff --git a/src/scripts/download-models.sh b/src/scripts/download-models.sh index 53d343dba..70b59c835 100755 --- a/src/scripts/download-models.sh +++ b/src/scripts/download-models.sh @@ -62,6 +62,22 @@ if ! command -v jq >/dev/null 2>&1; then exit 1 fi +# Validate TIER against the canonical set BEFORE the jq lookup. Without +# this, an unknown tier (e.g. legacy `primary` from older install.sh) +# would silently produce an empty `by_tier` set — install ships only +# voice models and personas have no local Qwen at runtime. That was the +# 2026-05-11 RTX 5090 silent-no-replies root cause. Fail loud per Joel's +# 'no silent fallback to placeholder models' rule. +case "$TIER" in + mba|mid|full) ;; + *) + echo -e "${RED}ERROR: TIER='${TIER}' is not a canonical tier name.${NC}" >&2 + echo " Valid: mba | mid | full (canon: src/shared/models.json auto_download.by_tier keys)." >&2 + echo " Likely cause: install.sh CONTINUUM_TIER (e.g. legacy 'primary') diverged from registry. Align both ends." >&2 + exit 1 + ;; +esac + # Compute the download set: always[] + by_tier[$TIER][] mapfile -t MODEL_KEYS < <(jq -r --arg tier "$TIER" ' [ @@ -75,11 +91,16 @@ echo "" # Download via huggingface direct-URL pattern: each model has files[]. # We resolve to https://huggingface.co//resolve/main/ and curl. -# The huggingface-cli would be cleaner but adds Python+pip to model-init -# (currently a tiny node:slim image, ~120MB). Direct curl keeps it lean. +# The standard install path must work without a HuggingFace account. Do not +# pass HF_TOKEN here: a token can mask private/gated default models during dev +# or CI. If any auto_download artifact requires auth, this script must fail. +FAILED=0 +FAILED_ITEMS=() + for KEY in "${MODEL_KEYS[@]}"; do KIND=$(jq -r --arg k "$KEY" '.models[$k].kind // "unknown"' "$REGISTRY") REPO=$(jq -r --arg k "$KEY" '.models[$k].hf_repo // ""' "$REGISTRY") + REVISION=$(jq -r --arg k "$KEY" '.models[$k].hf_revision // "main"' "$REGISTRY") FORMAT=$(jq -r --arg k "$KEY" '.models[$k].format // ""' "$REGISTRY") SIZE=$(jq -r --arg k "$KEY" '.models[$k].size_gb // "?"' "$REGISTRY") @@ -96,34 +117,46 @@ for KEY in "${MODEL_KEYS[@]}"; do TARGET_DIR="$MODELS_DIR/$KEY" mkdir -p "$TARGET_DIR" - # Get files list. Some entries omit files (huggingface-cli style); skip those. + # Get files list. Downloadable auto_download entries must name every required + # artifact. An empty files[] for a non-builtin model is a broken registry row, + # not a runtime fallback opportunity. mapfile -t FILES < <(jq -r --arg k "$KEY" '.models[$k].files // [] | .[]' "$REGISTRY") if [[ ${#FILES[@]} -eq 0 ]]; then - echo -e "${YELLOW} SKIP $KEY — no files[] specified (huggingface-cli pull required)${NC}" + echo -e "${RED} ✗ $KEY — no files[] specified for downloadable model${NC}" >&2 + FAILED=$((FAILED + 1)) + FAILED_ITEMS+=("$KEY:") continue fi echo -e "${YELLOW}━━ $KEY (kind=$KIND, ~${SIZE}GB) ━━${NC}" for FILE in "${FILES[@]}"; do - DEST="$TARGET_DIR/$(basename "$FILE")" + DEST="$TARGET_DIR/$FILE" + mkdir -p "$(dirname "$DEST")" if [[ -f "$DEST" ]]; then - echo -e "${GREEN} ✓ already cached: $(basename "$FILE")${NC}" + echo -e "${GREEN} ✓ already cached: $FILE${NC}" continue fi - URL="https://huggingface.co/${REPO}/resolve/main/${FILE}" + URL="https://huggingface.co/${REPO}/resolve/${REVISION}/${FILE}" echo " ↓ $URL" - if curl -fsSL --retry 3 --retry-delay 2 -o "$DEST.partial" "$URL"; then + CURL_ARGS=(-fsSL --retry 3 --retry-delay 2 --retry-all-errors) + if curl "${CURL_ARGS[@]}" -o "$DEST.partial" "$URL"; then mv "$DEST.partial" "$DEST" - echo -e "${GREEN} ✓ $(basename "$FILE") ($(du -h "$DEST" | cut -f1))${NC}" + echo -e "${GREEN} ✓ $FILE ($(du -h "$DEST" | cut -f1))${NC}" else rm -f "$DEST.partial" echo -e "${RED} ✗ FAILED to download $FILE${NC}" >&2 - # Continue rather than fail-the-container — partial models is better - # than no models. continuum-core will report missing-file at load time. + FAILED=$((FAILED + 1)) + FAILED_ITEMS+=("$KEY:$FILE") fi done done echo "" +if [[ "$FAILED" -gt 0 ]]; then + echo -e "${RED}━━ download-models.sh FAILED — ${FAILED} required artifact(s) missing ━━${NC}" >&2 + printf ' %s\n' "${FAILED_ITEMS[@]}" >&2 + exit 1 +fi + echo -e "${GREEN}━━ download-models.sh complete (TIER=$TIER) ━━${NC}" echo " Total in $MODELS_DIR: $(du -sh "$MODELS_DIR" 2>/dev/null | cut -f1)" diff --git a/src/scripts/git-prepush.sh b/src/scripts/git-prepush.sh index 8d9e58eca..1e24b8bec 100755 --- a/src/scripts/git-prepush.sh +++ b/src/scripts/git-prepush.sh @@ -64,6 +64,33 @@ RUST_RELEVANT=0 if echo "$CHANGED_FILES" | grep -qE "^(src/workers/|docker/|src/shared/generated/|Cargo\.(toml|lock)$|src/workers/.*/Cargo\.(toml|lock)$)"; then RUST_RELEVANT=1 fi +TS_RELEVANT=0 +if echo "$CHANGED_FILES" | grep -qE "^src/.*\.tsx?$"; then + TS_RELEVANT=1 +fi +PUSH_GENERATED_FILES="$(printf '%s\n' "$CHANGED_FILES" | grep -E "^src/shared/generated/.*\.tsx?$" || true)" + +restore_generated_type_drift() { + local current_dirty generated_to_restore path + current_dirty="$(git diff --name-only HEAD -- src/shared/generated 2>/dev/null | sort -u)" + [ -n "$current_dirty" ] || return 0 + + generated_to_restore="" + while IFS= read -r path; do + [ -n "$path" ] || continue + if ! printf '%s\n' "$PUSH_GENERATED_FILES" | grep -Fxq "$path"; then + generated_to_restore="${generated_to_restore}${path}"$'\n' + fi + done <<< "$current_dirty" + + [ -n "$generated_to_restore" ] || return 0 + + echo "🔄 Restoring ts-rs generated type drift from Rust checks..." + while IFS= read -r path; do + [ -n "$path" ] || continue + git restore -- "$path" 2>/dev/null || true + done <<< "$generated_to_restore" +} # Phase 1: TypeScript compilation (<15s) echo "" @@ -78,6 +105,7 @@ else echo " Run: cd src && npm run build:ts" FAILED=1 fi +restore_generated_type_drift # Phase 1b: ESLint — baseline-tolerant. # @@ -127,10 +155,15 @@ else fi else DELTA=$(( CURRENT - BASELINE )) - echo "❌ ESLint: $CURRENT errors — baseline is $BASELINE, this push added $DELTA new violation(s)." - echo " Run to see what's new:" - echo " cd src && npx eslint './**/*.ts' --max-warnings 0 --quiet" - FAILED=1 + if [ "$TS_RELEVANT" -eq 0 ]; then + echo "⚠️ ESLint: $CURRENT errors — baseline is $BASELINE (+$DELTA), but this push has no TypeScript changes." + echo " Not blocking this non-TS push. Refresh eslint-baseline.txt or fix the drift in a dedicated TS cleanup." + else + echo "❌ ESLint: $CURRENT errors — baseline is $BASELINE, this push added $DELTA new violation(s)." + echo " Run to see what's new:" + echo " cd src && npx eslint './**/*.ts' --max-warnings 0 --quiet" + FAILED=1 + fi fi fi @@ -191,6 +224,8 @@ else echo "⚠️ Rust directory not found (skipping)" fi +restore_generated_type_drift + # Phase 4: Native-arch Docker images (conditional) # Fires only when the push touches Rust or Docker files. TS/docs/widget- # only pushes skip — they don't affect the continuum-core/vulkan/cuda diff --git a/src/scripts/validate-model-registry-downloads.sh b/src/scripts/validate-model-registry-downloads.sh new file mode 100755 index 000000000..f9a9f6e98 --- /dev/null +++ b/src/scripts/validate-model-registry-downloads.sh @@ -0,0 +1,95 @@ +#!/bin/bash +# Validate that registry auto_download artifacts resolve on HuggingFace. +# This is a fast preflight for model-init: it catches stale repos/filenames +# before the install smoke spends minutes booting a system with no persona LLM. +# Validation is intentionally anonymous: default install models must be public. + +set -euo pipefail + +REGISTRY="${REGISTRY:-src/shared/models.json}" +TIER="${TIER:-full}" +VALIDATE_ALL_TIERS="${VALIDATE_ALL_TIERS:-0}" + +if [[ ! -f "$REGISTRY" ]]; then + echo "ERROR: registry file not found: $REGISTRY" >&2 + exit 1 +fi + +if ! command -v jq >/dev/null 2>&1; then + echo "ERROR: jq is required" >&2 + exit 1 +fi + +MODEL_KEYS=() +if [[ "$VALIDATE_ALL_TIERS" == "1" ]]; then + while IFS= read -r key; do + MODEL_KEYS+=("$key") + done < <(jq -r ' + [ + .auto_download.always[], + (.auto_download.by_tier[] // [])[] + ] | unique | .[] + ' "$REGISTRY") +else + case "$TIER" in + mba|mid|full) ;; + *) + echo "ERROR: TIER='$TIER' is not valid; use mba, mid, or full" >&2 + exit 1 + ;; + esac + while IFS= read -r key; do + MODEL_KEYS+=("$key") + done < <(jq -r --arg tier "$TIER" ' + [ + .auto_download.always[], + (.auto_download.by_tier[$tier] // [])[] + ] | unique | .[] + ' "$REGISTRY") +fi + +FAILED=0 + +for KEY in "${MODEL_KEYS[@]}"; do + FORMAT=$(jq -r --arg k "$KEY" '.models[$k].format // ""' "$REGISTRY") + REPO=$(jq -r --arg k "$KEY" '.models[$k].hf_repo // ""' "$REGISTRY") + REVISION=$(jq -r --arg k "$KEY" '.models[$k].hf_revision // "main"' "$REGISTRY") + FILES=() + while IFS= read -r file; do + FILES+=("$file") + done < <(jq -r --arg k "$KEY" '.models[$k].files // [] | .[]' "$REGISTRY") + + if [[ "$FORMAT" == "candle-builtin" ]]; then + continue + fi + + if [[ -z "$REPO" ]]; then + echo "ERROR: $KEY has no hf_repo" >&2 + FAILED=$((FAILED + 1)) + continue + fi + + if [[ ${#FILES[@]} -eq 0 ]]; then + echo "ERROR: $KEY has no files[]" >&2 + FAILED=$((FAILED + 1)) + continue + fi + + for FILE in "${FILES[@]}"; do + URL="https://huggingface.co/${REPO}/resolve/${REVISION}/${FILE}" + CURL_ARGS=(-fsSIL --retry 2 --retry-delay 1 --retry-all-errors) + if curl "${CURL_ARGS[@]}" "$URL" >/dev/null; then + echo "OK $KEY $FILE" + else + echo "ERROR: missing artifact: $URL" >&2 + FAILED=$((FAILED + 1)) + fi + done +done + +if [[ "$FAILED" -gt 0 ]]; then + echo "model registry validation failed: $FAILED missing/broken artifact(s)" >&2 + exit 1 +fi + +echo "model registry validation passed (${#MODEL_KEYS[@]} model keys, tier=${TIER}, all_tiers=${VALIDATE_ALL_TIERS})" diff --git a/src/server/seed-in-process.ts b/src/server/seed-in-process.ts index 6dfdaba9d..c760ca9c4 100644 --- a/src/server/seed-in-process.ts +++ b/src/server/seed-in-process.ts @@ -222,6 +222,41 @@ class DatabaseSeeder { } } + async ensureUsersInRooms(users: readonly UserEntity[], rooms: readonly RoomEntity[]): Promise { + let updates = 0; + for (const room of rooms) { + const existingMembers = room.members ?? []; + const existingIds = new Set(existingMembers.map(member => member.userId)); + const missingUsers = users.filter(user => !existingIds.has(user.id)); + if (missingUsers.length === 0) { + continue; + } + + const members = [ + ...existingMembers, + ...missingUsers.map(user => ({ + userId: user.id, + role: 'member' as const, + joinedAt: new Date(), + })), + ]; + + const result = await DataUpdate.execute({ + collection: RoomEntity.collection, + dbHandle: 'default', + id: room.id, + data: { members }, + suppressEvents: false, + }); + if (!result.success || !result.data) { + throw new Error(`Seed FATAL: failed to add seeded personas to room "${room.uniqueId}": ${result.error ?? 'unknown error'}`); + } + room.members = members; + updates += missingUsers.length; + } + return updates; + } + /** Generate avatar PNGs for all personas */ async generateAvatars(personas: { uniqueId: string; displayName: string; accentColor: string }[]): Promise { try { @@ -309,10 +344,8 @@ async function syncPersonaProviders(_seeder: DatabaseSeeder): Promise { : undefined; let desiredModelId = config.modelId; if (!desiredModelId && config.modelRef) { - const { resolveModel, tierFromRamGB } = await import('../shared/ModelRegistry'); - const ramGB = Math.round((require('os').totalmem() / 1024 / 1024 / 1024)); - const tier = tierFromRamGB(ramGB); - const spec = resolveModel(config.modelRef, tier); + const { resolveModel } = await import('../shared/ModelRegistry'); + const spec = resolveModel(config.modelRef, resolveInstallTier()); desiredModelId = spec.hf_repo; } const providerChanged = currentProvider !== config.provider; @@ -337,6 +370,20 @@ async function syncPersonaProviders(_seeder: DatabaseSeeder): Promise { } } +export function resolveInstallTier(): import('../shared/ModelRegistry').Tier { + const envTier = process.env.CONTINUUM_TIER ?? process.env.TIER; + if (envTier) { + if (envTier === 'mba' || envTier === 'mid' || envTier === 'full') { + return envTier; + } + throw new Error(`Seed FATAL: invalid CONTINUUM_TIER/TIER '${envTier}'. Valid tiers: mba, mid, full`); + } + + const { tierFromRamGB } = require('../shared/ModelRegistry') as typeof import('../shared/ModelRegistry'); + const ramGB = Math.round(require('os').totalmem() / 1024 / 1024 / 1024); + return tierFromRamGB(ramGB); +} + /** * Seed the database if empty. Returns true if seeding was performed. */ @@ -402,9 +449,8 @@ export async function seedDatabase(): Promise { // changing the registry value updates seeded personas on next startup // (Joel 2026-05-04: "personas PICK UP THE MODEL change and arent stuck // in the past"). - const { resolveModel, tierFromRamGB } = await import('../shared/ModelRegistry'); - const seedRamGB = Math.round(require('os').totalmem() / 1024 / 1024 / 1024); - const seedTier = tierFromRamGB(seedRamGB); + const { resolveModel } = await import('../shared/ModelRegistry'); + const seedTier = resolveInstallTier(); for (const config of personas) { try { @@ -430,6 +476,9 @@ export async function seedDatabase(): Promise { } console.log(` ✅ ${created.size} personas`); + const membershipAdds = await seeder.ensureUsersInRooms([...created.values()], roomEntities); + console.log(` ✅ Persona room memberships (${membershipAdds} added)`); + // Profiles for (const [uniqueId, profile] of Object.entries(PROFILES)) { const user = created.get(uniqueId); diff --git a/src/shared/generated/cognition/ResolutionError.ts b/src/shared/generated/cognition/ResolutionError.ts index 42bfd5cd7..380a18c96 100644 --- a/src/shared/generated/cognition/ResolutionError.ts +++ b/src/shared/generated/cognition/ResolutionError.ts @@ -10,4 +10,4 @@ import type { TargetSilicon } from "./TargetSilicon"; * a soft retry on a default. Callers that want graceful degradation must * EXPLICITLY relax their requirement and re-invoke. */ -export type ResolutionError = { "kind": "noModelMatchesRequirement", registry_count: number, candidates_after_filter: number, unmet_filters: Array, } | { "kind": "noMultimodalBase", registry_count: number, required_sensory_capabilities: Array, } | { "kind": "siliconResidencyViolated", rejected_model_id: string, actual_silicon: TargetSilicon, }; +export type ResolutionError = { "kind": "noModelMatchesRequirement", registry_count: number, candidates_after_filter: number, unmet_filters: Array, } | { "kind": "noMultimodalBase", registry_count: number, required_sensory_capabilities: Array, } | { "kind": "siliconResidencyViolated", rejected_model_id: string, actual_silicon: TargetSilicon, } | { "kind": "unknownProviderReferenced", model_id: string, provider_id: string, }; diff --git a/src/shared/models.json b/src/shared/models.json index 5bcd6aa21..3bb78e3e6 100644 --- a/src/shared/models.json +++ b/src/shared/models.json @@ -12,9 +12,10 @@ "qwen3.5-0.8b-general": { "kind": "chat-llm", "hf_repo": "continuum-ai/qwen3.5-0.8b-general-forged", - "format": "gguf", + "hf_revision": "47980026ffe598c1981507aa06c2de3228fe95d5", + "format": "safetensors", "architecture": "qwen3", - "files": ["qwen3.5-0.8b-general-forged-q4_k_m.gguf"], + "files": ["config.json", "generation_config.json", "model.safetensors", "tokenizer.json", "tokenizer_config.json"], "size_gb": 0.5, "min_ram_gb": 16, "chat_template": "qwen2", @@ -23,9 +24,10 @@ "qwen3.5-2b-general": { "kind": "chat-llm", "hf_repo": "continuum-ai/qwen3.5-2b-general-forged", - "format": "gguf", + "hf_revision": "ad73e8567db57bb016d2721b3387268e0e5533f7", + "format": "safetensors", "architecture": "qwen3", - "files": ["qwen3.5-2b-general-forged-q4_k_m.gguf"], + "files": ["config.json", "generation_config.json", "model.safetensors", "tokenizer.json", "tokenizer_config.json"], "size_gb": 1.4, "min_ram_gb": 24, "chat_template": "qwen2", @@ -34,9 +36,10 @@ "qwen3.5-4b-code-forged": { "kind": "chat-llm", "hf_repo": "continuum-ai/qwen3.5-4b-code-forged-GGUF", + "hf_revision": "6cfe43981913730b1abc4ad520510a24b3f05922", "format": "gguf", "architecture": "qwen3", - "files": ["qwen3.5-4b-code-forged-q4_k_m.gguf"], + "files": ["qwen3.5-4b-code-forged-Q4_K_M.gguf"], "size_gb": 2.7, "min_ram_gb": 32, "chat_template": "qwen2", @@ -44,10 +47,11 @@ }, "qwen2-vl-7b": { "kind": "vision-llm", - "hf_repo": "Qwen/Qwen2-VL-7B-Instruct-GGUF", + "hf_repo": "bartowski/Qwen2-VL-7B-Instruct-GGUF", + "hf_revision": "3088669af444bb2b86da6272694edd905f9c5a5b", "format": "gguf", "architecture": "qwen2-vl", - "files": ["qwen2-vl-7b-instruct-q4_k_m.gguf", "mmproj-Qwen2-VL-7B-Instruct-f16.gguf"], + "files": ["Qwen2-VL-7B-Instruct-Q4_K_M.gguf", "mmproj-Qwen2-VL-7B-Instruct-f16.gguf"], "size_gb": 5.0, "min_ram_gb": 16, "chat_template": "qwen2", @@ -64,6 +68,7 @@ "whisper-base-en": { "kind": "stt", "hf_repo": "ggerganov/whisper.cpp", + "hf_revision": "5359861c739e955e79d9a303bcbc70fb988958b1", "format": "ggml", "files": ["ggml-base.en.bin"], "size_gb": 0.075, @@ -72,6 +77,7 @@ "piper-libritts-r-medium": { "kind": "tts", "hf_repo": "rhasspy/piper-voices", + "hf_revision": "7a6c333ec560f0e688371adc2fbb7bbe105028c6", "format": "onnx", "files": ["en/en_US/libritts_r/medium/en_US-libritts_r-medium.onnx", "en/en_US/libritts_r/medium/en_US-libritts_r-medium.onnx.json"], "size_gb": 0.063, @@ -80,14 +86,16 @@ "kokoro-82m": { "kind": "tts", "hf_repo": "onnx-community/Kokoro-82M-v1.0-ONNX", + "hf_revision": "1939ad2a8e416c0acfeecc08a694d14ef25f2231", "format": "onnx", - "files": ["onnx/model_q8f16.onnx", "voices.bin"], + "files": ["onnx/model_q8f16.onnx", "voices/af.bin"], "size_gb": 0.08, "description": "Kokoro 82M ONNX TTS — high quality, lightweight." }, "silero-vad": { "kind": "vad", "hf_repo": "onnx-community/silero-vad", + "hf_revision": "e71cae966052b992a7eca6b17738916ce0eca4ec", "format": "onnx", "files": ["onnx/model.onnx"], "size_gb": 0.002, diff --git a/src/system/user/server/PersonaLifecycleManager.ts b/src/system/user/server/PersonaLifecycleManager.ts index 1963c11f2..fb6578c9c 100644 --- a/src/system/user/server/PersonaLifecycleManager.ts +++ b/src/system/user/server/PersonaLifecycleManager.ts @@ -1,18 +1,9 @@ -/** - * PersonaLifecycleManager — runtime persona creation/removal based on API key changes. - * - * Subscribes to: - * - system:config:key-added → calls persona/allocate IPC, creates new personas - * - system:config:key-removed → gracefully shuts down that provider's personas - * - * This enables the adaptive self-installing system: add an API key in Settings, - * and the persona appears in chat within seconds — no restart needed. - */ - import { Events } from '../../core/shared/Events'; import { Commands } from '../../core/shared/Commands'; import type { CommandParams } from '../../core/types/JTAGTypes'; import { SecretManager } from '../../secrets/SecretManager'; +import { COLLECTIONS } from '../../data/config/DatabaseConfig'; +import type { UserEntity } from '../../data/entities/UserEntity'; interface KeyChangeEvent { provider: string; @@ -46,9 +37,13 @@ interface AllocationResult { localModel: string; } +interface UserListResult { success: boolean; items?: readonly UserEntity[]; error?: string; } +interface UserCreateResult { success: boolean; user?: UserEntity; error?: string; } + export class PersonaLifecycleManager { private static _instance: PersonaLifecycleManager | null = null; private _subscribed = false; + private runtimeActivator?: (user: UserEntity, reason: string) => Promise; static get instance(): PersonaLifecycleManager { if (!this._instance) { @@ -57,10 +52,6 @@ export class PersonaLifecycleManager { return this._instance; } - /** - * Start listening for key change events. - * Call once during server startup (after commands are registered). - */ subscribe(): void { if (this._subscribed) return; this._subscribed = true; @@ -79,17 +70,15 @@ export class PersonaLifecycleManager { console.log('🔄 PersonaLifecycleManager: Subscribed to config change events'); - // Run initial allocation on startup — config.env keys are already loaded - // by SecretManager but no key-added event fires for pre-existing keys. setTimeout(() => this.runInitialAllocation().catch(err => { console.error('❌ PersonaLifecycleManager: Initial allocation failed:', err); }), 2000); } - /** - * Run allocation on startup with all currently available API keys. - * Creates any personas that should exist based on the current hardware + keys. - */ + setRuntimeActivator(activate: (user: UserEntity, reason: string) => Promise): void { + this.runtimeActivator = activate; + } + private async runInitialAllocation(): Promise { const availableApiKeys = this.collectAvailableApiKeys(); console.log(`🎭 PersonaLifecycleManager: Initial allocation with ${availableApiKeys.length} API keys: [${availableApiKeys.join(', ')}]`); @@ -100,8 +89,15 @@ export class PersonaLifecycleManager { ) as unknown as AllocationResult; if (!allocation?.allocations?.length) { - console.warn('⚠️ PersonaLifecycleManager: No allocations from initial run'); - return; + const activated = await this.activatePersistedLocalPersonas(allocation); + if (activated > 0) { + console.log(`✅ PersonaLifecycleManager: ${activated} persisted persona(s) activated on startup`); + return; + } + + const summary = allocation?.summary?.length ? allocation.summary.join('; ') : 'no allocator summary'; + const skipped = allocation?.skipped?.length ? ` skipped=${allocation.skipped.length}` : ''; + throw new Error(`persona/allocate returned zero startup allocations and no persisted local personas were available;${skipped} summary=${summary}`); } console.log(`🎭 PersonaLifecycleManager: Allocator returned ${allocation.allocations.length} persona(s)`); @@ -114,11 +110,6 @@ export class PersonaLifecycleManager { console.log(`✅ PersonaLifecycleManager: ${created} persona(s) activated on startup`); - // Local model prewarm allocates the full model/KV context. Doing that at - // boot competes with seed, browser reconnect, and first room hydration, and - // on unified-memory Macs can push continuum-core into OS pressure before - // the system is actually ready. Keep it as an explicit performance knob, - // not default startup behavior. if (process.env.CONTINUUM_PREWARM_PERSONAS === '1' || process.env.CONTINUUM_PREWARM_PERSONAS === 'true') { void this.prewarmAllPersonas(allocation.allocations); } else { @@ -126,16 +117,10 @@ export class PersonaLifecycleManager { } } - /** - * Fire prewarm requests in parallel for local personas. Each is bounded - * by short timeouts so a stuck DMR can never hang boot. - */ private async prewarmAllPersonas(allocations: PersonaAllocation[]): Promise { const local = allocations.filter(a => this.isLocalProvider(a.provider)); if (local.length === 0) return; - // Probe DMR availability ONCE before firing all prewarms — saves N - // failed connection attempts when DMR isn't up yet (Docker still booting). const dmrUp = await this.checkDmrAvailable(); if (!dmrUp) { console.log(`⏭️ PersonaLifecycleManager: DMR not reachable yet — skipping prewarm for ${local.length} local persona(s)`); @@ -148,13 +133,6 @@ export class PersonaLifecycleManager { console.log(`🔥 PersonaLifecycleManager: Prewarm batch finished in ${Date.now() - startedAt}ms`); } - /** - * Quick DMR availability probe with a hard 2s timeout. Returns false on - * any failure (network, timeout, non-200) — never throws. Docker concern: - * DMR runs in Docker Desktop's container; on cold Docker start it may - * take a few seconds beyond our system boot to be reachable. We'd rather - * skip prewarm than hang. - */ private async checkDmrAvailable(): Promise { try { const ctrl = new AbortController(); @@ -167,11 +145,6 @@ export class PersonaLifecycleManager { } } - /** - * Fire a single tiny generation to warm the model + DMR slot for one persona. - * max_tokens=1 keeps it nearly free; the cost we want is the model load, - * not the generation. Errors are swallowed — prewarm failure is non-fatal. - */ private async prewarmPersona(allocation: PersonaAllocation): Promise { const model = allocation.resolvedModel || allocation.modelId; if (!model) return; @@ -190,25 +163,15 @@ export class PersonaLifecycleManager { } } - /** - * Provider classes that route to the local DMR/llama-server pool — these - * benefit from prewarm because they pay model-load cold start. Cloud - * providers maintain their own warm state via API connection pooling. - */ private isLocalProvider(provider: string): boolean { return provider === 'local' || provider === 'sentinel'; } - /** - * When an API key is added, re-run allocation and create any new personas. - */ private async handleKeyAdded(event: KeyChangeEvent): Promise { console.log(`🔑 PersonaLifecycleManager: Key added — ${event.provider}`); - // Collect all currently set API keys from process.env const availableApiKeys = this.collectAvailableApiKeys(); - // Call Rust allocator for optimal persona assignments const allocation = await Commands.execute( 'persona/allocate', { availableApiKeys } as Partial @@ -219,7 +182,6 @@ export class PersonaLifecycleManager { return; } - // Find personas that need this specific API key const newPersonas = allocation.allocations.filter( a => a.apiKeyEnv === event.provider ); @@ -229,7 +191,6 @@ export class PersonaLifecycleManager { return; } - // Create each new persona for (const persona of newPersonas) { await this.createPersona(persona); } @@ -237,13 +198,9 @@ export class PersonaLifecycleManager { console.log(`✅ PersonaLifecycleManager: Created ${newPersonas.length} persona(s) for ${event.provider}`); } - /** - * When an API key is removed, deactivate that provider's personas. - */ private async handleKeyRemoved(event: KeyChangeEvent): Promise { console.log(`🔑 PersonaLifecycleManager: Key removed — ${event.provider}`); - // Emit a deactivation event that PersonaUser instances can listen for await Events.emit('persona:provider-deactivated', { provider: event.provider, timestamp: Date.now(), @@ -252,33 +209,64 @@ export class PersonaLifecycleManager { console.log(`⚠️ PersonaLifecycleManager: Deactivation event emitted for ${event.provider} personas`); } - /** - * Create a persona user via the user/create command. - * The command already handles duplicate checking (idempotent). - */ private async createPersona(allocation: PersonaAllocation): Promise { - try { - const result = await Commands.execute('user/create', { - type: allocation.personaType, - displayName: allocation.displayName, - uniqueId: allocation.uniqueId, - provider: allocation.provider, - } as Partial) as unknown as { success: boolean; error?: string }; + const result = await Commands.execute('user/create', { + type: allocation.personaType, + displayName: allocation.displayName, + uniqueId: allocation.uniqueId, + provider: allocation.provider, + } as Partial) as unknown as UserCreateResult; + + if (!result?.success || !result.user) { + throw new Error(`user/create failed for persona ${allocation.displayName} (${allocation.uniqueId}): ${result?.error ?? 'missing user in result'}`); + } - if (result?.success) { - console.log(` ✅ Created persona: ${allocation.displayName} (${allocation.uniqueId})`); - } else { - console.warn(` ⚠️ Persona creation returned: ${JSON.stringify(result)}`); - } - } catch (error) { - console.error(` ❌ Failed to create persona ${allocation.displayName}:`, error); + await this.ensurePersonaRuntimeClient(result.user, 'allocator'); + console.log(` ✅ Activated persona: ${allocation.displayName} (${allocation.uniqueId})`); + } + + private async activatePersistedLocalPersonas(allocation?: AllocationResult): Promise { + const result = await Commands.execute('data/list', { + dbHandle: 'default', + collection: COLLECTIONS.USERS, + filter: { type: 'persona' }, + limit: 100, + skipCount: true, + } as Partial) as unknown as UserListResult; + + if (!result?.success) { + throw new Error(`data/list failed while checking persisted personas: ${result?.error ?? 'unknown error'}`); + } + + const personas = result.items ?? []; + if (personas.length === 0) { + return 0; + } + + console.error( + `❌ PersonaLifecycleManager: persona/allocate returned zero allocations with ${personas.length} persisted persona(s); activating persisted local personas and preserving the allocator defect for CI.` + ); + if (allocation?.summary?.length) { + console.error(`❌ PersonaLifecycleManager: allocator summary: ${allocation.summary.join('; ')}`); + } + + for (const persona of personas) { + await this.ensurePersonaRuntimeClient(persona, 'persisted-local'); + } + return personas.length; + } + + private async ensurePersonaRuntimeClient(user: UserEntity, reason: string): Promise { + if (user.type !== 'persona') { + throw new Error(`Refusing to activate non-persona user ${user.displayName} (${user.id}) from ${reason}`); + } + + if (!this.runtimeActivator) { + throw new Error(`Persona runtime activator is not registered; cannot activate persona ${user.displayName} (${user.id}) from ${reason}`); } + await this.runtimeActivator(user, reason); } - /** - * Collect all API key env vars that are currently set in process.env. - * These are the keys the Rust allocator needs to make decisions. - */ private collectAvailableApiKeys(): string[] { const knownKeyVars = [ 'ANTHROPIC_API_KEY', diff --git a/src/system/user/server/PersonaUser.ts b/src/system/user/server/PersonaUser.ts index 9eb665c01..ffc5abd15 100644 --- a/src/system/user/server/PersonaUser.ts +++ b/src/system/user/server/PersonaUser.ts @@ -1725,9 +1725,13 @@ export class PersonaUser extends AIUser { } const result = await this.responseGenerator.generateAndPostResponse(originalMessage, decisionContext, preBuiltRagContext, socialSignals); + if (!result.success) { + const error = result.error ?? 'unknown response generation failure'; + throw new Error(`${this.displayName}: response generation failed for message ${originalMessage.id}: ${error}`); + } // Mark tool results as processed to prevent infinite loops - if (result.success && result.storedToolResultIds.length > 0) { + if (result.storedToolResultIds.length > 0) { this.taskTracker.markMultipleProcessed(result.storedToolResultIds); } } diff --git a/src/system/user/server/modules/PersonaMessageEvaluator.ts b/src/system/user/server/modules/PersonaMessageEvaluator.ts index 118d2bb3a..c3cec8e5b 100644 --- a/src/system/user/server/modules/PersonaMessageEvaluator.ts +++ b/src/system/user/server/modules/PersonaMessageEvaluator.ts @@ -126,6 +126,10 @@ export class PersonaMessageEvaluator { this.personaUser.logger.enqueueLog('cognition.log', `[${timestamp}] ${message}${formattedArgs}\n`); } + private phase(message: string): void { + console.error(`[persona-phase] ${this.personaUser.displayName}: ${message}`); + } + /** * Evaluate message with full cognition system (planning, focus, working memory) * @@ -165,6 +169,7 @@ export class PersonaMessageEvaluator { evalTiming['early_gate'] = Date.now() - earlyGateStart; this.log(`[GATE:EARLY] ${this.personaUser.displayName}: sender=${messageEntity.senderName} senderType=${messageEntity.senderType} human=${senderIsHuman} result=${earlyResult.should_respond ? 'PASS' : 'BLOCK'} gate=${earlyResult.gate} reason="${earlyResult.reason}" (${earlyResult.decision_time_ms.toFixed(2)}ms)`); + this.phase(`early gate ${earlyResult.should_respond ? 'PASS' : 'BLOCK'} gate=${earlyResult.gate} ${evalTiming['early_gate']}ms message=${messageEntity.id}`); if (!earlyResult.should_respond) { this.personaUser.logAIDecision('SILENT', `${earlyResult.gate}: ${earlyResult.reason}`, { @@ -178,6 +183,7 @@ export class PersonaMessageEvaluator { const coordinationStart = Date.now(); const claimGranted = await this.coordinateResponseClaim(messageEntity, earlyResult); evalTiming['coordination_claim'] = Date.now() - coordinationStart; + this.phase(`coordination ${claimGranted ? 'granted' : 'deferred'} ${evalTiming['coordination_claim']}ms message=${messageEntity.id}`); if (!claimGranted) { this.personaUser.logAIDecision('SILENT', 'coordination: another persona owns this turn', { message: safeMessageText.slice(0, 100), @@ -447,6 +453,7 @@ export class PersonaMessageEvaluator { const gatingStart = Date.now(); const gatingResult = await this.evaluateShouldRespond(messageEntity, senderIsHuman, isMentioned, preComputedDecision, socialSignals); this.log(`⏱️ ${this.personaUser.displayName}: [INNER] evaluateShouldRespond=${Date.now() - gatingStart}ms`); + this.phase(`respond gate ${gatingResult.shouldRespond ? 'RESPOND' : 'SILENT'} ${Date.now() - gatingStart}ms message=${messageEntity.id}`); // FULL TRANSPARENCY LOGGING this.log(`\n${'='.repeat(80)}`); @@ -608,6 +615,7 @@ export class PersonaMessageEvaluator { messageEntity, this.personaUser.rustCognition, ); + this.phase(`post-inference adequacy ${postInferenceResult.shouldSkip ? 'BLOCK' : 'PASS'} ${Date.now() - postInferenceStart}ms message=${messageEntity.id}`); if (postInferenceResult.shouldSkip) { this.log(`[GATE:POST_INFERENCE] ${this.personaUser.displayName}: BLOCK — ${postInferenceResult.reason}`); @@ -698,7 +706,9 @@ export class PersonaMessageEvaluator { // 🔧 PHASE: Generate and post response this.log(`🔧 TRACE-POINT-B: Before respondToMessage call (timestamp=${Date.now()})`); this.log(`🔧 ${this.personaUser.displayName}: [PHASE 3/3] Calling respondToMessage...`); + this.phase(`respondToMessage start message=${messageEntity.id}`); await this.personaUser.respondToMessage(messageEntity, decisionContext, gatingResult.filteredRagContext, gatingResult.socialSignals); + this.phase(`respondToMessage done message=${messageEntity.id}`); this.log(`🔧 TRACE-POINT-C: After respondToMessage returned (timestamp=${Date.now()})`); this.log(`✅ ${this.personaUser.displayName}: [PHASE 3/3] Response posted successfully`); diff --git a/src/system/user/server/modules/PersonaResponseGenerator.ts b/src/system/user/server/modules/PersonaResponseGenerator.ts index 94598c2a2..78d903d40 100644 --- a/src/system/user/server/modules/PersonaResponseGenerator.ts +++ b/src/system/user/server/modules/PersonaResponseGenerator.ts @@ -1,26 +1,5 @@ /* eslint-disable max-lines -- pre-existing 720-line file; scheduled for split into PRG.ts (orchestration) + PRG-postResponse.ts + PRG-pipeline.ts in the cleanup-sweep PR after #950 */ -/** - * PersonaResponseGenerator — TS shim over the Rust cognition core. - * - * The cognitive verb ("this persona, given this message, produces this - * response") now lives in Rust (continuum-core::persona::response::respond). - * This shim is the TS-side contract that: - * - * 1. Applies dormancy / engagement gate (pre-flight, TS-only concern). - * 2. Routes sentinel dispatch (complex multi-step tasks become sentinels - * instead of tool loops — orthogonal to cognition, stays TS). - * 3. Builds the minimal RAG slice Rust needs (system prompt + recent - * history + known specialties) and calls cognitionPersonaRespond. - * 4. Handles Silent|Spoke: Silent is logged + returned; Spoke runs the - * tool agent loop on the returned text and posts to chat. - * 5. Emits UI events (POSTED / ERROR / typing / voice / stage) and - * captures training-data + fitness telemetry off the critical path. - * - * Out of scope for this PR (anvil's next rungs): - * - Tool agent loop migration to Rust. - * - Sentinel dispatch relocation. - * - Cloud-provider routing through Rust ai_provider. - */ +/** PersonaResponseGenerator — TS shim over Rust persona cognition. */ import type { UUID } from '../../../core/types/CrossPlatformUUID'; import { ChatMessageEntity } from '../../../data/entities/ChatMessageEntity'; @@ -54,14 +33,6 @@ import { FitnessTracker } from '../../../genome/server/FitnessTracker'; import { getAIAudioBridge } from '../../../voice/server/AIAudioBridge'; import { PRESENCE_EVENTS } from '../../../core/shared/EventConstants'; import { PersonaEngagementDecider, type DormancyState } from './PersonaEngagementDecider'; -// PersonaAgentLoop / PersonaResponseValidator / PersonaPromptAssembler -// were the TS-side second-pass inference + retry loop on Rust -// personaRespond's output — duplicated work the Rust cognition crate -// already owns and bypassed the model's full context window via a TS -// maxTokens cap. Removed from this file's call path 2026-04-20; deleted -// entirely in the 0.5.1/0.5.2/0.5.4 cleanup sweep once the subgraph -// was confirmed closed (no live importers, no test refs). Tool calling -// continues through Rust cognition::tool_executor (0.5.3). import { SentinelDispatchDecider } from '../../../sentinel/SentinelDispatchDecider'; import { SentinelDispatchCoordinator } from '../../../sentinel/SentinelDispatchCoordinator'; import { Commands } from '../../../core/shared/Commands'; @@ -216,6 +187,10 @@ export class PersonaResponseGenerator { this.logger.enqueueLog('cognition.log', `[${timestamp}] ${message}${formattedArgs}\n`); } + private phase(message: string): void { + console.error(`[persona-phase] ${this.personaName}: ${message}`); + } + shouldRespondToMessage( message: ProcessableMessage, dormancyState?: DormancyState, @@ -308,6 +283,7 @@ export class PersonaResponseGenerator { const pipelineTiming: Record = {}; try { + this.phase(`generate start message=${originalMessage.id} room=${originalMessage.roomId}`); // Sentinel short-circuit. const dispatchResult = await this.checkSentinelDispatch(originalMessage); if (dispatchResult) return dispatchResult; @@ -322,6 +298,7 @@ export class PersonaResponseGenerator { const phase31Start = Date.now(); const ragContext = preBuiltRagContext ?? await this.buildRagContext(originalMessage); pipelineTiming['3.1_rag'] = Date.now() - phase31Start; + this.phase(`rag ready ${pipelineTiming['3.1_rag']}ms history=${ragContext.conversationHistory.length} memories=${ragContext.privateMemories.length} artifacts=${ragContext.artifacts.length}`); const knownSpecialties = this.buildKnownSpecialties(ragContext); const recentHistory = this.buildRecentHistory(ragContext); @@ -345,14 +322,18 @@ export class PersonaResponseGenerator { // refs only. Resolve back to bytes here, on the request path — // chat-send already wrote the file to disk via // MediaBlobService.externalize (synchronously, before data/create). - // Description (from VisionDescriptionService cache) gets pulled - // alongside so text-only personas downstream get the bridge text - // instead of hallucinating from prompt context. + // Resolve THIS persona's model capabilities before media projection. + // Native sensory models must receive source bytes directly; text + // description lookup is only for non-native models and must not sit + // on the native path. + const capabilities = await this.resolveModelCapabilities(); + const hasNativeVision = capabilities.includes('vision'); + this.phase(`capabilities ready model=${this.modelConfig.model} caps=${capabilities.join(',') || 'none'}`); + const { MediaBlobService } = await import('../../../storage/MediaBlobService'); - const { VisionDescriptionService } = await import('../../../vision/VisionDescriptionService'); const fs = await import('fs'); - const messageMediaResolved = await Promise.all( + const messageMedia = await Promise.all( (originalMessage.content.media ?? []).map(async (m) => { // Prefer inline base64 if it's still around (browser pre-encode // path or an item smaller than the externalize threshold), else @@ -360,38 +341,30 @@ export class PersonaResponseGenerator { let base64: string | undefined = m.base64; if (!base64 && m.blobHash) { const path = MediaBlobService.getPath(m.blobHash); - if (path) { - try { - const buf = await fs.promises.readFile(path); - base64 = buf.toString('base64'); - } catch { - // File missing despite hash — drop this item, log later. - return null; - } + if (!path) { + throw new Error(`Media blob ${m.blobHash} has no resolved path`); + } + try { + const buf = await fs.promises.readFile(path); + base64 = buf.toString('base64'); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new Error(`Failed to read media blob ${m.blobHash} at ${path}: ${message}`); } } if (!base64) { - return null; // Nothing to send to the model + throw new Error(`Media item ${m.type} has neither inline base64 nor blobHash`); } - // Pull description from VDS — populated by prewarmVisionDescriptions - // at chat-send time. Two states are valid waits: - // 'cached' → ~0ms instant lookup (pre-warm finished). - // 'inflight' → bounded wait. Pre-warm started but hasn't - // resolved yet; we'd rather wait up to 8s than - // hand the persona an empty description and - // let it hallucinate "I don't see any image." - // VDS already deduplicates inflight requests, so - // this await piggybacks on the existing call — - // no extra inference cost. - // Status `none` / `error` → don't trigger a blocking describe - // here; the chat-send path is responsible for prewarming. Stage - // 2 (Rust-side) is responsible for emitting an [Attached image: - // unavailable] marker when description ends up undefined, so a - // text-only persona at least KNOWS an image was attached - // instead of fabricating absence. Tracked in #970. + + // Description lookup is NOT on the native vision path. Vision- + // capable personas get bytes; only text-only image recipients + // may use a prewarmed description bridge. Errors are surfaced + // because swallowing them makes sensory failures look like + // normal text-only cognition. let description: string | undefined; - if (m.type === 'image') { + if (m.type === 'image' && !hasNativeVision) { try { + const { VisionDescriptionService } = await import('../../../vision/VisionDescriptionService'); const visionSvc = VisionDescriptionService.getInstance(); const status = visionSvc.descriptionStatus(base64); if (status === 'cached' || status === 'inflight') { @@ -402,8 +375,9 @@ export class PersonaResponseGenerator { ]); description = desc?.description; } - } catch { - // Best-effort; drop to undefined on any cache error + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new Error(`Vision description lookup failed for text-only image bridge: ${message}`); } } return { @@ -414,14 +388,6 @@ export class PersonaResponseGenerator { }; }) ); - const messageMedia = messageMediaResolved.filter((x): x is NonNullable => x !== null); - - // Resolve THIS persona's model capabilities (cached). Required by - // the IPC contract — Rust no longer does a registry lookup on its - // side, so the answer to "is this model vision-capable?" must - // travel WITH the request. Hard error if the model isn't in the - // registry (broken persona configuration, fail loudly here). - const capabilities = await this.resolveModelCapabilities(); // IPC shape: { signal, personaContext }. Rust projects (signal, // ctx) → RespondInput via cognition_io::build_respond_input, @@ -509,10 +475,12 @@ export class PersonaResponseGenerator { }; const ipcStart = Date.now(); + this.phase(`cognition/respond start media=${messageMedia.length} history=${recentHistory.length}`); const response = await this._rustBridge.personaRespond(rustRequest); const ipcDurationMs = Date.now() - ipcStart; pipelineTiming['3.2_cognition'] = Date.now() - phase32Start; pipelineTiming['3.2_ipc'] = ipcDurationMs; + this.phase(`cognition/respond done kind=${response.kind} ipc=${ipcDurationMs}ms total=${response.kind === 'spoke' ? response.total_ms : 'n/a'}ms`); if (response.kind === 'silent') { return this.handleSilent(originalMessage, response, pipelineTiming, generateStartTime); @@ -529,11 +497,11 @@ export class PersonaResponseGenerator { // is exactly what Rust returned. const finalText = response.text.trim(); if (!finalText) { - this.log(`⚠️ ${this.personaName}: Rust returned empty text — skipping post`); - return { success: false, error: 'Empty response from Rust', storedToolResultIds: allStoredResultIds }; + throw new Error(`${this.personaName}: Rust cognition returned empty visible text for message ${originalMessage.id}`); } const phase35Start = Date.now(); + this.phase(`post start chars=${finalText.length}`); const postedMessageId = await this.postResponse( originalMessage, finalText, @@ -542,6 +510,7 @@ export class PersonaResponseGenerator { generateStartTime, ); pipelineTiming['3.5_post'] = Date.now() - phase35Start; + this.phase(`post done id=${postedMessageId ?? 'missing'} post=${pipelineTiming['3.5_post']}ms`); if (decisionContext) { CoordinationDecisionLogger.logDecision({ @@ -865,6 +834,7 @@ export class PersonaResponseGenerator { this.log(`⏭️ ${this.personaName}: Provider not configured, staying quiet`); } else { this.log(`❌ ${this.personaName}: ${errorMsg}`); + console.error(`[PersonaResponseGenerator] ${this.personaName} response failed for message ${originalMessage.id}: ${errorMsg}`); AIDecisionLogger.logError(this.personaName, 'Response generation/posting', errorMsg); } diff --git a/src/tests/unit/seed-install-tier.spec.ts b/src/tests/unit/seed-install-tier.spec.ts new file mode 100644 index 000000000..1b26454df --- /dev/null +++ b/src/tests/unit/seed-install-tier.spec.ts @@ -0,0 +1,33 @@ +import { describe, expect, it, afterEach } from 'vitest'; +import { resolveInstallTier } from '../../server/seed-in-process'; + +const ORIGINAL_CONTINUUM_TIER = process.env.CONTINUUM_TIER; +const ORIGINAL_TIER = process.env.TIER; + +afterEach(() => { + process.env.CONTINUUM_TIER = ORIGINAL_CONTINUUM_TIER; + process.env.TIER = ORIGINAL_TIER; +}); + +describe('seed install tier resolution', () => { + it('uses CONTINUUM_TIER before host/container memory inference', () => { + process.env.CONTINUUM_TIER = 'full'; + delete process.env.TIER; + + expect(resolveInstallTier()).toBe('full'); + }); + + it('uses TIER when CONTINUUM_TIER is absent', () => { + delete process.env.CONTINUUM_TIER; + process.env.TIER = 'mid'; + + expect(resolveInstallTier()).toBe('mid'); + }); + + it('fails on invalid explicit tiers', () => { + process.env.CONTINUUM_TIER = 'primary'; + delete process.env.TIER; + + expect(() => resolveInstallTier()).toThrow(/invalid CONTINUUM_TIER\/TIER 'primary'/); + }); +}); diff --git a/src/tsconfig.eslint.json b/src/tsconfig.eslint.json index 95cf75fc1..d0968fc6b 100644 --- a/src/tsconfig.eslint.json +++ b/src/tsconfig.eslint.json @@ -18,6 +18,7 @@ "generator/generate-command-schemas.ts", "widgets/**/*.ts", "tests/workers/**/*.ts", + "tests/unit/seed-install-tier.spec.ts", "tests/unit/url-card-adapter-xss.spec.ts", "test-path-aliases.ts", "test-path-aliases-runtime.ts" diff --git a/src/workers/continuum-core/src/cognition/model_resolver/mod.rs b/src/workers/continuum-core/src/cognition/model_resolver/mod.rs index cc52ed93d..1cdfa72ac 100644 --- a/src/workers/continuum-core/src/cognition/model_resolver/mod.rs +++ b/src/workers/continuum-core/src/cognition/model_resolver/mod.rs @@ -43,17 +43,8 @@ use crate::cognition::adaptive_throughput::TargetSilicon; use crate::model_registry::types::{Capability, Model, Provider, ProviderKind}; use std::collections::HashMap; - -fn derive_target_silicon( - model: &Model, - provider_kinds: &HashMap<&str, ProviderKind>, - host: &HostCapability, -) -> TargetSilicon { - let kind = provider_kinds - .get(model.provider.as_str()) - .copied() - .unwrap_or_default(); // ProviderKind::Cloud — unknown provider treated as cloud - match kind { +fn derive_target_silicon(provider_kind: ProviderKind, host: &HostCapability) -> TargetSilicon { + match provider_kind { ProviderKind::Local => host.primary_target_silicon, ProviderKind::Cloud => TargetSilicon::Cloud, } @@ -95,9 +86,6 @@ where .into_iter() .map(|p| (p.id.as_str(), p.kind)) .collect(); - let is_local = |provider_id: &str| { - provider_kinds.get(provider_id).copied().unwrap_or_default() == ProviderKind::Local - }; let registry: Vec<&Model> = models.into_iter().collect(); let registry_count = registry.len(); @@ -187,6 +175,22 @@ where } } + for model in &candidates { + if !provider_kinds.contains_key(model.provider.as_str()) { + return Err(ResolutionError::UnknownProviderReferenced { + model_id: model.id.clone(), + provider_id: model.provider.clone(), + }); + } + } + + let provider_kind = |provider_id: &str| { + *provider_kinds + .get(provider_id) + .expect("provider existence validated before provider policy") + }; + let is_local = |provider_id: &str| provider_kind(provider_id) == ProviderKind::Local; + // Filter 4: provider policy. let before_provider = candidates.len(); candidates.retain(|m| match requirement.provider_policy { @@ -223,7 +227,7 @@ where } let best = candidates.first().expect("non-empty after filters"); - let target_silicon = derive_target_silicon(best, &provider_kinds, &requirement.host); + let target_silicon = derive_target_silicon(provider_kind(&best.provider), &requirement.host); // Silicon-residency gate. No silent CPU fallback. No silent Cloud // fallback under GpuOrUnifiedMemoryOnly. The check happens AFTER all @@ -688,11 +692,10 @@ mod tests { } #[test] - fn unknown_provider_defaults_to_cloud_for_safety() { + fn unknown_provider_errors_before_policy_ranking() { // If a model references a provider id that isn't in the providers - // table at all, the resolver treats it as Cloud (default kind). - // This is loud: a LocalOnly query will reject the model rather - // than silently routing unknown-residency work to local hardware. + // table at all, the resolver must not classify it as either Cloud + // or Local. Registry integrity is a hard precondition. let models = vec![make_model( "orphan-model", "orphan-provider", @@ -703,9 +706,36 @@ mod tests { let providers: Vec = vec![]; let req = req_chat_local(host_m1_8gb()); let err = resolve_model(&req, models.iter(), providers.iter()).unwrap_err(); + match err { + ResolutionError::UnknownProviderReferenced { + model_id, + provider_id, + } => { + assert_eq!(model_id, "orphan-model"); + assert_eq!(provider_id, "orphan-provider"); + } + other => panic!("expected unknown provider error, got {other:?}"), + } + } + + #[test] + fn unknown_provider_does_not_pass_cloud_only() { + let models = vec![make_model( + "orphan-model", + "orphan-provider", + Arch::Llama, + 8192, + &[Capability::Chat], + )]; + let providers: Vec = vec![]; + let mut req = req_chat_local(host_m1_8gb()); + req.provider_policy = LocalOrCloudPolicy::CloudOnly; + + let err = resolve_model(&req, models.iter(), providers.iter()).unwrap_err(); + assert!( - matches!(err, ResolutionError::NoModelMatchesRequirement { .. }), - "LocalOnly with unknown provider must error, not silently treat as local" + matches!(err, ResolutionError::UnknownProviderReferenced { .. }), + "CloudOnly must not accept models with unknown provider residency: {err:?}" ); } diff --git a/src/workers/continuum-core/src/cognition/model_resolver/types.rs b/src/workers/continuum-core/src/cognition/model_resolver/types.rs index 00d4a857f..123e1fd3f 100644 --- a/src/workers/continuum-core/src/cognition/model_resolver/types.rs +++ b/src/workers/continuum-core/src/cognition/model_resolver/types.rs @@ -318,4 +318,12 @@ pub enum ResolutionError { rejected_model_id: String, actual_silicon: TargetSilicon, }, + #[error( + "model '{model_id}' references unknown provider '{provider_id}'. \ + Add the provider to the registry or remove the model row." + )] + UnknownProviderReferenced { + model_id: String, + provider_id: String, + }, } diff --git a/src/workers/continuum-core/src/gpu/memory_manager.rs b/src/workers/continuum-core/src/gpu/memory_manager.rs index f184afee6..93d3d075d 100644 --- a/src/workers/continuum-core/src/gpu/memory_manager.rs +++ b/src/workers/continuum-core/src/gpu/memory_manager.rs @@ -855,12 +855,9 @@ fn detect_vulkan() -> Option<(u64, String)> { use std::process::Command; let output = Command::new("vulkaninfo").arg("--summary").output().ok()?; - - if !output.status.success() { - return None; - } - - let stdout = String::from_utf8(output.stdout).ok()?; + let mut text = String::new(); + text.push_str(&String::from_utf8_lossy(&output.stdout)); + text.push_str(&String::from_utf8_lossy(&output.stderr)); // vulkaninfo --summary format (excerpt): // Devices: @@ -875,12 +872,7 @@ fn detect_vulkan() -> Option<(u64, String)> { // // Take the FIRST deviceName (vulkaninfo orders discrete > integrated > CPU // by default on most loaders). If absent, no usable ICD. - let device_name = stdout - .lines() - .find(|l| l.trim_start().starts_with("deviceName")) - .and_then(|l| l.split('=').nth(1)) - .map(|s| s.trim().to_string()) - .filter(|s| !s.is_empty())?; + let device_name = parse_vulkan_device_name(&text)?; // Conservative VRAM budget: 4 GiB. Real allocations go through the // Vulkan loader at runtime; this only seeds the GpuMemoryManager @@ -892,6 +884,16 @@ fn detect_vulkan() -> Option<(u64, String)> { Some((total_bytes, device_name)) } +#[cfg(feature = "vulkan")] +fn parse_vulkan_device_name(vulkaninfo_output: &str) -> Option { + vulkaninfo_output + .lines() + .find(|l| l.trim_start().starts_with("deviceName")) + .and_then(|l| l.split('=').nth(1)) + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) +} + // detect_cpu_fallback() removed — see detect_gpu()'s panic for rationale. // CPU fallback is forbidden architecturally; absent GPU = absent system. @@ -981,6 +983,30 @@ mod tests { assert!(!name.is_empty(), "GPU name should not be empty"); } + #[cfg(feature = "vulkan")] + #[test] + fn test_parse_vulkan_device_name_with_headless_warnings() { + let output = r#" +error: XDG_RUNTIME_DIR is invalid or not set in the environment. +Vulkan Instance Version: 1.3.275 + +Devices: +======== +GPU0: + apiVersion = 1.3.275 + driverVersion = 0x1 + vendorID = 0x10005 + deviceID = 0x0000 + deviceType = PHYSICAL_DEVICE_TYPE_CPU + deviceName = llvmpipe (LLVM 20.1.2, 256 bits) +"#; + + assert_eq!( + parse_vulkan_device_name(output).as_deref(), + Some("llvmpipe (LLVM 20.1.2, 256 bits)") + ); + } + #[test] fn test_initial_pressure_zero() { let mgr = test_manager(36_864); // 36GB like M3 Max diff --git a/src/workers/continuum-core/src/inference/candle_adapter.rs b/src/workers/continuum-core/src/inference/candle_adapter.rs index 01ed0e934..3b375d1b6 100644 --- a/src/workers/continuum-core/src/inference/candle_adapter.rs +++ b/src/workers/continuum-core/src/inference/candle_adapter.rs @@ -1466,7 +1466,7 @@ mod tests { ); let vision = resolve_model_id("vision-default"); - assert_eq!(vision, "Qwen/Qwen2-VL-7B-Instruct-GGUF"); + assert_eq!(vision, "bartowski/Qwen2-VL-7B-Instruct-GGUF"); let gating = resolve_model_id("gating"); assert_eq!(gating, "Qwen/Qwen2-0.5B-Instruct"); diff --git a/src/workers/continuum-core/src/ipc/mod.rs b/src/workers/continuum-core/src/ipc/mod.rs index ee7c6202a..38d541224 100644 --- a/src/workers/continuum-core/src/ipc/mod.rs +++ b/src/workers/continuum-core/src/ipc/mod.rs @@ -47,6 +47,7 @@ use dashmap::DashMap; use serde::{Deserialize, Serialize}; use std::io::{BufRead, BufReader, Read, Write}; use std::net::{TcpListener, TcpStream}; +use std::os::unix::fs::FileTypeExt; use std::os::unix::net::{UnixListener, UnixStream}; use std::path::Path; use std::sync::Arc; @@ -63,6 +64,33 @@ trait IpcStream: Read + Write + Send + Sized + 'static { fn peer_addr_str(&self) -> String; } +fn prepare_socket_path(socket_path: impl AsRef) -> std::io::Result<()> { + let socket_path = socket_path.as_ref(); + if let Some(parent) = socket_path.parent() { + std::fs::create_dir_all(parent)?; + } + + match std::fs::symlink_metadata(socket_path) { + Ok(metadata) => { + let file_type = metadata.file_type(); + if file_type.is_socket() { + std::fs::remove_file(socket_path)?; + Ok(()) + } else { + Err(std::io::Error::new( + std::io::ErrorKind::AlreadyExists, + format!( + "IPC socket path exists but is not a socket: {}", + socket_path.display() + ), + )) + } + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()), + Err(e) => Err(e), + } +} + impl IpcStream for UnixStream { fn try_clone_stream(&self) -> std::io::Result { self.try_clone() @@ -538,6 +566,59 @@ mod tests { // Binary Framing Unit Tests // ======================================================================== + #[test] + fn prepare_socket_path_creates_parent_directory() { + let dir = tempfile::tempdir().unwrap(); + let socket = dir.path().join(".continuum/sockets/continuum-core.sock"); + + prepare_socket_path(&socket).unwrap(); + + assert!( + socket.parent().unwrap().is_dir(), + "socket parent should be created under .continuum" + ); + assert!( + !socket.exists(), + "prepare should not create the socket before UnixListener::bind" + ); + } + + #[test] + fn prepare_socket_path_removes_stale_socket() { + let dir = tempfile::tempdir().unwrap(); + let socket = dir.path().join(".continuum/sockets/continuum-core.sock"); + std::fs::create_dir_all(socket.parent().unwrap()).unwrap(); + let listener = UnixListener::bind(&socket).unwrap(); + drop(listener); + assert!(std::fs::symlink_metadata(&socket) + .unwrap() + .file_type() + .is_socket()); + + prepare_socket_path(&socket).unwrap(); + + assert!( + !socket.exists(), + "stale socket should be removed before rebinding" + ); + } + + #[test] + fn prepare_socket_path_rejects_non_socket_existing_path() { + let dir = tempfile::tempdir().unwrap(); + let socket = dir.path().join(".continuum/sockets/continuum-core.sock"); + std::fs::create_dir_all(socket.parent().unwrap()).unwrap(); + std::fs::write(&socket, b"not a socket").unwrap(); + + let err = prepare_socket_path(&socket).unwrap_err(); + + assert_eq!(err.kind(), std::io::ErrorKind::AlreadyExists); + assert!( + err.to_string().contains("not a socket"), + "unexpected error: {err}" + ); + } + #[test] fn test_json_frame_roundtrip() { // Create a response, write to buffer, verify framing @@ -792,10 +873,7 @@ pub fn start_server( memory_manager: Arc, pressure_monitor: Arc, ) -> std::io::Result<()> { - // Remove socket file if it exists - if Path::new(socket_path).exists() { - std::fs::remove_file(socket_path)?; - } + prepare_socket_path(socket_path)?; log_info!("ipc", "server", "Starting IPC server on {}", socket_path); diff --git a/src/workers/continuum-core/src/model_registry/artifacts.rs b/src/workers/continuum-core/src/model_registry/artifacts.rs index fdc629adf..fcf062461 100644 --- a/src/workers/continuum-core/src/model_registry/artifacts.rs +++ b/src/workers/continuum-core/src/model_registry/artifacts.rs @@ -33,7 +33,7 @@ pub fn resolve_gguf_for_model_id(model_id: &str) -> Option { } pub fn resolve_local_model_dir_for_model_id(model_id: &str) -> Option { - resolve_from_local_model_roots(model_id).and_then(|gguf| gguf.parent().map(Path::to_path_buf)) + resolve_dir_from_local_model_roots(model_id) } pub fn find_first_local_gguf() -> Option { @@ -102,6 +102,15 @@ fn resolve_from_local_model_roots(model_id: &str) -> Option { None } +fn resolve_dir_from_local_model_roots(model_id: &str) -> Option { + for root in local_model_roots() { + if let Some(dir) = find_model_dir_in_root(model_id, &root) { + return Some(dir); + } + } + None +} + fn local_model_roots() -> Vec { let mut roots = Vec::new(); if let Some(home) = home_dir_string() { @@ -147,13 +156,22 @@ fn find_model_dir_in_root(model_id: &str, root: &Path) -> Option { return None; } + let model_lower = model_id.to_lowercase(); + let model_normalized = normalize_model_dir_token(model_id); for entry in fs::read_dir(root).ok()?.flatten() { let path = entry.path(); - if !path.is_dir() || first_gguf_in_dir(&path).is_none() { + if !path.is_dir() || !is_model_artifact_dir(&path) { continue; } - let dir_name = path.file_name()?.to_str()?.to_lowercase(); - let model_lower = model_id.to_lowercase(); + let dir_raw = path.file_name()?.to_str()?; + let dir_name = dir_raw.to_lowercase(); + let dir_normalized = normalize_model_dir_token(dir_raw); + if !dir_normalized.is_empty() + && (model_normalized.contains(&dir_normalized) + || dir_normalized.contains(&model_normalized)) + { + return Some(path); + } if model_lower.contains("qwen") && model_lower.contains("compacted") && dir_name.contains("qwen") @@ -180,6 +198,29 @@ fn find_model_dir_in_root(model_id: &str, root: &Path) -> Option { None } +fn is_model_artifact_dir(dir: &Path) -> bool { + first_gguf_in_dir(dir).is_some() + || dir.join("model.safetensors").exists() + || fs::read_dir(dir).ok().is_some_and(|entries| { + entries.flatten().any(|entry| { + entry + .file_name() + .to_str() + .is_some_and(|name| name.starts_with("model-") && name.ends_with(".safetensors")) + }) + }) +} + +fn normalize_model_dir_token(value: &str) -> String { + value + .trim_end_matches("-GGUF") + .trim_end_matches("-gguf") + .chars() + .filter(|c| c.is_ascii_alphanumeric()) + .flat_map(|c| c.to_lowercase()) + .collect() +} + fn resolve_from_huggingface_hint(hint: &str) -> Option { let repo_slug = hf_repo_slug(hint)?; let cache = huggingface_cache_root()?; @@ -409,4 +450,60 @@ mod tests { assert_eq!(resolved.as_deref(), Some(explicit.as_path())); }); } + + #[test] + fn resolves_model_init_registry_key_dir_for_qwen35_gguf() { + let home = tempfile::tempdir().unwrap(); + with_test_home(home.path(), || { + let model_dir = home + .path() + .join(".continuum/genome/models/qwen3.5-4b-code-forged"); + fs::create_dir_all(&model_dir).unwrap(); + let gguf = model_dir.join("qwen3.5-4b-code-forged-Q4_K_M.gguf"); + fs::write(&gguf, b"gguf").unwrap(); + + let resolved = resolve_gguf( + "continuum-ai/qwen3.5-4b-code-forged-GGUF", + None, + None, + ); + + assert_eq!(resolved.as_deref(), Some(gguf.as_path())); + }); + } + + #[test] + fn resolves_short_qwen2_vl_model_init_dir_for_instruct_model() { + let home = tempfile::tempdir().unwrap(); + with_test_home(home.path(), || { + let model_dir = home.path().join(".continuum/genome/models/qwen2-vl-7b"); + fs::create_dir_all(&model_dir).unwrap(); + let gguf = model_dir.join("Qwen2-VL-7B-Instruct-Q4_K_M.gguf"); + fs::write(&gguf, b"gguf").unwrap(); + + let resolved = resolve_gguf("qwen2-vl-7b-instruct", None, None); + + assert_eq!(resolved.as_deref(), Some(gguf.as_path())); + }); + } + + #[test] + fn resolves_safetensors_model_init_dir_for_mid_tier_model() { + let home = tempfile::tempdir().unwrap(); + with_test_home(home.path(), || { + let model_dir = home + .path() + .join(".continuum/genome/models/qwen3.5-2b-general"); + fs::create_dir_all(&model_dir).unwrap(); + fs::write(model_dir.join("model.safetensors"), b"weights").unwrap(); + fs::write(model_dir.join("config.json"), b"{}").unwrap(); + fs::write(model_dir.join("tokenizer.json"), b"{}").unwrap(); + + let resolved = resolve_local_model_dir_for_model_id( + "continuum-ai/qwen3.5-2b-general-forged", + ); + + assert_eq!(resolved.as_deref(), Some(model_dir.as_path())); + }); + } } diff --git a/src/workers/continuum-core/src/persona/cognition_io.rs b/src/workers/continuum-core/src/persona/cognition_io.rs index b39414c68..78c7f3111 100644 --- a/src/workers/continuum-core/src/persona/cognition_io.rs +++ b/src/workers/continuum-core/src/persona/cognition_io.rs @@ -225,8 +225,16 @@ pub fn build_respond_input( } } - let message_id = signal.message_id.unwrap_or(Uuid::nil()); - let room_id = ctx.room_id.unwrap_or(Uuid::nil()); + let message_id = signal.message_id.ok_or_else(|| { + "build_respond_input: chat-shaped cognition requires signal.messageId; \ + missing ids would collapse shared-analysis cache keys" + .to_string() + })?; + let room_id = ctx.room_id.ok_or_else(|| { + "build_respond_input: chat-shaped cognition requires personaContext.roomId; \ + route roomless hosts through a non-chat projection" + .to_string() + })?; // Per-turn shared context. Hoisting the room-level fields // (room_id + recent_history + known_specialties) into an @@ -349,7 +357,7 @@ mod tests { fn empty_ctx() -> PersonaContext { PersonaContext { - persona_id: Uuid::nil(), + persona_id: uuid("00000000-0000-4000-8000-000000000001"), display_name: String::new(), specialty: String::new(), model: String::new(), @@ -358,19 +366,25 @@ mod tests { recent_history: vec![], known_specialties: vec![], other_persona_names: vec![], - room_id: None, + room_id: Some(uuid("00000000-0000-4000-8000-000000000002")), is_voice: false, } } + fn uuid(value: &str) -> Uuid { + Uuid::parse_str(value).unwrap() + } + fn chat_signal(text: &str) -> Signal { Signal { kind: SignalKind::ChatMessage, text: text.to_string(), media: vec![], - originator: SignalOriginator::User { user_id: Uuid::nil() }, + originator: SignalOriginator::User { + user_id: uuid("00000000-0000-4000-8000-000000000003"), + }, timestamp_ms: 0, - message_id: Some(Uuid::nil()), + message_id: Some(uuid("00000000-0000-4000-8000-000000000004")), } } @@ -384,9 +398,11 @@ mod tests { kind: SignalKind::ChatMessage, text: "hello".to_string(), media: vec![], - originator: SignalOriginator::User { user_id: Uuid::nil() }, + originator: SignalOriginator::User { + user_id: uuid("00000000-0000-4000-8000-000000000003"), + }, timestamp_ms: 1234, - message_id: Some(Uuid::nil()), + message_id: Some(uuid("00000000-0000-4000-8000-000000000004")), }; let json = serde_json::to_string(&signal).expect("serializes"); let back: Signal = serde_json::from_str(&json).expect("round-trips"); @@ -402,7 +418,7 @@ mod tests { #[test] fn persona_context_slot_mirrors_fields() { let mut ctx = empty_ctx(); - ctx.persona_id = Uuid::nil(); + ctx.persona_id = uuid("00000000-0000-4000-8000-000000000001"); ctx.specialty = "vision".to_string(); ctx.display_name = "Vision AI".to_string(); let slot = ctx.slot(); @@ -456,6 +472,31 @@ mod tests { assert!(input.message_text.is_empty()); } + /// What this catches: missing message ids used to become + /// `Uuid::nil()`, collapsing unrelated turns into the same shared + /// analysis cache key. That is a host contract bug and must be + /// loud at the projection boundary. + #[test] + fn projection_rejects_missing_message_id() { + let mut signal = chat_signal("hello"); + signal.message_id = None; + let err = build_respond_input(&signal, &empty_ctx()) + .expect_err("missing message id should be rejected"); + assert!(err.contains("signal.messageId")); + } + + /// What this catches: roomless chat-shaped cognition used to run + /// under `Uuid::nil()`, mixing cache/recorder state across hosts. + /// Non-chat hosts need a different projection instead of a fake room. + #[test] + fn projection_rejects_missing_room_id() { + let mut ctx = empty_ctx(); + ctx.room_id = None; + let err = build_respond_input(&chat_signal("hello"), &ctx) + .expect_err("missing room id should be rejected"); + assert!(err.contains("personaContext.roomId")); + } + /// What this catches: media on the signal passes through to /// `RespondInput::message_media` unchanged. Downstream /// `MediaPolicy` decides byte-vs-marker; the projection stays diff --git a/src/workers/continuum-core/src/persona/response.rs b/src/workers/continuum-core/src/persona/response.rs index b926ce16d..4e18fea65 100644 --- a/src/workers/continuum-core/src/persona/response.rs +++ b/src/workers/continuum-core/src/persona/response.rs @@ -322,6 +322,14 @@ async fn respond_inner( "visible_chars": visible_text.len(), }), ); + if visible_text.trim().is_empty() { + return Err(format!( + "persona '{}' produced empty visible text after post-processing (raw_chars={}, think_blocks={})", + input.persona.display_name, + raw_response.text.len(), + think_count + )); + } Ok(PersonaResponse::Spoke { persona_id: input.persona.persona_id, @@ -454,7 +462,7 @@ async fn run_render( assembled.messages, &input.message_media, &input.capabilities, - ); + )?; let request = TextGenerationRequest { messages, @@ -517,32 +525,33 @@ async fn run_render( /// the FINAL user-role message — but only when the persona's resolved /// model declares the matching capability (`Vision` for image, /// `AudioInput` for audio). Native-multimodal models receive the source -/// bytes directly; text-only models fall back to the simple text path -/// (the sensory bridge would inject a description upstream — its job, -/// not ours). +/// bytes directly; text-only models use the simple text path plus +/// explicit description markers from the sensory bridge. /// /// Behavior contract: /// - empty `media` → identical to the legacy text-only path. /// - non-empty `media` + model has Vision/AudioInput → last user /// message becomes `MessageContent::Parts(text + media)`. /// - non-empty `media` + model lacks the capability → text-only -/// path; the bridge layer (VisionDescriptionService etc.) is -/// expected to have already converted media → text upstream. +/// path with description markers; the bridge layer +/// (VisionDescriptionService etc.) is expected to have already +/// converted media → text upstream. /// - `media` items whose `item_type` doesn't match a capability the -/// model has are dropped (e.g. audio sent to a vision-only model). -/// - no user-role messages found → media silently dropped (rare — -/// would mean the assembler produced an unusual shape). +/// model has are emitted as description markers only. +/// - no user-role messages found or target user message already uses +/// `MessageContent::Parts` → hard error. These shapes mean the +/// assembler contract changed and media cannot be attached safely. pub fn build_messages_with_media( prompt_messages: Vec, media: &[MediaItemLite], model_caps: &std::collections::HashSet, -) -> Vec { +) -> Result, String> { use crate::ai::types::{AudioInput, ChatMessage, ContentPart, ImageInput, MessageContent}; use crate::persona::media_policy::MediaPolicy; // Default text-only path. Always start here; we may rewrite the // last user message below if the policy chose an attachable item. - let mut messages: Vec = prompt_messages + let messages: Vec = prompt_messages .into_iter() .map(|m| ChatMessage { role: m.role, @@ -552,7 +561,7 @@ pub fn build_messages_with_media( .collect(); if media.is_empty() { - return messages; + return Ok(messages); } // Apply the AT-MOST-ONE-LATEST policy. The byte-attachment slot @@ -625,8 +634,17 @@ pub fn build_messages_with_media( emitted_parts.push(ContentPart::Text { text }); } + attach_media_parts_to_last_user(messages, emitted_parts) +} + +fn attach_media_parts_to_last_user( + mut messages: Vec, + emitted_parts: Vec, +) -> Result, String> { + use crate::ai::types::{ContentPart, MessageContent}; + if emitted_parts.is_empty() { - return messages; + return Ok(messages); } // Find the LAST user-role message and convert it to Parts (text + @@ -634,17 +652,20 @@ pub fn build_messages_with_media( // turn after assemble(). let last_user_idx = messages.iter().rposition(|m| m.role == "user"); let Some(idx) = last_user_idx else { - // No user message to attach to. Drop media silently — caller - // shape was unusual; assembling new user messages here would - // hide the actual bug. - return messages; + return Err( + "build_messages_with_media: media was provided but prompt has no user message to attach it to" + .to_string(), + ); }; let existing_text = match &messages[idx].content { MessageContent::Text(t) => t.clone(), - // Defensive: if the assembler somehow already produced Parts, - // we don't try to merge — leave it alone. - MessageContent::Parts(_) => return messages, + MessageContent::Parts(_) => { + return Err( + "build_messages_with_media: media was provided but target user message already has parts" + .to_string(), + ); + } }; let mut parts: Vec = Vec::with_capacity(emitted_parts.len() + 1); @@ -655,7 +676,7 @@ pub fn build_messages_with_media( } parts.extend(emitted_parts); messages[idx].content = MessageContent::Parts(parts); - messages + Ok(messages) } /// each as a `cognition:think-block` event for the (future) hippocampus @@ -872,6 +893,18 @@ mod tests { assert_eq!(count, 2); } + #[test] + fn all_think_output_leaves_no_postable_text() { + let raw = "plan only"; + let (think_stripped, count) = strip_thinks_emit_events(raw, Uuid::nil(), Uuid::nil()); + let visible = strip_leaked_tool_markup(&think_stripped); + assert_eq!(count, 1); + assert!( + visible.trim().is_empty(), + "all-think model output must trip the hard empty-visible-text guard" + ); + } + #[test] fn strip_thinks_handles_multiline_thinks() { let raw = "\nLine one\nLine two\n\nVisible response."; @@ -1009,7 +1042,7 @@ mod tests { // attached to the LAST user message; media without capability → // text path (the bridge is upstream's job, not ours). - use crate::ai::types::{ContentPart, MessageContent}; + use crate::ai::types::{ChatMessage, ContentPart, MessageContent}; use crate::cognition::tool_executor::types::MediaItemLite; use crate::model_registry::Capability; use crate::persona::prompt_assembly::PromptMessage; @@ -1049,7 +1082,7 @@ mod tests { fn no_media_returns_text_only_messages() { let prompt = vec![pm("system", "you are helpful"), pm("user", "hello")]; let caps = HashSet::new(); - let out = build_messages_with_media(prompt, &[], &caps); + let out = build_messages_with_media(prompt, &[], &caps).unwrap(); assert_eq!(out.len(), 2); assert!(matches!(out[0].content, MessageContent::Text(_))); assert!(matches!(out[1].content, MessageContent::Text(_))); @@ -1072,7 +1105,7 @@ mod tests { let prompt = vec![pm("user", "describe this")]; let media = vec![img_b64("AAAA")]; let caps = HashSet::new(); // model has NO Vision capability - let out = build_messages_with_media(prompt, &media, &caps); + let out = build_messages_with_media(prompt, &media, &caps).unwrap(); assert_eq!(out.len(), 1); // New contract (2026-04-22): when model lacks the matching // capability, ContentPart::Image bytes MUST NOT attach. The @@ -1114,7 +1147,7 @@ mod tests { let media = vec![img_b64("PNG_BASE64_DATA")]; let mut caps = HashSet::new(); caps.insert(Capability::Vision); - let out = build_messages_with_media(prompt, &media, &caps); + let out = build_messages_with_media(prompt, &media, &caps).unwrap(); assert_eq!(out.len(), 2); // System message untouched. assert!(matches!(out[0].content, MessageContent::Text(_))); @@ -1156,7 +1189,7 @@ mod tests { let media = vec![img_b64("X")]; let mut caps = HashSet::new(); caps.insert(Capability::Vision); - let out = build_messages_with_media(prompt, &media, &caps); + let out = build_messages_with_media(prompt, &media, &caps).unwrap(); // First user message stays text. match &out[0].content { MessageContent::Text(t) => assert_eq!(t, "earlier turn"), @@ -1192,7 +1225,8 @@ mod tests { }; let mut vision_only = HashSet::new(); vision_only.insert(Capability::Vision); - let out = build_messages_with_media(prompt.clone(), &[audio.clone()], &vision_only); + let out = + build_messages_with_media(prompt.clone(), &[audio.clone()], &vision_only).unwrap(); // Vision-only model: audio bytes MUST NOT attach. Wrapper MAY // be Parts(Text-marker) per the new policy contract — what // matters is no ContentPart::Audio carrying real bytes. @@ -1210,7 +1244,7 @@ mod tests { let mut audio_capable = HashSet::new(); audio_capable.insert(Capability::AudioInput); - let out = build_messages_with_media(prompt, &[audio], &audio_capable); + let out = build_messages_with_media(prompt, &[audio], &audio_capable).unwrap(); // Audio-capable model: audio attaches. match &out[0].content { MessageContent::Parts(p) => { @@ -1219,4 +1253,44 @@ mod tests { _ => panic!("audio-capable model should receive Parts"), } } + + #[test] + fn media_with_no_user_message_fails_loud() { + let prompt = vec![pm("system", "you describe images")]; + let media = vec![img_b64("PNG_BASE64_DATA")]; + let mut caps = HashSet::new(); + caps.insert(Capability::Vision); + + let err = build_messages_with_media(prompt, &media, &caps).unwrap_err(); + + assert!( + err.contains("no user message"), + "unexpected error for impossible media attachment shape: {err}" + ); + } + + #[test] + fn media_with_existing_parts_fails_loud() { + let messages = vec![ChatMessage { + role: "user".to_string(), + content: MessageContent::Parts(vec![ContentPart::Text { + text: "already structured".to_string(), + }]), + name: None, + }]; + let media_parts = vec![ContentPart::Image { + image: crate::ai::types::ImageInput { + url: None, + base64: Some("PNG_BASE64_DATA".to_string()), + mime_type: Some("image/png".to_string()), + }, + }]; + + let err = attach_media_parts_to_last_user(messages, media_parts).unwrap_err(); + + assert!( + err.contains("already has parts"), + "unexpected error for pre-structured user media target: {err}" + ); + } } diff --git a/src/workers/continuum-core/tests/fixture_assembly_replay.rs b/src/workers/continuum-core/tests/fixture_assembly_replay.rs index c4edc7eda..8df0cf4bc 100644 --- a/src/workers/continuum-core/tests/fixture_assembly_replay.rs +++ b/src/workers/continuum-core/tests/fixture_assembly_replay.rs @@ -65,10 +65,10 @@ use continuum_core::ai::types::{ContentPart, MessageContent}; use continuum_core::cognition::tool_executor::types::MediaItemLite; use continuum_core::model_registry::Capability; -use continuum_core::persona::prompt_assembly::PromptMessage; use continuum_core::persona::cognition_io::{ build_respond_input, PersonaContext, Signal, SignalKind, SignalOriginator, }; +use continuum_core::persona::prompt_assembly::PromptMessage; use continuum_core::persona::response::build_messages_with_media; use serde_json::Value; use std::collections::HashSet; @@ -215,9 +215,10 @@ fn signal_and_ctx_from_legacy_fixture( // New shape (post-IPC-reshape commit 983d30102): rust_request already // has `signal` + `personaContext` as nested objects matching the wire // shape exactly. Deserialize directly. No reconstruction needed. - if let (Some(signal_json), Some(ctx_json)) = - (rust_request.get("signal"), rust_request.get("personaContext")) - { + if let (Some(signal_json), Some(ctx_json)) = ( + rust_request.get("signal"), + rust_request.get("personaContext"), + ) { let signal: Signal = serde_json::from_value(signal_json.clone()) .map_err(|e| format!("new-shape signal deserialize failed: {e}"))?; let ctx: PersonaContext = serde_json::from_value(ctx_json.clone()) @@ -286,7 +287,9 @@ fn signal_and_ctx_from_legacy_fixture( kind: SignalKind::ChatMessage, text: message_text, media, - originator: SignalOriginator::User { user_id: Uuid::nil() }, + originator: SignalOriginator::User { + user_id: Uuid::nil(), + }, timestamp_ms: 0, message_id: Some(message_id), }; @@ -332,9 +335,21 @@ fn fixtures_replay_through_message_builder() { let media = extract_media(rust_request); let caps = extract_capabilities(rust_request); let prompt = synth_prompt_messages(rust_request); - let out = build_messages_with_media(prompt, &media, &caps); + let out = match build_messages_with_media(prompt, &media, &caps) { + Ok(out) => out, + Err(e) => { + failures.push(format!( + "[{}] build_messages_with_media failed: {}", + path.file_name().unwrap().to_string_lossy(), + e + )); + continue; + } + }; - let last = out.last().expect("builder always returns at least one message"); + let last = out + .last() + .expect("builder always returns at least one message"); let image_parts: Vec<&ContentPart> = match &last.content { MessageContent::Text(_) => Vec::new(), MessageContent::Parts(parts) => parts @@ -493,8 +508,10 @@ async fn ensure_llamacpp_qwen2vl_registered() -> Option<()> { if !gguf_path.exists() { continue; } - let mut adapter: Box = - Box::new(LlamaCppAdapter::with_model_id(gguf_path.clone(), m.id.clone())); + let mut adapter: Box = Box::new(LlamaCppAdapter::with_model_id( + gguf_path.clone(), + m.id.clone(), + )); adapter .initialize() .await @@ -537,10 +554,7 @@ async fn vision_fixture_describes_image_via_real_model() { let caps = extract_capabilities(rust_request); let has_real_image = media.iter().any(|m| { m.item_type == "image" - && m.base64 - .as_deref() - .map(|b| !b.is_empty()) - .unwrap_or(false) + && m.base64.as_deref().map(|b| !b.is_empty()).unwrap_or(false) }); has_real_image && caps.contains(&Capability::Vision) }) @@ -602,7 +616,9 @@ async fn vision_fixture_describes_image_via_real_model() { let (signal, ctx) = match signal_and_ctx_from_legacy_fixture(rust_request) { Ok(pair) => pair, Err(e) => { - failures.push(format!("[{fname}] could not build Signal+PersonaContext: {e}")); + failures.push(format!( + "[{fname}] could not build Signal+PersonaContext: {e}" + )); continue; } }; @@ -647,7 +663,9 @@ async fn vision_fixture_describes_image_via_real_model() { a response. reason: {reason}" )); } - PersonaResponse::Spoke { text, model_used, .. } => { + PersonaResponse::Spoke { + text, model_used, .. + } => { let trimmed = text.trim(); if trimmed.len() < 30 { failures.push(format!(