diff --git a/src/workers/continuum-core/src/inference/llamacpp_adapter.rs b/src/workers/continuum-core/src/inference/llamacpp_adapter.rs index ec55dcd11..9d410dbb3 100644 --- a/src/workers/continuum-core/src/inference/llamacpp_adapter.rs +++ b/src/workers/continuum-core/src/inference/llamacpp_adapter.rs @@ -118,6 +118,29 @@ fn decode_data_url_or_base64( } } +/// Typed failure for [`LlamaCppAdapter::try_new`] when the model +/// registry has no `llamacpp-local` row with a resolved +/// `gguf_local_path`. Surfaces install-time-no-Qwen state as observable +/// runtime health rather than a process panic. Operators see this in +/// install/health output and know exactly what's missing. +/// +/// 2026-05-11: continuum-8e97 RTX 5090 finding showed cuda stack ready, +/// VRAM available, zero personas replying — root cause was no Qwen +/// GGUF seeded by carl install. Without this typed error the silent +/// state was indistinguishable from "personas just slow." +#[derive(Debug, thiserror::Error)] +#[error( + "no `{provider_id}` model with `gguf_local_path` resolved on disk \ + ({rows_in_registry} provider rows, {rows_with_gguf_local_path} with \ + a path on disk). Install seeded no local Qwen GGUF — run model-init \ + downloader or seed manually." +)] +pub struct NoLocalModelLoadable { + pub provider_id: String, + pub rows_in_registry: usize, + pub rows_with_gguf_local_path: usize, +} + /// In-process llama.cpp adapter. Lazy-loads the model on first /// `generate_text` call (so adapter registration doesn't pay the /// 5-10s model-load cost up front). After load, the backend lives for @@ -157,27 +180,61 @@ impl LlamaCppAdapter { /// and uses its id + path. If the registry has no such row, panics /// — that's a config bug, not a runtime failure mode (per the /// no-fallback rule). + /// + /// Prefer [`Self::try_new`] when calling from a path that should + /// surface the missing-Qwen state as observable runtime health + /// rather than crashing the process. Boot-time health checks + /// (continuum status, ai/status, install-time validators) MUST use + /// `try_new` so an install with no Qwen seeded reports + /// `NoLocalModelLoadable` cleanly instead of crash-looping. pub fn new() -> Self { + Self::try_new().unwrap_or_else(|err| panic!("{err}")) + } + + /// Result-returning variant of [`Self::new`]. Returns + /// [`NoLocalModelLoadable`] when the registry has no `llamacpp-local` + /// row with a resolved `gguf_local_path` — the typed failure mode + /// for "install seeded no local Qwen GGUF" which surfaces at + /// install-time on hosts where the model-init container did not + /// download a chat-capable model (RTX 5090 finding, 2026-05-11). The + /// caller decides whether to crash (legacy `new()` behavior), + /// degrade, or report the error to operators. + pub fn try_new() -> Result { let reg = crate::model_registry::global(); - let model = reg - .models_for_provider(LLAMACPP_PROVIDER_ID) - .find(|m| m.gguf_local_path.is_some()) - .expect( - "no llamacpp-local model with gguf_local_path in config/models.toml — \ - the in-process adapter has nothing to load", - ); + Self::try_new_from(reg.models_for_provider(LLAMACPP_PROVIDER_ID)) + } + + /// Pure variant of [`Self::try_new`] taking a model iterator + /// directly — lets tests assemble synthetic registries without going + /// through the global singleton. Production code uses + /// [`Self::try_new`] which calls this with `global().models_for_provider(...)`. + pub fn try_new_from<'a, I>(models: I) -> Result + where + I: IntoIterator, + { + let candidates: Vec<&crate::model_registry::Model> = models.into_iter().collect(); + let with_path: Vec<&crate::model_registry::Model> = candidates + .iter() + .copied() + .filter(|m| m.gguf_local_path.is_some()) + .collect(); + let model = with_path.first().ok_or_else(|| NoLocalModelLoadable { + provider_id: LLAMACPP_PROVIDER_ID.to_string(), + rows_in_registry: candidates.len(), + rows_with_gguf_local_path: 0, + })?; let model_path = model .gguf_local_path .clone() - .expect("gguf_local_path present — filtered by find()"); - Self { + .expect("gguf_local_path present — filtered above"); + Ok(Self { backend: Arc::new(RwLock::new(None)), model_path, last_throughput_tok_s: Arc::new(RwLock::new(0.0)), default_model: model.id.clone(), context_length_override: None, kv_quant_policy: crate::inference::kv_quant::KvQuantPolicy::default(), - } + }) } /// Override the model path. Useful for tests + when the model isn't @@ -807,3 +864,100 @@ impl AIProviderAdapter for LlamaCppAdapter { self.default_model.eq_ignore_ascii_case(model_name) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::model_registry::types::{Arch, MultiPartyChatStrategy}; + use crate::model_registry::Model; + use std::collections::BTreeSet; + + fn synthetic_llamacpp_local_model(id: &str, gguf_path: Option) -> Model { + Model { + id: id.into(), + name: None, + provider: LLAMACPP_PROVIDER_ID.into(), + arch: Arch::Qwen35, + context_window: 32_768, + max_output_tokens: 4096, + tokens_per_second: 33.0, + capabilities: BTreeSet::new(), + cost_input_per_1k: 0.0, + cost_output_per_1k: 0.0, + gguf_hint: None, + gguf_local_path: gguf_path, + mmproj_local_path: None, + chat_template: None, + multi_party_strategy: MultiPartyChatStrategy::default(), + stop_sequences: vec![], + } + } + + #[test] + fn try_new_from_errors_when_no_llamacpp_local_rows() { + // Empty iterator — no llamacpp-local rows at all (the worst-case + // install state continuum-8e97 saw on RTX 5090: install seeded + // only voice-models, registry has no llamacpp-local Qwen row). + let models: Vec = vec![]; + match LlamaCppAdapter::try_new_from(models.iter()) { + Err(err) => { + assert_eq!(err.provider_id, LLAMACPP_PROVIDER_ID); + assert_eq!(err.rows_in_registry, 0); + assert_eq!(err.rows_with_gguf_local_path, 0); + // Error message must name the actionable next step so + // operators see what to do (run model-init / seed manually). + let msg = format!("{err}"); + assert!( + msg.contains("model-init"), + "error must name the actionable remediation: {msg}" + ); + } + Ok(_) => panic!("expected NoLocalModelLoadable on empty registry"), + } + } + + #[test] + fn try_new_from_errors_when_llamacpp_rows_exist_but_none_have_gguf_path() { + // Registry has llamacpp-local rows but artifact resolver couldn't + // find the GGUF on disk for any of them — `gguf_local_path` is + // None for every row. This is the SAME observable state as + // "registry empty" from the adapter's perspective: nothing to + // load. Operator-actionable signal must distinguish "registry is + // wrong" (zero rows) from "files aren't seeded" (rows exist, + // paths unresolved). + let models = vec![ + synthetic_llamacpp_local_model("qwen3.5-4b-code-forged-GGUF", None), + synthetic_llamacpp_local_model("qwen2-vl-7b-instruct", None), + ]; + match LlamaCppAdapter::try_new_from(models.iter()) { + Err(err) => { + assert_eq!(err.provider_id, LLAMACPP_PROVIDER_ID); + assert_eq!(err.rows_in_registry, 2); + assert_eq!(err.rows_with_gguf_local_path, 0); + } + Ok(_) => panic!("expected NoLocalModelLoadable when no row has gguf_local_path"), + } + } + + #[test] + fn try_new_from_succeeds_with_at_least_one_resolved_path() { + // Mixed registry: one row has the path resolved, one doesn't. + // Adapter should pick the resolved row (matches the existing + // production behavior of legacy `new()`). + let resolved_path = PathBuf::from("/tmp/synthetic-test-only.gguf"); + let models = vec![ + synthetic_llamacpp_local_model("qwen3.5-4b-code-forged-GGUF", None), + synthetic_llamacpp_local_model( + "qwen2-vl-7b-instruct", + Some(resolved_path.clone()), + ), + ]; + match LlamaCppAdapter::try_new_from(models.iter()) { + Ok(adapter) => { + assert_eq!(adapter.model_path, resolved_path); + assert_eq!(adapter.default_model, "qwen2-vl-7b-instruct"); + } + Err(err) => panic!("expected Ok with resolved path; got {err:?}"), + } + } +}