From f92307452eb1e27b7ed506a96458c9722cb9981b Mon Sep 17 00:00:00 2001
From: Test <joel@cambriantech.com>
Date: Thu, 7 May 2026 22:03:47 -0500
Subject: [PATCH 1/3] Add Rust model resolver with hardware capability tiers
 (Lane C)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR-D from docs/architecture/ALPHA-GAP-RUST-PERSONA-RUNTIME.md: capability-shaped
model resolution with no-fallback contract. Builds on the typed model_registry
SSOT (models.toml + providers.toml + Arch/Capability vocab) and the TargetSilicon
2-axis from #1062 (and dropped_no_budget loud-fail from #1063).

cognition/model_resolver.rs (pure module — no IPC, no ORM, no inference):
- ModelRequirement: required_capabilities, arch_preference, context_window_min,
  memory_budget_mb, provider_policy, host
- ResolvedModel: model_id, provider_id, expected_memory_mb, target_silicon,
  hw_capability_tier, reason
- HwCapabilityTier: finer-grained than TargetSilicon (M1Uma8Gb..M3UmaProMax,
  Sm70..Sm120, VulkanAmd, Cloud)
- LocalOrCloudPolicy: LocalOnly | CloudOnly | PreferLocal | PreferCloud | Any
- HostCapability: per-machine snapshot (tier + memory + primary silicon)
- ResolutionError: NoModelMatchesRequirement{registry_count,
  candidates_after_filter, unmet_filters} — typed, no fallback
- resolve_model(): pure function over IntoIterator<&Model>

target_silicon derivation: local providers (llamacpp-local, docker-model-runner)
inherit host.primary_target_silicon; cloud providers always TargetSilicon::Cloud.
Hardcoded local-provider list for v1; follow-up moves it to a kind:
local|cloud field on Provider in providers.toml.

expected_memory_mb stays None until Model schema gains an estimated_memory_mb
field — separate followup. Today's resolver still rejects cloud models from
LocalOnly queries, which prevents the worst class of mis-routing.

model_registry/types.rs: Arch gains #[derive(TS)] + ts(export) parallel to the
existing Capability derivation. Backwards-compatible additive change; required
because ModelRequirement.arch_preference: Vec<Arch> crosses the TS boundary.

11 logic tests + 6 ts-rs export-binding tests = 16/16 green:
- local_chat_resolves_to_qwen35_on_m1
- vision_request_resolves_to_qwen2_vl
- cloud_only_skips_local_models
- missing_capability_errors_no_fallback (NO FALLBACK assertion)
- vision_with_local_only_on_cpu_host_still_finds_local_vision_model
- context_window_min_filters_small_models
- arch_preference_filters_to_qwen35_only
- prefer_local_ranks_local_first
- prefer_cloud_ranks_cloud_first
- five_persona_resolution_smoke (Lane C contract test)

Validation:
- cargo test --features metal,accelerate cognition::model_resolver: 16/16
- npx tsx scripts/build-with-loud-failure.ts: TypeScript compilation succeeded

Two SSOTs noted (TOML registry vs shared/models.json) — out of Lane C scope,
filed for separate consolidation followup.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../generated/cognition/HostCapability.ts     |  23 +
 .../generated/cognition/HwCapabilityTier.ts   |  15 +
 .../generated/cognition/LocalOrCloudPolicy.ts |   6 +
 .../generated/cognition/ModelRequirement.ts   |  40 +
 .../generated/cognition/ResolutionError.ts    |  12 +
 .../generated/cognition/ResolvedModel.ts      |  26 +
 src/shared/generated/cognition/index.ts       |  15 +
 src/shared/generated/model_registry/Arch.ts   |  12 +
 src/shared/generated/model_registry/index.ts  |   1 +
 .../continuum-core/src/cognition/mod.rs       |   2 +
 .../src/cognition/model_resolver.rs           | 718 ++++++++++++++++++
 .../src/model_registry/types.rs               |   8 +-
 12 files changed, 877 insertions(+), 1 deletion(-)
 create mode 100644 src/shared/generated/cognition/HostCapability.ts
 create mode 100644 src/shared/generated/cognition/HwCapabilityTier.ts
 create mode 100644 src/shared/generated/cognition/LocalOrCloudPolicy.ts
 create mode 100644 src/shared/generated/cognition/ModelRequirement.ts
 create mode 100644 src/shared/generated/cognition/ResolutionError.ts
 create mode 100644 src/shared/generated/cognition/ResolvedModel.ts
 create mode 100644 src/shared/generated/model_registry/Arch.ts
 create mode 100644 src/workers/continuum-core/src/cognition/model_resolver.rs
diff --git a/src/shared/generated/cognition/HostCapability.ts b/src/shared/generated/cognition/HostCapability.ts
new file mode 100644
index 000000000..6cdf6a163
--- /dev/null
+++ b/src/shared/generated/cognition/HostCapability.ts
@@ -0,0 +1,23 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+import type { HwCapabilityTier } from "./HwCapabilityTier";
+import type { TargetSilicon } from "./TargetSilicon";
+
+/**
+ * What the resolver knows about THIS machine. Caller populates from a
+ * hardware-detection probe at boot (see future `device_probe` module).
+ * The resolver consumes this as a snapshot — re-invoke when probe values
+ * change.
+ */
+export type HostCapability = { hwCapabilityTier: HwCapabilityTier, 
+/**
+ * Memory available for inference workloads in megabytes. For unified-
+ * memory hosts this is the share inference is willing to claim, not
+ * total system RAM.
+ */
+availableMemoryMb: number, 
+/**
+ * Which physical-budget pool inference workloads on this host should
+ * admit against. Mac M-series → `UnifiedMemory`; nVidia → `Gpu`;
+ * CPU-only → `Cpu`.
+ */
+primaryTargetSilicon: TargetSilicon, };
diff --git a/src/shared/generated/cognition/HwCapabilityTier.ts b/src/shared/generated/cognition/HwCapabilityTier.ts
new file mode 100644
index 000000000..2f239ec18
--- /dev/null
+++ b/src/shared/generated/cognition/HwCapabilityTier.ts
@@ -0,0 +1,15 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+
+/**
+ * Finer-grained hardware tier than [`TargetSilicon`]. Selects which model
+ * VARIANT a host can run, not which physical-budget POOL admission uses.
+ *
+ * Example: `M1Uma8Gb` and `M3UmaProMax` both have
+ * `target_silicon == TargetSilicon::UnifiedMemory`, but only the latter
+ * can hold a 4B-parameter model alongside a 7B vision model.
+ *
+ * Lane B's lease layer + adaptive_throughput's budgets care about the
+ * pool (TargetSilicon). Lane C's resolver cares about the variant
+ * (HwCapabilityTier).
+ */
+export type HwCapabilityTier = "cpu_only" | "m1_uma8_gb" | "m1_uma16_gb" | "m2_uma_pro_max" | "m3_uma_pro_max" | "sm70" | "sm80" | "sm86" | "sm89" | "sm90" | "sm120" | "vulkan_amd" | "cloud";
diff --git a/src/shared/generated/cognition/LocalOrCloudPolicy.ts b/src/shared/generated/cognition/LocalOrCloudPolicy.ts
new file mode 100644
index 000000000..5e643cc06
--- /dev/null
+++ b/src/shared/generated/cognition/LocalOrCloudPolicy.ts
@@ -0,0 +1,6 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+
+/**
+ * How aggressively to prefer local vs cloud providers.
+ */
+export type LocalOrCloudPolicy = "local_only" | "cloud_only" | "prefer_local" | "prefer_cloud" | "any";
diff --git a/src/shared/generated/cognition/ModelRequirement.ts b/src/shared/generated/cognition/ModelRequirement.ts
new file mode 100644
index 000000000..95c4e8de9
--- /dev/null
+++ b/src/shared/generated/cognition/ModelRequirement.ts
@@ -0,0 +1,40 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+import type { Arch } from "../model_registry/Arch";
+import type { Capability } from "../model_registry/Capability";
+import type { HostCapability } from "./HostCapability";
+import type { LocalOrCloudPolicy } from "./LocalOrCloudPolicy";
+
+/**
+ * Capability-shaped query for the resolver. Callers describe what the
+ * model needs to DO (generate text, see images, etc.) — not which model
+ * to use. Per Joel's axiom: code knows ARCHETYPES, models are data.
+ */
+export type ModelRequirement = { 
+/**
+ * Capabilities every candidate must advertise. Empty set matches any
+ * model (rare — usually callers want at least `Chat`).
+ */
+requiredCapabilities: Array<Capability>, 
+/**
+ * Architectural family preference. Empty = any architecture qualifies.
+ * When non-empty, candidates outside the preference are filtered out
+ * rather than down-ranked — caller wants this family or none.
+ */
+archPreference: Array<Arch>, 
+/**
+ * Minimum context window in tokens. `0` = any.
+ */
+contextWindowMin: number, 
+/**
+ * Maximum memory the resolved model may consume on this host, in MB.
+ * `None` = use `host.available_memory_mb` as the implicit cap.
+ */
+memoryBudgetMb?: number, 
+/**
+ * Local-vs-cloud preference. See [`LocalOrCloudPolicy`].
+ */
+providerPolicy: LocalOrCloudPolicy, 
+/**
+ * Host capability snapshot. See [`HostCapability`].
+ */
+host: HostCapability, };
diff --git a/src/shared/generated/cognition/ResolutionError.ts b/src/shared/generated/cognition/ResolutionError.ts
new file mode 100644
index 000000000..23cfbf2e1
--- /dev/null
+++ b/src/shared/generated/cognition/ResolutionError.ts
@@ -0,0 +1,12 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+
+/**
+ * Why a [`resolve_model`] call failed. Each variant names the SPECIFIC
+ * filter that eliminated all candidates so the caller's error message
+ * can be actionable.
+ *
+ * No `Fallback` variant. Per Joel's rule: missing-model is an error, not
+ * a soft retry on a default. Callers that want graceful degradation must
+ * EXPLICITLY relax their requirement and re-invoke.
+ */
+export type ResolutionError = { "kind": "noModelMatchesRequirement", registry_count: number, candidates_after_filter: number, unmet_filters: Array<string>, };
diff --git a/src/shared/generated/cognition/ResolvedModel.ts b/src/shared/generated/cognition/ResolvedModel.ts
new file mode 100644
index 000000000..abc3635b6
--- /dev/null
+++ b/src/shared/generated/cognition/ResolvedModel.ts
@@ -0,0 +1,26 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+import type { HwCapabilityTier } from "./HwCapabilityTier";
+import type { TargetSilicon } from "./TargetSilicon";
+
+/**
+ * Resolver output. Includes the silicon target so the caller can plumb it
+ * straight into a [`ThroughputJob`] without re-deriving it from the
+ * model + host.
+ */
+export type ResolvedModel = { modelId: string, providerId: string, 
+/**
+ * Expected memory footprint in megabytes if the registry knows it.
+ * `None` for cloud models (always-fits) and for local models whose
+ * row in `models.toml` doesn't yet declare a memory estimate. A
+ * follow-up adds an `estimated_memory_mb` field to the Model schema;
+ * until then memory-budget filtering is best-effort on local models
+ * (the resolver still rejects cloud models from `LocalOnly` queries).
+ */
+expectedMemoryMb?: number, targetSilicon: TargetSilicon, hwCapabilityTier: HwCapabilityTier, 
+/**
+ * Human-readable explanation of why this model was chosen. Surfaced
+ * in logs + UI when a persona's resolution changes (e.g., "switched
+ * from gpt-4o to claude-sonnet-4-5 because PreferLocal couldn't
+ * satisfy required Capability::Vision on this host").
+ */
+reason: string, };
diff --git a/src/shared/generated/cognition/index.ts b/src/shared/generated/cognition/index.ts
index 2bb2b8802..0b7a2861f 100644
--- a/src/shared/generated/cognition/index.ts
+++ b/src/shared/generated/cognition/index.ts
@@ -2,9 +2,15 @@
 // Source: generator/generate-rust-bindings.ts
 // Re-generate: npx tsx generator/generate-rust-bindings.ts
 
+export type { AdaptiveThroughputPlan } from './AdaptiveThroughputPlan';
+export type { AdaptiveThroughputRequest } from './AdaptiveThroughputRequest';
+export type { HostCapability } from './HostCapability';
+export type { HwCapabilityTier } from './HwCapabilityTier';
 export type { LeverCall } from './LeverCall';
 export type { LeverName } from './LeverName';
+export type { LocalOrCloudPolicy } from './LocalOrCloudPolicy';
 export type { MediaItemLite } from './MediaItemLite';
+export type { ModelRequirement } from './ModelRequirement';
 export type { NativeBatchOutcome } from './NativeBatchOutcome';
 export type { ParsedToolBatch } from './ParsedToolBatch';
 export type { PersonaMediaConfigLite } from './PersonaMediaConfigLite';
@@ -18,10 +24,19 @@ export type { RecipeRagSourcePolicy } from './RecipeRagSourcePolicy';
 export type { RecipeTurnBatchPlan } from './RecipeTurnBatchPlan';
 export type { RecipeTurnBatchRequest } from './RecipeTurnBatchRequest';
 export type { RecipeTurnTrigger } from './RecipeTurnTrigger';
+export type { ResolutionError } from './ResolutionError';
+export type { ResolvedModel } from './ResolvedModel';
+export type { ResourceClass } from './ResourceClass';
 export type { ResponderDecision } from './ResponderDecision';
 export type { SharedAnalysis } from './SharedAnalysis';
 export type { SharedAnalysisIntent } from './SharedAnalysisIntent';
 export type { SharedRagSourcePlan } from './SharedRagSourcePlan';
+export type { TargetSilicon } from './TargetSilicon';
+export type { ThroughputJob } from './ThroughputJob';
+export type { ThroughputLaneBudget } from './ThroughputLaneBudget';
+export type { ThroughputLease } from './ThroughputLease';
+export type { ThroughputLeaseRevocationPolicy } from './ThroughputLeaseRevocationPolicy';
+export type { ThroughputLeaseSnapshot } from './ThroughputLeaseSnapshot';
 export type { ToolExecutionContext } from './ToolExecutionContext';
 export type { ToolInvocation } from './ToolInvocation';
 export type { ToolOutcome } from './ToolOutcome';
diff --git a/src/shared/generated/model_registry/Arch.ts b/src/shared/generated/model_registry/Arch.ts
new file mode 100644
index 000000000..1a5a81282
--- /dev/null
+++ b/src/shared/generated/model_registry/Arch.ts
@@ -0,0 +1,12 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+
+/**
+ * Model architecture family. Typed (not stringly-typed) so call sites
+ * use enum matching, not string comparison. Adding a new arch means:
+ * (a) add the variant here, (b) add a TOML row with `arch = "new_arch"`.
+ * Code that dispatches by arch gets a compile error reminding the author
+ * to handle the new variant — precisely the pattern Joel's axiom calls
+ * for ("code should NEVER know the model" — code knows the ARCHETYPES
+ * via this enum, models are data).
+ */
+export type Arch = "qwen2" | "qwen3" | "qwen35" | "llama" | "claude" | "gpt" | "gemini" | "grok" | "deepseek" | "unknown";
diff --git a/src/shared/generated/model_registry/index.ts b/src/shared/generated/model_registry/index.ts
index 700da966a..afd28d110 100644
--- a/src/shared/generated/model_registry/index.ts
+++ b/src/shared/generated/model_registry/index.ts
@@ -2,4 +2,5 @@
 // Source: generator/generate-rust-bindings.ts
 // Re-generate: npx tsx generator/generate-rust-bindings.ts
 
+export type { Arch } from './Arch';
 export type { Capability } from './Capability';
diff --git a/src/workers/continuum-core/src/cognition/mod.rs b/src/workers/continuum-core/src/cognition/mod.rs
index 08358c12e..93156f21c 100644
--- a/src/workers/continuum-core/src/cognition/mod.rs
+++ b/src/workers/continuum-core/src/cognition/mod.rs
@@ -28,6 +28,7 @@
 //!                                  `ResponderDecision`)
 
 pub mod adaptive_throughput;
+pub mod model_resolver;
 pub mod response_orchestrator;
 pub mod response_validator;
 pub mod shared_analysis;
@@ -37,6 +38,7 @@ pub mod turn_batch;
 pub mod types;
 
 pub use adaptive_throughput::*;
+pub use model_resolver::*;
 pub use response_orchestrator::{
     DEFAULT_RELEVANCE_THRESHOLD, PersonaSlot, orchestrate, score_persona,
 };
diff --git a/src/workers/continuum-core/src/cognition/model_resolver.rs b/src/workers/continuum-core/src/cognition/model_resolver.rs
new file mode 100644
index 000000000..de754e247
--- /dev/null
+++ b/src/workers/continuum-core/src/cognition/model_resolver.rs
@@ -0,0 +1,718 @@
+//! Model resolver — capability-shaped model selection.
+//!
+//! Pure contract for "given a ModelRequirement, which concrete model_id
+//! satisfies it on this host?" Does not load models, initialize backends,
+//! or call providers. Does not invent fallbacks: a requirement that cannot
+//! be satisfied returns a typed [`ResolutionError`], not a best-guess model.
+//!
+//! Per Joel's rule (`fallbacks are illegal`): callers handle the error
+//! explicitly. There is no fall-through to a base model — that turns silent
+//! capability mismatches into runtime failures downstream.
+//!
+//! The resolver is the lookup half of the Adaptive Throughput Substrate.
+//! `adaptive_throughput` plans LANES; this module picks WHICH MODEL fills
+//! a given lane's request. The two share [`TargetSilicon`] as the join
+//! key — `ResolvedModel.target_silicon` flows into
+//! `ThroughputJob.target_silicon` when the resolver's output is admitted.
+//!
+//! Symmetrical to `adaptive_throughput.rs`: pure planner, callers re-invoke
+//! when host capabilities change (e.g., another model evicted, GPU
+//! pressure shifted).
+//!
+//! Source-of-truth ordering for model data: this module reads Models from
+//! the typed registry (`crate::model_registry`). It does NOT itself read
+//! `models.toml` or `models.json` — the registry already loaded both.
+
+use crate::cognition::adaptive_throughput::TargetSilicon;
+use crate::model_registry::types::{Arch, Capability, Model};
+use serde::{Deserialize, Serialize};
+use std::collections::BTreeSet;
+use ts_rs::TS;
+
+/// Finer-grained hardware tier than [`TargetSilicon`]. Selects which model
+/// VARIANT a host can run, not which physical-budget POOL admission uses.
+///
+/// Example: `M1Uma8Gb` and `M3UmaProMax` both have
+/// `target_silicon == TargetSilicon::UnifiedMemory`, but only the latter
+/// can hold a 4B-parameter model alongside a 7B vision model.
+///
+/// Lane B's lease layer + adaptive_throughput's budgets care about the
+/// pool (TargetSilicon). Lane C's resolver cares about the variant
+/// (HwCapabilityTier).
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize, TS)]
+#[serde(rename_all = "snake_case")]
+#[ts(
+    export,
+    export_to = "../../../shared/generated/cognition/HwCapabilityTier.ts"
+)]
+pub enum HwCapabilityTier {
+    /// No GPU, no NPU. Inference happens on CPU only.
+    CpuOnly,
+    /// Apple M1, 8GB unified memory. MBA-tier baseline.
+    M1Uma8Gb,
+    /// Apple M1/M2, 16GB unified memory.
+    M1Uma16Gb,
+    /// Apple M2/M3 Pro/Max, 32GB+ unified memory.
+    M2UmaProMax,
+    /// Apple M3 Pro/Max/Ultra, 32GB+ unified memory.
+    M3UmaProMax,
+    /// nVidia compute capability 7.0 (V100).
+    Sm70,
+    /// nVidia compute capability 8.0 (A100).
+    Sm80,
+    /// nVidia compute capability 8.6 (RTX 30xx, A40).
+    Sm86,
+    /// nVidia compute capability 8.9 (RTX 40xx).
+    Sm89,
+    /// nVidia compute capability 9.0 (H100).
+    Sm90,
+    /// nVidia compute capability 12.0 (RTX 50xx).
+    Sm120,
+    /// AMD GPU via Vulkan backend.
+    VulkanAmd,
+    /// Remote inference — host capability irrelevant.
+    Cloud,
+}
+
+/// How aggressively to prefer local vs cloud providers.
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize, TS)]
+#[serde(rename_all = "snake_case")]
+#[ts(
+    export,
+    export_to = "../../../shared/generated/cognition/LocalOrCloudPolicy.ts"
+)]
+pub enum LocalOrCloudPolicy {
+    /// Match local providers only. Cloud models are filtered out.
+    LocalOnly,
+    /// Match cloud providers only. Local models are filtered out.
+    CloudOnly,
+    /// Both eligible; rank local higher in the result.
+    PreferLocal,
+    /// Both eligible; rank cloud higher in the result.
+    PreferCloud,
+    /// Both eligible; no ranking preference.
+    Any,
+}
+
+/// What the resolver knows about THIS machine. Caller populates from a
+/// hardware-detection probe at boot (see future `device_probe` module).
+/// The resolver consumes this as a snapshot — re-invoke when probe values
+/// change.
+#[derive(Debug, Clone, Serialize, Deserialize, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(
+    export,
+    export_to = "../../../shared/generated/cognition/HostCapability.ts"
+)]
+pub struct HostCapability {
+    pub hw_capability_tier: HwCapabilityTier,
+    /// Memory available for inference workloads in megabytes. For unified-
+    /// memory hosts this is the share inference is willing to claim, not
+    /// total system RAM.
+    pub available_memory_mb: u32,
+    /// Which physical-budget pool inference workloads on this host should
+    /// admit against. Mac M-series → `UnifiedMemory`; nVidia → `Gpu`;
+    /// CPU-only → `Cpu`.
+    pub primary_target_silicon: TargetSilicon,
+}
+
+/// Capability-shaped query for the resolver. Callers describe what the
+/// model needs to DO (generate text, see images, etc.) — not which model
+/// to use. Per Joel's axiom: code knows ARCHETYPES, models are data.
+#[derive(Debug, Clone, Serialize, Deserialize, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(
+    export,
+    export_to = "../../../shared/generated/cognition/ModelRequirement.ts"
+)]
+pub struct ModelRequirement {
+    /// Capabilities every candidate must advertise. Empty set matches any
+    /// model (rare — usually callers want at least `Chat`).
+    pub required_capabilities: BTreeSet<Capability>,
+    /// Architectural family preference. Empty = any architecture qualifies.
+    /// When non-empty, candidates outside the preference are filtered out
+    /// rather than down-ranked — caller wants this family or none.
+    #[serde(default)]
+    pub arch_preference: Vec<Arch>,
+    /// Minimum context window in tokens. `0` = any.
+    #[serde(default)]
+    pub context_window_min: u32,
+    /// Maximum memory the resolved model may consume on this host, in MB.
+    /// `None` = use `host.available_memory_mb` as the implicit cap.
+    #[ts(optional)]
+    pub memory_budget_mb: Option<u32>,
+    /// Local-vs-cloud preference. See [`LocalOrCloudPolicy`].
+    pub provider_policy: LocalOrCloudPolicy,
+    /// Host capability snapshot. See [`HostCapability`].
+    pub host: HostCapability,
+}
+
+/// Resolver output. Includes the silicon target so the caller can plumb it
+/// straight into a [`ThroughputJob`] without re-deriving it from the
+/// model + host.
+#[derive(Debug, Clone, Serialize, Deserialize, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(
+    export,
+    export_to = "../../../shared/generated/cognition/ResolvedModel.ts"
+)]
+pub struct ResolvedModel {
+    pub model_id: String,
+    pub provider_id: String,
+    /// Expected memory footprint in megabytes if the registry knows it.
+    /// `None` for cloud models (always-fits) and for local models whose
+    /// row in `models.toml` doesn't yet declare a memory estimate. A
+    /// follow-up adds an `estimated_memory_mb` field to the Model schema;
+    /// until then memory-budget filtering is best-effort on local models
+    /// (the resolver still rejects cloud models from `LocalOnly` queries).
+    #[ts(optional)]
+    pub expected_memory_mb: Option<u32>,
+    pub target_silicon: TargetSilicon,
+    pub hw_capability_tier: HwCapabilityTier,
+    /// Human-readable explanation of why this model was chosen. Surfaced
+    /// in logs + UI when a persona's resolution changes (e.g., "switched
+    /// from gpt-4o to claude-sonnet-4-5 because PreferLocal couldn't
+    /// satisfy required Capability::Vision on this host").
+    pub reason: String,
+}
+
+/// Why a [`resolve_model`] call failed. Each variant names the SPECIFIC
+/// filter that eliminated all candidates so the caller's error message
+/// can be actionable.
+///
+/// No `Fallback` variant. Per Joel's rule: missing-model is an error, not
+/// a soft retry on a default. Callers that want graceful degradation must
+/// EXPLICITLY relax their requirement and re-invoke.
+#[derive(Debug, Clone, Serialize, Deserialize, TS, thiserror::Error)]
+#[serde(rename_all = "camelCase", tag = "kind")]
+#[ts(
+    export,
+    export_to = "../../../shared/generated/cognition/ResolutionError.ts"
+)]
+pub enum ResolutionError {
+    #[error(
+        "no model satisfies requirement: {registry_count} models in registry, \
+         {candidates_after_filter} survived filtering. unmet: {unmet_filters:?}"
+    )]
+    NoModelMatchesRequirement {
+        registry_count: usize,
+        candidates_after_filter: usize,
+        unmet_filters: Vec<String>,
+    },
+}
+
+/// Provider ids treated as local. Hardcoded for v1; a follow-up moves this
+/// to a `kind: local|cloud` field on `Provider` in `providers.toml`.
+const LOCAL_PROVIDER_IDS: &[&str] = &["llamacpp-local", "docker-model-runner"];
+
+fn is_local_provider(provider_id: &str) -> bool {
+    LOCAL_PROVIDER_IDS.contains(&provider_id)
+}
+
+fn derive_target_silicon(model: &Model, host: &HostCapability) -> TargetSilicon {
+    if is_local_provider(&model.provider) {
+        host.primary_target_silicon
+    } else {
+        TargetSilicon::Cloud
+    }
+}
+
+/// Resolve a [`ModelRequirement`] against a model catalog. Pure: caller
+/// supplies the iterator of [`Model`] (typically `registry.models()`).
+///
+/// Filter order (each step records the unmet predicate when it eliminates
+/// the last candidate, so the error names the specific cause):
+/// 1. `required_capabilities` — every cap must be advertised
+/// 2. `arch_preference` — when non-empty, must match
+/// 3. `context_window_min` — model's window ≥ requirement
+/// 4. `provider_policy` — Local/Cloud filter
+/// 5. memory budget — local models with declared estimates only
+///
+/// Returns the first survivor under the policy's ranking. `PreferLocal`
+/// puts local providers first; `PreferCloud` puts cloud providers first;
+/// other policies preserve registry order.
+pub fn resolve_model<'a, I>(
+    requirement: &ModelRequirement,
+    models: I,
+) -> Result<ResolvedModel, ResolutionError>
+where
+    I: IntoIterator<Item = &'a Model>,
+{
+    let registry: Vec<&Model> = models.into_iter().collect();
+    let registry_count = registry.len();
+    let mut unmet: Vec<String> = Vec::new();
+
+    // Filter 1: required capabilities.
+    let mut candidates: Vec<&Model> = registry
+        .iter()
+        .copied()
+        .filter(|m| {
+            requirement
+                .required_capabilities
+                .iter()
+                .all(|c| m.has(*c))
+        })
+        .collect();
+    if candidates.is_empty() && !requirement.required_capabilities.is_empty() {
+        unmet.push(format!(
+            "required_capabilities={:?}",
+            requirement.required_capabilities
+        ));
+        return Err(ResolutionError::NoModelMatchesRequirement {
+            registry_count,
+            candidates_after_filter: 0,
+            unmet_filters: unmet,
+        });
+    }
+
+    // Filter 2: arch preference.
+    if !requirement.arch_preference.is_empty() {
+        let after_arch: Vec<&Model> = candidates
+            .iter()
+            .copied()
+            .filter(|m| requirement.arch_preference.contains(&m.arch))
+            .collect();
+        if after_arch.is_empty() {
+            unmet.push(format!(
+                "arch_preference={:?} (no survivor matched)",
+                requirement.arch_preference
+            ));
+            return Err(ResolutionError::NoModelMatchesRequirement {
+                registry_count,
+                candidates_after_filter: 0,
+                unmet_filters: unmet,
+            });
+        }
+        candidates = after_arch;
+    }
+
+    // Filter 3: context window minimum.
+    if requirement.context_window_min > 0 {
+        let before = candidates.len();
+        candidates.retain(|m| m.context_window >= requirement.context_window_min);
+        if candidates.is_empty() {
+            unmet.push(format!(
+                "context_window_min={} (eliminated {} candidates)",
+                requirement.context_window_min, before
+            ));
+            return Err(ResolutionError::NoModelMatchesRequirement {
+                registry_count,
+                candidates_after_filter: 0,
+                unmet_filters: unmet,
+            });
+        }
+    }
+
+    // Filter 4: provider policy.
+    let before_provider = candidates.len();
+    candidates.retain(|m| match requirement.provider_policy {
+        LocalOrCloudPolicy::LocalOnly => is_local_provider(&m.provider),
+        LocalOrCloudPolicy::CloudOnly => !is_local_provider(&m.provider),
+        LocalOrCloudPolicy::PreferLocal
+        | LocalOrCloudPolicy::PreferCloud
+        | LocalOrCloudPolicy::Any => true,
+    });
+    if candidates.is_empty() {
+        unmet.push(format!(
+            "provider_policy={:?} (eliminated {} candidates)",
+            requirement.provider_policy, before_provider
+        ));
+        return Err(ResolutionError::NoModelMatchesRequirement {
+            registry_count,
+            candidates_after_filter: 0,
+            unmet_filters: unmet,
+        });
+    }
+
+    // Rank: PreferLocal/PreferCloud reorder; other policies preserve order.
+    match requirement.provider_policy {
+        LocalOrCloudPolicy::PreferLocal => {
+            candidates.sort_by_key(|m| u8::from(!is_local_provider(&m.provider)));
+        }
+        LocalOrCloudPolicy::PreferCloud => {
+            candidates.sort_by_key(|m| u8::from(is_local_provider(&m.provider)));
+        }
+        _ => {}
+    }
+
+    let best = candidates.first().expect("non-empty after filters");
+    let target_silicon = derive_target_silicon(best, &requirement.host);
+    let reason = format!(
+        "matched {} required capability(ies) on arch={:?}, context={}, provider={}, policy={:?}",
+        requirement.required_capabilities.len(),
+        best.arch,
+        best.context_window,
+        best.provider,
+        requirement.provider_policy,
+    );
+
+    Ok(ResolvedModel {
+        model_id: best.id.clone(),
+        provider_id: best.provider.clone(),
+        // expected_memory_mb stays None until the Model schema gains an
+        // `estimated_memory_mb` field. Not blocking for v1; the
+        // LocalOnly/CloudOnly filter already prevents the worst class of
+        // mis-routing (running a 7B model on the cloud lane).
+        expected_memory_mb: None,
+        target_silicon,
+        hw_capability_tier: requirement.host.hw_capability_tier,
+        reason,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::model_registry::types::MultiPartyChatStrategy;
+
+    fn make_model(
+        id: &str,
+        provider: &str,
+        arch: Arch,
+        context_window: u32,
+        caps: &[Capability],
+    ) -> Model {
+        Model {
+            id: id.into(),
+            name: None,
+            provider: provider.into(),
+            arch,
+            context_window,
+            max_output_tokens: 4096,
+            tokens_per_second: 50.0,
+            capabilities: caps.iter().copied().collect(),
+            cost_input_per_1k: 0.0,
+            cost_output_per_1k: 0.0,
+            gguf_hint: None,
+            gguf_local_path: None,
+            mmproj_local_path: None,
+            chat_template: None,
+            multi_party_strategy: MultiPartyChatStrategy::default(),
+            stop_sequences: vec![],
+        }
+    }
+
+    fn host_m1_8gb() -> HostCapability {
+        HostCapability {
+            hw_capability_tier: HwCapabilityTier::M1Uma8Gb,
+            available_memory_mb: 6144,
+            primary_target_silicon: TargetSilicon::UnifiedMemory,
+        }
+    }
+
+    fn host_rtx5090() -> HostCapability {
+        HostCapability {
+            hw_capability_tier: HwCapabilityTier::Sm120,
+            available_memory_mb: 32768,
+            primary_target_silicon: TargetSilicon::Gpu,
+        }
+    }
+
+    fn host_cpu_only() -> HostCapability {
+        HostCapability {
+            hw_capability_tier: HwCapabilityTier::CpuOnly,
+            available_memory_mb: 8192,
+            primary_target_silicon: TargetSilicon::Cpu,
+        }
+    }
+
+    fn registry() -> Vec<Model> {
+        vec![
+            make_model(
+                "claude-sonnet-4-5-20250929",
+                "anthropic",
+                Arch::Claude,
+                200_000,
+                &[
+                    Capability::TextGeneration,
+                    Capability::Chat,
+                    Capability::ToolUse,
+                    Capability::Vision,
+                    Capability::Streaming,
+                ],
+            ),
+            make_model(
+                "gpt-4o",
+                "openai",
+                Arch::Gpt,
+                128_000,
+                &[
+                    Capability::TextGeneration,
+                    Capability::Chat,
+                    Capability::Vision,
+                    Capability::AudioInput,
+                    Capability::AudioOutput,
+                ],
+            ),
+            make_model(
+                "continuum-ai/qwen3.5-4b-code-forged-GGUF",
+                "llamacpp-local",
+                Arch::Qwen35,
+                262_144,
+                &[
+                    Capability::TextGeneration,
+                    Capability::Chat,
+                    Capability::ToolUse,
+                ],
+            ),
+            make_model(
+                "qwen2-vl-7b-instruct",
+                "llamacpp-local",
+                Arch::Qwen2,
+                32_768,
+                &[
+                    Capability::TextGeneration,
+                    Capability::Chat,
+                    Capability::Vision,
+                ],
+            ),
+            make_model(
+                "qwen2-0.5b-gating",
+                "llamacpp-local",
+                Arch::Qwen2,
+                8_192,
+                &[Capability::TextGeneration, Capability::Chat],
+            ),
+        ]
+    }
+
+    fn req_chat_local(host: HostCapability) -> ModelRequirement {
+        ModelRequirement {
+            required_capabilities: [Capability::Chat].iter().copied().collect(),
+            arch_preference: vec![],
+            context_window_min: 0,
+            memory_budget_mb: None,
+            provider_policy: LocalOrCloudPolicy::LocalOnly,
+            host,
+        }
+    }
+
+    fn req_vision_local(host: HostCapability) -> ModelRequirement {
+        ModelRequirement {
+            required_capabilities: [Capability::Chat, Capability::Vision]
+                .iter()
+                .copied()
+                .collect(),
+            arch_preference: vec![],
+            context_window_min: 0,
+            memory_budget_mb: None,
+            provider_policy: LocalOrCloudPolicy::LocalOnly,
+            host,
+        }
+    }
+
+    #[test]
+    fn local_chat_resolves_to_qwen35_on_m1() {
+        let r = registry();
+        let resolved = resolve_model(&req_chat_local(host_m1_8gb()), r.iter()).unwrap();
+        assert_eq!(resolved.provider_id, "llamacpp-local");
+        assert!(
+            resolved.model_id.starts_with("continuum-ai/qwen3.5") || resolved.model_id.starts_with("qwen2"),
+            "expected a local qwen model, got {}",
+            resolved.model_id,
+        );
+        assert_eq!(resolved.target_silicon, TargetSilicon::UnifiedMemory);
+        assert_eq!(resolved.hw_capability_tier, HwCapabilityTier::M1Uma8Gb);
+    }
+
+    #[test]
+    fn vision_request_resolves_to_qwen2_vl() {
+        let r = registry();
+        let resolved = resolve_model(&req_vision_local(host_rtx5090()), r.iter()).unwrap();
+        assert_eq!(resolved.model_id, "qwen2-vl-7b-instruct");
+        assert_eq!(resolved.provider_id, "llamacpp-local");
+        assert_eq!(resolved.target_silicon, TargetSilicon::Gpu);
+        assert_eq!(resolved.hw_capability_tier, HwCapabilityTier::Sm120);
+    }
+
+    #[test]
+    fn cloud_only_skips_local_models() {
+        let r = registry();
+        let mut req = req_chat_local(host_rtx5090());
+        req.provider_policy = LocalOrCloudPolicy::CloudOnly;
+        let resolved = resolve_model(&req, r.iter()).unwrap();
+        assert!(
+            ["anthropic", "openai"].contains(&resolved.provider_id.as_str()),
+            "expected cloud provider, got {}",
+            resolved.provider_id,
+        );
+        assert_eq!(resolved.target_silicon, TargetSilicon::Cloud);
+    }
+
+    #[test]
+    fn missing_capability_errors_no_fallback() {
+        let r = registry();
+        let req = ModelRequirement {
+            required_capabilities: [Capability::ImageGeneration].iter().copied().collect(),
+            arch_preference: vec![],
+            context_window_min: 0,
+            memory_budget_mb: None,
+            provider_policy: LocalOrCloudPolicy::Any,
+            host: host_rtx5090(),
+        };
+        let err = resolve_model(&req, r.iter()).unwrap_err();
+        let ResolutionError::NoModelMatchesRequirement {
+            registry_count,
+            candidates_after_filter,
+            unmet_filters,
+        } = err;
+        assert_eq!(registry_count, r.len());
+        assert_eq!(candidates_after_filter, 0);
+        assert!(
+            unmet_filters.iter().any(|f| f.contains("ImageGeneration")),
+            "unmet filters should name ImageGeneration: {unmet_filters:?}"
+        );
+    }
+
+    #[test]
+    fn vision_with_local_only_on_cpu_host_still_finds_local_vision_model() {
+        // Even on a CPU-only host, the resolver should return the local
+        // vision model — admission/feasibility is the substrate's job
+        // (adaptive_throughput will refuse the lane if the host can't
+        // run it). The resolver answers "what fits the requirement,"
+        // not "what will succeed at inference time."
+        let r = registry();
+        let resolved = resolve_model(&req_vision_local(host_cpu_only()), r.iter()).unwrap();
+        assert_eq!(resolved.model_id, "qwen2-vl-7b-instruct");
+        assert_eq!(resolved.target_silicon, TargetSilicon::Cpu);
+        assert_eq!(resolved.hw_capability_tier, HwCapabilityTier::CpuOnly);
+    }
+
+    #[test]
+    fn context_window_min_filters_small_models() {
+        let r = registry();
+        let req = ModelRequirement {
+            required_capabilities: [Capability::Chat].iter().copied().collect(),
+            arch_preference: vec![],
+            context_window_min: 100_000,
+            memory_budget_mb: None,
+            provider_policy: LocalOrCloudPolicy::LocalOnly,
+            host: host_rtx5090(),
+        };
+        let resolved = resolve_model(&req, r.iter()).unwrap();
+        // Only qwen3.5-4b (262144 ctx) survives among local with ≥100k window.
+        assert_eq!(resolved.model_id, "continuum-ai/qwen3.5-4b-code-forged-GGUF");
+    }
+
+    #[test]
+    fn arch_preference_filters_to_qwen35_only() {
+        let r = registry();
+        let req = ModelRequirement {
+            required_capabilities: [Capability::Chat].iter().copied().collect(),
+            arch_preference: vec![Arch::Qwen35],
+            context_window_min: 0,
+            memory_budget_mb: None,
+            provider_policy: LocalOrCloudPolicy::Any,
+            host: host_rtx5090(),
+        };
+        let resolved = resolve_model(&req, r.iter()).unwrap();
+        assert_eq!(resolved.model_id, "continuum-ai/qwen3.5-4b-code-forged-GGUF");
+    }
+
+    #[test]
+    fn prefer_local_ranks_local_first() {
+        let r = registry();
+        let req = ModelRequirement {
+            required_capabilities: [Capability::Chat, Capability::Vision]
+                .iter()
+                .copied()
+                .collect(),
+            arch_preference: vec![],
+            context_window_min: 0,
+            memory_budget_mb: None,
+            provider_policy: LocalOrCloudPolicy::PreferLocal,
+            host: host_rtx5090(),
+        };
+        let resolved = resolve_model(&req, r.iter()).unwrap();
+        assert_eq!(resolved.provider_id, "llamacpp-local");
+        assert_eq!(resolved.model_id, "qwen2-vl-7b-instruct");
+    }
+
+    #[test]
+    fn prefer_cloud_ranks_cloud_first() {
+        let r = registry();
+        let req = ModelRequirement {
+            required_capabilities: [Capability::Chat, Capability::Vision]
+                .iter()
+                .copied()
+                .collect(),
+            arch_preference: vec![],
+            context_window_min: 0,
+            memory_budget_mb: None,
+            provider_policy: LocalOrCloudPolicy::PreferCloud,
+            host: host_rtx5090(),
+        };
+        let resolved = resolve_model(&req, r.iter()).unwrap();
+        assert!(
+            ["anthropic", "openai"].contains(&resolved.provider_id.as_str()),
+            "expected cloud first, got {}",
+            resolved.provider_id,
+        );
+    }
+
+    #[test]
+    fn five_persona_resolution_smoke() {
+        // Lane C contract test: 5 personas with different needs all
+        // resolve to the correct concrete model + missing path errors.
+        let r = registry();
+
+        // Persona 1: Helper AI — local chat.
+        let helper = resolve_model(&req_chat_local(host_m1_8gb()), r.iter()).unwrap();
+        assert_eq!(helper.provider_id, "llamacpp-local");
+
+        // Persona 2: Vision AI — local vision.
+        let vision = resolve_model(&req_vision_local(host_m1_8gb()), r.iter()).unwrap();
+        assert_eq!(vision.model_id, "qwen2-vl-7b-instruct");
+
+        // Persona 3: Cloud-only persona — wants vision via cloud.
+        let mut cloud_vision_req = req_vision_local(host_m1_8gb());
+        cloud_vision_req.provider_policy = LocalOrCloudPolicy::CloudOnly;
+        let cloud_vision = resolve_model(&cloud_vision_req, r.iter()).unwrap();
+        assert!(
+            ["anthropic", "openai"].contains(&cloud_vision.provider_id.as_str()),
+            "expected cloud, got {}",
+            cloud_vision.provider_id,
+        );
+
+        // Persona 4: Audio-input persona on cloud only (no local audio model
+        // in registry — should resolve to gpt-4o which has audio-input).
+        let mut audio_req = req_chat_local(host_rtx5090());
+        audio_req.required_capabilities = [Capability::Chat, Capability::AudioInput]
+            .iter()
+            .copied()
+            .collect();
+        audio_req.provider_policy = LocalOrCloudPolicy::Any;
+        let audio = resolve_model(&audio_req, r.iter()).unwrap();
+        assert_eq!(audio.model_id, "gpt-4o");
+
+        // Persona 5: Code persona requiring tool-use — qwen3.5 OR claude.
+        let mut code_req = req_chat_local(host_rtx5090());
+        code_req.required_capabilities = [Capability::Chat, Capability::ToolUse]
+            .iter()
+            .copied()
+            .collect();
+        code_req.provider_policy = LocalOrCloudPolicy::PreferLocal;
+        let code = resolve_model(&code_req, r.iter()).unwrap();
+        assert_eq!(code.provider_id, "llamacpp-local");
+        assert_eq!(code.model_id, "continuum-ai/qwen3.5-4b-code-forged-GGUF");
+
+        // Missing-model error path: persona requires ImageGeneration which
+        // none of the registered models advertise. Must error, not fall
+        // back.
+        let img_req = ModelRequirement {
+            required_capabilities: [Capability::ImageGeneration].iter().copied().collect(),
+            arch_preference: vec![],
+            context_window_min: 0,
+            memory_budget_mb: None,
+            provider_policy: LocalOrCloudPolicy::Any,
+            host: host_rtx5090(),
+        };
+        assert!(
+            matches!(
+                resolve_model(&img_req, r.iter()),
+                Err(ResolutionError::NoModelMatchesRequirement { .. })
+            ),
+            "missing capability must error, not fall back"
+        );
+    }
+}
diff --git a/src/workers/continuum-core/src/model_registry/types.rs b/src/workers/continuum-core/src/model_registry/types.rs
index 42eb461b9..33aa1376c 100644
--- a/src/workers/continuum-core/src/model_registry/types.rs
+++ b/src/workers/continuum-core/src/model_registry/types.rs
@@ -16,7 +16,13 @@ use std::path::PathBuf;
 /// to handle the new variant — precisely the pattern Joel's axiom calls
 /// for ("code should NEVER know the model" — code knows the ARCHETYPES
 /// via this enum, models are data).
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[derive(
+    Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, ts_rs::TS,
+)]
+#[ts(
+    export,
+    export_to = "../../../shared/generated/model_registry/Arch.ts"
+)]
 #[serde(rename_all = "snake_case")]
 pub enum Arch {
     Qwen2,

From d45788e24c29f60d141807e91730460044c43e20 Mon Sep 17 00:00:00 2001
From: Test <joel@cambriantech.com>
Date: Thu, 7 May 2026 22:08:29 -0500
Subject: [PATCH 2/3] Doc: HwCapabilityTier closed-by-design + memory_budget_mb
 not-yet-enforced
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address sibling Mac review on PR #1066 — non-blocking doc-clarity flags:

(α) HwCapabilityTier doc: spell out the closed-enum design choice. New
hardware classes require enum-edit + ts-rs regen + an explicit alias
decision. No Other(String) / wildcard fallback variant by design — silent
routing to a default tier hides exactly the capacity-mismatch bugs the
resolver exists to catch. Per Joel's no-fallback rule.

(β) ModelRequirement.memory_budget_mb doc: explicitly state OBSERVED but
NOT ENFORCED until Model schema gains estimated_memory_mb. Without this
note, callers may pass it expecting filtering and silently get over-
budget models. Loud-fail on memory pressure is a downstream Lane B
(FootprintRegistry / PressureBroker) concern, not a resolver filter.

ts-rs regenerated HwCapabilityTier.ts + ModelRequirement.ts with new
docstrings. cargo test --features metal,accelerate cognition::model_resolver:
16/16 still green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../generated/cognition/HwCapabilityTier.ts   | 10 ++++++++++
 .../generated/cognition/ModelRequirement.ts   | 10 ++++++++++
 .../src/cognition/model_resolver.rs           | 20 +++++++++++++++++++
 3 files changed, 40 insertions(+)

diff --git a/src/shared/generated/cognition/HwCapabilityTier.ts b/src/shared/generated/cognition/HwCapabilityTier.ts
index 2f239ec18..bdcd75b0f 100644
--- a/src/shared/generated/cognition/HwCapabilityTier.ts
+++ b/src/shared/generated/cognition/HwCapabilityTier.ts
@@ -11,5 +11,15 @@
  * Lane B's lease layer + adaptive_throughput's budgets care about the
  * pool (TargetSilicon). Lane C's resolver cares about the variant
  * (HwCapabilityTier).
+ *
+ * **Closed enum by design.** New hardware classes (RTX 6090 → `Sm130`,
+ * M4, future Apple silicon) require an enum-edit + ts-rs regen + an
+ * explicit decision on which existing variant — if any — they alias to.
+ * There is intentionally no `Other(String)` or wildcard fallback variant:
+ * "unknown hardware" silently routing to a default tier hides
+ * capacity-mismatch bugs the resolver exists to catch. See Joel's rule
+ * on no fallbacks (`docs/architecture/...`). Adding a tier means the
+ * caller's hardware probe must produce it AND every match-on-tier site
+ * gets a compile error reminding the author to handle it.
  */
 export type HwCapabilityTier = "cpu_only" | "m1_uma8_gb" | "m1_uma16_gb" | "m2_uma_pro_max" | "m3_uma_pro_max" | "sm70" | "sm80" | "sm86" | "sm89" | "sm90" | "sm120" | "vulkan_amd" | "cloud";
diff --git a/src/shared/generated/cognition/ModelRequirement.ts b/src/shared/generated/cognition/ModelRequirement.ts
index 95c4e8de9..c547d256e 100644
--- a/src/shared/generated/cognition/ModelRequirement.ts
+++ b/src/shared/generated/cognition/ModelRequirement.ts
@@ -28,6 +28,16 @@ contextWindowMin: number,
 /**
  * Maximum memory the resolved model may consume on this host, in MB.
  * `None` = use `host.available_memory_mb` as the implicit cap.
+ *
+ * **Currently OBSERVED but NOT ENFORCED.** Memory-budget filtering
+ * requires the [`Model`] schema to gain an `estimated_memory_mb`
+ * field — tracked as a separate followup. Until then, callers that
+ * pass this expecting filtering will silently get over-budget
+ * models. The `LocalOnly` / `CloudOnly` filter still prevents the
+ * worst class of mis-routing (running a 7B local model on the cloud
+ * lane). Loud-fail on memory pressure is a Lane B
+ * (FootprintRegistry / PressureBroker) concern downstream of
+ * resolution, not a resolver-side filter.
  */
 memoryBudgetMb?: number, 
 /**
diff --git a/src/workers/continuum-core/src/cognition/model_resolver.rs b/src/workers/continuum-core/src/cognition/model_resolver.rs
index de754e247..df9518fe1 100644
--- a/src/workers/continuum-core/src/cognition/model_resolver.rs
+++ b/src/workers/continuum-core/src/cognition/model_resolver.rs
@@ -39,6 +39,16 @@ use ts_rs::TS;
 /// Lane B's lease layer + adaptive_throughput's budgets care about the
 /// pool (TargetSilicon). Lane C's resolver cares about the variant
 /// (HwCapabilityTier).
+///
+/// **Closed enum by design.** New hardware classes (RTX 6090 → `Sm130`,
+/// M4, future Apple silicon) require an enum-edit + ts-rs regen + an
+/// explicit decision on which existing variant — if any — they alias to.
+/// There is intentionally no `Other(String)` or wildcard fallback variant:
+/// "unknown hardware" silently routing to a default tier hides
+/// capacity-mismatch bugs the resolver exists to catch. See Joel's rule
+/// on no fallbacks (`docs/architecture/...`). Adding a tier means the
+/// caller's hardware probe must produce it AND every match-on-tier site
+/// gets a compile error reminding the author to handle it.
 #[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize, TS)]
 #[serde(rename_all = "snake_case")]
 #[ts(
@@ -139,6 +149,16 @@ pub struct ModelRequirement {
     pub context_window_min: u32,
     /// Maximum memory the resolved model may consume on this host, in MB.
     /// `None` = use `host.available_memory_mb` as the implicit cap.
+    ///
+    /// **Currently OBSERVED but NOT ENFORCED.** Memory-budget filtering
+    /// requires the [`Model`] schema to gain an `estimated_memory_mb`
+    /// field — tracked as a separate followup. Until then, callers that
+    /// pass this expecting filtering will silently get over-budget
+    /// models. The `LocalOnly` / `CloudOnly` filter still prevents the
+    /// worst class of mis-routing (running a 7B local model on the cloud
+    /// lane). Loud-fail on memory pressure is a Lane B
+    /// (FootprintRegistry / PressureBroker) concern downstream of
+    /// resolution, not a resolver-side filter.
     #[ts(optional)]
     pub memory_budget_mb: Option<u32>,
     /// Local-vs-cloud preference. See [`LocalOrCloudPolicy`].

From fbdc357f8d703098052a8807da0a710351f7e57e Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Thu, 7 May 2026 22:16:36 -0500
Subject: [PATCH 3/3] Make model resolver provider residency data-driven

---
 .../generated/cognition/HwCapabilityTier.ts   |   2 +-
 .../generated/cognition/ModelRequirement.ts   |  15 --
 .../generated/model_registry/ProviderKind.ts  |  10 +
 src/shared/generated/model_registry/index.ts  |   1 +
 .../continuum-core/config/providers.toml      |   2 +
 .../src/cognition/model_resolver.rs           | 229 ++++++++++++------
 .../src/model_registry/types.rs               |  46 +++-
 7 files changed, 208 insertions(+), 97 deletions(-)
 create mode 100644 src/shared/generated/model_registry/ProviderKind.ts

diff --git a/src/shared/generated/cognition/HwCapabilityTier.ts b/src/shared/generated/cognition/HwCapabilityTier.ts
index bdcd75b0f..e8ea51d22 100644
--- a/src/shared/generated/cognition/HwCapabilityTier.ts
+++ b/src/shared/generated/cognition/HwCapabilityTier.ts
@@ -22,4 +22,4 @@
  * caller's hardware probe must produce it AND every match-on-tier site
  * gets a compile error reminding the author to handle it.
  */
-export type HwCapabilityTier = "cpu_only" | "m1_uma8_gb" | "m1_uma16_gb" | "m2_uma_pro_max" | "m3_uma_pro_max" | "sm70" | "sm80" | "sm86" | "sm89" | "sm90" | "sm120" | "vulkan_amd" | "cloud";
+export type HwCapabilityTier = "cpu_only" | "m1_uma8_gb" | "m1_uma16_gb" | "m2_uma_pro_max" | "m3_uma_pro_max" | "sm70" | "sm75" | "sm80" | "sm86" | "sm89" | "sm90" | "sm100" | "sm120" | "vulkan_amd" | "cloud";
diff --git a/src/shared/generated/cognition/ModelRequirement.ts b/src/shared/generated/cognition/ModelRequirement.ts
index c547d256e..643bbe1cb 100644
--- a/src/shared/generated/cognition/ModelRequirement.ts
+++ b/src/shared/generated/cognition/ModelRequirement.ts
@@ -25,21 +25,6 @@ archPreference: Array<Arch>,
  * Minimum context window in tokens. `0` = any.
  */
 contextWindowMin: number, 
-/**
- * Maximum memory the resolved model may consume on this host, in MB.
- * `None` = use `host.available_memory_mb` as the implicit cap.
- *
- * **Currently OBSERVED but NOT ENFORCED.** Memory-budget filtering
- * requires the [`Model`] schema to gain an `estimated_memory_mb`
- * field — tracked as a separate followup. Until then, callers that
- * pass this expecting filtering will silently get over-budget
- * models. The `LocalOnly` / `CloudOnly` filter still prevents the
- * worst class of mis-routing (running a 7B local model on the cloud
- * lane). Loud-fail on memory pressure is a Lane B
- * (FootprintRegistry / PressureBroker) concern downstream of
- * resolution, not a resolver-side filter.
- */
-memoryBudgetMb?: number, 
 /**
  * Local-vs-cloud preference. See [`LocalOrCloudPolicy`].
  */
diff --git a/src/shared/generated/model_registry/ProviderKind.ts b/src/shared/generated/model_registry/ProviderKind.ts
new file mode 100644
index 000000000..82d216be9
--- /dev/null
+++ b/src/shared/generated/model_registry/ProviderKind.ts
@@ -0,0 +1,10 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+
+/**
+ * Where a provider runs its inference. Resolver consumes this to honor
+ * `LocalOrCloudPolicy` without needing a hardcoded provider-id list.
+ * Providers default to [`ProviderKind::Cloud`] so adding a new cloud
+ * provider TOML row doesn't require an explicit `kind` line; local
+ * providers MUST declare `kind = "local"` explicitly.
+ */
+export type ProviderKind = "local" | "cloud";
diff --git a/src/shared/generated/model_registry/index.ts b/src/shared/generated/model_registry/index.ts
index afd28d110..fa4bac8f0 100644
--- a/src/shared/generated/model_registry/index.ts
+++ b/src/shared/generated/model_registry/index.ts
@@ -4,3 +4,4 @@
 
 export type { Arch } from './Arch';
 export type { Capability } from './Capability';
+export type { ProviderKind } from './ProviderKind';
diff --git a/src/workers/continuum-core/config/providers.toml b/src/workers/continuum-core/config/providers.toml
index baa631081..6bad70160 100644
--- a/src/workers/continuum-core/config/providers.toml
+++ b/src/workers/continuum-core/config/providers.toml
@@ -82,6 +82,7 @@ model_prefixes = ["gemini"]
 [[provider]]
 id = "docker-model-runner"
 name = "Docker Model Runner (local Metal/CUDA)"
+kind = "local"
 # IPv4 literal on purpose — `localhost` on macOS resolves to both ::1 and
 # 127.0.0.1 and Docker Desktop's model runner listens on IPv4 only. When
 # the hyper client tries ::1 first it waits for the connect path to fall
@@ -98,6 +99,7 @@ auth = "none"
 [[provider]]
 id = "llamacpp-local"
 name = "Llama.cpp (in-process Metal/CUDA)"
+kind = "local"
 base_url = "in-process"
 auth = "none"
 default_model = "continuum-ai/qwen3.5-4b-code-forged-GGUF"
diff --git a/src/workers/continuum-core/src/cognition/model_resolver.rs b/src/workers/continuum-core/src/cognition/model_resolver.rs
index df9518fe1..45f13b850 100644
--- a/src/workers/continuum-core/src/cognition/model_resolver.rs
+++ b/src/workers/continuum-core/src/cognition/model_resolver.rs
@@ -24,9 +24,9 @@
 //! `models.toml` or `models.json` — the registry already loaded both.
 
 use crate::cognition::adaptive_throughput::TargetSilicon;
-use crate::model_registry::types::{Arch, Capability, Model};
+use crate::model_registry::types::{Arch, Capability, Model, Provider, ProviderKind};
 use serde::{Deserialize, Serialize};
-use std::collections::BTreeSet;
+use std::collections::{BTreeSet, HashMap};
 use ts_rs::TS;
 
 /// Finer-grained hardware tier than [`TargetSilicon`]. Selects which model
@@ -68,6 +68,9 @@ pub enum HwCapabilityTier {
     M3UmaProMax,
     /// nVidia compute capability 7.0 (V100).
     Sm70,
+    /// nVidia compute capability 7.5 (T4 datacenter, RTX 20xx, GTX 16xx).
+    /// Common on cloud GPU inference instances.
+    Sm75,
     /// nVidia compute capability 8.0 (A100).
     Sm80,
     /// nVidia compute capability 8.6 (RTX 30xx, A40).
@@ -76,7 +79,11 @@ pub enum HwCapabilityTier {
     Sm89,
     /// nVidia compute capability 9.0 (H100).
     Sm90,
-    /// nVidia compute capability 12.0 (RTX 50xx).
+    /// nVidia compute capability 10.0 (Blackwell datacenter B100/B200,
+    /// HBM3e). Distinct from `Sm120` — Blackwell-consumer (RTX 50xx) and
+    /// Blackwell-datacenter take different driver paths.
+    Sm100,
+    /// nVidia compute capability 12.0 (RTX 50xx Blackwell-consumer).
     Sm120,
     /// AMD GPU via Vulkan backend.
     VulkanAmd,
@@ -147,20 +154,6 @@ pub struct ModelRequirement {
     /// Minimum context window in tokens. `0` = any.
     #[serde(default)]
     pub context_window_min: u32,
-    /// Maximum memory the resolved model may consume on this host, in MB.
-    /// `None` = use `host.available_memory_mb` as the implicit cap.
-    ///
-    /// **Currently OBSERVED but NOT ENFORCED.** Memory-budget filtering
-    /// requires the [`Model`] schema to gain an `estimated_memory_mb`
-    /// field — tracked as a separate followup. Until then, callers that
-    /// pass this expecting filtering will silently get over-budget
-    /// models. The `LocalOnly` / `CloudOnly` filter still prevents the
-    /// worst class of mis-routing (running a 7B local model on the cloud
-    /// lane). Loud-fail on memory pressure is a Lane B
-    /// (FootprintRegistry / PressureBroker) concern downstream of
-    /// resolution, not a resolver-side filter.
-    #[ts(optional)]
-    pub memory_budget_mb: Option<u32>,
     /// Local-vs-cloud preference. See [`LocalOrCloudPolicy`].
     pub provider_policy: LocalOrCloudPolicy,
     /// Host capability snapshot. See [`HostCapability`].
@@ -221,43 +214,54 @@ pub enum ResolutionError {
     },
 }
 
-/// Provider ids treated as local. Hardcoded for v1; a follow-up moves this
-/// to a `kind: local|cloud` field on `Provider` in `providers.toml`.
-const LOCAL_PROVIDER_IDS: &[&str] = &["llamacpp-local", "docker-model-runner"];
-
-fn is_local_provider(provider_id: &str) -> bool {
-    LOCAL_PROVIDER_IDS.contains(&provider_id)
-}
-
-fn derive_target_silicon(model: &Model, host: &HostCapability) -> TargetSilicon {
-    if is_local_provider(&model.provider) {
-        host.primary_target_silicon
-    } else {
-        TargetSilicon::Cloud
+fn derive_target_silicon(
+    model: &Model,
+    provider_kinds: &HashMap<&str, ProviderKind>,
+    host: &HostCapability,
+) -> TargetSilicon {
+    let kind = provider_kinds
+        .get(model.provider.as_str())
+        .copied()
+        .unwrap_or_default(); // ProviderKind::Cloud — unknown provider treated as cloud
+    match kind {
+        ProviderKind::Local => host.primary_target_silicon,
+        ProviderKind::Cloud => TargetSilicon::Cloud,
     }
 }
 
-/// Resolve a [`ModelRequirement`] against a model catalog. Pure: caller
-/// supplies the iterator of [`Model`] (typically `registry.models()`).
+/// Resolve a [`ModelRequirement`] against a model catalog + provider table.
+/// Pure: caller supplies iterators of [`Model`] and [`Provider`] (typically
+/// `registry.models()` and `registry.providers()`).
 ///
 /// Filter order (each step records the unmet predicate when it eliminates
 /// the last candidate, so the error names the specific cause):
 /// 1. `required_capabilities` — every cap must be advertised
 /// 2. `arch_preference` — when non-empty, must match
 /// 3. `context_window_min` — model's window ≥ requirement
-/// 4. `provider_policy` — Local/Cloud filter
-/// 5. memory budget — local models with declared estimates only
+/// 4. `provider_policy` — Local/Cloud filter, keyed on the provider's
+///    [`ProviderKind`] (no hardcoded provider-id list — providers declare
+///    their own residency in `providers.toml`)
 ///
 /// Returns the first survivor under the policy's ranking. `PreferLocal`
 /// puts local providers first; `PreferCloud` puts cloud providers first;
 /// other policies preserve registry order.
-pub fn resolve_model<'a, I>(
+pub fn resolve_model<'a, M, P>(
     requirement: &ModelRequirement,
-    models: I,
+    models: M,
+    providers: P,
 ) -> Result<ResolvedModel, ResolutionError>
 where
-    I: IntoIterator<Item = &'a Model>,
+    M: IntoIterator<Item = &'a Model>,
+    P: IntoIterator<Item = &'a Provider>,
 {
+    let provider_kinds: HashMap<&str, ProviderKind> = providers
+        .into_iter()
+        .map(|p| (p.id.as_str(), p.kind))
+        .collect();
+    let is_local = |provider_id: &str| {
+        provider_kinds.get(provider_id).copied().unwrap_or_default() == ProviderKind::Local
+    };
+
     let registry: Vec<&Model> = models.into_iter().collect();
     let registry_count = registry.len();
     let mut unmet: Vec<String> = Vec::new();
@@ -266,12 +270,7 @@ where
     let mut candidates: Vec<&Model> = registry
         .iter()
         .copied()
-        .filter(|m| {
-            requirement
-                .required_capabilities
-                .iter()
-                .all(|c| m.has(*c))
-        })
+        .filter(|m| requirement.required_capabilities.iter().all(|c| m.has(*c)))
         .collect();
     if candidates.is_empty() && !requirement.required_capabilities.is_empty() {
         unmet.push(format!(
@@ -326,8 +325,8 @@ where
     // Filter 4: provider policy.
     let before_provider = candidates.len();
     candidates.retain(|m| match requirement.provider_policy {
-        LocalOrCloudPolicy::LocalOnly => is_local_provider(&m.provider),
-        LocalOrCloudPolicy::CloudOnly => !is_local_provider(&m.provider),
+        LocalOrCloudPolicy::LocalOnly => is_local(&m.provider),
+        LocalOrCloudPolicy::CloudOnly => !is_local(&m.provider),
         LocalOrCloudPolicy::PreferLocal
         | LocalOrCloudPolicy::PreferCloud
         | LocalOrCloudPolicy::Any => true,
@@ -347,16 +346,16 @@ where
     // Rank: PreferLocal/PreferCloud reorder; other policies preserve order.
     match requirement.provider_policy {
         LocalOrCloudPolicy::PreferLocal => {
-            candidates.sort_by_key(|m| u8::from(!is_local_provider(&m.provider)));
+            candidates.sort_by_key(|m| u8::from(!is_local(&m.provider)));
         }
         LocalOrCloudPolicy::PreferCloud => {
-            candidates.sort_by_key(|m| u8::from(is_local_provider(&m.provider)));
+            candidates.sort_by_key(|m| u8::from(is_local(&m.provider)));
         }
         _ => {}
     }
 
     let best = candidates.first().expect("non-empty after filters");
-    let target_silicon = derive_target_silicon(best, &requirement.host);
+    let target_silicon = derive_target_silicon(best, &provider_kinds, &requirement.host);
     let reason = format!(
         "matched {} required capability(ies) on arch={:?}, context={}, provider={}, policy={:?}",
         requirement.required_capabilities.len(),
@@ -383,7 +382,7 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::model_registry::types::MultiPartyChatStrategy;
+    use crate::model_registry::types::{AuthKind, MultiPartyChatStrategy};
 
     fn make_model(
         id: &str,
@@ -412,6 +411,27 @@ mod tests {
         }
     }
 
+    fn make_provider(id: &str, kind: ProviderKind) -> Provider {
+        Provider {
+            id: id.into(),
+            name: None,
+            base_url: "http://test".into(),
+            api_key_env: None,
+            default_model: None,
+            auth: AuthKind::None,
+            model_prefixes: vec![],
+            kind,
+        }
+    }
+
+    fn providers() -> Vec<Provider> {
+        vec![
+            make_provider("anthropic", ProviderKind::Cloud),
+            make_provider("openai", ProviderKind::Cloud),
+            make_provider("llamacpp-local", ProviderKind::Local),
+        ]
+    }
+
     fn host_m1_8gb() -> HostCapability {
         HostCapability {
             hw_capability_tier: HwCapabilityTier::M1Uma8Gb,
@@ -501,7 +521,6 @@ mod tests {
             required_capabilities: [Capability::Chat].iter().copied().collect(),
             arch_preference: vec![],
             context_window_min: 0,
-            memory_budget_mb: None,
             provider_policy: LocalOrCloudPolicy::LocalOnly,
             host,
         }
@@ -515,7 +534,6 @@ mod tests {
                 .collect(),
             arch_preference: vec![],
             context_window_min: 0,
-            memory_budget_mb: None,
             provider_policy: LocalOrCloudPolicy::LocalOnly,
             host,
         }
@@ -524,12 +542,12 @@ mod tests {
     #[test]
     fn local_chat_resolves_to_qwen35_on_m1() {
         let r = registry();
-        let resolved = resolve_model(&req_chat_local(host_m1_8gb()), r.iter()).unwrap();
+        let resolved =
+            resolve_model(&req_chat_local(host_m1_8gb()), r.iter(), providers().iter()).unwrap();
         assert_eq!(resolved.provider_id, "llamacpp-local");
-        assert!(
-            resolved.model_id.starts_with("continuum-ai/qwen3.5") || resolved.model_id.starts_with("qwen2"),
-            "expected a local qwen model, got {}",
+        assert_eq!(
             resolved.model_id,
+            "continuum-ai/qwen3.5-4b-code-forged-GGUF"
         );
         assert_eq!(resolved.target_silicon, TargetSilicon::UnifiedMemory);
         assert_eq!(resolved.hw_capability_tier, HwCapabilityTier::M1Uma8Gb);
@@ -538,7 +556,12 @@ mod tests {
     #[test]
     fn vision_request_resolves_to_qwen2_vl() {
         let r = registry();
-        let resolved = resolve_model(&req_vision_local(host_rtx5090()), r.iter()).unwrap();
+        let resolved = resolve_model(
+            &req_vision_local(host_rtx5090()),
+            r.iter(),
+            providers().iter(),
+        )
+        .unwrap();
         assert_eq!(resolved.model_id, "qwen2-vl-7b-instruct");
         assert_eq!(resolved.provider_id, "llamacpp-local");
         assert_eq!(resolved.target_silicon, TargetSilicon::Gpu);
@@ -550,7 +573,7 @@ mod tests {
         let r = registry();
         let mut req = req_chat_local(host_rtx5090());
         req.provider_policy = LocalOrCloudPolicy::CloudOnly;
-        let resolved = resolve_model(&req, r.iter()).unwrap();
+        let resolved = resolve_model(&req, r.iter(), providers().iter()).unwrap();
         assert!(
             ["anthropic", "openai"].contains(&resolved.provider_id.as_str()),
             "expected cloud provider, got {}",
@@ -566,11 +589,10 @@ mod tests {
             required_capabilities: [Capability::ImageGeneration].iter().copied().collect(),
             arch_preference: vec![],
             context_window_min: 0,
-            memory_budget_mb: None,
             provider_policy: LocalOrCloudPolicy::Any,
             host: host_rtx5090(),
         };
-        let err = resolve_model(&req, r.iter()).unwrap_err();
+        let err = resolve_model(&req, r.iter(), providers().iter()).unwrap_err();
         let ResolutionError::NoModelMatchesRequirement {
             registry_count,
             candidates_after_filter,
@@ -592,7 +614,12 @@ mod tests {
         // run it). The resolver answers "what fits the requirement,"
         // not "what will succeed at inference time."
         let r = registry();
-        let resolved = resolve_model(&req_vision_local(host_cpu_only()), r.iter()).unwrap();
+        let resolved = resolve_model(
+            &req_vision_local(host_cpu_only()),
+            r.iter(),
+            providers().iter(),
+        )
+        .unwrap();
         assert_eq!(resolved.model_id, "qwen2-vl-7b-instruct");
         assert_eq!(resolved.target_silicon, TargetSilicon::Cpu);
         assert_eq!(resolved.hw_capability_tier, HwCapabilityTier::CpuOnly);
@@ -605,13 +632,15 @@ mod tests {
             required_capabilities: [Capability::Chat].iter().copied().collect(),
             arch_preference: vec![],
             context_window_min: 100_000,
-            memory_budget_mb: None,
             provider_policy: LocalOrCloudPolicy::LocalOnly,
             host: host_rtx5090(),
         };
-        let resolved = resolve_model(&req, r.iter()).unwrap();
+        let resolved = resolve_model(&req, r.iter(), providers().iter()).unwrap();
         // Only qwen3.5-4b (262144 ctx) survives among local with ≥100k window.
-        assert_eq!(resolved.model_id, "continuum-ai/qwen3.5-4b-code-forged-GGUF");
+        assert_eq!(
+            resolved.model_id,
+            "continuum-ai/qwen3.5-4b-code-forged-GGUF"
+        );
     }
 
     #[test]
@@ -621,12 +650,14 @@ mod tests {
             required_capabilities: [Capability::Chat].iter().copied().collect(),
             arch_preference: vec![Arch::Qwen35],
             context_window_min: 0,
-            memory_budget_mb: None,
             provider_policy: LocalOrCloudPolicy::Any,
             host: host_rtx5090(),
         };
-        let resolved = resolve_model(&req, r.iter()).unwrap();
-        assert_eq!(resolved.model_id, "continuum-ai/qwen3.5-4b-code-forged-GGUF");
+        let resolved = resolve_model(&req, r.iter(), providers().iter()).unwrap();
+        assert_eq!(
+            resolved.model_id,
+            "continuum-ai/qwen3.5-4b-code-forged-GGUF"
+        );
     }
 
     #[test]
@@ -639,11 +670,10 @@ mod tests {
                 .collect(),
             arch_preference: vec![],
             context_window_min: 0,
-            memory_budget_mb: None,
             provider_policy: LocalOrCloudPolicy::PreferLocal,
             host: host_rtx5090(),
         };
-        let resolved = resolve_model(&req, r.iter()).unwrap();
+        let resolved = resolve_model(&req, r.iter(), providers().iter()).unwrap();
         assert_eq!(resolved.provider_id, "llamacpp-local");
         assert_eq!(resolved.model_id, "qwen2-vl-7b-instruct");
     }
@@ -658,11 +688,10 @@ mod tests {
                 .collect(),
             arch_preference: vec![],
             context_window_min: 0,
-            memory_budget_mb: None,
             provider_policy: LocalOrCloudPolicy::PreferCloud,
             host: host_rtx5090(),
         };
-        let resolved = resolve_model(&req, r.iter()).unwrap();
+        let resolved = resolve_model(&req, r.iter(), providers().iter()).unwrap();
         assert!(
             ["anthropic", "openai"].contains(&resolved.provider_id.as_str()),
             "expected cloud first, got {}",
@@ -670,6 +699,47 @@ mod tests {
         );
     }
 
+    #[test]
+    fn provider_kind_drives_local_classification_not_id() {
+        // Confirms the LOCAL_PROVIDER_IDS hardcoding is gone — Provider's
+        // kind field is what decides Local vs Cloud. Construct a custom
+        // provider whose id has nothing to do with the old hardcoded set.
+        let models = vec![make_model(
+            "custom-local-model",
+            "custom-local-provider",
+            Arch::Llama,
+            8192,
+            &[Capability::Chat],
+        )];
+        let providers = vec![make_provider("custom-local-provider", ProviderKind::Local)];
+        let req = req_chat_local(host_m1_8gb());
+        let resolved = resolve_model(&req, models.iter(), providers.iter()).unwrap();
+        assert_eq!(resolved.model_id, "custom-local-model");
+        assert_eq!(resolved.target_silicon, TargetSilicon::UnifiedMemory);
+    }
+
+    #[test]
+    fn unknown_provider_defaults_to_cloud_for_safety() {
+        // If a model references a provider id that isn't in the providers
+        // table at all, the resolver treats it as Cloud (default kind).
+        // This is loud: a LocalOnly query will reject the model rather
+        // than silently routing unknown-residency work to local hardware.
+        let models = vec![make_model(
+            "orphan-model",
+            "orphan-provider",
+            Arch::Llama,
+            8192,
+            &[Capability::Chat],
+        )];
+        let providers: Vec<Provider> = vec![];
+        let req = req_chat_local(host_m1_8gb());
+        let err = resolve_model(&req, models.iter(), providers.iter()).unwrap_err();
+        assert!(
+            matches!(err, ResolutionError::NoModelMatchesRequirement { .. }),
+            "LocalOnly with unknown provider must error, not silently treat as local"
+        );
+    }
+
     #[test]
     fn five_persona_resolution_smoke() {
         // Lane C contract test: 5 personas with different needs all
@@ -677,17 +747,23 @@ mod tests {
         let r = registry();
 
         // Persona 1: Helper AI — local chat.
-        let helper = resolve_model(&req_chat_local(host_m1_8gb()), r.iter()).unwrap();
+        let helper =
+            resolve_model(&req_chat_local(host_m1_8gb()), r.iter(), providers().iter()).unwrap();
         assert_eq!(helper.provider_id, "llamacpp-local");
 
         // Persona 2: Vision AI — local vision.
-        let vision = resolve_model(&req_vision_local(host_m1_8gb()), r.iter()).unwrap();
+        let vision = resolve_model(
+            &req_vision_local(host_m1_8gb()),
+            r.iter(),
+            providers().iter(),
+        )
+        .unwrap();
         assert_eq!(vision.model_id, "qwen2-vl-7b-instruct");
 
         // Persona 3: Cloud-only persona — wants vision via cloud.
         let mut cloud_vision_req = req_vision_local(host_m1_8gb());
         cloud_vision_req.provider_policy = LocalOrCloudPolicy::CloudOnly;
-        let cloud_vision = resolve_model(&cloud_vision_req, r.iter()).unwrap();
+        let cloud_vision = resolve_model(&cloud_vision_req, r.iter(), providers().iter()).unwrap();
         assert!(
             ["anthropic", "openai"].contains(&cloud_vision.provider_id.as_str()),
             "expected cloud, got {}",
@@ -702,7 +778,7 @@ mod tests {
             .copied()
             .collect();
         audio_req.provider_policy = LocalOrCloudPolicy::Any;
-        let audio = resolve_model(&audio_req, r.iter()).unwrap();
+        let audio = resolve_model(&audio_req, r.iter(), providers().iter()).unwrap();
         assert_eq!(audio.model_id, "gpt-4o");
 
         // Persona 5: Code persona requiring tool-use — qwen3.5 OR claude.
@@ -712,7 +788,7 @@ mod tests {
             .copied()
             .collect();
         code_req.provider_policy = LocalOrCloudPolicy::PreferLocal;
-        let code = resolve_model(&code_req, r.iter()).unwrap();
+        let code = resolve_model(&code_req, r.iter(), providers().iter()).unwrap();
         assert_eq!(code.provider_id, "llamacpp-local");
         assert_eq!(code.model_id, "continuum-ai/qwen3.5-4b-code-forged-GGUF");
 
@@ -723,13 +799,12 @@ mod tests {
             required_capabilities: [Capability::ImageGeneration].iter().copied().collect(),
             arch_preference: vec![],
             context_window_min: 0,
-            memory_budget_mb: None,
             provider_policy: LocalOrCloudPolicy::Any,
             host: host_rtx5090(),
         };
         assert!(
             matches!(
-                resolve_model(&img_req, r.iter()),
+                resolve_model(&img_req, r.iter(), providers().iter()),
                 Err(ResolutionError::NoModelMatchesRequirement { .. })
             ),
             "missing capability must error, not fall back"
diff --git a/src/workers/continuum-core/src/model_registry/types.rs b/src/workers/continuum-core/src/model_registry/types.rs
index 33aa1376c..127462592 100644
--- a/src/workers/continuum-core/src/model_registry/types.rs
+++ b/src/workers/continuum-core/src/model_registry/types.rs
@@ -19,10 +19,7 @@ use std::path::PathBuf;
 #[derive(
     Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize, ts_rs::TS,
 )]
-#[ts(
-    export,
-    export_to = "../../../shared/generated/model_registry/Arch.ts"
-)]
+#[ts(export, export_to = "../../../shared/generated/model_registry/Arch.ts")]
 #[serde(rename_all = "snake_case")]
 pub enum Arch {
     Qwen2,
@@ -85,6 +82,41 @@ pub enum Capability {
     Reranking,
 }
 
+/// Where a provider runs its inference. Resolver consumes this to honor
+/// `LocalOrCloudPolicy` without needing a hardcoded provider-id list.
+/// Providers default to [`ProviderKind::Cloud`] so adding a new cloud
+/// provider TOML row doesn't require an explicit `kind` line; local
+/// providers MUST declare `kind = "local"` explicitly.
+#[derive(
+    Debug,
+    Clone,
+    Copy,
+    PartialEq,
+    Eq,
+    Hash,
+    PartialOrd,
+    Ord,
+    Default,
+    Serialize,
+    Deserialize,
+    ts_rs::TS,
+)]
+#[ts(
+    export,
+    export_to = "../../../shared/generated/model_registry/ProviderKind.ts"
+)]
+#[serde(rename_all = "snake_case")]
+pub enum ProviderKind {
+    /// In-process or localhost backend. Inference runs on this host's
+    /// hardware (CPU / GPU / unified memory). Examples: `llamacpp-local`,
+    /// `docker-model-runner`.
+    Local,
+    /// Remote HTTP API. Inference runs off-host; this provider counts
+    /// toward `TargetSilicon::Cloud` admission. Default for new providers.
+    #[default]
+    Cloud,
+}
+
 /// HTTP authentication mode for a provider's API.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
@@ -286,6 +318,12 @@ pub struct Provider {
     /// dispatch via live /v1/models probes instead.
     #[serde(default)]
     pub model_prefixes: Vec<String>,
+    /// Where this provider runs inference. See [`ProviderKind`]. Defaults
+    /// to `Cloud` when omitted in TOML — local providers must declare
+    /// `kind = "local"` explicitly so adding a new cloud provider doesn't
+    /// require touching this field.
+    #[serde(default)]
+    pub kind: ProviderKind,
 }
 
 impl Provider {