CambrianTech · joelteply · May 8, 2026 · May 8, 2026 · May 8, 2026 · May 8, 2026
diff --git a/src/shared/generated/cognition/HostCapability.ts b/src/shared/generated/cognition/HostCapability.ts
@@ -0,0 +1,23 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+import type { HwCapabilityTier } from "./HwCapabilityTier";
+import type { TargetSilicon } from "./TargetSilicon";
+
+/**
+ * What the resolver knows about THIS machine. Caller populates from a
+ * hardware-detection probe at boot (see future `device_probe` module).
+ * The resolver consumes this as a snapshot — re-invoke when probe values
+ * change.
+ */
+export type HostCapability = { hwCapabilityTier: HwCapabilityTier, 
+/**
+ * Memory available for inference workloads in megabytes. For unified-
+ * memory hosts this is the share inference is willing to claim, not
+ * total system RAM.
+ */
+availableMemoryMb: number, 
+/**
+ * Which physical-budget pool inference workloads on this host should
+ * admit against. Mac M-series → `UnifiedMemory`; nVidia → `Gpu`;
+ * CPU-only → `Cpu`.
+ */
+primaryTargetSilicon: TargetSilicon, };
diff --git a/src/shared/generated/cognition/HwCapabilityTier.ts b/src/shared/generated/cognition/HwCapabilityTier.ts
@@ -0,0 +1,25 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+
+/**
+ * Finer-grained hardware tier than [`TargetSilicon`]. Selects which model
+ * VARIANT a host can run, not which physical-budget POOL admission uses.
+ *
+ * Example: `M1Uma8Gb` and `M3UmaProMax` both have
+ * `target_silicon == TargetSilicon::UnifiedMemory`, but only the latter
+ * can hold a 4B-parameter model alongside a 7B vision model.
+ *
+ * Lane B's lease layer + adaptive_throughput's budgets care about the
+ * pool (TargetSilicon). Lane C's resolver cares about the variant
+ * (HwCapabilityTier).
+ *
+ * **Closed enum by design.** New hardware classes (RTX 6090 → `Sm130`,
+ * M4, future Apple silicon) require an enum-edit + ts-rs regen + an
+ * explicit decision on which existing variant — if any — they alias to.
+ * There is intentionally no `Other(String)` or wildcard fallback variant:
+ * "unknown hardware" silently routing to a default tier hides
+ * capacity-mismatch bugs the resolver exists to catch. See Joel's rule
+ * on no fallbacks (`docs/architecture/...`). Adding a tier means the
+ * caller's hardware probe must produce it AND every match-on-tier site
+ * gets a compile error reminding the author to handle it.
+ */
+export type HwCapabilityTier = "cpu_only" | "m1_uma8_gb" | "m1_uma16_gb" | "m2_uma_pro_max" | "m3_uma_pro_max" | "sm70" | "sm75" | "sm80" | "sm86" | "sm89" | "sm90" | "sm100" | "sm120" | "vulkan_amd" | "cloud";
diff --git a/src/shared/generated/cognition/LocalOrCloudPolicy.ts b/src/shared/generated/cognition/LocalOrCloudPolicy.ts
@@ -0,0 +1,6 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+
+/**
+ * How aggressively to prefer local vs cloud providers.
+ */
+export type LocalOrCloudPolicy = "local_only" | "cloud_only" | "prefer_local" | "prefer_cloud" | "any";
diff --git a/src/shared/generated/cognition/ModelRequirement.ts b/src/shared/generated/cognition/ModelRequirement.ts
@@ -0,0 +1,35 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+import type { Arch } from "../model_registry/Arch";
+import type { Capability } from "../model_registry/Capability";
+import type { HostCapability } from "./HostCapability";
+import type { LocalOrCloudPolicy } from "./LocalOrCloudPolicy";
+
+/**
+ * Capability-shaped query for the resolver. Callers describe what the
+ * model needs to DO (generate text, see images, etc.) — not which model
+ * to use. Per Joel's axiom: code knows ARCHETYPES, models are data.
+ */
+export type ModelRequirement = { 
+/**
+ * Capabilities every candidate must advertise. Empty set matches any
+ * model (rare — usually callers want at least `Chat`).
+ */
+requiredCapabilities: Array<Capability>, 
+/**
+ * Architectural family preference. Empty = any architecture qualifies.
+ * When non-empty, candidates outside the preference are filtered out
+ * rather than down-ranked — caller wants this family or none.
+ */
+archPreference: Array<Arch>, 
+/**
+ * Minimum context window in tokens. `0` = any.
+ */
+contextWindowMin: number, 
+/**
+ * Local-vs-cloud preference. See [`LocalOrCloudPolicy`].
+ */
+providerPolicy: LocalOrCloudPolicy, 
+/**
+ * Host capability snapshot. See [`HostCapability`].
+ */
+host: HostCapability, };
diff --git a/src/shared/generated/cognition/ResolutionError.ts b/src/shared/generated/cognition/ResolutionError.ts
@@ -0,0 +1,12 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+
+/**
+ * Why a [`resolve_model`] call failed. Each variant names the SPECIFIC
+ * filter that eliminated all candidates so the caller's error message
+ * can be actionable.
+ *
+ * No `Fallback` variant. Per Joel's rule: missing-model is an error, not
+ * a soft retry on a default. Callers that want graceful degradation must
+ * EXPLICITLY relax their requirement and re-invoke.
+ */
+export type ResolutionError = { "kind": "noModelMatchesRequirement", registry_count: number, candidates_after_filter: number, unmet_filters: Array<string>, };
diff --git a/src/shared/generated/cognition/ResolvedModel.ts b/src/shared/generated/cognition/ResolvedModel.ts
@@ -0,0 +1,26 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+import type { HwCapabilityTier } from "./HwCapabilityTier";
+import type { TargetSilicon } from "./TargetSilicon";
+
+/**
+ * Resolver output. Includes the silicon target so the caller can plumb it
+ * straight into a [`ThroughputJob`] without re-deriving it from the
+ * model + host.
+ */
+export type ResolvedModel = { modelId: string, providerId: string, 
+/**
+ * Expected memory footprint in megabytes if the registry knows it.
+ * `None` for cloud models (always-fits) and for local models whose
+ * row in `models.toml` doesn't yet declare a memory estimate. A
+ * follow-up adds an `estimated_memory_mb` field to the Model schema;
+ * until then memory-budget filtering is best-effort on local models
+ * (the resolver still rejects cloud models from `LocalOnly` queries).
+ */
+expectedMemoryMb?: number, targetSilicon: TargetSilicon, hwCapabilityTier: HwCapabilityTier, 
+/**
+ * Human-readable explanation of why this model was chosen. Surfaced
+ * in logs + UI when a persona's resolution changes (e.g., "switched
+ * from gpt-4o to claude-sonnet-4-5 because PreferLocal couldn't
+ * satisfy required Capability::Vision on this host").
+ */
+reason: string, };
diff --git a/src/shared/generated/cognition/index.ts b/src/shared/generated/cognition/index.ts
@@ -2,9 +2,15 @@
 // Source: generator/generate-rust-bindings.ts
 // Re-generate: npx tsx generator/generate-rust-bindings.ts
 
+export type { AdaptiveThroughputPlan } from './AdaptiveThroughputPlan';
+export type { AdaptiveThroughputRequest } from './AdaptiveThroughputRequest';
+export type { HostCapability } from './HostCapability';
+export type { HwCapabilityTier } from './HwCapabilityTier';
 export type { LeverCall } from './LeverCall';
 export type { LeverName } from './LeverName';
+export type { LocalOrCloudPolicy } from './LocalOrCloudPolicy';
 export type { MediaItemLite } from './MediaItemLite';
+export type { ModelRequirement } from './ModelRequirement';
 export type { NativeBatchOutcome } from './NativeBatchOutcome';
 export type { ParsedToolBatch } from './ParsedToolBatch';
 export type { PersonaMediaConfigLite } from './PersonaMediaConfigLite';
@@ -18,10 +24,19 @@ export type { RecipeRagSourcePolicy } from './RecipeRagSourcePolicy';
 export type { RecipeTurnBatchPlan } from './RecipeTurnBatchPlan';
 export type { RecipeTurnBatchRequest } from './RecipeTurnBatchRequest';
 export type { RecipeTurnTrigger } from './RecipeTurnTrigger';
+export type { ResolutionError } from './ResolutionError';
+export type { ResolvedModel } from './ResolvedModel';
+export type { ResourceClass } from './ResourceClass';
 export type { ResponderDecision } from './ResponderDecision';
 export type { SharedAnalysis } from './SharedAnalysis';
 export type { SharedAnalysisIntent } from './SharedAnalysisIntent';
 export type { SharedRagSourcePlan } from './SharedRagSourcePlan';
+export type { TargetSilicon } from './TargetSilicon';
+export type { ThroughputJob } from './ThroughputJob';
+export type { ThroughputLaneBudget } from './ThroughputLaneBudget';
+export type { ThroughputLease } from './ThroughputLease';
+export type { ThroughputLeaseRevocationPolicy } from './ThroughputLeaseRevocationPolicy';
+export type { ThroughputLeaseSnapshot } from './ThroughputLeaseSnapshot';
 export type { ToolExecutionContext } from './ToolExecutionContext';
 export type { ToolInvocation } from './ToolInvocation';
 export type { ToolOutcome } from './ToolOutcome';
diff --git a/src/shared/generated/model_registry/Arch.ts b/src/shared/generated/model_registry/Arch.ts
@@ -0,0 +1,12 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+
+/**
+ * Model architecture family. Typed (not stringly-typed) so call sites
+ * use enum matching, not string comparison. Adding a new arch means:
+ * (a) add the variant here, (b) add a TOML row with `arch = "new_arch"`.
+ * Code that dispatches by arch gets a compile error reminding the author
+ * to handle the new variant — precisely the pattern Joel's axiom calls
+ * for ("code should NEVER know the model" — code knows the ARCHETYPES
+ * via this enum, models are data).
+ */
+export type Arch = "qwen2" | "qwen3" | "qwen35" | "llama" | "claude" | "gpt" | "gemini" | "grok" | "deepseek" | "unknown";
diff --git a/src/shared/generated/model_registry/ProviderKind.ts b/src/shared/generated/model_registry/ProviderKind.ts
@@ -0,0 +1,10 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+
+/**
+ * Where a provider runs its inference. Resolver consumes this to honor
+ * `LocalOrCloudPolicy` without needing a hardcoded provider-id list.
+ * Providers default to [`ProviderKind::Cloud`] so adding a new cloud
+ * provider TOML row doesn't require an explicit `kind` line; local
+ * providers MUST declare `kind = "local"` explicitly.
+ */
+export type ProviderKind = "local" | "cloud";
diff --git a/src/shared/generated/model_registry/index.ts b/src/shared/generated/model_registry/index.ts
@@ -2,4 +2,6 @@
 // Source: generator/generate-rust-bindings.ts
 // Re-generate: npx tsx generator/generate-rust-bindings.ts
 
+export type { Arch } from './Arch';
 export type { Capability } from './Capability';
+export type { ProviderKind } from './ProviderKind';
diff --git a/src/workers/continuum-core/config/providers.toml b/src/workers/continuum-core/config/providers.toml
@@ -82,6 +82,7 @@ model_prefixes = ["gemini"]
 [[provider]]
 id = "docker-model-runner"
 name = "Docker Model Runner (local Metal/CUDA)"
+kind = "local"
 # IPv4 literal on purpose — `localhost` on macOS resolves to both ::1 and
 # 127.0.0.1 and Docker Desktop's model runner listens on IPv4 only. When
 # the hyper client tries ::1 first it waits for the connect path to fall
@@ -98,6 +99,7 @@ auth = "none"
 [[provider]]
 id = "llamacpp-local"
 name = "Llama.cpp (in-process Metal/CUDA)"
+kind = "local"
 base_url = "in-process"
 auth = "none"
 default_model = "continuum-ai/qwen3.5-4b-code-forged-GGUF"

diff --git a/src/workers/continuum-core/src/cognition/mod.rs b/src/workers/continuum-core/src/cognition/mod.rs
@@ -28,6 +28,7 @@
 //!                                  `ResponderDecision`)
 
 pub mod adaptive_throughput;
+pub mod model_resolver;
 pub mod response_orchestrator;
 pub mod response_validator;
 pub mod shared_analysis;
@@ -37,6 +38,7 @@ pub mod turn_batch;
 pub mod types;
 
 pub use adaptive_throughput::*;
+pub use model_resolver::*;
 pub use response_orchestrator::{
     DEFAULT_RELEVANCE_THRESHOLD, PersonaSlot, orchestrate, score_persona,
 };