Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/shared/generated/cognition/HostCapability.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
import type { HwCapabilityTier } from "./HwCapabilityTier";
import type { TargetSilicon } from "./TargetSilicon";

/**
* What the resolver knows about THIS machine. Caller populates from a
* hardware-detection probe at boot (see future `device_probe` module).
* The resolver consumes this as a snapshot — re-invoke when probe values
* change.
*/
export type HostCapability = { hwCapabilityTier: HwCapabilityTier,
/**
* Memory available for inference workloads in megabytes. For unified-
* memory hosts this is the share inference is willing to claim, not
* total system RAM.
*/
availableMemoryMb: number,
/**
* Which physical-budget pool inference workloads on this host should
* admit against. Mac M-series → `UnifiedMemory`; nVidia → `Gpu`;
* CPU-only → `Cpu`.
*/
primaryTargetSilicon: TargetSilicon, };
25 changes: 25 additions & 0 deletions src/shared/generated/cognition/HwCapabilityTier.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.

/**
* Finer-grained hardware tier than [`TargetSilicon`]. Selects which model
* VARIANT a host can run, not which physical-budget POOL admission uses.
*
* Example: `M1Uma8Gb` and `M3UmaProMax` both have
* `target_silicon == TargetSilicon::UnifiedMemory`, but only the latter
* can hold a 4B-parameter model alongside a 7B vision model.
*
* Lane B's lease layer + adaptive_throughput's budgets care about the
* pool (TargetSilicon). Lane C's resolver cares about the variant
* (HwCapabilityTier).
*
* **Closed enum by design.** New hardware classes (RTX 6090 → `Sm130`,
* M4, future Apple silicon) require an enum-edit + ts-rs regen + an
* explicit decision on which existing variant — if any — they alias to.
* There is intentionally no `Other(String)` or wildcard fallback variant:
* "unknown hardware" silently routing to a default tier hides
* capacity-mismatch bugs the resolver exists to catch. See Joel's rule
* on no fallbacks (`docs/architecture/...`). Adding a tier means the
* caller's hardware probe must produce it AND every match-on-tier site
* gets a compile error reminding the author to handle it.
*/
export type HwCapabilityTier = "cpu_only" | "m1_uma8_gb" | "m1_uma16_gb" | "m2_uma_pro_max" | "m3_uma_pro_max" | "sm70" | "sm75" | "sm80" | "sm86" | "sm89" | "sm90" | "sm100" | "sm120" | "vulkan_amd" | "cloud";
6 changes: 6 additions & 0 deletions src/shared/generated/cognition/LocalOrCloudPolicy.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.

/**
* How aggressively to prefer local vs cloud providers.
*/
export type LocalOrCloudPolicy = "local_only" | "cloud_only" | "prefer_local" | "prefer_cloud" | "any";
35 changes: 35 additions & 0 deletions src/shared/generated/cognition/ModelRequirement.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
import type { Arch } from "../model_registry/Arch";
import type { Capability } from "../model_registry/Capability";
import type { HostCapability } from "./HostCapability";
import type { LocalOrCloudPolicy } from "./LocalOrCloudPolicy";

/**
* Capability-shaped query for the resolver. Callers describe what the
* model needs to DO (generate text, see images, etc.) — not which model
* to use. Per Joel's axiom: code knows ARCHETYPES, models are data.
*/
export type ModelRequirement = {
/**
* Capabilities every candidate must advertise. Empty set matches any
* model (rare — usually callers want at least `Chat`).
*/
requiredCapabilities: Array<Capability>,
/**
* Architectural family preference. Empty = any architecture qualifies.
* When non-empty, candidates outside the preference are filtered out
* rather than down-ranked — caller wants this family or none.
*/
archPreference: Array<Arch>,
/**
* Minimum context window in tokens. `0` = any.
*/
contextWindowMin: number,
/**
* Local-vs-cloud preference. See [`LocalOrCloudPolicy`].
*/
providerPolicy: LocalOrCloudPolicy,
/**
* Host capability snapshot. See [`HostCapability`].
*/
host: HostCapability, };
12 changes: 12 additions & 0 deletions src/shared/generated/cognition/ResolutionError.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.

/**
* Why a [`resolve_model`] call failed. Each variant names the SPECIFIC
* filter that eliminated all candidates so the caller's error message
* can be actionable.
*
* No `Fallback` variant. Per Joel's rule: missing-model is an error, not
* a soft retry on a default. Callers that want graceful degradation must
* EXPLICITLY relax their requirement and re-invoke.
*/
export type ResolutionError = { "kind": "noModelMatchesRequirement", registry_count: number, candidates_after_filter: number, unmet_filters: Array<string>, };
26 changes: 26 additions & 0 deletions src/shared/generated/cognition/ResolvedModel.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
import type { HwCapabilityTier } from "./HwCapabilityTier";
import type { TargetSilicon } from "./TargetSilicon";

/**
* Resolver output. Includes the silicon target so the caller can plumb it
* straight into a [`ThroughputJob`] without re-deriving it from the
* model + host.
*/
export type ResolvedModel = { modelId: string, providerId: string,
/**
* Expected memory footprint in megabytes if the registry knows it.
* `None` for cloud models (always-fits) and for local models whose
* row in `models.toml` doesn't yet declare a memory estimate. A
* follow-up adds an `estimated_memory_mb` field to the Model schema;
* until then memory-budget filtering is best-effort on local models
* (the resolver still rejects cloud models from `LocalOnly` queries).
*/
expectedMemoryMb?: number, targetSilicon: TargetSilicon, hwCapabilityTier: HwCapabilityTier,
/**
* Human-readable explanation of why this model was chosen. Surfaced
* in logs + UI when a persona's resolution changes (e.g., "switched
* from gpt-4o to claude-sonnet-4-5 because PreferLocal couldn't
* satisfy required Capability::Vision on this host").
*/
reason: string, };
15 changes: 15 additions & 0 deletions src/shared/generated/cognition/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,15 @@
// Source: generator/generate-rust-bindings.ts
// Re-generate: npx tsx generator/generate-rust-bindings.ts

export type { AdaptiveThroughputPlan } from './AdaptiveThroughputPlan';
export type { AdaptiveThroughputRequest } from './AdaptiveThroughputRequest';
export type { HostCapability } from './HostCapability';
export type { HwCapabilityTier } from './HwCapabilityTier';
export type { LeverCall } from './LeverCall';
export type { LeverName } from './LeverName';
export type { LocalOrCloudPolicy } from './LocalOrCloudPolicy';
export type { MediaItemLite } from './MediaItemLite';
export type { ModelRequirement } from './ModelRequirement';
export type { NativeBatchOutcome } from './NativeBatchOutcome';
export type { ParsedToolBatch } from './ParsedToolBatch';
export type { PersonaMediaConfigLite } from './PersonaMediaConfigLite';
Expand All @@ -18,10 +24,19 @@ export type { RecipeRagSourcePolicy } from './RecipeRagSourcePolicy';
export type { RecipeTurnBatchPlan } from './RecipeTurnBatchPlan';
export type { RecipeTurnBatchRequest } from './RecipeTurnBatchRequest';
export type { RecipeTurnTrigger } from './RecipeTurnTrigger';
export type { ResolutionError } from './ResolutionError';
export type { ResolvedModel } from './ResolvedModel';
export type { ResourceClass } from './ResourceClass';
export type { ResponderDecision } from './ResponderDecision';
export type { SharedAnalysis } from './SharedAnalysis';
export type { SharedAnalysisIntent } from './SharedAnalysisIntent';
export type { SharedRagSourcePlan } from './SharedRagSourcePlan';
export type { TargetSilicon } from './TargetSilicon';
export type { ThroughputJob } from './ThroughputJob';
export type { ThroughputLaneBudget } from './ThroughputLaneBudget';
export type { ThroughputLease } from './ThroughputLease';
export type { ThroughputLeaseRevocationPolicy } from './ThroughputLeaseRevocationPolicy';
export type { ThroughputLeaseSnapshot } from './ThroughputLeaseSnapshot';
export type { ToolExecutionContext } from './ToolExecutionContext';
export type { ToolInvocation } from './ToolInvocation';
export type { ToolOutcome } from './ToolOutcome';
12 changes: 12 additions & 0 deletions src/shared/generated/model_registry/Arch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.

/**
* Model architecture family. Typed (not stringly-typed) so call sites
* use enum matching, not string comparison. Adding a new arch means:
* (a) add the variant here, (b) add a TOML row with `arch = "new_arch"`.
* Code that dispatches by arch gets a compile error reminding the author
* to handle the new variant — precisely the pattern Joel's axiom calls
* for ("code should NEVER know the model" — code knows the ARCHETYPES
* via this enum, models are data).
*/
export type Arch = "qwen2" | "qwen3" | "qwen35" | "llama" | "claude" | "gpt" | "gemini" | "grok" | "deepseek" | "unknown";
10 changes: 10 additions & 0 deletions src/shared/generated/model_registry/ProviderKind.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.

/**
* Where a provider runs its inference. Resolver consumes this to honor
* `LocalOrCloudPolicy` without needing a hardcoded provider-id list.
* Providers default to [`ProviderKind::Cloud`] so adding a new cloud
* provider TOML row doesn't require an explicit `kind` line; local
* providers MUST declare `kind = "local"` explicitly.
*/
export type ProviderKind = "local" | "cloud";
2 changes: 2 additions & 0 deletions src/shared/generated/model_registry/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@
// Source: generator/generate-rust-bindings.ts
// Re-generate: npx tsx generator/generate-rust-bindings.ts

export type { Arch } from './Arch';
export type { Capability } from './Capability';
export type { ProviderKind } from './ProviderKind';
2 changes: 2 additions & 0 deletions src/workers/continuum-core/config/providers.toml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ model_prefixes = ["gemini"]
[[provider]]
id = "docker-model-runner"
name = "Docker Model Runner (local Metal/CUDA)"
kind = "local"
# IPv4 literal on purpose — `localhost` on macOS resolves to both ::1 and
# 127.0.0.1 and Docker Desktop's model runner listens on IPv4 only. When
# the hyper client tries ::1 first it waits for the connect path to fall
Expand All @@ -98,6 +99,7 @@ auth = "none"
[[provider]]
id = "llamacpp-local"
name = "Llama.cpp (in-process Metal/CUDA)"
kind = "local"
base_url = "in-process"
auth = "none"
default_model = "continuum-ai/qwen3.5-4b-code-forged-GGUF"
Expand Down
2 changes: 2 additions & 0 deletions src/workers/continuum-core/src/cognition/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
//! `ResponderDecision`)

pub mod adaptive_throughput;
pub mod model_resolver;
pub mod response_orchestrator;
pub mod response_validator;
pub mod shared_analysis;
Expand All @@ -37,6 +38,7 @@ pub mod turn_batch;
pub mod types;

pub use adaptive_throughput::*;
pub use model_resolver::*;
pub use response_orchestrator::{
DEFAULT_RELEVANCE_THRESHOLD, PersonaSlot, orchestrate, score_persona,
};
Expand Down
Loading
Loading