diff --git a/src/shared/generated/forge/AlloyHardware.ts b/src/shared/generated/forge/AlloyHardware.ts new file mode 100644 index 000000000..b5c0774cf --- /dev/null +++ b/src/shared/generated/forge/AlloyHardware.ts @@ -0,0 +1,30 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Hardware envelope for the recipe. Tells the foundry what device + * tier to target + estimates resource needs. Mirrors the existing + * Python `AlloyHardware` shape. + */ +export type AlloyHardware = { +/** + * Minimum VRAM (GB) required to run the foundry pipeline. + */ +min_vram_gb?: number, +/** + * Recommended VRAM (GB) for comfortable headroom. + */ +recommended_vram_gb?: number, +/** + * Estimated wall-clock duration for a full forge run (informational). + */ +estimated_duration_minutes?: number, +/** + * Whether the pipeline can fall back to CPU if no GPU available. + */ +supports_cpu: boolean, +/** + * Devices the recipe has been validated on (informational; the + * artifact's `hardware_verified` is the authoritative post-run + * list). + */ +tested_on: Array, }; diff --git a/src/shared/generated/forge/AlloySource.ts b/src/shared/generated/forge/AlloySource.ts new file mode 100644 index 000000000..531452fc5 --- /dev/null +++ b/src/shared/generated/forge/AlloySource.ts @@ -0,0 +1,31 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Source model identifier — what the foundry forges from. + * + * Mirrors the `AlloySource` shape from + * `forge-alloy/python/forge_alloy/types.py`. Phase 2 replaces the Python + * type with a `derive(TS)` import of this Rust type as the source of + * truth. + */ +export type AlloySource = { +/** + * Hugging Face model identifier (e.g., "Qwen/Qwen3.5-4B-Instruct"). + */ +base_model: string, +/** + * Architecture family (e.g., "qwen3", "llama", "mistral"). + */ +architecture: string, +/** + * Optional pinned revision (commit / branch / tag) for reproducibility. + */ +revision?: string, +/** + * MoE indicator. Defaults to false (dense models). + */ +is_moe: boolean, +/** + * Number of experts in the MoE (None for dense). + */ +total_experts?: number, }; diff --git a/src/shared/generated/forge/BenchmarkDef.ts b/src/shared/generated/forge/BenchmarkDef.ts new file mode 100644 index 000000000..0d9a54331 --- /dev/null +++ b/src/shared/generated/forge/BenchmarkDef.ts @@ -0,0 +1,25 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Benchmark to run during evaluation. Mirrors the existing Python + * `BenchmarkDef` shape so Phase 2 can swap the Python type to a + * generated client of this Rust type. + */ +export type BenchmarkDef = { +/** + * Benchmark name (e.g., "humaneval", "mmlu", "hellaswag"). + */ +name: string, +/** + * Optional sub-task / split name within the benchmark. + */ +subset?: string, +/** + * N-shot setting. None = benchmark default. + */ +n_shot?: number, +/** + * Whether this benchmark's result should be submitted to a + * leaderboard. Defaults to false. + */ +submit_to_leaderboard: boolean, }; diff --git a/src/shared/generated/forge/CorpusRef.ts b/src/shared/generated/forge/CorpusRef.ts new file mode 100644 index 000000000..f2a655d4e --- /dev/null +++ b/src/shared/generated/forge/CorpusRef.ts @@ -0,0 +1,36 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Pointer to the calibration corpus used for the importance profile + + * (eventual) compensation LoRA. Held-out from `evaluation_benchmarks`. + * + * Bytes don't live in Continuum's ORM (corpora can be MB-GB). The + * recipe carries a pointer; the bytes live in HF datasets, foundry- + * node-local storage, or wherever the `source_url` resolves. + * + * `content_hash` uses the canonical `"sha256:"` format that + * matches `persona::admission` content_hash on the engram side + * (consensus position #8 from the design review). Cross-domain + * consistency: any two subsystems comparing hashes can do + * string-equality without normalization. + */ +export type CorpusRef = { +/** + * Human-readable corpus name (e.g., "wikitext-103-v1"). + */ +name: string, +/** + * SHA-256 of the canonical corpus contents in `"sha256:"` form. + * Tamper-detection anchor + cross-domain equality with admission's + * content_hash convention. + */ +content_hash: string, +/** + * Size in bytes (informational; helps the foundry pre-flight storage). + */ +size_bytes: number, +/** + * Where the bytes live (HF dataset id, file:// URL, etc.). Optional + * because some corpora are foundry-node-local with no shareable URL. + */ +source_url?: string, }; diff --git a/src/shared/generated/forge/ForgeArtifact.ts b/src/shared/generated/forge/ForgeArtifact.ts new file mode 100644 index 000000000..dd2ae0a7b --- /dev/null +++ b/src/shared/generated/forge/ForgeArtifact.ts @@ -0,0 +1,139 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { AlloyHardware } from "./AlloyHardware"; +import type { AlloySource } from "./AlloySource"; +import type { BenchmarkDef } from "./BenchmarkDef"; +import type { CorpusRef } from "./CorpusRef"; +import type { HardwareProfile } from "./HardwareProfile"; +import type { PriorBaseline } from "./PriorBaseline"; +import type { QuantTier } from "./QuantTier"; + +/** + * Foundry-generated output. Combines (a) a snapshot of the recipe + * fields the foundry consumed + (b) execution outputs that only the + * foundry knows. + * + * Stored as a Continuum entity (Phase 3 wires the registry). Read by + * `publish_model.py` as the source of truth for what gets published. + * Never authored by hand. + */ +export type ForgeArtifact = { +/** + * Stable artifact id (different from recipe id — one recipe can + * produce many artifacts across multiple runs / hardware tiers). + */ +id: string, +/** + * Which recipe produced this artifact. + */ +recipe_id: string, +/** + * Recipe version at run time (semver). Pinned so a later recipe + * revision doesn't retroactively change what this artifact claims + * to come from. + */ +recipe_version: string, +/** + * Recipe `name` snapshot (denormalized — lets the artifact card + * render without re-fetching the recipe entity). + */ +recipe_name: string, +/** + * Paragraph for the README/card. + */ +description: string, +/** + * One-line plain-English headline. + */ +user_summary: string, +/** + * Recipe author at the time of run. + */ +author: string, +/** + * Tags from the recipe at run time. + */ +tags: Array, +/** + * SPDX license identifier. + */ +license: string, +/** + * Methodology paper URL from the recipe at run time. + */ +methodology_paper_url?: string, +/** + * Limitations from the recipe at run time. + */ +limitations: Array, +/** + * §4.1.3.4 negative-baselines preserved from the recipe. + */ +prior_metric_baselines: Array, +/** + * Source model snapshot. + */ +source: AlloySource, +/** + * Calibration corpus pointer used for THIS forge. + */ +calibration_corpus: CorpusRef, +/** + * Quant tiers requested by the recipe. + */ +quant_tiers: Array, +/** + * Benchmarks requested by the recipe. + */ +evaluation_benchmarks: Array, +/** + * Hardware target from the recipe. + */ +hardware: AlloyHardware, +/** + * When the foundry started this run (epoch milliseconds UTC). + */ +forged_at_ms: number, +/** + * Total wall-clock duration of the forge run (minutes). + */ +duration_minutes?: number, +/** + * Final parameter count after prune/compact (in billions). + */ +forged_params_b?: number, +/** + * Active params per token for MoE artifacts (in billions). None + * for dense models. + */ +active_params_b?: number, +/** + * Devices the artifact has been verified on, with measured + * throughput + memory. Drives the published card's device grid. + */ +hardware_verified: Array, +/** + * Content-addressable hash of the populated artifact JSON. Used + * as the verification anchor by `publish_model.py` and by the + * proof-contract trust layer (see grid/FORGE-ALLOY-PROOF-CONTRACTS.md). + */ +alloy_hash?: string, +/** + * Full execution results blob. v1 carries this as opaque JSON + * matching the existing Python `AlloyResults` shape (benchmarks, + * perplexity, samples, integrity attestation). Phase 2 types this + * as a first-class Rust struct once the foundry executor needs it. + */ +results?: unknown, +/** + * Publication receipt blob. Same Phase 2 deferral as `results` — + * opaque JSON for v1, typed when the publish path is ported into + * Rust. Mirrors the existing Python `AlloyReceipt`. + */ +receipt?: unknown, +/** + * Integrity attestation blob. Carries the IntegrityAttestation + * (signed proof of the forge run) when the run was attested. + * Opaque JSON for v1; typed when the proof-contract integration + * (grid/FORGE-ALLOY-PROOF-CONTRACTS.md) lands in Rust. + */ +integrity?: unknown, }; diff --git a/src/shared/generated/forge/ForgeRecipe.ts b/src/shared/generated/forge/ForgeRecipe.ts new file mode 100644 index 000000000..e67bcbcce --- /dev/null +++ b/src/shared/generated/forge/ForgeRecipe.ts @@ -0,0 +1,122 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { AlloyHardware } from "./AlloyHardware"; +import type { AlloySource } from "./AlloySource"; +import type { BenchmarkDef } from "./BenchmarkDef"; +import type { CorpusRef } from "./CorpusRef"; +import type { PriorBaseline } from "./PriorBaseline"; +import type { QuantTier } from "./QuantTier"; + +/** + * Authored recipe — the input the foundry consumes. + * + * Stored as a Continuum entity (Phase 3 wires the entity registry). + * Edited via standard `Commands.execute('data/...')` primitives. Never + * consumed directly by `publish_model.py` — that script reads the + * `ForgeArtifact` (sibling type) the foundry emits. + * + * All prose fields the model card renders live HERE, not in a hand- + * authored `.alloy.json`. + */ +export type ForgeRecipe = { +/** + * Stable recipe identifier. Generated at recipe creation time. + */ +id: string, +/** + * Recipe name (e.g., "qwen3.5-4b-code-aggressive"). + */ +name: string, +/** + * Semantic version of THIS recipe (semver). Bump when revising + * the recipe; lineage chain via `parent_recipe_id`. + */ +version: string, +/** + * Paragraph for the README/card. + */ +description: string, +/** + * One-line plain-English headline (used as the model card subtitle). + */ +user_summary: string, +/** + * Recipe author (e.g., "continuum-ai" or a user handle). + */ +author: string, +/** + * Tags for discovery (e.g., ["code", "pruning", "4b"]). + */ +tags: Array, +/** + * SPDX license identifier or shorthand. Default "apache-2.0"; the + * caller is responsible for inheriting the source model's license + * when applicable (consensus position #10 — `license_strategy` + * auto-inheritance lands in v2). + */ +license: string, +/** + * Optional link to the methodology paper. + */ +methodology_paper_url?: string, +/** + * Known limitations of the recipe (rendered into the model card). + */ +limitations: Array, +/** + * §4.1.3.4 negative-baselines preserved for falsifiability. + */ +prior_metric_baselines: Array, +/** + * Base model + architecture metadata. + */ +source: AlloySource, +/** + * Ordered pipeline of recipe stages. v1 carries stages as opaque + * JSON values matching the existing `AlloyStage` discriminated + * union in `forge-alloy/python/forge_alloy/types.py`. Phase 2 + * replaces this with a typed `Vec` enum where each + * variant carries an optional `notes: String` field for the + * methodology blockquote (consensus position #2 from the design + * review — per-variant notes, not index-keyed sidecar). + */ +stages: Array, +/** + * How many times to repeat the prune→train cycle (1 = single pass). + * Most recipes are 1. + */ +cycles: number, +/** + * Held-out corpus pointer (importance profile + LoRA training). + */ +calibration_corpus: CorpusRef, +/** + * Which output formats / tiers to produce (top-level per consensus + * position #3 — quant tiers are an artifact property, not a stage + * config). + */ +quant_tiers: Array, +/** + * Benchmarks to run during evaluation. + */ +evaluation_benchmarks: Array, +/** + * Target hardware envelope (VRAM, device list, CPU fallback). + */ +hardware: AlloyHardware, +/** + * Parent recipe id, if this recipe was forked from another. None + * for net-new recipes. v1 lineage is one-directional (recipe → + * recipe); bidirectional lineage (recipe ← artifact) is a future + * `parent_artifact_ids` field per consensus position #9. + */ +parent_recipe_id?: string, +/** + * When the recipe was authored (epoch milliseconds UTC). Same + * convention as `Engram.admitted_at_ms` from the engram thread — + * `u64` epoch ms, not chrono::DateTime. + */ +authored_at_ms: number, +/** + * When the recipe was last edited (epoch milliseconds UTC). + */ +updated_at_ms: number, }; diff --git a/src/shared/generated/forge/HardwareProfile.ts b/src/shared/generated/forge/HardwareProfile.ts new file mode 100644 index 000000000..757470b9b --- /dev/null +++ b/src/shared/generated/forge/HardwareProfile.ts @@ -0,0 +1,35 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * One device the foundry actually ran the artifact on. Composes into + * `ForgeArtifact.hardware_verified` so the model card's device-grid + * reflects measured reality, not just the recipe's `tested_on` claim. + * + * Mirrors the existing Python `HardwareProfile` shape; Phase 2 makes + * the Rust type the source of truth. + */ +export type HardwareProfile = { +/** + * Device label (e.g., "m5-pro", "rtx-5090", "linux-amd64"). + */ +device: string, +/** + * Format the device ran (e.g., "gguf-Q4_K_M", "mlx", "safetensors"). + */ +format: string, +/** + * On-disk size in GB. + */ +size_gb?: number, +/** + * Measured throughput. + */ +tokens_per_sec?: number, +/** + * Peak memory usage during inference. + */ +memory_usage_gb?: number, +/** + * Whether the verification run actually completed without error. + */ +verified: boolean, }; diff --git a/src/shared/generated/forge/PriorBaseline.ts b/src/shared/generated/forge/PriorBaseline.ts new file mode 100644 index 000000000..dcc4e8ae8 --- /dev/null +++ b/src/shared/generated/forge/PriorBaseline.ts @@ -0,0 +1,28 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * §4.1.3.4 negative-baseline metric the artifact preserves for + * falsifiability. Each baseline names a metric + measured value + + * source so a reader can falsify the published improvement claim. + */ +export type PriorBaseline = { +/** + * Metric name (e.g., "perplexity", "humaneval-pass1"). + */ +metric: string, +/** + * Measured baseline value. + */ +value: number, +/** + * Where the baseline came from (e.g., "qwen3.5-4b base @ revision XYZ"). + */ +source: string, +/** + * ISO-8601 timestamp of when the measurement was taken. + */ +measured_at: string, +/** + * Free-text description of how the measurement was performed. + */ +measurement_method: string, }; diff --git a/src/shared/generated/forge/QuantTier.ts b/src/shared/generated/forge/QuantTier.ts new file mode 100644 index 000000000..5488f6630 --- /dev/null +++ b/src/shared/generated/forge/QuantTier.ts @@ -0,0 +1,25 @@ +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Which GGUF / MLX / safetensors / onnx tier(s) get published from + * one recipe. Top-level on the recipe (consensus position #3 from the + * design review) rather than nested inside a `QuantStage` — quant + * tiers are a property of the published artifact, NOT a property of + * the pipeline stage that produces them. + */ +export type QuantTier = { +/** + * Output format (e.g., "gguf", "mlx", "safetensors", "onnx"). + */ +format: string, +/** + * Quantization variants for this format (e.g., ["Q4_K_M", "Q5_K_M", + * "Q8_0"] for gguf). + */ +variants: Array, +/** + * Which device tiers this tier targets (e.g., ["m1-8gb", "m5-pro", + * "rtx-5090"]). Helps the foundry decide which devices to verify + * the quantized output on. + */ +target_devices: Array, }; diff --git a/src/shared/generated/forge/index.ts b/src/shared/generated/forge/index.ts new file mode 100644 index 000000000..34c7d4979 --- /dev/null +++ b/src/shared/generated/forge/index.ts @@ -0,0 +1,13 @@ +// Auto-generated barrel export — do not edit manually +// Source: generator/generate-rust-bindings.ts +// Re-generate: npx tsx generator/generate-rust-bindings.ts + +export type { AlloyHardware } from './AlloyHardware'; +export type { AlloySource } from './AlloySource'; +export type { BenchmarkDef } from './BenchmarkDef'; +export type { CorpusRef } from './CorpusRef'; +export type { ForgeArtifact } from './ForgeArtifact'; +export type { ForgeRecipe } from './ForgeRecipe'; +export type { HardwareProfile } from './HardwareProfile'; +export type { PriorBaseline } from './PriorBaseline'; +export type { QuantTier } from './QuantTier'; diff --git a/src/shared/generated/index.ts b/src/shared/generated/index.ts index 0ef869930..1156dd319 100644 --- a/src/shared/generated/index.ts +++ b/src/shared/generated/index.ts @@ -35,6 +35,7 @@ export type { VideoInput } from './ai'; export * from './code'; export * from './cognition'; export * from './dataset'; +export * from './forge'; export * from './gpu'; export * from './grid'; export * from './inference'; diff --git a/src/workers/continuum-core/src/forge/artifact.rs b/src/workers/continuum-core/src/forge/artifact.rs new file mode 100644 index 000000000..2fe15f761 --- /dev/null +++ b/src/workers/continuum-core/src/forge/artifact.rs @@ -0,0 +1,358 @@ +//! ForgeArtifact — foundry-generated output for a recipe. +//! +//! Per the design at docs/architecture/FORGE-RECIPE-AS-ENTITY.md. +//! The artifact is what the foundry emits AFTER consuming a `ForgeRecipe` +//! and running its stages. It carries the recipe lineage (so you can +//! always answer "which recipe produced this?") plus everything the +//! foundry measured during the run that no human could have known +//! beforehand: benchmark results, hardware-verified device list, alloy +//! content hash, publication receipt, integrity attestation. +//! +//! The artifact is what `publish_model.py` reads. The recipe is what +//! a human authors. The foundry is the function recipe → artifact. +//! +//! # What this PR ships (Phase 1a of #1164) +//! +//! - `ForgeArtifact` Rust value type with ts-rs bindings + tests +//! - Recipe lineage fields (`recipe_id`, `recipe_version`, `forged_at_ms`) +//! - Result fields kept opaque (`serde_json::Value`) for v1 — Phase 2 +//! types `AlloyResults`, `AlloyReceipt`, `IntegrityAttestation` as +//! first-class Rust structs once the foundry executor lands and +//! needs them. +//! +//! # Naming (consensus position #1) +//! +//! "ForgeAlloy" → "ForgeArtifact" rename happens in **Phase 1b** (TS +//! side, 15 file references; separate slice). This Rust file ships +//! with the new name from day 1. + +use serde::{Deserialize, Serialize}; +use ts_rs::TS; +use uuid::Uuid; + +use super::recipe::{AlloyHardware, AlloySource, BenchmarkDef, CorpusRef, PriorBaseline, QuantTier}; + +//============================================================================= +// HARDWARE PROFILE — verified post-run +//============================================================================= + +/// One device the foundry actually ran the artifact on. Composes into +/// `ForgeArtifact.hardware_verified` so the model card's device-grid +/// reflects measured reality, not just the recipe's `tested_on` claim. +/// +/// Mirrors the existing Python `HardwareProfile` shape; Phase 2 makes +/// the Rust type the source of truth. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/forge/HardwareProfile.ts")] +pub struct HardwareProfile { + /// Device label (e.g., "m5-pro", "rtx-5090", "linux-amd64"). + pub device: String, + /// Format the device ran (e.g., "gguf-Q4_K_M", "mlx", "safetensors"). + pub format: String, + /// On-disk size in GB. + #[ts(optional)] + pub size_gb: Option, + /// Measured throughput. + #[ts(optional)] + pub tokens_per_sec: Option, + /// Peak memory usage during inference. + #[ts(optional)] + pub memory_usage_gb: Option, + /// Whether the verification run actually completed without error. + #[serde(default)] + pub verified: bool, +} + +//============================================================================= +// FORGE ARTIFACT +//============================================================================= + +/// Foundry-generated output. Combines (a) a snapshot of the recipe +/// fields the foundry consumed + (b) execution outputs that only the +/// foundry knows. +/// +/// Stored as a Continuum entity (Phase 3 wires the registry). Read by +/// `publish_model.py` as the source of truth for what gets published. +/// Never authored by hand. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/forge/ForgeArtifact.ts")] +pub struct ForgeArtifact { + //--- Identity ---------------------------------------------------------- + + /// Stable artifact id (different from recipe id — one recipe can + /// produce many artifacts across multiple runs / hardware tiers). + #[ts(type = "string")] + pub id: Uuid, + + //--- Recipe lineage (frozen at run time) ------------------------------ + + /// Which recipe produced this artifact. + #[ts(type = "string")] + pub recipe_id: Uuid, + + /// Recipe version at run time (semver). Pinned so a later recipe + /// revision doesn't retroactively change what this artifact claims + /// to come from. + pub recipe_version: String, + + /// Recipe `name` snapshot (denormalized — lets the artifact card + /// render without re-fetching the recipe entity). + pub recipe_name: String, + + //--- Snapshot of recipe authored fields ------------------------------- + // + // Denormalized so the artifact carries everything the model card + // needs without joining back to the recipe. If the recipe edits a + // field after this artifact was forged, this artifact's snapshot + // stays as-was — the recipe lineage points to the recipe-version + // that was current at run time. + + /// Paragraph for the README/card. + pub description: String, + /// One-line plain-English headline. + pub user_summary: String, + /// Recipe author at the time of run. + pub author: String, + /// Tags from the recipe at run time. + #[serde(default)] + pub tags: Vec, + /// SPDX license identifier. + pub license: String, + /// Methodology paper URL from the recipe at run time. + #[ts(optional)] + pub methodology_paper_url: Option, + /// Limitations from the recipe at run time. + #[serde(default)] + pub limitations: Vec, + /// §4.1.3.4 negative-baselines preserved from the recipe. + #[serde(default)] + pub prior_metric_baselines: Vec, + /// Source model snapshot. + pub source: AlloySource, + /// Calibration corpus pointer used for THIS forge. + pub calibration_corpus: CorpusRef, + /// Quant tiers requested by the recipe. + #[serde(default)] + pub quant_tiers: Vec, + /// Benchmarks requested by the recipe. + #[serde(default)] + pub evaluation_benchmarks: Vec, + /// Hardware target from the recipe. + pub hardware: AlloyHardware, + + //--- Execution outputs (only the foundry knows these) ----------------- + + /// When the foundry started this run (epoch milliseconds UTC). + #[ts(type = "number")] + pub forged_at_ms: u64, + + /// Total wall-clock duration of the forge run (minutes). + #[ts(optional)] + pub duration_minutes: Option, + + /// Final parameter count after prune/compact (in billions). + #[ts(optional)] + pub forged_params_b: Option, + + /// Active params per token for MoE artifacts (in billions). None + /// for dense models. + #[ts(optional)] + pub active_params_b: Option, + + /// Devices the artifact has been verified on, with measured + /// throughput + memory. Drives the published card's device grid. + #[serde(default)] + pub hardware_verified: Vec, + + /// Content-addressable hash of the populated artifact JSON. Used + /// as the verification anchor by `publish_model.py` and by the + /// proof-contract trust layer (see grid/FORGE-ALLOY-PROOF-CONTRACTS.md). + #[ts(optional)] + pub alloy_hash: Option, + + /// Full execution results blob. v1 carries this as opaque JSON + /// matching the existing Python `AlloyResults` shape (benchmarks, + /// perplexity, samples, integrity attestation). Phase 2 types this + /// as a first-class Rust struct once the foundry executor needs it. + #[ts(optional, type = "unknown")] + pub results: Option, + + /// Publication receipt blob. Same Phase 2 deferral as `results` — + /// opaque JSON for v1, typed when the publish path is ported into + /// Rust. Mirrors the existing Python `AlloyReceipt`. + #[ts(optional, type = "unknown")] + pub receipt: Option, + + /// Integrity attestation blob. Carries the IntegrityAttestation + /// (signed proof of the forge run) when the run was attested. + /// Opaque JSON for v1; typed when the proof-contract integration + /// (grid/FORGE-ALLOY-PROOF-CONTRACTS.md) lands in Rust. + #[ts(optional, type = "unknown")] + pub integrity: Option, +} + +//============================================================================= +// TESTS +//============================================================================= + +#[cfg(test)] +mod tests { + use super::*; + + fn fixed_now_ms() -> u64 { + 1_715_625_600_000 + } + + fn sample_artifact() -> ForgeArtifact { + ForgeArtifact { + id: Uuid::new_v4(), + recipe_id: Uuid::nil(), + recipe_version: "1.0.0".to_string(), + recipe_name: "qwen3.5-4b-code-aggressive".to_string(), + description: "Forged from the qwen3.5-4b-code-aggressive recipe.".to_string(), + user_summary: "Smaller, faster Qwen3.5-4B for code.".to_string(), + author: "continuum-ai".to_string(), + tags: vec!["code".to_string(), "pruning".to_string()], + license: "apache-2.0".to_string(), + methodology_paper_url: None, + limitations: vec!["English-only".to_string()], + prior_metric_baselines: vec![], + source: AlloySource { + base_model: "Qwen/Qwen3.5-4B-Instruct".to_string(), + architecture: "qwen3".to_string(), + revision: None, + is_moe: false, + total_experts: None, + }, + calibration_corpus: CorpusRef { + name: "wikitext-103-v1".to_string(), + content_hash: "sha256:abc".to_string(), + size_bytes: 100, + source_url: None, + }, + quant_tiers: vec![], + evaluation_benchmarks: vec![], + hardware: AlloyHardware { + min_vram_gb: Some(8.0), + recommended_vram_gb: Some(16.0), + estimated_duration_minutes: None, + supports_cpu: false, + tested_on: vec![], + }, + forged_at_ms: fixed_now_ms(), + duration_minutes: Some(75.0), + forged_params_b: Some(2.4), + active_params_b: None, + hardware_verified: vec![HardwareProfile { + device: "m5-pro".to_string(), + format: "gguf-Q4_K_M".to_string(), + size_gb: Some(2.6), + tokens_per_sec: Some(45.0), + memory_usage_gb: Some(3.2), + verified: true, + }], + alloy_hash: Some("sha256:aa61c4bdf463847c".to_string()), + results: Some(serde_json::json!({ + "benchmarks": [{"name": "humaneval", "metrics": {"pass1": 0.32}}] + })), + receipt: None, + integrity: None, + } + } + + /// What this catches: full ForgeArtifact round-trips through serde + /// without dropping any of the recipe-snapshot or execution fields. + /// publish_model.py reads this; field loss = silent publish bugs. + #[test] + fn forge_artifact_serde_roundtrip_preserves_all_fields() { + let original = sample_artifact(); + let json = serde_json::to_string(&original).expect("serialize"); + let back: ForgeArtifact = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(original.recipe_id, back.recipe_id); + assert_eq!(original.recipe_version, back.recipe_version); + assert_eq!(original.recipe_name, back.recipe_name); + assert_eq!(original.description, back.description); + assert_eq!(original.author, back.author); + assert_eq!(original.tags, back.tags); + assert_eq!(original.limitations, back.limitations); + assert_eq!(original.source.base_model, back.source.base_model); + assert_eq!( + original.calibration_corpus.content_hash, + back.calibration_corpus.content_hash + ); + assert_eq!(original.forged_at_ms, back.forged_at_ms); + assert_eq!(original.forged_params_b, back.forged_params_b); + assert_eq!(original.hardware_verified.len(), 1); + assert_eq!( + original.hardware_verified[0].device, + back.hardware_verified[0].device + ); + assert_eq!(original.alloy_hash, back.alloy_hash); + assert!(back.results.is_some()); + } + + /// What this catches: opaque results/receipt/integrity blobs round- + /// trip exactly. Phase 2 types these; until then, faithful + /// pass-through is the contract. + #[test] + fn opaque_blob_fields_round_trip_unchanged() { + let mut artifact = sample_artifact(); + artifact.receipt = Some(serde_json::json!({ + "publications": [{"target": "huggingface", "url": "https://example.com"}] + })); + artifact.integrity = Some(serde_json::json!({ + "trustLevel": "self-attested", + "modelHash": "sha256:def", + })); + let json = serde_json::to_string(&artifact).expect("serialize"); + let back: ForgeArtifact = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(artifact.results, back.results); + assert_eq!(artifact.receipt, back.receipt); + assert_eq!(artifact.integrity, back.integrity); + } + + /// What this catches: an artifact with no execution results yet + /// (e.g., partial run that errored before benchmarks completed) + /// still serializes. Critical for forensic captures of failed runs + /// — the artifact entity must survive partial state. + #[test] + fn partial_artifact_with_none_results_serializes() { + let mut artifact = sample_artifact(); + artifact.results = None; + artifact.receipt = None; + artifact.integrity = None; + artifact.alloy_hash = None; + artifact.duration_minutes = None; + artifact.forged_params_b = None; + let json = serde_json::to_string(&artifact).expect("serialize"); + let back: ForgeArtifact = serde_json::from_str(&json).expect("deserialize"); + assert!(back.results.is_none()); + assert!(back.alloy_hash.is_none()); + assert_eq!(back.recipe_id, artifact.recipe_id, "lineage preserved even on partial"); + } + + /// What this catches: recipe_id + recipe_version pinning means a + /// later recipe edit can't retroactively rewrite what this artifact + /// claims to come from. Snapshot semantics for the lineage fields. + #[test] + fn recipe_lineage_fields_are_not_optional() { + // Compile-time: the struct definition forces non-optional + // recipe_id + recipe_version + recipe_name. This test is the + // runtime spec that they're populated. + let artifact = sample_artifact(); + assert!(!artifact.recipe_version.is_empty(), "recipe_version is required"); + assert!(!artifact.recipe_name.is_empty(), "recipe_name is required"); + } + + // ── ts-rs bindings — same pattern as persona/engram.rs ────────────── + + #[test] + fn export_bindings_hardware_profile() { + HardwareProfile::export_all(&ts_rs::Config::default()).unwrap(); + } + + #[test] + fn export_bindings_forge_artifact() { + ForgeArtifact::export_all(&ts_rs::Config::default()).unwrap(); + } +} diff --git a/src/workers/continuum-core/src/forge/mod.rs b/src/workers/continuum-core/src/forge/mod.rs new file mode 100644 index 000000000..71cb623ed --- /dev/null +++ b/src/workers/continuum-core/src/forge/mod.rs @@ -0,0 +1,17 @@ +//! Forge — recipe-as-entity and foundry artifact types. +//! +//! Per the design at `docs/architecture/FORGE-RECIPE-AS-ENTITY.md` +//! (continuum#1164/#1165). Phase 1a: pure value types (recipe, artifact, +//! and supporting structs). Phase 1b: rename existing TS-side `ForgeAlloy` +//! to `ForgeArtifact` across the 15 referencing files. Phase 2: typed +//! `RecipeStage` enum and typed `AlloyResults`/`AlloyReceipt`/ +//! `IntegrityAttestation` (currently `serde_json::Value` blobs). Phase 3: +//! entity registry registration plus the `forge/run` IPC. + +pub mod artifact; +pub mod recipe; + +pub use artifact::{ForgeArtifact, HardwareProfile}; +pub use recipe::{ + AlloyHardware, AlloySource, BenchmarkDef, CorpusRef, ForgeRecipe, PriorBaseline, QuantTier, +}; diff --git a/src/workers/continuum-core/src/forge/recipe.rs b/src/workers/continuum-core/src/forge/recipe.rs new file mode 100644 index 000000000..4d2aab1a1 --- /dev/null +++ b/src/workers/continuum-core/src/forge/recipe.rs @@ -0,0 +1,545 @@ +//! ForgeRecipe — authored input for the foundry pipeline. +//! +//! Per the design at docs/architecture/FORGE-RECIPE-AS-ENTITY.md +//! (continuum#1164/#1165). The recipe captures everything a human +//! decides BEFORE running the foundry: prose fields, source model, +//! pipeline stages with notes, calibration corpus, quant tiers, +//! evaluation benchmarks, prior baselines, hardware target. The +//! foundry consumes a recipe + execution results and emits a +//! `ForgeArtifact` (see sibling `artifact.rs`). +//! +//! # What this PR ships (Phase 1a of #1164) +//! +//! - Pure Rust value types for ForgeRecipe + supporting structs +//! - ts-rs bindings to `shared/generated/forge/` +//! - Serde roundtrip + ts-rs export tests +//! +//! # Deferred to later phases +//! +//! - **Phase 1b:** rename existing TS-side `ForgeAlloy` → `ForgeArtifact` +//! (15 TS files reference the old name; separate slice). +//! - **Phase 2:** typed `RecipeStage` enum matching the existing +//! `AlloyStage` discriminated union from forge-alloy/python/forge_alloy/types.py +//! (ports the stage zoo into Rust as the source of truth). v1 carries +//! stages as `Vec` so the recipe is usable today. +//! - **Phase 2:** typed `AlloyResults`, `AlloyReceipt`, `IntegrityAttestation` +//! on the artifact side. +//! - **Phase 3:** entity registry registration + `data/*` collection wiring +//! (the recipe types ship first; storage hooks them up next). +//! +//! # Conventions (matching existing persona/* modules) +//! +//! - `Uuid` fields use `#[ts(type = "string")]` for the TS export. +//! - Strings + bools + numbers map directly via ts-rs defaults. +//! - Nested types that aren't yet in Rust use `serde_json::Value` with +//! `#[ts(type = "unknown")]` so the TS side gets `unknown` (caller +//! must validate via the existing Python pydantic schemas until +//! Phase 2 ports the types). + +use serde::{Deserialize, Serialize}; +use ts_rs::TS; +use uuid::Uuid; + +//============================================================================= +// SUPPORTING TYPES +//============================================================================= + +/// Source model identifier — what the foundry forges from. +/// +/// Mirrors the `AlloySource` shape from +/// `forge-alloy/python/forge_alloy/types.py`. Phase 2 replaces the Python +/// type with a `derive(TS)` import of this Rust type as the source of +/// truth. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/forge/AlloySource.ts")] +pub struct AlloySource { + /// Hugging Face model identifier (e.g., "Qwen/Qwen3.5-4B-Instruct"). + pub base_model: String, + /// Architecture family (e.g., "qwen3", "llama", "mistral"). + pub architecture: String, + /// Optional pinned revision (commit / branch / tag) for reproducibility. + #[ts(optional)] + pub revision: Option, + /// MoE indicator. Defaults to false (dense models). + #[serde(default)] + pub is_moe: bool, + /// Number of experts in the MoE (None for dense). + #[ts(optional)] + pub total_experts: Option, +} + +/// §4.1.3.4 negative-baseline metric the artifact preserves for +/// falsifiability. Each baseline names a metric + measured value + +/// source so a reader can falsify the published improvement claim. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/forge/PriorBaseline.ts")] +pub struct PriorBaseline { + /// Metric name (e.g., "perplexity", "humaneval-pass1"). + pub metric: String, + /// Measured baseline value. + pub value: f64, + /// Where the baseline came from (e.g., "qwen3.5-4b base @ revision XYZ"). + pub source: String, + /// ISO-8601 timestamp of when the measurement was taken. + pub measured_at: String, + /// Free-text description of how the measurement was performed. + pub measurement_method: String, +} + +/// Pointer to the calibration corpus used for the importance profile + +/// (eventual) compensation LoRA. Held-out from `evaluation_benchmarks`. +/// +/// Bytes don't live in Continuum's ORM (corpora can be MB-GB). The +/// recipe carries a pointer; the bytes live in HF datasets, foundry- +/// node-local storage, or wherever the `source_url` resolves. +/// +/// `content_hash` uses the canonical `"sha256:"` format that +/// matches `persona::admission` content_hash on the engram side +/// (consensus position #8 from the design review). Cross-domain +/// consistency: any two subsystems comparing hashes can do +/// string-equality without normalization. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/forge/CorpusRef.ts")] +pub struct CorpusRef { + /// Human-readable corpus name (e.g., "wikitext-103-v1"). + pub name: String, + /// SHA-256 of the canonical corpus contents in `"sha256:"` form. + /// Tamper-detection anchor + cross-domain equality with admission's + /// content_hash convention. + pub content_hash: String, + /// Size in bytes (informational; helps the foundry pre-flight storage). + #[ts(type = "number")] + pub size_bytes: u64, + /// Where the bytes live (HF dataset id, file:// URL, etc.). Optional + /// because some corpora are foundry-node-local with no shareable URL. + #[ts(optional)] + pub source_url: Option, +} + +/// Which GGUF / MLX / safetensors / onnx tier(s) get published from +/// one recipe. Top-level on the recipe (consensus position #3 from the +/// design review) rather than nested inside a `QuantStage` — quant +/// tiers are a property of the published artifact, NOT a property of +/// the pipeline stage that produces them. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/forge/QuantTier.ts")] +pub struct QuantTier { + /// Output format (e.g., "gguf", "mlx", "safetensors", "onnx"). + pub format: String, + /// Quantization variants for this format (e.g., ["Q4_K_M", "Q5_K_M", + /// "Q8_0"] for gguf). + pub variants: Vec, + /// Which device tiers this tier targets (e.g., ["m1-8gb", "m5-pro", + /// "rtx-5090"]). Helps the foundry decide which devices to verify + /// the quantized output on. + #[serde(default)] + pub target_devices: Vec, +} + +/// Benchmark to run during evaluation. Mirrors the existing Python +/// `BenchmarkDef` shape so Phase 2 can swap the Python type to a +/// generated client of this Rust type. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/forge/BenchmarkDef.ts")] +pub struct BenchmarkDef { + /// Benchmark name (e.g., "humaneval", "mmlu", "hellaswag"). + pub name: String, + /// Optional sub-task / split name within the benchmark. + #[ts(optional)] + pub subset: Option, + /// N-shot setting. None = benchmark default. + #[ts(optional)] + pub n_shot: Option, + /// Whether this benchmark's result should be submitted to a + /// leaderboard. Defaults to false. + #[serde(default)] + pub submit_to_leaderboard: bool, +} + +/// Hardware envelope for the recipe. Tells the foundry what device +/// tier to target + estimates resource needs. Mirrors the existing +/// Python `AlloyHardware` shape. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/forge/AlloyHardware.ts")] +pub struct AlloyHardware { + /// Minimum VRAM (GB) required to run the foundry pipeline. + #[ts(optional)] + pub min_vram_gb: Option, + /// Recommended VRAM (GB) for comfortable headroom. + #[ts(optional)] + pub recommended_vram_gb: Option, + /// Estimated wall-clock duration for a full forge run (informational). + #[ts(optional)] + pub estimated_duration_minutes: Option, + /// Whether the pipeline can fall back to CPU if no GPU available. + #[serde(default)] + pub supports_cpu: bool, + /// Devices the recipe has been validated on (informational; the + /// artifact's `hardware_verified` is the authoritative post-run + /// list). + #[serde(default)] + pub tested_on: Vec, +} + +//============================================================================= +// FORGE RECIPE +//============================================================================= + +/// Authored recipe — the input the foundry consumes. +/// +/// Stored as a Continuum entity (Phase 3 wires the entity registry). +/// Edited via standard `Commands.execute('data/...')` primitives. Never +/// consumed directly by `publish_model.py` — that script reads the +/// `ForgeArtifact` (sibling type) the foundry emits. +/// +/// All prose fields the model card renders live HERE, not in a hand- +/// authored `.alloy.json`. +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "../../../shared/generated/forge/ForgeRecipe.ts")] +pub struct ForgeRecipe { + //--- Identity ---------------------------------------------------------- + + /// Stable recipe identifier. Generated at recipe creation time. + #[ts(type = "string")] + pub id: Uuid, + + /// Recipe name (e.g., "qwen3.5-4b-code-aggressive"). + pub name: String, + + /// Semantic version of THIS recipe (semver). Bump when revising + /// the recipe; lineage chain via `parent_recipe_id`. + pub version: String, + + /// Paragraph for the README/card. + pub description: String, + + /// One-line plain-English headline (used as the model card subtitle). + pub user_summary: String, + + /// Recipe author (e.g., "continuum-ai" or a user handle). + pub author: String, + + /// Tags for discovery (e.g., ["code", "pruning", "4b"]). + #[serde(default)] + pub tags: Vec, + + /// SPDX license identifier or shorthand. Default "apache-2.0"; the + /// caller is responsible for inheriting the source model's license + /// when applicable (consensus position #10 — `license_strategy` + /// auto-inheritance lands in v2). + pub license: String, + + //--- Methodology / falsifiability prose -------------------------------- + + /// Optional link to the methodology paper. + #[ts(optional)] + pub methodology_paper_url: Option, + + /// Known limitations of the recipe (rendered into the model card). + #[serde(default)] + pub limitations: Vec, + + /// §4.1.3.4 negative-baselines preserved for falsifiability. + #[serde(default)] + pub prior_metric_baselines: Vec, + + //--- Source ----------------------------------------------------------- + + /// Base model + architecture metadata. + pub source: AlloySource, + + //--- Pipeline --------------------------------------------------------- + + /// Ordered pipeline of recipe stages. v1 carries stages as opaque + /// JSON values matching the existing `AlloyStage` discriminated + /// union in `forge-alloy/python/forge_alloy/types.py`. Phase 2 + /// replaces this with a typed `Vec` enum where each + /// variant carries an optional `notes: String` field for the + /// methodology blockquote (consensus position #2 from the design + /// review — per-variant notes, not index-keyed sidecar). + #[ts(type = "Array")] + pub stages: Vec, + + /// How many times to repeat the prune→train cycle (1 = single pass). + /// Most recipes are 1. + pub cycles: u32, + + //--- Calibration / eval inputs ---------------------------------------- + + /// Held-out corpus pointer (importance profile + LoRA training). + pub calibration_corpus: CorpusRef, + + /// Which output formats / tiers to produce (top-level per consensus + /// position #3 — quant tiers are an artifact property, not a stage + /// config). + #[serde(default)] + pub quant_tiers: Vec, + + /// Benchmarks to run during evaluation. + #[serde(default)] + pub evaluation_benchmarks: Vec, + + //--- Hardware target -------------------------------------------------- + + /// Target hardware envelope (VRAM, device list, CPU fallback). + pub hardware: AlloyHardware, + + //--- Lineage ---------------------------------------------------------- + + /// Parent recipe id, if this recipe was forked from another. None + /// for net-new recipes. v1 lineage is one-directional (recipe → + /// recipe); bidirectional lineage (recipe ← artifact) is a future + /// `parent_artifact_ids` field per consensus position #9. + #[ts(optional, type = "string")] + pub parent_recipe_id: Option, + + //--- Timestamps ------------------------------------------------------- + + /// When the recipe was authored (epoch milliseconds UTC). Same + /// convention as `Engram.admitted_at_ms` from the engram thread — + /// `u64` epoch ms, not chrono::DateTime. + #[ts(type = "number")] + pub authored_at_ms: u64, + + /// When the recipe was last edited (epoch milliseconds UTC). + #[ts(type = "number")] + pub updated_at_ms: u64, +} + +//============================================================================= +// TESTS +//============================================================================= + +#[cfg(test)] +mod tests { + use super::*; + + fn fixed_now_ms() -> u64 { + 1_715_625_600_000 + } + + fn sample_corpus() -> CorpusRef { + CorpusRef { + name: "wikitext-103-v1".to_string(), + content_hash: "sha256:abcdef0123456789".to_string(), + size_bytes: 100_000_000, + source_url: Some("hf://datasets/wikitext".to_string()), + } + } + + fn sample_recipe() -> ForgeRecipe { + ForgeRecipe { + id: Uuid::nil(), + name: "qwen3.5-4b-code-aggressive".to_string(), + version: "1.0.0".to_string(), + description: "Aggressive prune + LoRA on a code corpus.".to_string(), + user_summary: "Smaller, faster Qwen3.5-4B for code tasks.".to_string(), + author: "continuum-ai".to_string(), + tags: vec!["code".to_string(), "pruning".to_string(), "4b".to_string()], + license: "apache-2.0".to_string(), + methodology_paper_url: Some("https://example.com/forge-methodology.pdf".to_string()), + limitations: vec!["English-only training corpus".to_string()], + prior_metric_baselines: vec![PriorBaseline { + metric: "perplexity".to_string(), + value: 12.34, + source: "qwen3.5-4b base @ revision XYZ".to_string(), + measured_at: "2026-05-14T00:00:00Z".to_string(), + measurement_method: "wikitext-103 eval split, fp16, batch=1".to_string(), + }], + source: AlloySource { + base_model: "Qwen/Qwen3.5-4B-Instruct".to_string(), + architecture: "qwen3".to_string(), + revision: None, + is_moe: false, + total_experts: None, + }, + stages: vec![ + serde_json::json!({"type": "prune", "strategy": "entropy", "level": 0.4}), + serde_json::json!({"type": "lora", "rank": 32, "epochs": 3}), + serde_json::json!({"type": "quant", "format": "gguf", "quantTypes": ["Q4_K_M"]}), + ], + cycles: 1, + calibration_corpus: sample_corpus(), + quant_tiers: vec![QuantTier { + format: "gguf".to_string(), + variants: vec!["Q4_K_M".to_string(), "Q5_K_M".to_string(), "Q8_0".to_string()], + target_devices: vec!["m1-8gb".to_string(), "m5-pro".to_string()], + }], + evaluation_benchmarks: vec![BenchmarkDef { + name: "humaneval".to_string(), + subset: None, + n_shot: Some(0), + submit_to_leaderboard: true, + }], + hardware: AlloyHardware { + min_vram_gb: Some(8.0), + recommended_vram_gb: Some(16.0), + estimated_duration_minutes: Some(120.0), + supports_cpu: false, + tested_on: vec!["m5-pro".to_string()], + }, + parent_recipe_id: None, + authored_at_ms: fixed_now_ms(), + updated_at_ms: fixed_now_ms(), + } + } + + /// What this catches: full ForgeRecipe round-trips through serde + /// without losing fields. The recipe is the source of truth; if it + /// silently drops a field on serialization the foundry would forge + /// against a mutated input. + #[test] + fn forge_recipe_serde_roundtrip_preserves_all_fields() { + let original = sample_recipe(); + let json = serde_json::to_string(&original).expect("serialize"); + let back: ForgeRecipe = serde_json::from_str(&json).expect("deserialize"); + assert_eq!(original.name, back.name); + assert_eq!(original.version, back.version); + assert_eq!(original.description, back.description); + assert_eq!(original.user_summary, back.user_summary); + assert_eq!(original.tags, back.tags); + assert_eq!(original.limitations, back.limitations); + assert_eq!(original.prior_metric_baselines.len(), 1); + assert_eq!(original.source.base_model, back.source.base_model); + assert_eq!(original.stages.len(), back.stages.len()); + assert_eq!(original.cycles, back.cycles); + assert_eq!( + original.calibration_corpus.content_hash, + back.calibration_corpus.content_hash + ); + assert_eq!(original.quant_tiers.len(), 1); + assert_eq!(original.quant_tiers[0].variants.len(), 3); + assert_eq!(original.evaluation_benchmarks.len(), 1); + assert_eq!(original.hardware.min_vram_gb, back.hardware.min_vram_gb); + assert_eq!(original.parent_recipe_id, back.parent_recipe_id); + assert_eq!(original.authored_at_ms, back.authored_at_ms); + } + + /// What this catches: minimal recipe (only required fields) serializes + /// and deserializes cleanly. `serde(default)` lets all the Vec fields + /// be omitted from the JSON without breaking deserialization. This + /// means a recipe author can supply just the essentials in v1 and + /// add tags/limitations/baselines later. + #[test] + fn minimal_recipe_serde_roundtrip_uses_defaults() { + let json = r#"{ + "id": "00000000-0000-0000-0000-000000000000", + "name": "minimal-recipe", + "version": "0.1.0", + "description": "Smallest viable recipe.", + "userSummary": "Just enough fields to compile.", + "author": "test", + "license": "apache-2.0", + "source": { + "baseModel": "Qwen/Qwen3.5-4B-Instruct", + "architecture": "qwen3" + }, + "stages": [], + "cycles": 1, + "calibrationCorpus": { + "name": "x", + "contentHash": "sha256:x", + "sizeBytes": 0 + }, + "hardware": {}, + "authoredAtMs": 0, + "updatedAtMs": 0 + }"#; + // Note: ts-rs uses snake_case by default; our fields ARE snake_case + // in the Rust struct. Pydantic-style camelCase is supplied by the + // TS layer when it converts. For this Rust-side test, use snake_case + // JSON to match the actual serde output. + let json_snake = json + .replace("userSummary", "user_summary") + .replace("baseModel", "base_model") + .replace("calibrationCorpus", "calibration_corpus") + .replace("contentHash", "content_hash") + .replace("sizeBytes", "size_bytes") + .replace("authoredAtMs", "authored_at_ms") + .replace("updatedAtMs", "updated_at_ms"); + let recipe: ForgeRecipe = serde_json::from_str(&json_snake) + .unwrap_or_else(|e| panic!("deserialize minimal: {e}\nJSON:\n{json_snake}")); + assert_eq!(recipe.name, "minimal-recipe"); + assert!(recipe.tags.is_empty(), "tags default to empty Vec"); + assert!( + recipe.limitations.is_empty(), + "limitations default to empty Vec" + ); + assert!( + recipe.prior_metric_baselines.is_empty(), + "prior_metric_baselines default to empty Vec" + ); + assert!( + recipe.quant_tiers.is_empty(), + "quant_tiers default to empty Vec" + ); + assert!( + recipe.evaluation_benchmarks.is_empty(), + "evaluation_benchmarks default to empty Vec" + ); + } + + /// What this catches: stages are opaque JSON in v1 — they must + /// round-trip without normalization. Phase 2's typed enum will + /// replace this; until then, faithful pass-through is the contract. + #[test] + fn stages_round_trip_as_opaque_json() { + let original = sample_recipe(); + let json = serde_json::to_string(&original).expect("serialize"); + let back: ForgeRecipe = serde_json::from_str(&json).expect("deserialize"); + // Each stage is a serde_json::Value; equality is structural. + for (orig, back_stage) in original.stages.iter().zip(back.stages.iter()) { + assert_eq!(orig, back_stage, "stage value must round-trip exactly"); + } + } + + /// What this catches: content_hash uses the canonical "sha256:" + /// format that matches admission's content_hash convention. Cross- + /// domain consistency check. + #[test] + fn corpus_content_hash_uses_canonical_format() { + let corpus = sample_corpus(); + assert!( + corpus.content_hash.starts_with("sha256:"), + "content_hash must use canonical sha256: format, got {}", + corpus.content_hash + ); + } + + // ── ts-rs binding tests — same pattern as persona/engram.rs ───────── + + #[test] + fn export_bindings_alloy_source() { + AlloySource::export_all(&ts_rs::Config::default()).unwrap(); + } + + #[test] + fn export_bindings_prior_baseline() { + PriorBaseline::export_all(&ts_rs::Config::default()).unwrap(); + } + + #[test] + fn export_bindings_corpus_ref() { + CorpusRef::export_all(&ts_rs::Config::default()).unwrap(); + } + + #[test] + fn export_bindings_quant_tier() { + QuantTier::export_all(&ts_rs::Config::default()).unwrap(); + } + + #[test] + fn export_bindings_benchmark_def() { + BenchmarkDef::export_all(&ts_rs::Config::default()).unwrap(); + } + + #[test] + fn export_bindings_alloy_hardware() { + AlloyHardware::export_all(&ts_rs::Config::default()).unwrap(); + } + + #[test] + fn export_bindings_forge_recipe() { + ForgeRecipe::export_all(&ts_rs::Config::default()).unwrap(); + } +} diff --git a/src/workers/continuum-core/src/lib.rs b/src/workers/continuum-core/src/lib.rs index 3296f9a9a..1e77a4334 100644 --- a/src/workers/continuum-core/src/lib.rs +++ b/src/workers/continuum-core/src/lib.rs @@ -22,6 +22,7 @@ pub mod code; pub mod cognition; pub mod concurrent; pub mod ffi; +pub mod forge; pub mod gpu; pub mod http; pub mod inference;