From 91ca41c41bd02b9bb682bd99a3c84b7af2a20e6f Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Wed, 3 Jun 2026 02:16:32 -0400 Subject: [PATCH 1/4] docs: tighten authority-first narrative --- README.md | 106 +++++++++++++++++++++++-------------------- demos/README.md | 17 +++---- docs/README.md | 1 + docs/architecture.md | 86 +++++++++++++++++++++++++++++++++++ examples/README.md | 20 ++++---- 5 files changed, 163 insertions(+), 67 deletions(-) create mode 100644 docs/architecture.md diff --git a/README.md b/README.md index 1952f5a..759faef 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ from context_compiler import ( engine = create_engine() -user_input = "prohibit peanuts" +user_input = "set premise current project uses uv" decision = engine.step(user_input) if is_clarify(decision): @@ -206,23 +206,23 @@ clarification instead of silently overwriting state. ## 10-Second Example -User sets a constraint once: +User sets a premise once: ```text -User: prohibit peanuts +User: set premise current project uses uv ``` -Outcome: policy state includes `"peanuts": "prohibit"`. +Outcome: premise state includes `"current project uses uv"`. Later in the conversation: ```text -User: how should I make this curry? +User: how should I run the tests? ``` -Your host sends the saved policy state with this later request, so the model is -constrained by explicit state (`peanuts: prohibit`) instead of relying on memory -of earlier conversation text. +Your host sends the saved authoritative state with this later request, so the +model answers in the context of the saved premise (`current project uses uv`) +instead of relying on memory of earlier conversation text. --- @@ -353,7 +353,14 @@ Policy value constants are exported for explicit policy comparisons: ## State Model -The compiler keeps a current state snapshot that your app can trust. +The state model represents explicit user commitments that the host can treat as +authoritative for future turns. + +- `premise` = authoritative context that changes how future answers should be interpreted +- `use` = affirmative selection or preference +- `prohibit` = explicit exclusion + +The compiler keeps this state snapshot in a form that your app can trust. - Premise is a single value that can be set or replaced - Policies are per-item (`use` or `prohibit`) @@ -368,6 +375,46 @@ For normal reads, prefer `get_premise_value(state)` and --- +### When to use `premise` + +The `premise` is intended for **persistent context that changes how all answers should be interpreted**, especially when it: + +- applies across many turns +- significantly changes what solutions are valid +- cannot be fully captured as simple `use` / `prohibit` policies + +Examples: + +- “Current medications: …” +- “Outdoor event; no seating available” +- “GDPR data handling requirements apply” +- “System is deployed across multiple regions” +- “Limited time available” + +In these cases, the premise acts as an **authoritative context anchor** that the host supplies to the model on every turn. + +Use policies instead when the constraint is explicit and enforceable: + +- “prohibit foods that may cause GI upset” +- “use handheld foods” +- “prohibit storing personal data beyond immediate use” +- “prohibit introducing new external dependencies” +- “use single-step preparation methods” + +### Example domains + +Hosts define what policy items and premise mean in context. Common patterns: + +- safety-oriented constraints (for example, prohibited materials or tools) +- authority/evidence constraints (for example, cite only approved sources) +- software workflow constraints (for example, require `uv`, prohibit `npm`) +- accessibility/environment constraints (for example, no audio-only outputs) + +Context Compiler enforces explicit directive/state mechanics. Domain reasoning +still belongs to the host and model workflow. + +--- + ## Checkpoint Contract `export_json()` / `import_json()` and checkpoint APIs serve different boundaries: @@ -424,46 +471,6 @@ When to use checkpoint APIs: --- -### When to use `premise` - -The `premise` is intended for **persistent context that changes how all answers should be interpreted**, especially when it: - -- applies across many turns -- significantly changes what solutions are valid -- cannot be fully captured as simple `use` / `prohibit` policies - -Examples: - -- “Current medications: …” -- “Outdoor event; no seating available” -- “GDPR data handling requirements apply” -- “System is deployed across multiple regions” -- “Limited time available” - -In these cases, the premise acts as an **authoritative context anchor** that the host supplies to the model on every turn. - -Use policies instead when the constraint is explicit and enforceable: - -- “prohibit foods that may cause GI upset” -- “use handheld foods” -- “prohibit storing personal data beyond immediate use” -- “prohibit introducing new external dependencies” -- “use single-step preparation methods” - -### Example domains - -Hosts define what policy items and premise mean in context. Common patterns: - -- safety-oriented constraints (for example, prohibited materials or tools) -- authority/evidence constraints (for example, cite only approved sources) -- software workflow constraints (for example, require `uv`, prohibit `npm`) -- accessibility/environment constraints (for example, no audio-only outputs) - -Context Compiler enforces explicit directive/state mechanics. Domain reasoning -still belongs to the host and model workflow. - ---- - ## Directive Examples Set and change premise: @@ -560,6 +567,7 @@ For a full documentation map, see [docs/README.md](docs/README.md). More detailed design and milestone documents are available in: +- [Architecture boundaries](docs/architecture.md) - [Project overview](docs/DescriptionAndMilestones.md) - [Directive grammar specification](docs/DirectiveGrammarSpec.md) diff --git a/demos/README.md b/demos/README.md index 9deb519..733c3af 100644 --- a/demos/README.md +++ b/demos/README.md @@ -6,8 +6,9 @@ They compare normal prompting with an approach where the application tracks important instructions explicitly instead of relying only on the conversation history. The scripts are designed to produce consistent results so the behavior is easy to see. -This demo set shows what users notice: rules and corrections can keep applying -later in the conversation, and where your app needs explicit state rules. +This demo set shows what users notice: saved authoritative state and explicit +corrections continue to affect later turns, and where your app needs +deterministic state-transition rules. Scored demos now compare four paths: - baseline @@ -19,15 +20,15 @@ Scored demos now compare four paths: | Demo | Behavior | Concept | Most visible with | | :--: | --- | :--: | --- | +| [03](./03_llm_premise_guardrail.py) | Premise updates stay authoritative | fixed, repeatable premise updates | models that summarize conversation | | [01](./01_llm_contradiction_clarify.py) | Contradiction blocking | clarification gate | small instruct models | -| [02](./02_llm_constraint_guardrail.py) | Rules stop applying over time | persistent policy enforcement | small or quantized models | -| [03](./03_llm_premise_guardrail.py) | Premise updates stop sticking | fixed, repeatable premise updates | models that summarize conversation | -| [04](./04_llm_tool_denylist_guardrail.py) | Tool governance | host-side denylist | general assistant models | -| [05](./05_llm_prompt_drift_vs_state.py) | Prompt drift | long transcript failure | weaker long-context models ([see Demo 5 note](#demo-5-stress-ladder-turns)) | -| [06](./06_llm_context_compaction.py) | Context compaction | saved compiler state replacing transcript context | small or local models | -| [07](./07_llm_prompt_vs_state.py) | Prompt engineering comparison | prompting vs saved compiler state | any model with long transcript sensitivity | | [08](./08_llm_replacement_precondition.py) | Replacement precondition | invalid replacement blocked without state mutation | any model | | [09](./09_llm_pending_clarification.py) | Pending clarification continuation | confirmation-only resolution of suspended mutation | any model | +| [06](./06_llm_context_compaction.py) | Context compaction | saved compiler state replacing transcript context | small or local models | +| [07](./07_llm_prompt_vs_state.py) | Prompt engineering comparison | prompting vs saved compiler state | any model with long transcript sensitivity | +| [02](./02_llm_constraint_guardrail.py) | Policy state stays active across turns | authoritative policy state | small or quantized models | +| [04](./04_llm_tool_denylist_guardrail.py) | Tool governance | application-layer tool gating from saved state | general assistant models | +| [05](./05_llm_prompt_drift_vs_state.py) | Prompt drift | long transcript failure | weaker long-context models ([see Demo 5 note](#demo-5-stress-ladder-turns)) | Stronger frontier models may show these behaviors less often, but the same patterns still appear in real applications. diff --git a/docs/README.md b/docs/README.md index 122b7ba..874f8b0 100644 --- a/docs/README.md +++ b/docs/README.md @@ -4,6 +4,7 @@ - [Project README](../README.md) ## Core Concepts +- [Architecture boundaries](architecture.md) - [Directive Grammar (exact command forms the engine accepts)](DirectiveGrammarSpec.md) ## Integrations diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..3fff74c --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,86 @@ +# Architecture Boundaries + +Context Compiler is best understood as a deterministic conversational state +authority inside a larger host application stack. + +## Acquisition Layer + +Responsibilities: +- recognize possible user state updates before core compilation +- normalize candidate inputs conservatively +- abstain when intent is uncertain + +Examples: +- the optional heuristic/LLM preprocessor +- host-side input shaping before `engine.step(...)` + +Out of scope: +- authoritative state mutation +- final conflict resolution +- semantic classification as source of truth + +## Authority Layer + +Responsibilities: +- apply deterministic state transitions +- enforce clarification and confirmation gates +- export/import authoritative state and checkpoints + +Examples: +- Context Compiler core engine +- transcript replay +- checkpoint continuation behavior + +Out of scope: +- prompt rendering +- tool selection +- moderation or policy classification + +## Application Layer + +Responsibilities: +- decide how compiler state affects runtime behavior +- render prompts and acknowledgements +- select schemas, gate tools, route workflows, and apply runtime controls + +Examples: +- Open WebUI, LiteLLM, and Ollama structured-output integrations +- host-controlled prompt construction from saved state + +Out of scope: +- changing compiler semantics +- inferring new state without explicit directives + +## Classification Layer + +Responsibilities: +- safety, moderation, semantic intent detection, and ontology/classification work +- external policy analysis before or around model calls + +Examples: +- moderation systems +- safety classifiers +- semantic routing/classification services + +Out of scope: +- deterministic compiler state transitions +- checkpoint and clarification authority + +## Composition Layer + +Responsibilities: +- combine outputs from multiple authorities +- track provenance and resolve conflicts across authorities +- coordinate host behavior across multiple authorities + +Examples: +- host-owned coordination that merges multiple state authorities +- provenance-aware conflict handling across authority outputs + +Out of scope: +- current Context Compiler core behavior +- any implied built-in coordinator semantics today + +The preprocessor belongs to the Acquisition Layer. It is optional, +conservative, and never the source of truth. Context Compiler core belongs to +the Authority Layer. Host applications own Application Layer behavior. diff --git a/examples/README.md b/examples/README.md index da8c127..a604eac 100644 --- a/examples/README.md +++ b/examples/README.md @@ -7,12 +7,12 @@ Non-integration example files in this directory are standalone scripts and can b ## 01_persistent_guardrails.py -Shows how a prohibition stays in saved state across later turns. -Shows the app sending saved state so the rule keeps applying. +Shows how explicit policy state stays authoritative across later turns. +Shows the app sending saved state so later answers are interpreted in that context. ## 02_configuration_and_correction.py -Demonstrates explicit premise lifecycle in 0.5. +Demonstrates premise as authoritative context for future turns. Shows `set premise ...` followed by `change premise to ...`. ## 03_ambiguity_with_clarification.py @@ -20,10 +20,11 @@ Shows `set premise ...` followed by `change premise to ...`. Shows `clarify` behavior before state changes. Shows how the app handles `clarify` and skips the LLM call. -## 04_tool_governance_denylist.py +## 08_controller_preview_diff.py -Shows tool-governance policy handling via `prohibit ...` directives. -Shows how apps can prevent denied tools from being selected. +Shows controller-layer dry-run behavior with `preview(engine, user_input)`. +Shows structural state inspection with `state_diff(state_before, state_after)`. +Shows `step(engine, user_input)` after preview to apply the same input. ## 05_llm_integration_pattern.py @@ -41,8 +42,7 @@ Shows `compile_transcript(messages)` from a fresh engine and `engine.apply_trans Demonstrates explicit single-policy correction without `reset policies`. Shows `prohibit peanuts` -> `remove policy peanuts` -> `use peanuts`. -## 08_controller_preview_diff.py +## 04_tool_governance_denylist.py -Shows controller-layer dry-run behavior with `preview(engine, user_input)`. -Shows structural state inspection with `state_diff(state_before, state_after)`. -Shows `step(engine, user_input)` after preview to apply the same input. +Shows an application-layer use of authoritative policy state for tool selection. +Shows how apps can prevent denied tools from being selected without changing compiler identity. From cef31965952be518e95ea5562a0acc7682f7bd0a Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Wed, 3 Jun 2026 02:20:40 -0400 Subject: [PATCH 2/4] docs: simplify wording follow-up --- demos/README.md | 6 +++--- docs/architecture.md | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/demos/README.md b/demos/README.md index 733c3af..5cffcc2 100644 --- a/demos/README.md +++ b/demos/README.md @@ -6,9 +6,9 @@ They compare normal prompting with an approach where the application tracks important instructions explicitly instead of relying only on the conversation history. The scripts are designed to produce consistent results so the behavior is easy to see. -This demo set shows what users notice: saved authoritative state and explicit -corrections continue to affect later turns, and where your app needs -deterministic state-transition rules. +This demo set shows what users notice: saved authoritative state continues to +affect later turns, and where your app needs deterministic state-transition +rules. Scored demos now compare four paths: - baseline diff --git a/docs/architecture.md b/docs/architecture.md index 3fff74c..720690d 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -69,17 +69,17 @@ Out of scope: ## Composition Layer Responsibilities: -- combine outputs from multiple authorities -- track provenance and resolve conflicts across authorities -- coordinate host behavior across multiple authorities +- coordinate multiple authority instances when a host uses them +- decide which authority outputs apply to a request Examples: -- host-owned coordination that merges multiple state authorities -- provenance-aware conflict handling across authority outputs +- separate project contexts +- separate user profiles +- independent authority instances Out of scope: - current Context Compiler core behavior -- any implied built-in coordinator semantics today +- built-in coordination behavior The preprocessor belongs to the Acquisition Layer. It is optional, conservative, and never the source of truth. Context Compiler core belongs to From bdfc1c9f4da1375251c3836d0a8195bd5f2a43af Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Wed, 3 Jun 2026 02:31:20 -0400 Subject: [PATCH 3/4] docs: reorder README narrative --- README.md | 167 ++++++++++++++++++++++++++---------------------------- 1 file changed, 80 insertions(+), 87 deletions(-) diff --git a/README.md b/README.md index 759faef..9bf859b 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,80 @@ Interpretation guide: → [Full results and demo output](demos/README.md) Canonical matrix: [docs/demos-results.md](docs/demos-results.md) +## 10-Second Example + +User sets a premise once: + +```text +User: set premise current project uses uv +``` + +Outcome: premise state includes `"current project uses uv"`. + +Later in the conversation: + +```text +User: how should I run the tests? +``` + +Your host sends the saved authoritative state with this later request, so the +model answers in the context of the saved premise (`current project uses uv`) +instead of relying on memory of earlier conversation text. + +--- + +## Deterministic behavior (examples) + +Context Compiler makes mutation rules explicit so behavior stays repeatable. + +**Explicit directive** +```text +set premise concise replies +``` +- Base model: silently accepts / rewrites +- Context Compiler: applies a repeatable state update + +**State-dependent operation** +```text +clear state +use podman instead of docker +``` +- Without explicit state transition rules: behavior depends on host/model handling +- Context Compiler: returns `clarify` before changing state + +**Lifecycle enforcement** +```text +clear state +change premise to formal tone +``` +- Without explicit transition checks: behavior depends on host/model handling +- Context Compiler: asks for clarification and keeps saved state unchanged + +--- + +## Architecture + +```text +User Input + │ + ▼ +Context Compiler + │ + ▼ +Decision + │ + ▼ +Host Application + ├─ clarify → ask user + ├─ passthrough → call LLM + └─ update → authoritative state mutated; host may call LLM with compiled state +``` + +The compiler owns state updates and never calls the LLM. +Your app decides whether to call the model based on the returned `Decision`. + +--- + ## Quickstart ```bash @@ -204,80 +278,6 @@ clarification instead of silently overwriting state. --- -## 10-Second Example - -User sets a premise once: - -```text -User: set premise current project uses uv -``` - -Outcome: premise state includes `"current project uses uv"`. - -Later in the conversation: - -```text -User: how should I run the tests? -``` - -Your host sends the saved authoritative state with this later request, so the -model answers in the context of the saved premise (`current project uses uv`) -instead of relying on memory of earlier conversation text. - ---- - -## Deterministic behavior (examples) - -Context Compiler makes mutation rules explicit so behavior stays repeatable. - -**Explicit directive** -```text -set premise concise replies -``` -- Base model: silently accepts / rewrites -- Context Compiler: applies a repeatable state update - -**State-dependent operation** -```text -clear state -use podman instead of docker -``` -- Without explicit state transition rules: behavior depends on host/model handling -- Context Compiler: returns `clarify` before changing state - -**Lifecycle enforcement** -```text -clear state -change premise to formal tone -``` -- Without explicit transition checks: behavior depends on host/model handling -- Context Compiler: asks for clarification and keeps saved state unchanged - ---- - -## Architecture - -```text -User Input - │ - ▼ -Context Compiler - │ - ▼ -Decision - │ - ▼ -Host Application - ├─ clarify → ask user - ├─ passthrough → call LLM - └─ update → authoritative state mutated; host may call LLM with compiled state -``` - -The compiler owns state updates and never calls the LLM. -Your app decides whether to call the model based on the returned `Decision`. - ---- - ## Decision API Each user message produces a `Decision`. @@ -519,7 +519,9 @@ those turns to the downstream LLM. --- -## Guarantees +## Advanced topics + +### Guarantees - State changes only through explicit user directives or confirmation. - Identical input sequences produce identical compiler state. @@ -528,9 +530,7 @@ those turns to the downstream LLM. These invariants are verified through behavioral tests and Hypothesis-based property tests. ---- - -## Optional: LLM Preprocessor (Experimental) +### Optional: LLM Preprocessor (Experimental) An optional host-side preprocessor can conservatively convert some natural-language instructions into canonical directives before compilation. @@ -549,31 +549,24 @@ See [LLM preprocessor](docs/llm-preprocessor.md) and [`experimental/preprocessor/`](experimental/preprocessor/) for details. -## Advanced topics - - [Multiple engines](docs/multi-engine.md) For a full documentation map, see [docs/README.md](docs/README.md). --- -## Design Rationale - -- [Design philosophy](docs/DesignPhilosophy.md) - ---- - ## Design Notes More detailed design and milestone documents are available in: +- [Design philosophy](docs/DesignPhilosophy.md) - [Architecture boundaries](docs/architecture.md) - [Project overview](docs/DescriptionAndMilestones.md) - [Directive grammar specification](docs/DirectiveGrammarSpec.md) --- -## Conformance Fixtures +### Conformance Fixtures Cross-language conformance tests are defined in [`tests/fixtures/`](tests/fixtures/). These fixtures serve as the behavioral contract for compiler semantics across implementations. From b3ced2cc8638651010d5bad92626b556cc32acac Mon Sep 17 00:00:00 2001 From: Robert Lippmann Date: Wed, 3 Jun 2026 02:34:27 -0400 Subject: [PATCH 4/4] docs: polish README narrative --- README.md | 59 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 9bf859b..d1b0f28 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Some behaviors require explicit host-side state handling. -Context Compiler is a deterministic host-side state layer for LLM applications. +Context Compiler is a deterministic conversational state authority for LLM applications. It applies explicit premise and policy updates so state changes stay fixed and repeatable. @@ -198,34 +198,6 @@ else: render(call_llm(user_input)) ``` -Controller quick example: - -```python -from context_compiler import ( - get_decision_state, - is_update, - create_engine, - preview, - state_diff, - step, -) - -engine = create_engine() - -before = engine.state -dry_run = preview(engine, "prohibit peanuts") -print(dry_run["would_mutate"]) # True -planned_change = state_diff(before, dry_run["state_after"]) -print(planned_change["changed"]) # True - -after_preview = engine.state -print(state_diff(before, after_preview)["changed"]) # False (preview does not mutate state) - -applied = step(engine, "prohibit peanuts") -print(is_update(applied["decision"])) # True -print(get_decision_state(applied["decision"]) is not None) # True -``` - ## Installation Requirements: @@ -327,6 +299,34 @@ instead of direct key traversal. These controller APIs are public package exports and can be used directly in app code (not just inside the REPL). +Controller quick example: + +```python +from context_compiler import ( + get_decision_state, + is_update, + create_engine, + preview, + state_diff, + step, +) + +engine = create_engine() + +before = engine.state +dry_run = preview(engine, "prohibit peanuts") +print(dry_run["would_mutate"]) # True +planned_change = state_diff(before, dry_run["state_after"]) +print(planned_change["changed"]) # True + +after_preview = engine.state +print(state_diff(before, after_preview)["changed"]) # False (preview does not mutate state) + +applied = step(engine, "prohibit peanuts") +print(is_update(applied["decision"])) # True +print(get_decision_state(applied["decision"]) is not None) # True +``` + | API | Description | |---|---| | `step(engine, user_input)` | Run one turn through the engine and return `StepResult` (`output_version`, `mode`, `decision`, `state`). | @@ -548,6 +548,7 @@ Boundary policy is false-negative-preferred: abstain rather than risk unsafe sta See [LLM preprocessor](docs/llm-preprocessor.md) and [`experimental/preprocessor/`](experimental/preprocessor/) for details. +### Multiple engines - [Multiple engines](docs/multi-engine.md)