diff --git a/.agents/skills/grill-with-docs/ADR-FORMAT.md b/.agents/skills/grill-with-docs/ADR-FORMAT.md new file mode 100644 index 0000000..da7e78e --- /dev/null +++ b/.agents/skills/grill-with-docs/ADR-FORMAT.md @@ -0,0 +1,47 @@ +# ADR Format + +ADRs live in `docs/adr/` and use sequential numbering: `0001-slug.md`, `0002-slug.md`, etc. + +Create the `docs/adr/` directory lazily — only when the first ADR is needed. + +## Template + +```md +# {Short title of the decision} + +{1-3 sentences: what's the context, what did we decide, and why.} +``` + +That's it. An ADR can be a single paragraph. The value is in recording *that* a decision was made and *why* — not in filling out sections. + +## Optional sections + +Only include these when they add genuine value. Most ADRs won't need them. + +- **Status** frontmatter (`proposed | accepted | deprecated | superseded by ADR-NNNN`) — useful when decisions are revisited +- **Considered Options** — only when the rejected alternatives are worth remembering +- **Consequences** — only when non-obvious downstream effects need to be called out + +## Numbering + +Scan `docs/adr/` for the highest existing number and increment by one. + +## When to offer an ADR + +All three of these must be true: + +1. **Hard to reverse** — the cost of changing your mind later is meaningful +2. **Surprising without context** — a future reader will look at the code and wonder "why on earth did they do it this way?" +3. **The result of a real trade-off** — there were genuine alternatives and you picked one for specific reasons + +If a decision is easy to reverse, skip it — you'll just reverse it. If it's not surprising, nobody will wonder why. If there was no real alternative, there's nothing to record beyond "we did the obvious thing." + +### What qualifies + +- **Architectural shape.** "We're using a monorepo." "The write model is event-sourced, the read model is projected into Postgres." +- **Integration patterns between contexts.** "Ordering and Billing communicate via domain events, not synchronous HTTP." +- **Technology choices that carry lock-in.** Database, message bus, auth provider, deployment target. Not every library — just the ones that would take a quarter to swap out. +- **Boundary and scope decisions.** "Customer data is owned by the Customer context; other contexts reference it by ID only." The explicit no-s are as valuable as the yes-s. +- **Deliberate deviations from the obvious path.** "We're using manual SQL instead of an ORM because X." Anything where a reasonable reader would assume the opposite. These stop the next engineer from "fixing" something that was deliberate. +- **Constraints not visible in the code.** "We can't use AWS because of compliance requirements." "Response times must be under 200ms because of the partner API contract." +- **Rejected alternatives when the rejection is non-obvious.** If you considered GraphQL and picked REST for subtle reasons, record it — otherwise someone will suggest GraphQL again in six months. diff --git a/.agents/skills/grill-with-docs/CONTEXT-FORMAT.md b/.agents/skills/grill-with-docs/CONTEXT-FORMAT.md new file mode 100644 index 0000000..ddfa247 --- /dev/null +++ b/.agents/skills/grill-with-docs/CONTEXT-FORMAT.md @@ -0,0 +1,77 @@ +# CONTEXT.md Format + +## Structure + +```md +# {Context Name} + +{One or two sentence description of what this context is and why it exists.} + +## Language + +**Order**: +{A concise description of the term} +_Avoid_: Purchase, transaction + +**Invoice**: +A request for payment sent to a customer after delivery. +_Avoid_: Bill, payment request + +**Customer**: +A person or organization that places orders. +_Avoid_: Client, buyer, account + +## Relationships + +- An **Order** produces one or more **Invoices** +- An **Invoice** belongs to exactly one **Customer** + +## Example dialogue + +> **Dev:** "When a **Customer** places an **Order**, do we create the **Invoice** immediately?" +> **Domain expert:** "No — an **Invoice** is only generated once a **Fulfillment** is confirmed." + +## Flagged ambiguities + +- "account" was used to mean both **Customer** and **User** — resolved: these are distinct concepts. +``` + +## Rules + +- **Be opinionated.** When multiple words exist for the same concept, pick the best one and list the others as aliases to avoid. +- **Flag conflicts explicitly.** If a term is used ambiguously, call it out in "Flagged ambiguities" with a clear resolution. +- **Keep definitions tight.** One sentence max. Define what it IS, not what it does. +- **Show relationships.** Use bold term names and express cardinality where obvious. +- **Only include terms specific to this project's context.** General programming concepts (timeouts, error types, utility patterns) don't belong even if the project uses them extensively. Before adding a term, ask: is this a concept unique to this context, or a general programming concept? Only the former belongs. +- **Group terms under subheadings** when natural clusters emerge. If all terms belong to a single cohesive area, a flat list is fine. +- **Write an example dialogue.** A conversation between a dev and a domain expert that demonstrates how the terms interact naturally and clarifies boundaries between related concepts. + +## Single vs multi-context repos + +**Single context (most repos):** One `CONTEXT.md` at the repo root. + +**Multiple contexts:** A `CONTEXT-MAP.md` at the repo root lists the contexts, where they live, and how they relate to each other: + +```md +# Context Map + +## Contexts + +- [Ordering](./src/ordering/CONTEXT.md) — receives and tracks customer orders +- [Billing](./src/billing/CONTEXT.md) — generates invoices and processes payments +- [Fulfillment](./src/fulfillment/CONTEXT.md) — manages warehouse picking and shipping + +## Relationships + +- **Ordering → Fulfillment**: Ordering emits `OrderPlaced` events; Fulfillment consumes them to start picking +- **Fulfillment → Billing**: Fulfillment emits `ShipmentDispatched` events; Billing consumes them to generate invoices +- **Ordering ↔ Billing**: Shared types for `CustomerId` and `Money` +``` + +The skill infers which structure applies: + +- If `CONTEXT-MAP.md` exists, read it to find contexts +- If only a root `CONTEXT.md` exists, single context +- If neither exists, create a root `CONTEXT.md` lazily when the first term is resolved + +When multiple contexts exist, infer which one the current topic relates to. If unclear, ask. diff --git a/.agents/skills/grill-with-docs/SKILL.md b/.agents/skills/grill-with-docs/SKILL.md new file mode 100644 index 0000000..6dad6ad --- /dev/null +++ b/.agents/skills/grill-with-docs/SKILL.md @@ -0,0 +1,88 @@ +--- +name: grill-with-docs +description: Grilling session that challenges your plan against the existing domain model, sharpens terminology, and updates documentation (CONTEXT.md, ADRs) inline as decisions crystallise. Use when user wants to stress-test a plan against their project's language and documented decisions. +--- + + + +Interview me relentlessly about every aspect of this plan until we reach a shared understanding. Walk down each branch of the design tree, resolving dependencies between decisions one-by-one. For each question, provide your recommended answer. + +Ask the questions one at a time, waiting for feedback on each question before continuing. + +If a question can be answered by exploring the codebase, explore the codebase instead. + + + + + +## Domain awareness + +During codebase exploration, also look for existing documentation: + +### File structure + +Most repos have a single context: + +``` +/ +├── CONTEXT.md +├── docs/ +│ └── adr/ +│ ├── 0001-event-sourced-orders.md +│ └── 0002-postgres-for-write-model.md +└── src/ +``` + +If a `CONTEXT-MAP.md` exists at the root, the repo has multiple contexts. The map points to where each one lives: + +``` +/ +├── CONTEXT-MAP.md +├── docs/ +│ └── adr/ ← system-wide decisions +├── src/ +│ ├── ordering/ +│ │ ├── CONTEXT.md +│ │ └── docs/adr/ ← context-specific decisions +│ └── billing/ +│ ├── CONTEXT.md +│ └── docs/adr/ +``` + +Create files lazily — only when you have something to write. If no `CONTEXT.md` exists, create one when the first term is resolved. If no `docs/adr/` exists, create it when the first ADR is needed. + +## During the session + +### Challenge against the glossary + +When the user uses a term that conflicts with the existing language in `CONTEXT.md`, call it out immediately. "Your glossary defines 'cancellation' as X, but you seem to mean Y — which is it?" + +### Sharpen fuzzy language + +When the user uses vague or overloaded terms, propose a precise canonical term. "You're saying 'account' — do you mean the Customer or the User? Those are different things." + +### Discuss concrete scenarios + +When domain relationships are being discussed, stress-test them with specific scenarios. Invent scenarios that probe edge cases and force the user to be precise about the boundaries between concepts. + +### Cross-reference with code + +When the user states how something works, check whether the code agrees. If you find a contradiction, surface it: "Your code cancels entire Orders, but you just said partial cancellation is possible — which is right?" + +### Update CONTEXT.md inline + +When a term is resolved, update `CONTEXT.md` right there. Don't batch these up — capture them as they happen. Use the format in [CONTEXT-FORMAT.md](./CONTEXT-FORMAT.md). + +Don't couple `CONTEXT.md` to implementation details. Only include terms that are meaningful to domain experts. + +### Offer ADRs sparingly + +Only offer to create an ADR when all three are true: + +1. **Hard to reverse** — the cost of changing your mind later is meaningful +2. **Surprising without context** — a future reader will wonder "why did they do it this way?" +3. **The result of a real trade-off** — there were genuine alternatives and you picked one for specific reasons + +If any of the three is missing, skip the ADR. Use the format in [ADR-FORMAT.md](./ADR-FORMAT.md). + + diff --git a/.agents/skills/improve-codebase-architecture/DEEPENING.md b/.agents/skills/improve-codebase-architecture/DEEPENING.md new file mode 100644 index 0000000..ecaf5d7 --- /dev/null +++ b/.agents/skills/improve-codebase-architecture/DEEPENING.md @@ -0,0 +1,37 @@ +# Deepening + +How to deepen a cluster of shallow modules safely, given its dependencies. Assumes the vocabulary in [LANGUAGE.md](LANGUAGE.md) — **module**, **interface**, **seam**, **adapter**. + +## Dependency categories + +When assessing a candidate for deepening, classify its dependencies. The category determines how the deepened module is tested across its seam. + +### 1. In-process + +Pure computation, in-memory state, no I/O. Always deepenable — merge the modules and test through the new interface directly. No adapter needed. + +### 2. Local-substitutable + +Dependencies that have local test stand-ins (PGLite for Postgres, in-memory filesystem). Deepenable if the stand-in exists. The deepened module is tested with the stand-in running in the test suite. The seam is internal; no port at the module's external interface. + +### 3. Remote but owned (Ports & Adapters) + +Your own services across a network boundary (microservices, internal APIs). Define a **port** (interface) at the seam. The deep module owns the logic; the transport is injected as an **adapter**. Tests use an in-memory adapter. Production uses an HTTP/gRPC/queue adapter. + +Recommendation shape: *"Define a port at the seam, implement an HTTP adapter for production and an in-memory adapter for testing, so the logic sits in one deep module even though it's deployed across a network."* + +### 4. True external (Mock) + +Third-party services (Stripe, Twilio, etc.) you don't control. The deepened module takes the external dependency as an injected port; tests provide a mock adapter. + +## Seam discipline + +- **One adapter means a hypothetical seam. Two adapters means a real one.** Don't introduce a port unless at least two adapters are justified (typically production + test). A single-adapter seam is just indirection. +- **Internal seams vs external seams.** A deep module can have internal seams (private to its implementation, used by its own tests) as well as the external seam at its interface. Don't expose internal seams through the interface just because tests use them. + +## Testing strategy: replace, don't layer + +- Old unit tests on shallow modules become waste once tests at the deepened module's interface exist — delete them. +- Write new tests at the deepened module's interface. The **interface is the test surface**. +- Tests assert on observable outcomes through the interface, not internal state. +- Tests should survive internal refactors — they describe behaviour, not implementation. If a test has to change when the implementation changes, it's testing past the interface. diff --git a/.agents/skills/improve-codebase-architecture/INTERFACE-DESIGN.md b/.agents/skills/improve-codebase-architecture/INTERFACE-DESIGN.md new file mode 100644 index 0000000..3197723 --- /dev/null +++ b/.agents/skills/improve-codebase-architecture/INTERFACE-DESIGN.md @@ -0,0 +1,44 @@ +# Interface Design + +When the user wants to explore alternative interfaces for a chosen deepening candidate, use this parallel sub-agent pattern. Based on "Design It Twice" (Ousterhout) — your first idea is unlikely to be the best. + +Uses the vocabulary in [LANGUAGE.md](LANGUAGE.md) — **module**, **interface**, **seam**, **adapter**, **leverage**. + +## Process + +### 1. Frame the problem space + +Before spawning sub-agents, write a user-facing explanation of the problem space for the chosen candidate: + +- The constraints any new interface would need to satisfy +- The dependencies it would rely on, and which category they fall into (see [DEEPENING.md](DEEPENING.md)) +- A rough illustrative code sketch to ground the constraints — not a proposal, just a way to make the constraints concrete + +Show this to the user, then immediately proceed to Step 2. The user reads and thinks while the sub-agents work in parallel. + +### 2. Spawn sub-agents + +Spawn 3+ sub-agents in parallel using the Agent tool. Each must produce a **radically different** interface for the deepened module. + +Prompt each sub-agent with a separate technical brief (file paths, coupling details, dependency category from [DEEPENING.md](DEEPENING.md), what sits behind the seam). The brief is independent of the user-facing problem-space explanation in Step 1. Give each agent a different design constraint: + +- Agent 1: "Minimize the interface — aim for 1–3 entry points max. Maximise leverage per entry point." +- Agent 2: "Maximise flexibility — support many use cases and extension." +- Agent 3: "Optimise for the most common caller — make the default case trivial." +- Agent 4 (if applicable): "Design around ports & adapters for cross-seam dependencies." + +Include both [LANGUAGE.md](LANGUAGE.md) vocabulary and CONTEXT.md vocabulary in the brief so each sub-agent names things consistently with the architecture language and the project's domain language. + +Each sub-agent outputs: + +1. Interface (types, methods, params — plus invariants, ordering, error modes) +2. Usage example showing how callers use it +3. What the implementation hides behind the seam +4. Dependency strategy and adapters (see [DEEPENING.md](DEEPENING.md)) +5. Trade-offs — where leverage is high, where it's thin + +### 3. Present and compare + +Present designs sequentially so the user can absorb each one, then compare them in prose. Contrast by **depth** (leverage at the interface), **locality** (where change concentrates), and **seam placement**. + +After comparing, give your own recommendation: which design you think is strongest and why. If elements from different designs would combine well, propose a hybrid. Be opinionated — the user wants a strong read, not a menu. diff --git a/.agents/skills/improve-codebase-architecture/LANGUAGE.md b/.agents/skills/improve-codebase-architecture/LANGUAGE.md new file mode 100644 index 0000000..530c276 --- /dev/null +++ b/.agents/skills/improve-codebase-architecture/LANGUAGE.md @@ -0,0 +1,53 @@ +# Language + +Shared vocabulary for every suggestion this skill makes. Use these terms exactly — don't substitute "component," "service," "API," or "boundary." Consistent language is the whole point. + +## Terms + +**Module** +Anything with an interface and an implementation. Deliberately scale-agnostic — applies equally to a function, class, package, or tier-spanning slice. +_Avoid_: unit, component, service. + +**Interface** +Everything a caller must know to use the module correctly. Includes the type signature, but also invariants, ordering constraints, error modes, required configuration, and performance characteristics. +_Avoid_: API, signature (too narrow — those refer only to the type-level surface). + +**Implementation** +What's inside a module — its body of code. Distinct from **Adapter**: a thing can be a small adapter with a large implementation (a Postgres repo) or a large adapter with a small implementation (an in-memory fake). Reach for "adapter" when the seam is the topic; "implementation" otherwise. + +**Depth** +Leverage at the interface — the amount of behaviour a caller (or test) can exercise per unit of interface they have to learn. A module is **deep** when a large amount of behaviour sits behind a small interface. A module is **shallow** when the interface is nearly as complex as the implementation. + +**Seam** _(from Michael Feathers)_ +A place where you can alter behaviour without editing in that place. The *location* at which a module's interface lives. Choosing where to put the seam is its own design decision, distinct from what goes behind it. +_Avoid_: boundary (overloaded with DDD's bounded context). + +**Adapter** +A concrete thing that satisfies an interface at a seam. Describes *role* (what slot it fills), not substance (what's inside). + +**Leverage** +What callers get from depth. More capability per unit of interface they have to learn. One implementation pays back across N call sites and M tests. + +**Locality** +What maintainers get from depth. Change, bugs, knowledge, and verification concentrate at one place rather than spreading across callers. Fix once, fixed everywhere. + +## Principles + +- **Depth is a property of the interface, not the implementation.** A deep module can be internally composed of small, mockable, swappable parts — they just aren't part of the interface. A module can have **internal seams** (private to its implementation, used by its own tests) as well as the **external seam** at its interface. +- **The deletion test.** Imagine deleting the module. If complexity vanishes, the module wasn't hiding anything (it was a pass-through). If complexity reappears across N callers, the module was earning its keep. +- **The interface is the test surface.** Callers and tests cross the same seam. If you want to test *past* the interface, the module is probably the wrong shape. +- **One adapter means a hypothetical seam. Two adapters means a real one.** Don't introduce a seam unless something actually varies across it. + +## Relationships + +- A **Module** has exactly one **Interface** (the surface it presents to callers and tests). +- **Depth** is a property of a **Module**, measured against its **Interface**. +- A **Seam** is where a **Module**'s **Interface** lives. +- An **Adapter** sits at a **Seam** and satisfies the **Interface**. +- **Depth** produces **Leverage** for callers and **Locality** for maintainers. + +## Rejected framings + +- **Depth as ratio of implementation-lines to interface-lines** (Ousterhout): rewards padding the implementation. We use depth-as-leverage instead. +- **"Interface" as the TypeScript `interface` keyword or a class's public methods**: too narrow — interface here includes every fact a caller must know. +- **"Boundary"**: overloaded with DDD's bounded context. Say **seam** or **interface**. diff --git a/.agents/skills/improve-codebase-architecture/SKILL.md b/.agents/skills/improve-codebase-architecture/SKILL.md new file mode 100644 index 0000000..05984a6 --- /dev/null +++ b/.agents/skills/improve-codebase-architecture/SKILL.md @@ -0,0 +1,71 @@ +--- +name: improve-codebase-architecture +description: Find deepening opportunities in a codebase, informed by the domain language in CONTEXT.md and the decisions in docs/adr/. Use when the user wants to improve architecture, find refactoring opportunities, consolidate tightly-coupled modules, or make a codebase more testable and AI-navigable. +--- + +# Improve Codebase Architecture + +Surface architectural friction and propose **deepening opportunities** — refactors that turn shallow modules into deep ones. The aim is testability and AI-navigability. + +## Glossary + +Use these terms exactly in every suggestion. Consistent language is the point — don't drift into "component," "service," "API," or "boundary." Full definitions in [LANGUAGE.md](LANGUAGE.md). + +- **Module** — anything with an interface and an implementation (function, class, package, slice). +- **Interface** — everything a caller must know to use the module: types, invariants, error modes, ordering, config. Not just the type signature. +- **Implementation** — the code inside. +- **Depth** — leverage at the interface: a lot of behaviour behind a small interface. **Deep** = high leverage. **Shallow** = interface nearly as complex as the implementation. +- **Seam** — where an interface lives; a place behaviour can be altered without editing in place. (Use this, not "boundary.") +- **Adapter** — a concrete thing satisfying an interface at a seam. +- **Leverage** — what callers get from depth. +- **Locality** — what maintainers get from depth: change, bugs, knowledge concentrated in one place. + +Key principles (see [LANGUAGE.md](LANGUAGE.md) for the full list): + +- **Deletion test**: imagine deleting the module. If complexity vanishes, it was a pass-through. If complexity reappears across N callers, it was earning its keep. +- **The interface is the test surface.** +- **One adapter = hypothetical seam. Two adapters = real seam.** + +This skill is _informed_ by the project's domain model. The domain language gives names to good seams; ADRs record decisions the skill should not re-litigate. + +## Process + +### 1. Explore + +Read the project's domain glossary and any ADRs in the area you're touching first. + +Then use the Agent tool with `subagent_type=Explore` to walk the codebase. Don't follow rigid heuristics — explore organically and note where you experience friction: + +- Where does understanding one concept require bouncing between many small modules? +- Where are modules **shallow** — interface nearly as complex as the implementation? +- Where have pure functions been extracted just for testability, but the real bugs hide in how they're called (no **locality**)? +- Where do tightly-coupled modules leak across their seams? +- Which parts of the codebase are untested, or hard to test through their current interface? + +Apply the **deletion test** to anything you suspect is shallow: would deleting it concentrate complexity, or just move it? A "yes, concentrates" is the signal you want. + +### 2. Present candidates + +Present a numbered list of deepening opportunities. For each candidate: + +- **Files** — which files/modules are involved +- **Problem** — why the current architecture is causing friction +- **Solution** — plain English description of what would change +- **Benefits** — explained in terms of locality and leverage, and also in how tests would improve + +**Use CONTEXT.md vocabulary for the domain, and [LANGUAGE.md](LANGUAGE.md) vocabulary for the architecture.** If `CONTEXT.md` defines "Order," talk about "the Order intake module" — not "the FooBarHandler," and not "the Order service." + +**ADR conflicts**: if a candidate contradicts an existing ADR, only surface it when the friction is real enough to warrant revisiting the ADR. Mark it clearly (e.g. _"contradicts ADR-0007 — but worth reopening because…"_). Don't list every theoretical refactor an ADR forbids. + +Do NOT propose interfaces yet. Ask the user: "Which of these would you like to explore?" + +### 3. Grilling loop + +Once the user picks a candidate, drop into a grilling conversation. Walk the design tree with them — constraints, dependencies, the shape of the deepened module, what sits behind the seam, what tests survive. + +Side effects happen inline as decisions crystallize: + +- **Naming a deepened module after a concept not in `CONTEXT.md`?** Add the term to `CONTEXT.md` — same discipline as `/grill-with-docs` (see [CONTEXT-FORMAT.md](../grill-with-docs/CONTEXT-FORMAT.md)). Create the file lazily if it doesn't exist. +- **Sharpening a fuzzy term during the conversation?** Update `CONTEXT.md` right there. +- **User rejects the candidate with a load-bearing reason?** Offer an ADR, framed as: _"Want me to record this as an ADR so future architecture reviews don't re-suggest it?"_ Only offer when the reason would actually be needed by a future explorer to avoid re-suggesting the same thing — skip ephemeral reasons ("not worth it right now") and self-evident ones. See [ADR-FORMAT.md](../grill-with-docs/ADR-FORMAT.md). +- **Want to explore alternative interfaces for the deepened module?** See [INTERFACE-DESIGN.md](INTERFACE-DESIGN.md). diff --git a/.agents/skills/tdd/SKILL.md b/.agents/skills/tdd/SKILL.md new file mode 100644 index 0000000..7a98941 --- /dev/null +++ b/.agents/skills/tdd/SKILL.md @@ -0,0 +1,109 @@ +--- +name: tdd +description: Test-driven development with red-green-refactor loop. Use when user wants to build features or fix bugs using TDD, mentions "red-green-refactor", wants integration tests, or asks for test-first development. +--- + +# Test-Driven Development + +## Philosophy + +**Core principle**: Tests should verify behavior through public interfaces, not implementation details. Code can change entirely; tests shouldn't. + +**Good tests** are integration-style: they exercise real code paths through public APIs. They describe _what_ the system does, not _how_ it does it. A good test reads like a specification - "user can checkout with valid cart" tells you exactly what capability exists. These tests survive refactors because they don't care about internal structure. + +**Bad tests** are coupled to implementation. They mock internal collaborators, test private methods, or verify through external means (like querying a database directly instead of using the interface). The warning sign: your test breaks when you refactor, but behavior hasn't changed. If you rename an internal function and tests fail, those tests were testing implementation, not behavior. + +See [tests.md](tests.md) for examples and [mocking.md](mocking.md) for mocking guidelines. + +## Anti-Pattern: Horizontal Slices + +**DO NOT write all tests first, then all implementation.** This is "horizontal slicing" - treating RED as "write all tests" and GREEN as "write all code." + +This produces **crap tests**: + +- Tests written in bulk test _imagined_ behavior, not _actual_ behavior +- You end up testing the _shape_ of things (data structures, function signatures) rather than user-facing behavior +- Tests become insensitive to real changes - they pass when behavior breaks, fail when behavior is fine +- You outrun your headlights, committing to test structure before understanding the implementation + +**Correct approach**: Vertical slices via tracer bullets. One test → one implementation → repeat. Each test responds to what you learned from the previous cycle. Because you just wrote the code, you know exactly what behavior matters and how to verify it. + +``` +WRONG (horizontal): + RED: test1, test2, test3, test4, test5 + GREEN: impl1, impl2, impl3, impl4, impl5 + +RIGHT (vertical): + RED→GREEN: test1→impl1 + RED→GREEN: test2→impl2 + RED→GREEN: test3→impl3 + ... +``` + +## Workflow + +### 1. Planning + +When exploring the codebase, use the project's domain glossary so that test names and interface vocabulary match the project's language, and respect ADRs in the area you're touching. + +Before writing any code: + +- [ ] Confirm with user what interface changes are needed +- [ ] Confirm with user which behaviors to test (prioritize) +- [ ] Identify opportunities for [deep modules](deep-modules.md) (small interface, deep implementation) +- [ ] Design interfaces for [testability](interface-design.md) +- [ ] List the behaviors to test (not implementation steps) +- [ ] Get user approval on the plan + +Ask: "What should the public interface look like? Which behaviors are most important to test?" + +**You can't test everything.** Confirm with the user exactly which behaviors matter most. Focus testing effort on critical paths and complex logic, not every possible edge case. + +### 2. Tracer Bullet + +Write ONE test that confirms ONE thing about the system: + +``` +RED: Write test for first behavior → test fails +GREEN: Write minimal code to pass → test passes +``` + +This is your tracer bullet - proves the path works end-to-end. + +### 3. Incremental Loop + +For each remaining behavior: + +``` +RED: Write next test → fails +GREEN: Minimal code to pass → passes +``` + +Rules: + +- One test at a time +- Only enough code to pass current test +- Don't anticipate future tests +- Keep tests focused on observable behavior + +### 4. Refactor + +After all tests pass, look for [refactor candidates](refactoring.md): + +- [ ] Extract duplication +- [ ] Deepen modules (move complexity behind simple interfaces) +- [ ] Apply SOLID principles where natural +- [ ] Consider what new code reveals about existing code +- [ ] Run tests after each refactor step + +**Never refactor while RED.** Get to GREEN first. + +## Checklist Per Cycle + +``` +[ ] Test describes behavior, not implementation +[ ] Test uses public interface only +[ ] Test would survive internal refactor +[ ] Code is minimal for this test +[ ] No speculative features added +``` diff --git a/.agents/skills/tdd/deep-modules.md b/.agents/skills/tdd/deep-modules.md new file mode 100644 index 0000000..0d9720c --- /dev/null +++ b/.agents/skills/tdd/deep-modules.md @@ -0,0 +1,33 @@ +# Deep Modules + +From "A Philosophy of Software Design": + +**Deep module** = small interface + lots of implementation + +``` +┌─────────────────────┐ +│ Small Interface │ ← Few methods, simple params +├─────────────────────┤ +│ │ +│ │ +│ Deep Implementation│ ← Complex logic hidden +│ │ +│ │ +└─────────────────────┘ +``` + +**Shallow module** = large interface + little implementation (avoid) + +``` +┌─────────────────────────────────┐ +│ Large Interface │ ← Many methods, complex params +├─────────────────────────────────┤ +│ Thin Implementation │ ← Just passes through +└─────────────────────────────────┘ +``` + +When designing interfaces, ask: + +- Can I reduce the number of methods? +- Can I simplify the parameters? +- Can I hide more complexity inside? diff --git a/.agents/skills/tdd/interface-design.md b/.agents/skills/tdd/interface-design.md new file mode 100644 index 0000000..a0a20ca --- /dev/null +++ b/.agents/skills/tdd/interface-design.md @@ -0,0 +1,31 @@ +# Interface Design for Testability + +Good interfaces make testing natural: + +1. **Accept dependencies, don't create them** + + ```typescript + // Testable + function processOrder(order, paymentGateway) {} + + // Hard to test + function processOrder(order) { + const gateway = new StripeGateway(); + } + ``` + +2. **Return results, don't produce side effects** + + ```typescript + // Testable + function calculateDiscount(cart): Discount {} + + // Hard to test + function applyDiscount(cart): void { + cart.total -= discount; + } + ``` + +3. **Small surface area** + - Fewer methods = fewer tests needed + - Fewer params = simpler test setup diff --git a/.agents/skills/tdd/mocking.md b/.agents/skills/tdd/mocking.md new file mode 100644 index 0000000..71cbfee --- /dev/null +++ b/.agents/skills/tdd/mocking.md @@ -0,0 +1,59 @@ +# When to Mock + +Mock at **system boundaries** only: + +- External APIs (payment, email, etc.) +- Databases (sometimes - prefer test DB) +- Time/randomness +- File system (sometimes) + +Don't mock: + +- Your own classes/modules +- Internal collaborators +- Anything you control + +## Designing for Mockability + +At system boundaries, design interfaces that are easy to mock: + +**1. Use dependency injection** + +Pass external dependencies in rather than creating them internally: + +```typescript +// Easy to mock +function processPayment(order, paymentClient) { + return paymentClient.charge(order.total); +} + +// Hard to mock +function processPayment(order) { + const client = new StripeClient(process.env.STRIPE_KEY); + return client.charge(order.total); +} +``` + +**2. Prefer SDK-style interfaces over generic fetchers** + +Create specific functions for each external operation instead of one generic function with conditional logic: + +```typescript +// GOOD: Each function is independently mockable +const api = { + getUser: (id) => fetch(`/users/${id}`), + getOrders: (userId) => fetch(`/users/${userId}/orders`), + createOrder: (data) => fetch('/orders', { method: 'POST', body: data }), +}; + +// BAD: Mocking requires conditional logic inside the mock +const api = { + fetch: (endpoint, options) => fetch(endpoint, options), +}; +``` + +The SDK approach means: +- Each mock returns one specific shape +- No conditional logic in test setup +- Easier to see which endpoints a test exercises +- Type safety per endpoint diff --git a/.agents/skills/tdd/refactoring.md b/.agents/skills/tdd/refactoring.md new file mode 100644 index 0000000..8a44439 --- /dev/null +++ b/.agents/skills/tdd/refactoring.md @@ -0,0 +1,10 @@ +# Refactor Candidates + +After TDD cycle, look for: + +- **Duplication** → Extract function/class +- **Long methods** → Break into private helpers (keep tests on public interface) +- **Shallow modules** → Combine or deepen +- **Feature envy** → Move logic to where data lives +- **Primitive obsession** → Introduce value objects +- **Existing code** the new code reveals as problematic diff --git a/.agents/skills/tdd/tests.md b/.agents/skills/tdd/tests.md new file mode 100644 index 0000000..ff22f80 --- /dev/null +++ b/.agents/skills/tdd/tests.md @@ -0,0 +1,61 @@ +# Good and Bad Tests + +## Good Tests + +**Integration-style**: Test through real interfaces, not mocks of internal parts. + +```typescript +// GOOD: Tests observable behavior +test("user can checkout with valid cart", async () => { + const cart = createCart(); + cart.add(product); + const result = await checkout(cart, paymentMethod); + expect(result.status).toBe("confirmed"); +}); +``` + +Characteristics: + +- Tests behavior users/callers care about +- Uses public API only +- Survives internal refactors +- Describes WHAT, not HOW +- One logical assertion per test + +## Bad Tests + +**Implementation-detail tests**: Coupled to internal structure. + +```typescript +// BAD: Tests implementation details +test("checkout calls paymentService.process", async () => { + const mockPayment = jest.mock(paymentService); + await checkout(cart, payment); + expect(mockPayment.process).toHaveBeenCalledWith(cart.total); +}); +``` + +Red flags: + +- Mocking internal collaborators +- Testing private methods +- Asserting on call counts/order +- Test breaks when refactoring without behavior change +- Test name describes HOW not WHAT +- Verifying through external means instead of interface + +```typescript +// BAD: Bypasses interface to verify +test("createUser saves to database", async () => { + await createUser({ name: "Alice" }); + const row = await db.query("SELECT * FROM users WHERE name = ?", ["Alice"]); + expect(row).toBeDefined(); +}); + +// GOOD: Verifies through interface +test("createUser makes user retrievable", async () => { + const user = await createUser({ name: "Alice" }); + const retrieved = await getUser(user.id); + expect(retrieved.name).toBe("Alice"); +}); +``` diff --git a/.agents/skills/to-issues/SKILL.md b/.agents/skills/to-issues/SKILL.md new file mode 100644 index 0000000..5a40716 --- /dev/null +++ b/.agents/skills/to-issues/SKILL.md @@ -0,0 +1,81 @@ +--- +name: to-issues +description: Break a plan, spec, or PRD into independently-grabbable issues on the project issue tracker using tracer-bullet vertical slices. Use when user wants to convert a plan into issues, create implementation tickets, or break down work into issues. +--- + +# To Issues + +Break a plan into independently-grabbable issues using vertical slices (tracer bullets). + +The issue tracker and triage label vocabulary should have been provided to you — run `/setup-matt-pocock-skills` if not. + +## Process + +### 1. Gather context + +Work from whatever is already in the conversation context. If the user passes an issue reference (issue number, URL, or path) as an argument, fetch it from the issue tracker and read its full body and comments. + +### 2. Explore the codebase (optional) + +If you have not already explored the codebase, do so to understand the current state of the code. Issue titles and descriptions should use the project's domain glossary vocabulary, and respect ADRs in the area you're touching. + +### 3. Draft vertical slices + +Break the plan into **tracer bullet** issues. Each issue is a thin vertical slice that cuts through ALL integration layers end-to-end, NOT a horizontal slice of one layer. + +Slices may be 'HITL' or 'AFK'. HITL slices require human interaction, such as an architectural decision or a design review. AFK slices can be implemented and merged without human interaction. Prefer AFK over HITL where possible. + + +- Each slice delivers a narrow but COMPLETE path through every layer (schema, API, UI, tests) +- A completed slice is demoable or verifiable on its own +- Prefer many thin slices over few thick ones + + +### 4. Quiz the user + +Present the proposed breakdown as a numbered list. For each slice, show: + +- **Title**: short descriptive name +- **Type**: HITL / AFK +- **Blocked by**: which other slices (if any) must complete first +- **User stories covered**: which user stories this addresses (if the source material has them) + +Ask the user: + +- Does the granularity feel right? (too coarse / too fine) +- Are the dependency relationships correct? +- Should any slices be merged or split further? +- Are the correct slices marked as HITL and AFK? + +Iterate until the user approves the breakdown. + +### 5. Publish the issues to the issue tracker + +For each approved slice, publish a new issue to the issue tracker. Use the issue body template below. Apply the `needs-triage` triage label so each issue enters the normal triage flow. + +Publish issues in dependency order (blockers first) so you can reference real issue identifiers in the "Blocked by" field. + + +## Parent + +A reference to the parent issue on the issue tracker (if the source was an existing issue, otherwise omit this section). + +## What to build + +A concise description of this vertical slice. Describe the end-to-end behavior, not layer-by-layer implementation. + +## Acceptance criteria + +- [ ] Criterion 1 +- [ ] Criterion 2 +- [ ] Criterion 3 + +## Blocked by + +- A reference to the blocking ticket (if any) + +Or "None - can start immediately" if no blockers. + + + +Do NOT close or modify any parent issue. diff --git a/.agents/skills/to-prd/SKILL.md b/.agents/skills/to-prd/SKILL.md new file mode 100644 index 0000000..7bdc82a --- /dev/null +++ b/.agents/skills/to-prd/SKILL.md @@ -0,0 +1,74 @@ +--- +name: to-prd +description: Turn the current conversation context into a PRD and publish it to the project issue tracker. Use when user wants to create a PRD from the current context. +--- + +This skill takes the current conversation context and codebase understanding and produces a PRD. Do NOT interview the user — just synthesize what you already know. + +The issue tracker and triage label vocabulary should have been provided to you — run `/setup-matt-pocock-skills` if not. + +## Process + +1. Explore the repo to understand the current state of the codebase, if you haven't already. Use the project's domain glossary vocabulary throughout the PRD, and respect any ADRs in the area you're touching. + +2. Sketch out the major modules you will need to build or modify to complete the implementation. Actively look for opportunities to extract deep modules that can be tested in isolation. + +A deep module (as opposed to a shallow module) is one which encapsulates a lot of functionality in a simple, testable interface which rarely changes. + +Check with the user that these modules match their expectations. Check with the user which modules they want tests written for. + +3. Write the PRD using the template below, then publish it to the project issue tracker. Apply the `needs-triage` triage label so it enters the normal triage flow. + + + +## Problem Statement + +The problem that the user is facing, from the user's perspective. + +## Solution + +The solution to the problem, from the user's perspective. + +## User Stories + +A LONG, numbered list of user stories. Each user story should be in the format of: + +1. As an , I want a , so that + + +1. As a mobile bank customer, I want to see balance on my accounts, so that I can make better informed decisions about my spending + + +This list of user stories should be extremely extensive and cover all aspects of the feature. + +## Implementation Decisions + +A list of implementation decisions that were made. This can include: + +- The modules that will be built/modified +- The interfaces of those modules that will be modified +- Technical clarifications from the developer +- Architectural decisions +- Schema changes +- API contracts +- Specific interactions + +Do NOT include specific file paths or code snippets. They may end up being outdated very quickly. + +## Testing Decisions + +A list of testing decisions that were made. Include: + +- A description of what makes a good test (only test external behavior, not implementation details) +- Which modules will be tested +- Prior art for the tests (i.e. similar types of tests in the codebase) + +## Out of Scope + +A description of the things that are out of scope for this PRD. + +## Further Notes + +Any further notes about the feature. + + diff --git a/.pi/skills/grill-with-docs/ADR-FORMAT.md b/.pi/skills/grill-with-docs/ADR-FORMAT.md new file mode 100644 index 0000000..da7e78e --- /dev/null +++ b/.pi/skills/grill-with-docs/ADR-FORMAT.md @@ -0,0 +1,47 @@ +# ADR Format + +ADRs live in `docs/adr/` and use sequential numbering: `0001-slug.md`, `0002-slug.md`, etc. + +Create the `docs/adr/` directory lazily — only when the first ADR is needed. + +## Template + +```md +# {Short title of the decision} + +{1-3 sentences: what's the context, what did we decide, and why.} +``` + +That's it. An ADR can be a single paragraph. The value is in recording *that* a decision was made and *why* — not in filling out sections. + +## Optional sections + +Only include these when they add genuine value. Most ADRs won't need them. + +- **Status** frontmatter (`proposed | accepted | deprecated | superseded by ADR-NNNN`) — useful when decisions are revisited +- **Considered Options** — only when the rejected alternatives are worth remembering +- **Consequences** — only when non-obvious downstream effects need to be called out + +## Numbering + +Scan `docs/adr/` for the highest existing number and increment by one. + +## When to offer an ADR + +All three of these must be true: + +1. **Hard to reverse** — the cost of changing your mind later is meaningful +2. **Surprising without context** — a future reader will look at the code and wonder "why on earth did they do it this way?" +3. **The result of a real trade-off** — there were genuine alternatives and you picked one for specific reasons + +If a decision is easy to reverse, skip it — you'll just reverse it. If it's not surprising, nobody will wonder why. If there was no real alternative, there's nothing to record beyond "we did the obvious thing." + +### What qualifies + +- **Architectural shape.** "We're using a monorepo." "The write model is event-sourced, the read model is projected into Postgres." +- **Integration patterns between contexts.** "Ordering and Billing communicate via domain events, not synchronous HTTP." +- **Technology choices that carry lock-in.** Database, message bus, auth provider, deployment target. Not every library — just the ones that would take a quarter to swap out. +- **Boundary and scope decisions.** "Customer data is owned by the Customer context; other contexts reference it by ID only." The explicit no-s are as valuable as the yes-s. +- **Deliberate deviations from the obvious path.** "We're using manual SQL instead of an ORM because X." Anything where a reasonable reader would assume the opposite. These stop the next engineer from "fixing" something that was deliberate. +- **Constraints not visible in the code.** "We can't use AWS because of compliance requirements." "Response times must be under 200ms because of the partner API contract." +- **Rejected alternatives when the rejection is non-obvious.** If you considered GraphQL and picked REST for subtle reasons, record it — otherwise someone will suggest GraphQL again in six months. diff --git a/.pi/skills/grill-with-docs/CONTEXT-FORMAT.md b/.pi/skills/grill-with-docs/CONTEXT-FORMAT.md new file mode 100644 index 0000000..ddfa247 --- /dev/null +++ b/.pi/skills/grill-with-docs/CONTEXT-FORMAT.md @@ -0,0 +1,77 @@ +# CONTEXT.md Format + +## Structure + +```md +# {Context Name} + +{One or two sentence description of what this context is and why it exists.} + +## Language + +**Order**: +{A concise description of the term} +_Avoid_: Purchase, transaction + +**Invoice**: +A request for payment sent to a customer after delivery. +_Avoid_: Bill, payment request + +**Customer**: +A person or organization that places orders. +_Avoid_: Client, buyer, account + +## Relationships + +- An **Order** produces one or more **Invoices** +- An **Invoice** belongs to exactly one **Customer** + +## Example dialogue + +> **Dev:** "When a **Customer** places an **Order**, do we create the **Invoice** immediately?" +> **Domain expert:** "No — an **Invoice** is only generated once a **Fulfillment** is confirmed." + +## Flagged ambiguities + +- "account" was used to mean both **Customer** and **User** — resolved: these are distinct concepts. +``` + +## Rules + +- **Be opinionated.** When multiple words exist for the same concept, pick the best one and list the others as aliases to avoid. +- **Flag conflicts explicitly.** If a term is used ambiguously, call it out in "Flagged ambiguities" with a clear resolution. +- **Keep definitions tight.** One sentence max. Define what it IS, not what it does. +- **Show relationships.** Use bold term names and express cardinality where obvious. +- **Only include terms specific to this project's context.** General programming concepts (timeouts, error types, utility patterns) don't belong even if the project uses them extensively. Before adding a term, ask: is this a concept unique to this context, or a general programming concept? Only the former belongs. +- **Group terms under subheadings** when natural clusters emerge. If all terms belong to a single cohesive area, a flat list is fine. +- **Write an example dialogue.** A conversation between a dev and a domain expert that demonstrates how the terms interact naturally and clarifies boundaries between related concepts. + +## Single vs multi-context repos + +**Single context (most repos):** One `CONTEXT.md` at the repo root. + +**Multiple contexts:** A `CONTEXT-MAP.md` at the repo root lists the contexts, where they live, and how they relate to each other: + +```md +# Context Map + +## Contexts + +- [Ordering](./src/ordering/CONTEXT.md) — receives and tracks customer orders +- [Billing](./src/billing/CONTEXT.md) — generates invoices and processes payments +- [Fulfillment](./src/fulfillment/CONTEXT.md) — manages warehouse picking and shipping + +## Relationships + +- **Ordering → Fulfillment**: Ordering emits `OrderPlaced` events; Fulfillment consumes them to start picking +- **Fulfillment → Billing**: Fulfillment emits `ShipmentDispatched` events; Billing consumes them to generate invoices +- **Ordering ↔ Billing**: Shared types for `CustomerId` and `Money` +``` + +The skill infers which structure applies: + +- If `CONTEXT-MAP.md` exists, read it to find contexts +- If only a root `CONTEXT.md` exists, single context +- If neither exists, create a root `CONTEXT.md` lazily when the first term is resolved + +When multiple contexts exist, infer which one the current topic relates to. If unclear, ask. diff --git a/.pi/skills/grill-with-docs/SKILL.md b/.pi/skills/grill-with-docs/SKILL.md new file mode 100644 index 0000000..6dad6ad --- /dev/null +++ b/.pi/skills/grill-with-docs/SKILL.md @@ -0,0 +1,88 @@ +--- +name: grill-with-docs +description: Grilling session that challenges your plan against the existing domain model, sharpens terminology, and updates documentation (CONTEXT.md, ADRs) inline as decisions crystallise. Use when user wants to stress-test a plan against their project's language and documented decisions. +--- + + + +Interview me relentlessly about every aspect of this plan until we reach a shared understanding. Walk down each branch of the design tree, resolving dependencies between decisions one-by-one. For each question, provide your recommended answer. + +Ask the questions one at a time, waiting for feedback on each question before continuing. + +If a question can be answered by exploring the codebase, explore the codebase instead. + + + + + +## Domain awareness + +During codebase exploration, also look for existing documentation: + +### File structure + +Most repos have a single context: + +``` +/ +├── CONTEXT.md +├── docs/ +│ └── adr/ +│ ├── 0001-event-sourced-orders.md +│ └── 0002-postgres-for-write-model.md +└── src/ +``` + +If a `CONTEXT-MAP.md` exists at the root, the repo has multiple contexts. The map points to where each one lives: + +``` +/ +├── CONTEXT-MAP.md +├── docs/ +│ └── adr/ ← system-wide decisions +├── src/ +│ ├── ordering/ +│ │ ├── CONTEXT.md +│ │ └── docs/adr/ ← context-specific decisions +│ └── billing/ +│ ├── CONTEXT.md +│ └── docs/adr/ +``` + +Create files lazily — only when you have something to write. If no `CONTEXT.md` exists, create one when the first term is resolved. If no `docs/adr/` exists, create it when the first ADR is needed. + +## During the session + +### Challenge against the glossary + +When the user uses a term that conflicts with the existing language in `CONTEXT.md`, call it out immediately. "Your glossary defines 'cancellation' as X, but you seem to mean Y — which is it?" + +### Sharpen fuzzy language + +When the user uses vague or overloaded terms, propose a precise canonical term. "You're saying 'account' — do you mean the Customer or the User? Those are different things." + +### Discuss concrete scenarios + +When domain relationships are being discussed, stress-test them with specific scenarios. Invent scenarios that probe edge cases and force the user to be precise about the boundaries between concepts. + +### Cross-reference with code + +When the user states how something works, check whether the code agrees. If you find a contradiction, surface it: "Your code cancels entire Orders, but you just said partial cancellation is possible — which is right?" + +### Update CONTEXT.md inline + +When a term is resolved, update `CONTEXT.md` right there. Don't batch these up — capture them as they happen. Use the format in [CONTEXT-FORMAT.md](./CONTEXT-FORMAT.md). + +Don't couple `CONTEXT.md` to implementation details. Only include terms that are meaningful to domain experts. + +### Offer ADRs sparingly + +Only offer to create an ADR when all three are true: + +1. **Hard to reverse** — the cost of changing your mind later is meaningful +2. **Surprising without context** — a future reader will wonder "why did they do it this way?" +3. **The result of a real trade-off** — there were genuine alternatives and you picked one for specific reasons + +If any of the three is missing, skip the ADR. Use the format in [ADR-FORMAT.md](./ADR-FORMAT.md). + + diff --git a/.pi/skills/improve-codebase-architecture/DEEPENING.md b/.pi/skills/improve-codebase-architecture/DEEPENING.md new file mode 100644 index 0000000..ecaf5d7 --- /dev/null +++ b/.pi/skills/improve-codebase-architecture/DEEPENING.md @@ -0,0 +1,37 @@ +# Deepening + +How to deepen a cluster of shallow modules safely, given its dependencies. Assumes the vocabulary in [LANGUAGE.md](LANGUAGE.md) — **module**, **interface**, **seam**, **adapter**. + +## Dependency categories + +When assessing a candidate for deepening, classify its dependencies. The category determines how the deepened module is tested across its seam. + +### 1. In-process + +Pure computation, in-memory state, no I/O. Always deepenable — merge the modules and test through the new interface directly. No adapter needed. + +### 2. Local-substitutable + +Dependencies that have local test stand-ins (PGLite for Postgres, in-memory filesystem). Deepenable if the stand-in exists. The deepened module is tested with the stand-in running in the test suite. The seam is internal; no port at the module's external interface. + +### 3. Remote but owned (Ports & Adapters) + +Your own services across a network boundary (microservices, internal APIs). Define a **port** (interface) at the seam. The deep module owns the logic; the transport is injected as an **adapter**. Tests use an in-memory adapter. Production uses an HTTP/gRPC/queue adapter. + +Recommendation shape: *"Define a port at the seam, implement an HTTP adapter for production and an in-memory adapter for testing, so the logic sits in one deep module even though it's deployed across a network."* + +### 4. True external (Mock) + +Third-party services (Stripe, Twilio, etc.) you don't control. The deepened module takes the external dependency as an injected port; tests provide a mock adapter. + +## Seam discipline + +- **One adapter means a hypothetical seam. Two adapters means a real one.** Don't introduce a port unless at least two adapters are justified (typically production + test). A single-adapter seam is just indirection. +- **Internal seams vs external seams.** A deep module can have internal seams (private to its implementation, used by its own tests) as well as the external seam at its interface. Don't expose internal seams through the interface just because tests use them. + +## Testing strategy: replace, don't layer + +- Old unit tests on shallow modules become waste once tests at the deepened module's interface exist — delete them. +- Write new tests at the deepened module's interface. The **interface is the test surface**. +- Tests assert on observable outcomes through the interface, not internal state. +- Tests should survive internal refactors — they describe behaviour, not implementation. If a test has to change when the implementation changes, it's testing past the interface. diff --git a/.pi/skills/improve-codebase-architecture/INTERFACE-DESIGN.md b/.pi/skills/improve-codebase-architecture/INTERFACE-DESIGN.md new file mode 100644 index 0000000..3197723 --- /dev/null +++ b/.pi/skills/improve-codebase-architecture/INTERFACE-DESIGN.md @@ -0,0 +1,44 @@ +# Interface Design + +When the user wants to explore alternative interfaces for a chosen deepening candidate, use this parallel sub-agent pattern. Based on "Design It Twice" (Ousterhout) — your first idea is unlikely to be the best. + +Uses the vocabulary in [LANGUAGE.md](LANGUAGE.md) — **module**, **interface**, **seam**, **adapter**, **leverage**. + +## Process + +### 1. Frame the problem space + +Before spawning sub-agents, write a user-facing explanation of the problem space for the chosen candidate: + +- The constraints any new interface would need to satisfy +- The dependencies it would rely on, and which category they fall into (see [DEEPENING.md](DEEPENING.md)) +- A rough illustrative code sketch to ground the constraints — not a proposal, just a way to make the constraints concrete + +Show this to the user, then immediately proceed to Step 2. The user reads and thinks while the sub-agents work in parallel. + +### 2. Spawn sub-agents + +Spawn 3+ sub-agents in parallel using the Agent tool. Each must produce a **radically different** interface for the deepened module. + +Prompt each sub-agent with a separate technical brief (file paths, coupling details, dependency category from [DEEPENING.md](DEEPENING.md), what sits behind the seam). The brief is independent of the user-facing problem-space explanation in Step 1. Give each agent a different design constraint: + +- Agent 1: "Minimize the interface — aim for 1–3 entry points max. Maximise leverage per entry point." +- Agent 2: "Maximise flexibility — support many use cases and extension." +- Agent 3: "Optimise for the most common caller — make the default case trivial." +- Agent 4 (if applicable): "Design around ports & adapters for cross-seam dependencies." + +Include both [LANGUAGE.md](LANGUAGE.md) vocabulary and CONTEXT.md vocabulary in the brief so each sub-agent names things consistently with the architecture language and the project's domain language. + +Each sub-agent outputs: + +1. Interface (types, methods, params — plus invariants, ordering, error modes) +2. Usage example showing how callers use it +3. What the implementation hides behind the seam +4. Dependency strategy and adapters (see [DEEPENING.md](DEEPENING.md)) +5. Trade-offs — where leverage is high, where it's thin + +### 3. Present and compare + +Present designs sequentially so the user can absorb each one, then compare them in prose. Contrast by **depth** (leverage at the interface), **locality** (where change concentrates), and **seam placement**. + +After comparing, give your own recommendation: which design you think is strongest and why. If elements from different designs would combine well, propose a hybrid. Be opinionated — the user wants a strong read, not a menu. diff --git a/.pi/skills/improve-codebase-architecture/LANGUAGE.md b/.pi/skills/improve-codebase-architecture/LANGUAGE.md new file mode 100644 index 0000000..530c276 --- /dev/null +++ b/.pi/skills/improve-codebase-architecture/LANGUAGE.md @@ -0,0 +1,53 @@ +# Language + +Shared vocabulary for every suggestion this skill makes. Use these terms exactly — don't substitute "component," "service," "API," or "boundary." Consistent language is the whole point. + +## Terms + +**Module** +Anything with an interface and an implementation. Deliberately scale-agnostic — applies equally to a function, class, package, or tier-spanning slice. +_Avoid_: unit, component, service. + +**Interface** +Everything a caller must know to use the module correctly. Includes the type signature, but also invariants, ordering constraints, error modes, required configuration, and performance characteristics. +_Avoid_: API, signature (too narrow — those refer only to the type-level surface). + +**Implementation** +What's inside a module — its body of code. Distinct from **Adapter**: a thing can be a small adapter with a large implementation (a Postgres repo) or a large adapter with a small implementation (an in-memory fake). Reach for "adapter" when the seam is the topic; "implementation" otherwise. + +**Depth** +Leverage at the interface — the amount of behaviour a caller (or test) can exercise per unit of interface they have to learn. A module is **deep** when a large amount of behaviour sits behind a small interface. A module is **shallow** when the interface is nearly as complex as the implementation. + +**Seam** _(from Michael Feathers)_ +A place where you can alter behaviour without editing in that place. The *location* at which a module's interface lives. Choosing where to put the seam is its own design decision, distinct from what goes behind it. +_Avoid_: boundary (overloaded with DDD's bounded context). + +**Adapter** +A concrete thing that satisfies an interface at a seam. Describes *role* (what slot it fills), not substance (what's inside). + +**Leverage** +What callers get from depth. More capability per unit of interface they have to learn. One implementation pays back across N call sites and M tests. + +**Locality** +What maintainers get from depth. Change, bugs, knowledge, and verification concentrate at one place rather than spreading across callers. Fix once, fixed everywhere. + +## Principles + +- **Depth is a property of the interface, not the implementation.** A deep module can be internally composed of small, mockable, swappable parts — they just aren't part of the interface. A module can have **internal seams** (private to its implementation, used by its own tests) as well as the **external seam** at its interface. +- **The deletion test.** Imagine deleting the module. If complexity vanishes, the module wasn't hiding anything (it was a pass-through). If complexity reappears across N callers, the module was earning its keep. +- **The interface is the test surface.** Callers and tests cross the same seam. If you want to test *past* the interface, the module is probably the wrong shape. +- **One adapter means a hypothetical seam. Two adapters means a real one.** Don't introduce a seam unless something actually varies across it. + +## Relationships + +- A **Module** has exactly one **Interface** (the surface it presents to callers and tests). +- **Depth** is a property of a **Module**, measured against its **Interface**. +- A **Seam** is where a **Module**'s **Interface** lives. +- An **Adapter** sits at a **Seam** and satisfies the **Interface**. +- **Depth** produces **Leverage** for callers and **Locality** for maintainers. + +## Rejected framings + +- **Depth as ratio of implementation-lines to interface-lines** (Ousterhout): rewards padding the implementation. We use depth-as-leverage instead. +- **"Interface" as the TypeScript `interface` keyword or a class's public methods**: too narrow — interface here includes every fact a caller must know. +- **"Boundary"**: overloaded with DDD's bounded context. Say **seam** or **interface**. diff --git a/.pi/skills/improve-codebase-architecture/SKILL.md b/.pi/skills/improve-codebase-architecture/SKILL.md new file mode 100644 index 0000000..05984a6 --- /dev/null +++ b/.pi/skills/improve-codebase-architecture/SKILL.md @@ -0,0 +1,71 @@ +--- +name: improve-codebase-architecture +description: Find deepening opportunities in a codebase, informed by the domain language in CONTEXT.md and the decisions in docs/adr/. Use when the user wants to improve architecture, find refactoring opportunities, consolidate tightly-coupled modules, or make a codebase more testable and AI-navigable. +--- + +# Improve Codebase Architecture + +Surface architectural friction and propose **deepening opportunities** — refactors that turn shallow modules into deep ones. The aim is testability and AI-navigability. + +## Glossary + +Use these terms exactly in every suggestion. Consistent language is the point — don't drift into "component," "service," "API," or "boundary." Full definitions in [LANGUAGE.md](LANGUAGE.md). + +- **Module** — anything with an interface and an implementation (function, class, package, slice). +- **Interface** — everything a caller must know to use the module: types, invariants, error modes, ordering, config. Not just the type signature. +- **Implementation** — the code inside. +- **Depth** — leverage at the interface: a lot of behaviour behind a small interface. **Deep** = high leverage. **Shallow** = interface nearly as complex as the implementation. +- **Seam** — where an interface lives; a place behaviour can be altered without editing in place. (Use this, not "boundary.") +- **Adapter** — a concrete thing satisfying an interface at a seam. +- **Leverage** — what callers get from depth. +- **Locality** — what maintainers get from depth: change, bugs, knowledge concentrated in one place. + +Key principles (see [LANGUAGE.md](LANGUAGE.md) for the full list): + +- **Deletion test**: imagine deleting the module. If complexity vanishes, it was a pass-through. If complexity reappears across N callers, it was earning its keep. +- **The interface is the test surface.** +- **One adapter = hypothetical seam. Two adapters = real seam.** + +This skill is _informed_ by the project's domain model. The domain language gives names to good seams; ADRs record decisions the skill should not re-litigate. + +## Process + +### 1. Explore + +Read the project's domain glossary and any ADRs in the area you're touching first. + +Then use the Agent tool with `subagent_type=Explore` to walk the codebase. Don't follow rigid heuristics — explore organically and note where you experience friction: + +- Where does understanding one concept require bouncing between many small modules? +- Where are modules **shallow** — interface nearly as complex as the implementation? +- Where have pure functions been extracted just for testability, but the real bugs hide in how they're called (no **locality**)? +- Where do tightly-coupled modules leak across their seams? +- Which parts of the codebase are untested, or hard to test through their current interface? + +Apply the **deletion test** to anything you suspect is shallow: would deleting it concentrate complexity, or just move it? A "yes, concentrates" is the signal you want. + +### 2. Present candidates + +Present a numbered list of deepening opportunities. For each candidate: + +- **Files** — which files/modules are involved +- **Problem** — why the current architecture is causing friction +- **Solution** — plain English description of what would change +- **Benefits** — explained in terms of locality and leverage, and also in how tests would improve + +**Use CONTEXT.md vocabulary for the domain, and [LANGUAGE.md](LANGUAGE.md) vocabulary for the architecture.** If `CONTEXT.md` defines "Order," talk about "the Order intake module" — not "the FooBarHandler," and not "the Order service." + +**ADR conflicts**: if a candidate contradicts an existing ADR, only surface it when the friction is real enough to warrant revisiting the ADR. Mark it clearly (e.g. _"contradicts ADR-0007 — but worth reopening because…"_). Don't list every theoretical refactor an ADR forbids. + +Do NOT propose interfaces yet. Ask the user: "Which of these would you like to explore?" + +### 3. Grilling loop + +Once the user picks a candidate, drop into a grilling conversation. Walk the design tree with them — constraints, dependencies, the shape of the deepened module, what sits behind the seam, what tests survive. + +Side effects happen inline as decisions crystallize: + +- **Naming a deepened module after a concept not in `CONTEXT.md`?** Add the term to `CONTEXT.md` — same discipline as `/grill-with-docs` (see [CONTEXT-FORMAT.md](../grill-with-docs/CONTEXT-FORMAT.md)). Create the file lazily if it doesn't exist. +- **Sharpening a fuzzy term during the conversation?** Update `CONTEXT.md` right there. +- **User rejects the candidate with a load-bearing reason?** Offer an ADR, framed as: _"Want me to record this as an ADR so future architecture reviews don't re-suggest it?"_ Only offer when the reason would actually be needed by a future explorer to avoid re-suggesting the same thing — skip ephemeral reasons ("not worth it right now") and self-evident ones. See [ADR-FORMAT.md](../grill-with-docs/ADR-FORMAT.md). +- **Want to explore alternative interfaces for the deepened module?** See [INTERFACE-DESIGN.md](INTERFACE-DESIGN.md). diff --git a/.pi/skills/tdd/SKILL.md b/.pi/skills/tdd/SKILL.md new file mode 100644 index 0000000..7a98941 --- /dev/null +++ b/.pi/skills/tdd/SKILL.md @@ -0,0 +1,109 @@ +--- +name: tdd +description: Test-driven development with red-green-refactor loop. Use when user wants to build features or fix bugs using TDD, mentions "red-green-refactor", wants integration tests, or asks for test-first development. +--- + +# Test-Driven Development + +## Philosophy + +**Core principle**: Tests should verify behavior through public interfaces, not implementation details. Code can change entirely; tests shouldn't. + +**Good tests** are integration-style: they exercise real code paths through public APIs. They describe _what_ the system does, not _how_ it does it. A good test reads like a specification - "user can checkout with valid cart" tells you exactly what capability exists. These tests survive refactors because they don't care about internal structure. + +**Bad tests** are coupled to implementation. They mock internal collaborators, test private methods, or verify through external means (like querying a database directly instead of using the interface). The warning sign: your test breaks when you refactor, but behavior hasn't changed. If you rename an internal function and tests fail, those tests were testing implementation, not behavior. + +See [tests.md](tests.md) for examples and [mocking.md](mocking.md) for mocking guidelines. + +## Anti-Pattern: Horizontal Slices + +**DO NOT write all tests first, then all implementation.** This is "horizontal slicing" - treating RED as "write all tests" and GREEN as "write all code." + +This produces **crap tests**: + +- Tests written in bulk test _imagined_ behavior, not _actual_ behavior +- You end up testing the _shape_ of things (data structures, function signatures) rather than user-facing behavior +- Tests become insensitive to real changes - they pass when behavior breaks, fail when behavior is fine +- You outrun your headlights, committing to test structure before understanding the implementation + +**Correct approach**: Vertical slices via tracer bullets. One test → one implementation → repeat. Each test responds to what you learned from the previous cycle. Because you just wrote the code, you know exactly what behavior matters and how to verify it. + +``` +WRONG (horizontal): + RED: test1, test2, test3, test4, test5 + GREEN: impl1, impl2, impl3, impl4, impl5 + +RIGHT (vertical): + RED→GREEN: test1→impl1 + RED→GREEN: test2→impl2 + RED→GREEN: test3→impl3 + ... +``` + +## Workflow + +### 1. Planning + +When exploring the codebase, use the project's domain glossary so that test names and interface vocabulary match the project's language, and respect ADRs in the area you're touching. + +Before writing any code: + +- [ ] Confirm with user what interface changes are needed +- [ ] Confirm with user which behaviors to test (prioritize) +- [ ] Identify opportunities for [deep modules](deep-modules.md) (small interface, deep implementation) +- [ ] Design interfaces for [testability](interface-design.md) +- [ ] List the behaviors to test (not implementation steps) +- [ ] Get user approval on the plan + +Ask: "What should the public interface look like? Which behaviors are most important to test?" + +**You can't test everything.** Confirm with the user exactly which behaviors matter most. Focus testing effort on critical paths and complex logic, not every possible edge case. + +### 2. Tracer Bullet + +Write ONE test that confirms ONE thing about the system: + +``` +RED: Write test for first behavior → test fails +GREEN: Write minimal code to pass → test passes +``` + +This is your tracer bullet - proves the path works end-to-end. + +### 3. Incremental Loop + +For each remaining behavior: + +``` +RED: Write next test → fails +GREEN: Minimal code to pass → passes +``` + +Rules: + +- One test at a time +- Only enough code to pass current test +- Don't anticipate future tests +- Keep tests focused on observable behavior + +### 4. Refactor + +After all tests pass, look for [refactor candidates](refactoring.md): + +- [ ] Extract duplication +- [ ] Deepen modules (move complexity behind simple interfaces) +- [ ] Apply SOLID principles where natural +- [ ] Consider what new code reveals about existing code +- [ ] Run tests after each refactor step + +**Never refactor while RED.** Get to GREEN first. + +## Checklist Per Cycle + +``` +[ ] Test describes behavior, not implementation +[ ] Test uses public interface only +[ ] Test would survive internal refactor +[ ] Code is minimal for this test +[ ] No speculative features added +``` diff --git a/.pi/skills/tdd/deep-modules.md b/.pi/skills/tdd/deep-modules.md new file mode 100644 index 0000000..0d9720c --- /dev/null +++ b/.pi/skills/tdd/deep-modules.md @@ -0,0 +1,33 @@ +# Deep Modules + +From "A Philosophy of Software Design": + +**Deep module** = small interface + lots of implementation + +``` +┌─────────────────────┐ +│ Small Interface │ ← Few methods, simple params +├─────────────────────┤ +│ │ +│ │ +│ Deep Implementation│ ← Complex logic hidden +│ │ +│ │ +└─────────────────────┘ +``` + +**Shallow module** = large interface + little implementation (avoid) + +``` +┌─────────────────────────────────┐ +│ Large Interface │ ← Many methods, complex params +├─────────────────────────────────┤ +│ Thin Implementation │ ← Just passes through +└─────────────────────────────────┘ +``` + +When designing interfaces, ask: + +- Can I reduce the number of methods? +- Can I simplify the parameters? +- Can I hide more complexity inside? diff --git a/.pi/skills/tdd/interface-design.md b/.pi/skills/tdd/interface-design.md new file mode 100644 index 0000000..a0a20ca --- /dev/null +++ b/.pi/skills/tdd/interface-design.md @@ -0,0 +1,31 @@ +# Interface Design for Testability + +Good interfaces make testing natural: + +1. **Accept dependencies, don't create them** + + ```typescript + // Testable + function processOrder(order, paymentGateway) {} + + // Hard to test + function processOrder(order) { + const gateway = new StripeGateway(); + } + ``` + +2. **Return results, don't produce side effects** + + ```typescript + // Testable + function calculateDiscount(cart): Discount {} + + // Hard to test + function applyDiscount(cart): void { + cart.total -= discount; + } + ``` + +3. **Small surface area** + - Fewer methods = fewer tests needed + - Fewer params = simpler test setup diff --git a/.pi/skills/tdd/mocking.md b/.pi/skills/tdd/mocking.md new file mode 100644 index 0000000..71cbfee --- /dev/null +++ b/.pi/skills/tdd/mocking.md @@ -0,0 +1,59 @@ +# When to Mock + +Mock at **system boundaries** only: + +- External APIs (payment, email, etc.) +- Databases (sometimes - prefer test DB) +- Time/randomness +- File system (sometimes) + +Don't mock: + +- Your own classes/modules +- Internal collaborators +- Anything you control + +## Designing for Mockability + +At system boundaries, design interfaces that are easy to mock: + +**1. Use dependency injection** + +Pass external dependencies in rather than creating them internally: + +```typescript +// Easy to mock +function processPayment(order, paymentClient) { + return paymentClient.charge(order.total); +} + +// Hard to mock +function processPayment(order) { + const client = new StripeClient(process.env.STRIPE_KEY); + return client.charge(order.total); +} +``` + +**2. Prefer SDK-style interfaces over generic fetchers** + +Create specific functions for each external operation instead of one generic function with conditional logic: + +```typescript +// GOOD: Each function is independently mockable +const api = { + getUser: (id) => fetch(`/users/${id}`), + getOrders: (userId) => fetch(`/users/${userId}/orders`), + createOrder: (data) => fetch('/orders', { method: 'POST', body: data }), +}; + +// BAD: Mocking requires conditional logic inside the mock +const api = { + fetch: (endpoint, options) => fetch(endpoint, options), +}; +``` + +The SDK approach means: +- Each mock returns one specific shape +- No conditional logic in test setup +- Easier to see which endpoints a test exercises +- Type safety per endpoint diff --git a/.pi/skills/tdd/refactoring.md b/.pi/skills/tdd/refactoring.md new file mode 100644 index 0000000..8a44439 --- /dev/null +++ b/.pi/skills/tdd/refactoring.md @@ -0,0 +1,10 @@ +# Refactor Candidates + +After TDD cycle, look for: + +- **Duplication** → Extract function/class +- **Long methods** → Break into private helpers (keep tests on public interface) +- **Shallow modules** → Combine or deepen +- **Feature envy** → Move logic to where data lives +- **Primitive obsession** → Introduce value objects +- **Existing code** the new code reveals as problematic diff --git a/.pi/skills/tdd/tests.md b/.pi/skills/tdd/tests.md new file mode 100644 index 0000000..ff22f80 --- /dev/null +++ b/.pi/skills/tdd/tests.md @@ -0,0 +1,61 @@ +# Good and Bad Tests + +## Good Tests + +**Integration-style**: Test through real interfaces, not mocks of internal parts. + +```typescript +// GOOD: Tests observable behavior +test("user can checkout with valid cart", async () => { + const cart = createCart(); + cart.add(product); + const result = await checkout(cart, paymentMethod); + expect(result.status).toBe("confirmed"); +}); +``` + +Characteristics: + +- Tests behavior users/callers care about +- Uses public API only +- Survives internal refactors +- Describes WHAT, not HOW +- One logical assertion per test + +## Bad Tests + +**Implementation-detail tests**: Coupled to internal structure. + +```typescript +// BAD: Tests implementation details +test("checkout calls paymentService.process", async () => { + const mockPayment = jest.mock(paymentService); + await checkout(cart, payment); + expect(mockPayment.process).toHaveBeenCalledWith(cart.total); +}); +``` + +Red flags: + +- Mocking internal collaborators +- Testing private methods +- Asserting on call counts/order +- Test breaks when refactoring without behavior change +- Test name describes HOW not WHAT +- Verifying through external means instead of interface + +```typescript +// BAD: Bypasses interface to verify +test("createUser saves to database", async () => { + await createUser({ name: "Alice" }); + const row = await db.query("SELECT * FROM users WHERE name = ?", ["Alice"]); + expect(row).toBeDefined(); +}); + +// GOOD: Verifies through interface +test("createUser makes user retrievable", async () => { + const user = await createUser({ name: "Alice" }); + const retrieved = await getUser(user.id); + expect(retrieved.name).toBe("Alice"); +}); +``` diff --git a/.pi/skills/to-issues/SKILL.md b/.pi/skills/to-issues/SKILL.md new file mode 100644 index 0000000..5a40716 --- /dev/null +++ b/.pi/skills/to-issues/SKILL.md @@ -0,0 +1,81 @@ +--- +name: to-issues +description: Break a plan, spec, or PRD into independently-grabbable issues on the project issue tracker using tracer-bullet vertical slices. Use when user wants to convert a plan into issues, create implementation tickets, or break down work into issues. +--- + +# To Issues + +Break a plan into independently-grabbable issues using vertical slices (tracer bullets). + +The issue tracker and triage label vocabulary should have been provided to you — run `/setup-matt-pocock-skills` if not. + +## Process + +### 1. Gather context + +Work from whatever is already in the conversation context. If the user passes an issue reference (issue number, URL, or path) as an argument, fetch it from the issue tracker and read its full body and comments. + +### 2. Explore the codebase (optional) + +If you have not already explored the codebase, do so to understand the current state of the code. Issue titles and descriptions should use the project's domain glossary vocabulary, and respect ADRs in the area you're touching. + +### 3. Draft vertical slices + +Break the plan into **tracer bullet** issues. Each issue is a thin vertical slice that cuts through ALL integration layers end-to-end, NOT a horizontal slice of one layer. + +Slices may be 'HITL' or 'AFK'. HITL slices require human interaction, such as an architectural decision or a design review. AFK slices can be implemented and merged without human interaction. Prefer AFK over HITL where possible. + + +- Each slice delivers a narrow but COMPLETE path through every layer (schema, API, UI, tests) +- A completed slice is demoable or verifiable on its own +- Prefer many thin slices over few thick ones + + +### 4. Quiz the user + +Present the proposed breakdown as a numbered list. For each slice, show: + +- **Title**: short descriptive name +- **Type**: HITL / AFK +- **Blocked by**: which other slices (if any) must complete first +- **User stories covered**: which user stories this addresses (if the source material has them) + +Ask the user: + +- Does the granularity feel right? (too coarse / too fine) +- Are the dependency relationships correct? +- Should any slices be merged or split further? +- Are the correct slices marked as HITL and AFK? + +Iterate until the user approves the breakdown. + +### 5. Publish the issues to the issue tracker + +For each approved slice, publish a new issue to the issue tracker. Use the issue body template below. Apply the `needs-triage` triage label so each issue enters the normal triage flow. + +Publish issues in dependency order (blockers first) so you can reference real issue identifiers in the "Blocked by" field. + + +## Parent + +A reference to the parent issue on the issue tracker (if the source was an existing issue, otherwise omit this section). + +## What to build + +A concise description of this vertical slice. Describe the end-to-end behavior, not layer-by-layer implementation. + +## Acceptance criteria + +- [ ] Criterion 1 +- [ ] Criterion 2 +- [ ] Criterion 3 + +## Blocked by + +- A reference to the blocking ticket (if any) + +Or "None - can start immediately" if no blockers. + + + +Do NOT close or modify any parent issue. diff --git a/.pi/skills/to-prd/SKILL.md b/.pi/skills/to-prd/SKILL.md new file mode 100644 index 0000000..7bdc82a --- /dev/null +++ b/.pi/skills/to-prd/SKILL.md @@ -0,0 +1,74 @@ +--- +name: to-prd +description: Turn the current conversation context into a PRD and publish it to the project issue tracker. Use when user wants to create a PRD from the current context. +--- + +This skill takes the current conversation context and codebase understanding and produces a PRD. Do NOT interview the user — just synthesize what you already know. + +The issue tracker and triage label vocabulary should have been provided to you — run `/setup-matt-pocock-skills` if not. + +## Process + +1. Explore the repo to understand the current state of the codebase, if you haven't already. Use the project's domain glossary vocabulary throughout the PRD, and respect any ADRs in the area you're touching. + +2. Sketch out the major modules you will need to build or modify to complete the implementation. Actively look for opportunities to extract deep modules that can be tested in isolation. + +A deep module (as opposed to a shallow module) is one which encapsulates a lot of functionality in a simple, testable interface which rarely changes. + +Check with the user that these modules match their expectations. Check with the user which modules they want tests written for. + +3. Write the PRD using the template below, then publish it to the project issue tracker. Apply the `needs-triage` triage label so it enters the normal triage flow. + + + +## Problem Statement + +The problem that the user is facing, from the user's perspective. + +## Solution + +The solution to the problem, from the user's perspective. + +## User Stories + +A LONG, numbered list of user stories. Each user story should be in the format of: + +1. As an , I want a , so that + + +1. As a mobile bank customer, I want to see balance on my accounts, so that I can make better informed decisions about my spending + + +This list of user stories should be extremely extensive and cover all aspects of the feature. + +## Implementation Decisions + +A list of implementation decisions that were made. This can include: + +- The modules that will be built/modified +- The interfaces of those modules that will be modified +- Technical clarifications from the developer +- Architectural decisions +- Schema changes +- API contracts +- Specific interactions + +Do NOT include specific file paths or code snippets. They may end up being outdated very quickly. + +## Testing Decisions + +A list of testing decisions that were made. Include: + +- A description of what makes a good test (only test external behavior, not implementation details) +- Which modules will be tested +- Prior art for the tests (i.e. similar types of tests in the codebase) + +## Out of Scope + +A description of the things that are out of scope for this PRD. + +## Further Notes + +Any further notes about the feature. + + diff --git a/.sandcastle/main.ts b/.sandcastle/main.ts index 4017084..820f385 100644 --- a/.sandcastle/main.ts +++ b/.sandcastle/main.ts @@ -41,6 +41,15 @@ const corepackHomePath = `${sandcastleRuntimePath}/corepack`; const npmCachePath = `${sandcastleRuntimePath}/npm-cache`; const pnpmStorePath = `${sandcastleRuntimePath}/pnpm-store`; const hostCodexHome = path.join(os.homedir(), ".codex"); +const hostGhConfigPath = [ + process.env.GH_CONFIG_DIR, + process.env.APPDATA + ? path.join(process.env.APPDATA, "GitHub CLI") + : undefined, + path.join(os.homedir(), ".config", "gh"), +].find((candidate): candidate is string => + Boolean(candidate && existsSync(candidate)), +); mkdirSync(sandcastleRuntimePath, { recursive: true }); mkdirSync(corepackHomePath, { recursive: true }); @@ -83,11 +92,15 @@ const sandboxConfig = { npm_config_cache: `/home/agent/workspace/${npmCachePath}`, }, mounts: [ - { - hostPath: "~/.config/gh", - sandboxPath: "/home/agent/.config/gh", - readonly: true, - }, + ...(hostGhConfigPath + ? [ + { + hostPath: hostGhConfigPath, + sandboxPath: "/home/agent/.config/gh", + readonly: true, + }, + ] + : []), { hostPath: hostCodexHome, sandboxPath: "/mnt/host-codex", diff --git a/.specforge/.gitignore b/.specforge/.gitignore new file mode 100644 index 0000000..933e0f0 --- /dev/null +++ b/.specforge/.gitignore @@ -0,0 +1,3 @@ +# Local SpecForge workspace state +previews/ +session.json diff --git a/CONTEXT.md b/CONTEXT.md new file mode 100644 index 0000000..6d96f38 --- /dev/null +++ b/CONTEXT.md @@ -0,0 +1,114 @@ +# SpecForge + +SpecForge is a desktop workspace for spec-driven development that keeps project context local while delegating AI-assisted planning and execution to configurable agent providers. + +## Language + +**Sandcastle Runtime**: +The orchestration layer SpecForge uses for all AI agent work. +_Avoid_: Cursor runtime, generic sidecar runtime + +**Agent Provider**: +The AI backend selected by the user for the Sandcastle Runtime to run. +_Avoid_: AI vendor, model provider, sidecar provider + +**Codex Provider**: +The first Agent Provider SpecForge should support through the Sandcastle Runtime. +_Avoid_: Default Cursor provider + +**Provider Auth Mode**: +The way an Agent Provider authenticates for a runtime turn. +_Avoid_: Provider type + +**Sandcastle Batch Runner**: +The existing issue-driven Sandcastle workflow that plans, implements, reviews, and merges GitHub issues in Docker worktrees. +_Avoid_: App runtime, chat runtime + +**App Sandcastle Runtime**: +The product runtime entrypoint that serves user-triggered PRD, spec, chat, and execution turns inside SpecForge. +_Avoid_: Personal Sandcastle runner + +**Runtime Sandbox**: +A Docker-backed isolated environment where the App Sandcastle Runtime runs agent work. +_Avoid_: Host workspace execution + +**Runtime Readiness**: +The configuration state that tells the user whether the App Sandcastle Runtime can run agent work. +_Avoid_: Provider status, environment scan + +**Runtime Event**: +A streamed update from the App Sandcastle Runtime that keeps the user informed during an agent turn. +_Avoid_: Final response only + +**Approval Gate**: +A user decision point before sandboxed agent changes are applied to the project workspace. +_Avoid_: Fully autonomous apply + +**Sandbox Result**: +The branch, patch, generated text, or diff produced by a Runtime Sandbox for review by SpecForge. +_Avoid_: Direct workspace mutation + +**Document Preview**: +A generated PRD or spec draft shown to the user before it is saved to the configured project path. +_Avoid_: Direct document overwrite + +**Provider Settings**: +The SpecForge settings area where users choose and configure the Agent Provider used by the Sandcastle Runtime. +_Avoid_: Cursor API key settings + +**Local Provider Configuration**: +Machine-local provider setup such as authentication state, installed CLIs, and subscription-backed access. +_Avoid_: Project credentials + +**Project Provider Defaults**: +Project-scoped defaults for Agent Provider, model, and reasoning profile. +_Avoid_: Local auth settings + +**Model Discovery**: +The runtime capability that asks the user's installed provider tooling which models are available. +_Avoid_: Hard-coded model catalog + +**Agent Description**: +Workflow-specific instructions that shape how the selected Agent Provider performs a SpecForge task. +_Avoid_: Provider prompt, system prompt + +## Relationships + +- The **Sandcastle Runtime** runs exactly one selected **Agent Provider** for each user-initiated AI agent turn. +- The **Codex Provider** is the first **Agent Provider** targeted for Sandcastle Runtime integration. +- The **Codex Provider** supports subscription-backed local auth and API-key auth as distinct **Provider Auth Mode** options. +- The user chooses the **Provider Auth Mode** during configuration. +- The **Sandcastle Batch Runner** proves Codex can run through Sandcastle, but it is distinct from the app-facing **Sandcastle Runtime**. +- The **App Sandcastle Runtime** must be separate from the personal **Sandcastle Batch Runner** in `.sandcastle/main.ts`. +- Product runtime code belongs to the app source tree, while `.sandcastle/` remains personal development automation. +- The **App Sandcastle Runtime** uses a **Runtime Sandbox** rather than running agent work directly on the host workspace. +- If Docker is unavailable, **Runtime Readiness** is not satisfied and real AI agent work is unavailable. +- **Runtime Readiness** must be visible during initial project configuration and in ongoing settings. +- **Runtime Readiness** checks local prerequisites and launchability, not a live model call. +- The **App Sandcastle Runtime** emits **Runtime Events** during each agent turn. +- Code-changing execution through the **App Sandcastle Runtime** must preserve **Approval Gates** and emergency stop behavior. +- Code-changing **Sandbox Results** are applied to the project workspace only after an **Approval Gate**. +- PRD and spec generation produce a **Document Preview** before saving to project files. +- A **Document Preview** remains available until the user saves, discards, or replaces it. +- A **Document Preview** persists across app restarts separately from the canonical PRD and spec files. +- **Document Previews** are ignored local workspace state under `.specforge/`. +- `.specforge/settings.json` is project configuration, while previews and session state are local workspace state. +- **Provider Settings** configure which **Agent Provider** the **Sandcastle Runtime** should use. +- An **Agent Description** is selected by workflow, independent of the chosen **Agent Provider**. +- **Local Provider Configuration** belongs to the user's machine, while **Project Provider Defaults** belong to the project. +- **Provider Auth Mode** is a project choice, but credentials and subscription state are local-only. +- Cursor SDK paths may remain temporarily as legacy migration code, but they are not the primary product runtime. +- Claude, Cursor, and OpenCode are future **Agent Providers**, not part of the first Codex Provider slice. +- The **Codex Provider** uses **Model Discovery** through the user's installed Codex tooling rather than a fixed model catalog. +- The first **Model Discovery** path uses the Codex CLI model catalog; Codex app-server IPC is a future integration option. +- **Runtime Readiness** and **Model Discovery** run on the host, while agent turns run inside the **Runtime Sandbox**. + +## Example Dialogue + +> **Dev:** "Should PRD generation call the Cursor SDK directly?" +> **Domain expert:** "No. PRD generation should go through the **Sandcastle Runtime**, and the selected **Agent Provider** determines whether Codex, Claude, Cursor, or another backend handles it." + +## Flagged Ambiguities + +- "sandscale" was used once to mean **Sandcastle**; resolved: the canonical term is **Sandcastle**. +- `.sandcastle/main.ts` is a personal development runner, not the product runtime. diff --git a/docs/PRD-sandcastle-runtime.md b/docs/PRD-sandcastle-runtime.md new file mode 100644 index 0000000..5471dd8 --- /dev/null +++ b/docs/PRD-sandcastle-runtime.md @@ -0,0 +1,169 @@ +# Product Requirements Document: Sandcastle Runtime + +## 1. Product Overview + +SpecForge will move all AI agent work to a Docker-backed Sandcastle Runtime. Users choose the Agent Provider that Sandcastle runs, with Codex as the first supported provider and Claude, Cursor, and OpenCode reserved for future provider implementations. + +This replaces the current Cursor-centered product direction. Cursor SDK code may remain temporarily as migration scaffolding, but the primary product runtime, setup flow, and documentation must treat Sandcastle as the runtime for PRD generation, spec generation, chat, and execution. + +## 2. Goals + +* Use Sandcastle as the required runtime for all real AI agent work. +* Support Codex through Sandcastle first. +* Let users choose Codex authentication mode during configuration: local subscription auth or API-key auth. +* Run agent turns inside a Docker-backed Runtime Sandbox. +* Show Runtime Readiness on both the initial Configuration screen and the Settings screen. +* Stream runtime events into the existing SpecForge UI during agent turns. +* Generate PRD/spec drafts as persisted Document Previews before saving to canonical project files. +* Preserve approval gates and emergency stop semantics for code-changing execution. + +## 3. Non-Goals + +* Claude Provider implementation. +* Cursor Provider implementation. +* OpenCode Provider implementation. +* Replacing the personal `.sandcastle/main.ts` development runner. +* Using Codex app-server IPC for the first model discovery slice. +* Running real agent work directly on the host workspace as a fallback when Docker is unavailable. +* Making generated document previews part of tracked project output. + +## 4. Domain Language + +* **Sandcastle Runtime:** The orchestration layer SpecForge uses for all AI agent work. +* **App Sandcastle Runtime:** The product runtime entrypoint that serves user-triggered PRD, spec, chat, and execution turns inside SpecForge. +* **Sandcastle Batch Runner:** The personal issue-driven workflow in `.sandcastle/main.ts`; it must remain separate from product runtime code. +* **Agent Provider:** The AI backend selected by the user for the Sandcastle Runtime to run. +* **Codex Provider:** The first Agent Provider supported through the Sandcastle Runtime. +* **Provider Auth Mode:** The way an Agent Provider authenticates for a runtime turn. +* **Runtime Sandbox:** A Docker-backed isolated environment where agent work runs. +* **Runtime Readiness:** The configuration state showing whether the App Sandcastle Runtime can run agent work. +* **Runtime Event:** A streamed update from the App Sandcastle Runtime. +* **Sandbox Result:** A branch, patch, generated text, or diff produced by a Runtime Sandbox for review. +* **Document Preview:** A generated PRD or spec draft shown before saving to the configured project path. +* **Approval Gate:** A user decision point before sandboxed code changes are applied to the project workspace. + +## 5. Primary User Flow + +1. The user opens SpecForge and chooses a project. +2. The Configuration screen asks the user to configure the Sandcastle Runtime. +3. The user chooses Codex as the Agent Provider. +4. The user chooses a Provider Auth Mode: + * local subscription auth through the user's installed Codex tooling + * API-key auth stored through the OS credential store +5. SpecForge checks Runtime Readiness: + * Docker CLI is installed + * Docker daemon is reachable + * Codex CLI is available + * selected Codex auth mode is satisfied + * the project root is readable + * runtime working directories can be created + * the app can launch the runtime runner process +6. The user selects a discovered Codex model and reasoning effort. +7. The user generates or refines a PRD/spec, chats, or starts execution. +8. SpecForge launches an App Sandcastle Runtime turn inside Docker. +9. Runtime Events stream into the UI while the turn runs. +10. PRD/spec generation returns a Document Preview. +11. Code-changing execution returns a Sandbox Result for approval before the host workspace is changed. + +## 6. Functional Requirements + +### 6.1. Runtime Architecture + +* Product runtime code must live in the app source tree, not under `.sandcastle/`. +* `.sandcastle/main.ts` must remain personal development automation and must not be wired into the product runtime. +* The app-facing runner should live under `src/`, with Rust process control under `src-tauri/src/`. +* React must continue to communicate with the desktop runtime through `src/lib/runtime.ts`; React must not execute shell commands or write workspace files directly. +* Tauri/Rust must own process control, filesystem access, credential access, Docker/Codex readiness checks, and stop handling. + +### 6.2. Runtime Sandbox + +* Real agent turns must run inside Docker through Sandcastle. +* If Docker is unavailable, the Sandcastle Runtime is unavailable. +* SpecForge must not silently fall back to direct host execution for real AI agent work. +* Runtime Sandbox output must return as a Sandbox Result rather than mutating the host workspace directly. + +### 6.3. Codex Provider + +* Codex is the first supported Agent Provider. +* Codex must run through Sandcastle. +* The user must choose the Provider Auth Mode during configuration. +* Provider Auth Mode is project configuration. +* Credentials and subscription state are local-only. +* API keys must be stored through the OS credential store and never written to `.specforge/settings.json`. +* Subscription auth should use the user's local Codex authentication material, such as `~/.codex`, when preparing the Runtime Sandbox. + +### 6.4. Model Discovery + +* Codex model discovery must be dynamic. +* The first implementation should use the installed Codex CLI model catalog: + * `codex debug models` + * `codex debug models --bundled` as a fallback if live refresh fails +* Codex app-server IPC is a future option, not required for issue #4. +* Model discovery and readiness checks run on the host. +* Agent turns run inside the Runtime Sandbox. +* Project settings persist the selected model id and reasoning effort. + +### 6.5. Configuration And Settings + +* Runtime Readiness must be visible on the initial Configuration screen. +* Runtime Readiness must also be visible in Settings. +* Settings must move away from a Cursor API key-centered experience. +* Settings must expose Provider Settings for: + * Agent Provider + * Provider Auth Mode + * Codex API key entry when API-key auth is selected + * local Codex auth detection when subscription auth is selected + * Docker readiness + * model discovery status + * selected model and reasoning effort +* Runtime actions must still guard on readiness because Docker or auth state can change after configuration. + +### 6.6. PRD And Spec Generation + +* PRD generation must run through the Sandcastle Runtime. +* Spec generation must run through the Sandcastle Runtime. +* PRD/spec generation must keep separate PRD Agent and Spec Agent descriptions. +* Provider choice must not collapse workflow-specific Agent Descriptions. +* Generation must produce a Document Preview before saving to `docs/PRD.md`, `docs/SPEC.md`, or configured paths. +* The user must be able to save, edit then save, discard, or replace a Document Preview. +* Document Previews must persist across app restarts separately from canonical PRD/spec files. +* Document Previews are ignored local workspace state under `.specforge/`. + +### 6.7. Chat And Execution + +* Chat must run through the Sandcastle Runtime. +* Execution must run through the Sandcastle Runtime. +* Runtime Events must stream during each turn. +* Code-changing execution must preserve Approval Gates and emergency stop behavior. +* Sandboxed code changes must produce a diff, patch, or branch for review. +* Approved changes may be applied or merged into the host workspace. +* Rejected changes must leave the host workspace untouched. + +### 6.8. Persistence + +* `.specforge/settings.json` is project configuration and may be tracked. +* `.specforge/previews/` is ignored local workspace state. +* `.specforge/session.json`, if used for preview/session recovery, is ignored local workspace state. +* `.specforge/.gitignore` must ignore preview/session state while allowing project settings to remain trackable. + +## 7. Acceptance Criteria + +* Configuration shows Sandcastle/Codex/Docker readiness before the app is considered fully usable. +* Settings shows the same readiness status and lets the user refresh diagnostics. +* User can choose Codex auth mode as subscription or API key. +* API-key auth stores the secret only in the OS credential store. +* Subscription auth detects local Codex auth without writing credentials to project settings. +* Codex models are discovered from the installed Codex CLI. +* PRD generation uses Sandcastle and returns a persisted Document Preview. +* Spec generation uses Sandcastle and returns a persisted Document Preview. +* Chat turns use Sandcastle and stream Runtime Events. +* Code-changing execution returns a Sandbox Result and requires approval before host workspace changes. +* Stop cancels the active Sandcastle runtime turn and leaves the UI in a halted state. +* Cursor is no longer presented as required for SpecForge to be useful. + +## 8. Migration Notes + +* Existing Cursor SDK generation code may remain temporarily as legacy migration code. +* Product UX and docs must no longer center Cursor API key setup. +* The main `docs/PRD.md` and `docs/SPEC.md` should be updated when the implementation ships. +* The current personal `.sandcastle/main.ts` runner demonstrates Codex-through-Sandcastle patterns but must not become the product runtime. diff --git a/docs/PRD.md b/docs/PRD.md index 47b7e21..9c2b95b 100644 --- a/docs/PRD.md +++ b/docs/PRD.md @@ -1,98 +1,85 @@ -# Product Requirements Document: SpecForge +# Product Requirements Document: Sandcastle Runtime ## 1. Product Overview -**SpecForge** is a setup-first desktop workspace for spec-driven development. After a project is configured, the primary product flow helps users generate, review, and refine PRD and technical spec documents with Cursor SDK agents while keeping local project data under the desktop app's control. - -The product combines five responsibilities in one desktop shell: - -* project setup and saved workspace defaults -* Cursor SDK-backed PRD/spec generation -* PRD/spec review and editing -* secure local Cursor API key storage -* approval-aware diff and terminal visibility - -## 2. Target Audience - -* **Solo engineers:** Wanting a desktop-native workspace for turning rough product intent into usable PRD/spec artifacts. -* **Technical leads and PMs:** Wanting editable agent descriptions and repeatable PRD/spec generation without leaking secrets into project settings. -* **AI-assisted developers:** Wanting Cursor SDK agents for product/spec planning while preserving local review, diff, and approval visibility. - -## 3. Primary User Flow - -1. **Open Projects:** The app starts on the Projects / Workspace Initialization screen until a workspace is chosen. The Projects screen is limited to workspace selection, recent projects, and the disabled clone placeholder. -2. **Choose the project folder:** SpecForge scans the workspace, creates `.specforge/settings.json` from defaults when it is missing, restores saved project settings, restores the most recent chat topic when available, and opens the review workspace. -3. **Review Settings when needed:** If default project settings were created, SpecForge prompts the user to open Settings. Cursor API key, model/reasoning defaults, editable PRD/spec/execution agent descriptions, PRD/spec paths, and optional supporting documents are configured from `/settings`. -4. **Connect Cursor:** The user saves a Cursor API key through the desktop runtime. The key is stored in the OS credential store and never in `.specforge/settings.json`. -5. **Refine or generate a PRD:** The user can run the built-in Grill PRD action to ask one focused follow-up question with a recommended answer, or generate the PRD directly. PRD generation sends the PRD agent description plus the user prompt to Cursor SDK, then asks Rust to save the generated Markdown. -6. **Refine or generate a spec:** The user can run the built-in Grill Spec action against the current PRD and spec brief to ask one focused follow-up question with a recommended answer, or generate the spec directly. Spec generation sends the spec agent description, user prompt, and chosen PRD content to Cursor SDK, then asks Rust to save the generated Markdown. -7. **Review output:** The `/review` screen remains available for PRD/spec/file editing and diff visibility. -8. **Continue in chat when needed:** `/chat` remains available below review in navigation, but chat execution is outside the current Cursor SDK refactor scope. - -## 4. Functional Requirements - -### 4.1. Project Setup And Persistence - -* **Project-scoped settings:** Opening a workspace without `.specforge/settings.json` must create that file from default settings, and saving setup must update it inside the selected workspace. -* **Secret separation:** Cursor API keys must be stored through the OS credential store and must not be written to `.specforge/settings.json`. -* **Editable agent descriptions:** Settings must persist user-editable descriptions for PRD, spec, and execution agents. -* **Cursor defaults:** Model and reasoning defaults must use Cursor SDK-compatible options. -* **Project-scoped sessions:** Chat metadata must be stored in `.specforge/sessions/index.json`. -* **Per-topic snapshots:** Each topic must be persisted in `.specforge/sessions/.json`. -* **Last-active restore:** Reopening the app should restore the last active project and the last active topic when available. -* **Recent projects:** The Projects screen must show recently opened project folders from browser `localStorage` and allow reopening them through the desktop runtime. Reopening a project without `.specforge/settings.json` must create default settings and prompt the user to review Settings. -* **Git clone placeholder:** Setup may show a repository URL clone option as a disabled/presentational control. It must not invoke Git or write files until the desktop clone flow is implemented. - -### 4.2. PRD And Spec Generation - -* **Cursor SDK runtime:** PRD/spec generation must run through `@cursor/sdk` from the TypeScript side. -* **Rust desktop boundary:** Rust must not call Codex or Claude ACP for PRD/spec generation; it reads local inputs, stores secrets, delegates Cursor SDK execution to the Bun TypeScript runner, and saves generated documents. -* **Existing UX preservation:** The user flow for choosing a PRD and generating a spec must remain unchanged except for the underlying Cursor SDK runtime. -* **PRD agent:** PRD generation must send the editable PRD agent description and the user's PRD prompt. -* **Spec agent:** Spec generation must send the editable spec agent description, the user's spec prompt, and the selected PRD content. -* **Built-in Grill Me:** PRD and spec empty states must expose secondary Grill actions that run the same Cursor SDK model with SpecForge's built-in grill-me instruction, ask exactly one next question, include a recommended answer, and append the response back into the generation prompt for user editing. -* **Execution agent description:** Settings must expose the execution agent description now, even though execution migration is not part of this slice. - -### 4.3. Chat Workspace - -* **Secondary route:** `/chat` must remain available after setup, but review is the primary destination while document review is the active focus. -* **Three-zone desktop layout:** The chat screen must provide a topic list, transcript/composer workspace, and context/artifacts panel. -* **Topic management:** Users must be able to create, search, select, rename, and delete topics. -* **Per-topic isolation:** Messages, context items, runtime state, pending approvals, pending diff, and terminal output must remain scoped to a single topic. -* **Per-topic drafts:** Composer drafts must be preserved per topic while switching between topics. -* **Context seeding:** New topics must start with PRD, SPEC, supporting docs, and a workspace tree summary. -* **Explicit file attachment:** Workspace files can be attached manually from the chat UI and only affect the active topic. -* **Inline controls:** Send, approve, and stop actions must live directly in the chat composer area rather than in modal flows. - -### 4.4. Runtime And Approval Semantics - -* **Chat runtime scope:** Chat turns still run through the legacy desktop backend path in this release and are not part of the current Cursor SDK PRD/spec migration. -* **Stepped mode:** The first pass must be proposal-first or read-only, then require explicit approval before a write-capable rerun. -* **Milestone mode:** One assistant turn may make changes, but it must pause on the resulting real git diff before the next turn. -* **God mode:** The assistant may complete the turn without an approval pause while still surfacing output and diff history afterward. -* **Session-scoped stop behavior:** Stop requests must only affect the active topic run and preserve existing emergency-stop semantics. -* **Visible artifacts:** Terminal output and diff history must remain visible for each topic after the run. - -### 4.5. Caveman Requirement - -* **Always-on mode:** Chat must always apply Caveman-style response guidance automatically for every topic. -* **No chat-entry verification:** Entering `/chat` must not trigger a blocking install or verification step. -* **Built-in prompt behavior:** The Caveman behavior must be injected by SpecForge's own system prompt so users do not need to spend turn tokens enabling it. -* **Never gate navigation or settings:** Caveman activation must not stop topic switching, route changes, or model/autonomy edits. - -### 4.6. Review And Settings - -* **Review remains available:** `/review` must still support PRD/spec/file editing and document generation. -* **Primary post-setup route:** `/review` is the default destination after setup and is ordered above chat in the main sidebar. -* **Projects remains available:** `/` must remain accessible after a project is configured and must not auto-redirect away during last-project restore. -* **Top-bar model controls:** Review must expose model, reasoning, and approval mode controls in the top app bar beside the File/Edit/Selection/Terminal/Help menu, not in a separate left control column. -* **Read-only execution mirror:** The execute panel in review must reflect the active chat topic runtime and diff, but must not start, approve, or stop a separate execution engine. -* **Projects is selection-only:** Workspace configuration controls must live in Settings, not on the Projects screen. -* **Settings remain project-scoped:** Model/reasoning defaults, agent descriptions, document paths, and supporting docs remain editable from Settings. - -## 5. Non-Goals - -* Multi-user collaboration or cloud sync -* Browser-only chat execution without the desktop runtime -* Automatic saving of edited workspace file tabs back to disk -* OpenCode runtime integration as a provider; it is only a UX reference for this release +SpecForge uses a Docker-backed Sandcastle Runtime for real AI agent work. Users choose the Agent Provider that Sandcastle runs, with Codex as the first supported provider and Claude, Cursor, and OpenCode reserved for future provider implementations. + +This replaces the previous Cursor-centered product direction. Legacy Cursor SDK code may remain temporarily as migration scaffolding, but the primary product runtime, setup flow, and documentation treat Sandcastle as the runtime for PRD generation, spec generation, chat, and execution. + +## 2. Goals + +* Use Sandcastle as the required runtime for real AI agent work. +* Support Codex through Sandcastle first. +* Let users choose Codex authentication mode during configuration: local subscription auth or API-key auth. +* Run PRD and spec generation inside a Docker-backed Runtime Sandbox. +* Show Runtime Readiness on both the initial Configuration screen and the Settings screen. +* Discover Codex models from the installed Codex CLI. +* Persist generated PRD/spec drafts as Document Previews before saving canonical project files. +* Preserve approval gates and emergency stop semantics for code-changing execution. + +## 3. Non-Goals + +* Claude Provider implementation. +* Cursor Provider implementation. +* OpenCode Provider implementation. +* Replacing the personal `.sandcastle/main.ts` development runner. +* Using Codex app-server IPC for the first model discovery slice. +* Running real agent work directly on the host workspace as a fallback when Docker is unavailable. + +## 4. Domain Language + +* **Sandcastle Runtime:** The orchestration layer SpecForge uses for AI agent work. +* **App Sandcastle Runtime:** The product runtime entrypoint that serves user-triggered PRD, spec, chat, and execution turns inside SpecForge. +* **Sandcastle Batch Runner:** The personal issue-driven workflow in `.sandcastle/main.ts`; it remains separate from product runtime code. +* **Agent Provider:** The AI backend selected by the user for the Sandcastle Runtime to run. +* **Codex Provider:** The first Agent Provider supported through the Sandcastle Runtime. +* **Provider Auth Mode:** The way an Agent Provider authenticates for a runtime turn. +* **Runtime Sandbox:** A Docker-backed isolated environment where agent work runs. +* **Runtime Readiness:** The configuration state showing whether the App Sandcastle Runtime can run agent work. +* **Runtime Event:** A streamed update from the App Sandcastle Runtime. +* **Sandbox Result:** A branch, patch, generated text, or diff produced by a Runtime Sandbox for review. +* **Document Preview:** A generated PRD or spec draft shown before saving to the configured project path. +* **Approval Gate:** A user decision point before sandboxed code changes are applied to the project workspace. + +## 5. Primary User Flow + +1. The user opens SpecForge and chooses a project. +2. The Configuration screen shows Sandcastle Runtime Readiness. +3. The user chooses Codex as the Agent Provider. +4. The user chooses a Provider Auth Mode: local subscription auth or API-key auth stored through the OS credential store. +5. SpecForge checks Docker CLI, Docker daemon, Codex CLI, Codex auth, Git, project readability, runtime working directories, and runtime launch capability. On Windows, host Docker is preferred, but a healthy WSL Docker integration is accepted when the host Docker pipe is unavailable. +6. The user selects a discovered Codex model and reasoning effort. +7. The user generates or refines a PRD/spec, chats, or starts execution. +8. SpecForge launches an App Sandcastle Runtime turn inside Docker. +9. Runtime Events stream into the UI while the turn runs. +10. PRD/spec generation returns a persisted Document Preview under `.specforge/previews/`. +11. Code-changing execution returns a Sandbox Result for approval before the host workspace is changed. + +## 6. Functional Requirements + +* Product runtime code must live in the app source tree, not under `.sandcastle/`. +* `.sandcastle/main.ts` remains personal development automation and is not wired into product runtime. +* React communicates with the desktop runtime through `src/lib/runtime.ts`; React does not execute shell commands or write workspace files directly. +* Tauri/Rust owns process control, filesystem access, credential access, Docker/Codex readiness checks, and stop handling. +* Real PRD/spec agent turns run inside Docker through Sandcastle. +* If Docker is unavailable, the Sandcastle Runtime is unavailable. +* On Windows, Docker readiness and Sandcastle launches may use Docker from WSL when host Docker is unavailable and a WSL distro can reach the daemon. +* SpecForge must not silently fall back to direct host execution for real AI agent work. +* API keys are stored through the OS credential store and never written to `.specforge/settings.json`. +* Subscription auth uses local Codex authentication material such as `~/.codex` when preparing the Runtime Sandbox. +* Codex model discovery uses `codex debug models`, with `codex debug models --bundled` as fallback. +* Generated PRD/spec previews can be saved to the configured document path, edited before saving, discarded, or replaced by another generation run. +* Settings expose Agent Provider, Provider Auth Mode, Codex API key entry, local Codex auth detection, Docker readiness, model discovery status, selected model, and reasoning effort. + +## 7. Acceptance Criteria + +* Configuration shows Sandcastle/Codex/Docker readiness before the app is considered fully usable. +* Settings shows the same readiness status and lets the user refresh diagnostics. +* User can choose Codex auth mode as subscription or API key. +* API-key auth stores the secret only in the OS credential store. +* Subscription auth detects local Codex auth without writing credentials to project settings. +* Codex models are discovered from the installed Codex CLI. +* PRD generation uses Sandcastle and returns a persisted Document Preview. +* Spec generation uses Sandcastle and returns a persisted Document Preview. +* Code-changing execution returns a Sandcastle Sandbox Result and preserves approval gates and emergency stop behavior. +* Cursor is no longer presented as required for SpecForge to be useful. diff --git a/docs/SPEC.md b/docs/SPEC.md index 02a4436..d34b5b5 100644 --- a/docs/SPEC.md +++ b/docs/SPEC.md @@ -1,179 +1,70 @@ -# Technical Specification: SpecForge +# Sandcastle Runtime Technical Specification -## 1. Architecture +## Architecture -SpecForge is a split desktop application: +SpecForge is a Tauri desktop app with a React webview. The webview never performs shell execution, filesystem writes, credential access, Docker checks, or Codex process control directly. All desktop work flows through `src/lib/runtime.ts` into Tauri commands exposed from `src-tauri/src/lib.rs`. -* **React webview:** Owns routing, topic/session management UI, PRD/spec editing, workspace browsing, settings, passive rendering of runtime output, and PRD/spec prompt orchestration. -* **Bun TypeScript runner:** Owns `@cursor/sdk` execution for PRD/spec generation because the SDK local runtime requires a Node-compatible process and cannot be bundled into the browser webview. -* **Tauri/Rust backend:** Owns filesystem access, workspace scanning, session persistence, git diffing, native dialogs, PDF parsing, OS credential storage for the Cursor API key, generated document saving, Bun runner delegation, and chat event streaming. +The product Sandcastle runtime lives in the app source tree. `src/sandcastle/Dockerfile` defines the Docker-backed Codex runtime image. The personal `.sandcastle/main.ts` batch runner remains separate development automation and is not used by product UI flows. -The webview never writes workspace files directly. All desktop data access continues to flow through `src/lib/runtime.ts` into Tauri commands exposed from `src-tauri/src/lib.rs`. PRD/spec model calls run through `src/cursorAgentRunner.ts` using Bun and `@cursor/sdk`; Rust does not implement provider-specific prompt logic. +## Runtime Ownership -## 2. Routes +* React owns view state, prompt composition, and rendering runtime events. +* `src/lib/runtime.ts` owns all Tauri `invoke` calls and event subscriptions. +* `src-tauri/src/environment.rs` reports Runtime Readiness for Codex auth, Codex CLI, Docker CLI/daemon, and Git. Docker readiness delegates to `src-tauri/src/docker.rs`, which prefers host Docker and falls back to WSL Docker integration on Windows when a WSL distro can reach the daemon. +* `src-tauri/src/secrets.rs` stores Codex API keys in the OS credential store and detects local Codex subscription auth from `CODEX_HOME`, `%USERPROFILE%\.codex`, or `$HOME/.codex`. +* `src-tauri/src/cursor_agent.rs` is legacy-named migration scaffolding; its PRD/spec command now builds and runs the Docker-backed Sandcastle Codex runtime. +* `src-tauri/src/project.rs` reads and writes `.specforge/settings.json`. -* `/` is the Projects / Workspace Initialization screen for local workspace selection, recent project reopening, and the disabled clone placeholder. -* `/review` is the primary post-setup document and file editing workspace. -* `/chat` is the secondary agent conversation workspace. -* `/settings` holds project-scoped and local runtime configuration. +## Project Settings -After setup completion or a manual project open from Projects, the app routes to `/review` by default. During automatic last-project restore, the app does not redirect away from `/`, so Projects remains available with the active project loaded. Chat remains available from the sidebar below Review. +`.specforge/settings.json` is project configuration. It stores: -## 3. State Model - -### 3.1. Frontend stores - -* **`useProjectStore`:** PRD/spec content, document paths, editable agent descriptions, selected project defaults, annotations, and open workspace file tabs. -* **`useChatStore`:** Chat session summaries, `activeSessionId`, loaded per-topic snapshots, per-topic drafts, and Caveman readiness state. -* **`useAgentStore`:** A lightweight runtime mirror used by review and shared execution UI. In chat-first flows it mirrors the active chat topic runtime rather than owning an independent executor. -* **`useSettingsStore`:** Theme, in-memory Cursor API key input, last opened project path, environment scan results, and workspace entries. - -### 3.2. Persistence - -Project settings live in: - -* `.specforge/settings.json` - -The Cursor API key is not part of project settings. It is stored by Rust through the OS credential store and exposed to the frontend only when Cursor SDK generation needs it. - -Chat data lives in: - -* `.specforge/sessions/index.json` -* `.specforge/sessions/.json` - -`index.json` stores topic summaries plus `lastActiveSessionId`. Each session snapshot stores: - -* `id` -* `title` -* `createdAt` -* `updatedAt` +* `agentProvider`, currently normalized to `codex` +* `providerAuthMode`, either `subscription` or `api_key` * `selectedModel` * `selectedReasoning` -* `autonomyMode` -* `status` -* `contextItems` -* `messages` -* `runtime` -* `lastError` - -## 4. Chat Session Behavior - -### 4.1. Default context +* PRD, spec, and execution agent descriptions +* PRD/spec output paths +* supporting document paths -When a new topic is created, the backend seeds the session with: +Provider secrets are not written to project settings. `.specforge/.gitignore` ignores local preview/session state while allowing settings to remain trackable. -* the configured PRD document -* the configured SPEC document -* any configured supporting documents -* a workspace tree summary +## Runtime Readiness -Additional workspace files are attached explicitly per topic from the chat UI. Session context does not bleed across topics. +Configuration and Settings both display: -### 4.2. Runtime orchestration +* Codex Provider authentication status +* Codex CLI status +* Docker CLI and daemon status, including Windows WSL Docker fallback status +* Git status -Chat turns are still executed in Rust as the legacy headless CLI path: +Model discovery runs on the host through `codex debug models`. If live discovery returns no models, the backend tries `codex debug models --bundled`. -* **Codex provider:** mapped to suggest, auto-edit, or full-auto style permissions depending on the selected autonomy mode -* **Claude provider:** mapped to default, accept-edits, or bypass-permissions style permissions +## PRD And Spec Generation -Rust keeps a session-keyed runtime map so the following remain isolated by `sessionId`: +PRD and spec generation keep separate workflow-specific agent descriptions. The frontend composes the prompt, then calls the desktop runtime command. Rust resolves a Docker runtime through `src-tauri/src/docker.rs`, builds the `specforge-sandcastle-runtime:latest` Docker image from `src/sandcastle/Dockerfile`, mounts the selected workspace read-only at `/home/agent/workspace`, mounts a temporary output directory, passes Codex auth through either `OPENAI_API_KEY` or a read-only `.codex` mount, and runs `codex exec` inside the container. When the Windows WSL Docker fallback is selected, Windows host paths are converted to `/mnt//...` before being passed to Docker. The fallback tries the default WSL distro first, then other user distros while skipping Docker Desktop's internal `docker-desktop` distros. -* current status -* terminal output -* pending approval state -* pending diff -* stop requests +Generated markdown is written first to `.specforge/previews/prd.md` or `.specforge/previews/spec.md`. Project context loads previews separately from canonical PRD/spec documents and the review pane shows a preview action group. Save writes the current preview content to the configured canonical document path, then deletes the preview. Edit switches the preview into the existing edit mode before saving. Discard deletes the preview and restores the canonical document from the workspace. Running generation again replaces the persisted preview. -### 4.3. Approval semantics +## Chat And Execution -* **`stepped`:** first run in proposal/read-only mode, then require explicit approval before the write-capable pass -* **`milestone`:** run one assistant turn, capture the real git diff, and pause before the next turn -* **`god_mode`:** allow a full-permission turn without an approval pause +Codex chat turns run through the same Docker-backed Sandcastle image while preserving the existing stepped and milestone approval gates. The current Sandcastle chat mount is read-only, so write-capable turns return assistant output and blockers without directly mutating the host workspace. -Review mode does not expose these controls directly; it only mirrors the active topic state. +The review execution command launches a read-only Sandcastle Codex turn from the approved spec and returns a Sandbox Result into the execution diff artifact. Stepped mode gates before the runtime turn, milestone mode gates after the result is available, and stop requests force-remove the active Docker container before marking the UI halted. Applying sandbox patches back into the host workspace remains a follow-up integration step. -## 5. PRD/Spec Generation +## Tauri Commands -PRD/spec generation now runs in the TypeScript layer with `@cursor/sdk`, isolated in a Bun runner so Vite does not bundle Node-only SDK dependencies into the webview. - -* `src/lib/cursorAgentRuntime.ts` composes Cursor prompts and invokes the desktop Cursor runner command. -* `src/cursorAgentRunner.ts` creates the local Cursor SDK agent, streams run events, waits for completion, and extracts final Markdown. -* `src/hooks/useDocumentHandlers.ts` keeps the existing PRD/spec button and prompt flow, but fetches the Cursor API key from Rust before invoking the Cursor generation path. -* Rust does not call Codex or Claude ACP for PRD/spec generation. -* Rust launches the Bun runner, validates the workspace root, and parses the runner's structured JSON-line output. -* Rust validates the workspace root and Markdown output path through `save_workspace_document`, strips wrapping Markdown code fences, creates parent directories, and writes the generated document. -* The PRD agent receives the editable PRD agent description plus the user's PRD prompt. -* The spec agent receives the editable spec agent description, the user's spec prompt, and the selected PRD content. -* `buildCursorPrdGrillPrompt` and `buildCursorSpecGrillPrompt` prepend SpecForge's built-in grill-me workflow to the editable PRD/spec agent descriptions. These prompts ask exactly one next question, include a recommended answer, infer answers already present in supplied context, and explicitly avoid drafting the final document. -* `useDocumentHandlers` exposes Grill PRD and Grill Spec handlers. The handlers reuse the Cursor SDK runner, stream events into the review terminal, and append the returned grill question block into the relevant generation textarea for the user to answer or edit before generating the document. -* The execution agent description is persisted in project settings for the upcoming execution migration. - -## 6. Tauri Command Surface - -The desktop runtime currently exposes: +Current command names remain stable for frontend compatibility: * `run_environment_scan` -* `run_cursor_agent_prompt` -* `get_cursor_api_key` -* `save_cursor_api_key` -* `delete_cursor_api_key` -* `pick_document` -* `pick_project_folder` -* `load_project_context` +* `list_cursor_models` (legacy name; returns Codex CLI models) +* `run_cursor_agent_prompt` (legacy name; runs Sandcastle Codex for PRD/spec turns) +* `save_cursor_api_key` / `delete_cursor_api_key` (legacy names; store Codex API keys) * `save_project_settings` -* `read_workspace_file` -* `get_workspace_snapshot` -* `git_get_diff` * `save_workspace_document` -* `create_chat_session` -* `load_chat_session` -* `save_chat_session` -* `rename_chat_session` -* `delete_chat_session` -* `send_chat_message` -* `approve_chat_session` -* `stop_chat_session` - -Chat runtime updates are streamed through a typed `chat-session-event` payload carrying the session id plus the current session snapshot or summary update. - -## 7. Caveman Integration - -SpecForge now treats Caveman as a built-in chat response mode instead of a runtime-installed dependency. - -Each outgoing chat turn prepends a compact Caveman-style instruction before the normal SpecForge system prompt, so the behavior stays active without making the user spend tokens enabling it manually. - -There is no chat-entry verification or installation path tied to navigation, and Caveman state must never block topic changes, route changes, or session configuration edits. - -## 8. Review Workspace - -The main sidebar follows the full-height review-screen pattern from the Stitch design: fixed-width desktop navigation, Projects, Review, Chat, and Settings ordering, Dracula Enterprise colors, and Review above Chat. The review screen also has a top app bar with File/Edit/Selection/Terminal/Help menu labels plus compact model, reasoning, and approval-mode controls. The review screen still provides: - -* PRD/spec editing -* workspace file browsing -* PRD/spec generation -* PRD/spec grill-me refinement before generation - -Its execute panel is now a read-only mirror of the active chat topic: - -* terminal output mirrors the active topic runtime -* diff output mirrors the active topic pending diff -* approval and stop controls are hidden - -This prevents review from launching a second execution engine that could diverge from chat state. - -## 9. Setup Clone Placeholder - -The setup screen includes a presentational Git clone card beside the local folder picker. The repository URL input and Clone button are disabled and must not call Git, Tauri commands, filesystem writes, or network operations until a dedicated clone implementation is added. - -The Projects screen also persists up to eight recently opened project folders in the `recentProjects` field of the `specforge.settings` `localStorage` record. Opening a recent project calls the existing `load_project_context` Tauri command for that saved path; React still does not perform filesystem access directly. - -When a picked or recent project has no `.specforge/settings.json`, the React handler immediately calls `save_project_settings` with the default settings returned by `load_project_context`, reloads the project context, and shows a HeroUI modal asking the user to review project defaults in Settings. - -The Projects screen does not render workspace configuration controls. Cursor API key management, model/reasoning defaults, agent descriptions, document paths, and supporting document configuration are owned by `/settings`. - -## 10. Known Limits +* `spawn_cli_agent` +* `approve_action` +* `kill_agent_process` +* chat session commands -* Opened workspace file tabs remain in-memory only; there is still no save-to-disk flow. -* The desktop runtime is required for real project persistence, chat sessions, and CLI-backed turns. -* Chat execution still uses the legacy Codex CLI and Claude Code path; the current Cursor SDK refactor is limited to PRD/spec generation. -* **Planned dependencies not yet installed:** `react-markdown`, `react-syntax-highlighter`, and `tauri-plugin-store` are referenced in design documents but are not currently in `package.json` or `Cargo.toml`. Features that depend on them (rich markdown rendering, syntax-highlighted code blocks, native key-value persistence) are aspirational and should not be assumed functional until the dependencies are added. +Legacy names should be renamed in a follow-up compatibility migration. diff --git a/skills-lock.json b/skills-lock.json index ad2a06d..d9f9d92 100644 --- a/skills-lock.json +++ b/skills-lock.json @@ -25,6 +25,36 @@ "source": "JuliusBrussee/caveman", "sourceType": "github", "computedHash": "1dc59e7e896bc9dd449e43116c4c8b2e656b88c3370df91848330c17347af3d0" + }, + "grill-with-docs": { + "source": "mattpocock/skills", + "sourceType": "github", + "skillPath": "skills/engineering/grill-with-docs/SKILL.md", + "computedHash": "31a5b1ae116558bf7d3f633f442835f54bd7645923d4f45c7823e52a97317666" + }, + "improve-codebase-architecture": { + "source": "mattpocock/skills", + "sourceType": "github", + "skillPath": "skills/engineering/improve-codebase-architecture/SKILL.md", + "computedHash": "c77b86b4332919499608f9af1880074e1fec65a59b95c70c27a9f39cd137865e" + }, + "tdd": { + "source": "mattpocock/skills", + "sourceType": "github", + "skillPath": "skills/engineering/tdd/SKILL.md", + "computedHash": "15a7b5e36383ebadb2dec5e586679e55e9663d292da418926b8da6fc0ef27d84" + }, + "to-issues": { + "source": "mattpocock/skills", + "sourceType": "github", + "skillPath": "skills/engineering/to-issues/SKILL.md", + "computedHash": "73a91f30784523aa59ec9b02769576ebfc738e2cd5ad8f6441076031f0a5d5ac" + }, + "to-prd": { + "source": "mattpocock/skills", + "sourceType": "github", + "skillPath": "skills/engineering/to-prd/SKILL.md", + "computedHash": "fd8c259f9c44eff08e29a1a2fc71a806a3568d279a55387a361f78620b10f2aa" } } } diff --git a/src-tauri/src/agent.rs b/src-tauri/src/agent.rs index d5ac04f..97c1eb6 100644 --- a/src-tauri/src/agent.rs +++ b/src-tauri/src/agent.rs @@ -1,9 +1,22 @@ -use crate::constants::SAMPLE_DIFF; -use crate::models::{AgentStateEvent, ApprovalWaitOutcome, SimulatedStep, StopState}; -use crate::state::{ExecutionRuntime, SharedState}; -use std::sync::Arc; -use std::thread; -use std::time::Duration; +use crate::models::{AgentStateEvent, ApprovalWaitOutcome}; +use crate::state::{ExecutionRuntime, SharedState, WorkspaceContext}; +use crate::{ + constants::SAMPLE_DIFF, + docker::{DockerRuntime, docker_mount_arg, sandcastle_build_args}, + generation::{ + create_spec_generation_temp_dir, format_process_failure, map_codex_reasoning, + run_command_with_stdin_and_stream, + }, + git::git_get_diff_for_root, + secrets::read_cursor_api_key, +}; +use std::{ + fs, + path::{Path, PathBuf}, + process::{Command, Stdio}, + sync::Arc, + thread, +}; use tauri::{AppHandle, Emitter, State}; #[tauri::command] @@ -24,11 +37,27 @@ pub(crate) fn spawn_cli_agent( control.run_id = control.run_id.wrapping_add(1); control.awaiting_approval = false; control.stop_requested = false; + control.active_container = None; control.run_id }; + let workspace = state + .workspace + .lock() + .map_err(|_| String::from("Workspace lock was poisoned."))? + .clone(); + thread::spawn(move || { - run_simulated_agent(app, runtime, run_id, spec_payload, mode, model, reasoning); + run_sandcastle_agent( + app, + runtime, + workspace, + run_id, + spec_payload, + mode, + model, + reasoning, + ); }); Ok(()) @@ -55,211 +84,404 @@ pub(crate) fn kill_agent_process(state: State) -> Result<(), String .map_err(|_| String::from("Execution lock was poisoned."))?; control.stop_requested = true; control.awaiting_approval = false; + let active_container = control.active_container.clone(); state.runtime.signal.notify_all(); + drop(control); + + if let Some(container_name) = active_container { + let _ = force_remove_container(&container_name); + } + Ok(()) } -pub(crate) fn run_simulated_agent( +pub(crate) fn run_sandcastle_agent( app: AppHandle, runtime: Arc, + workspace: Option, run_id: u64, spec_payload: String, mode: String, model: String, reasoning: String, ) { - let heading_count = spec_payload - .lines() - .filter(|line| line.trim_start().starts_with('#')) - .count(); - let steps = build_simulated_steps(heading_count, &mode, &model, &reasoning); - emit_state(&app, "executing", Some("Pre-flight Check"), None, None); - - for step in steps { - match stop_state(&runtime, run_id) { - StopState::Continue => {} - StopState::StopRequested => { - emit_line( - &app, - "Execution interrupted before the next step could run.", - ); + let Some(workspace) = workspace else { + emit_line( + &app, + "Choose a project workspace before starting Sandcastle execution.", + ); + emit_state( + &app, + "error", + Some("Workspace Required"), + None, + Some("Choose a project workspace before starting execution."), + ); + return; + }; + + emit_state(&app, "executing", Some("Sandcastle Pre-flight"), None, None); + emit_line( + &app, + "Preparing the Sandcastle Runtime sandbox for execution.", + ); + + if mode == "stepped" { + match wait_for_approval( + &app, + &runtime, + run_id, + "Stepped Approval", + "Approve this Sandcastle execution turn before the sandbox runs.", + ) { + Ok(ApprovalWaitOutcome::Approved) => {} + Ok(ApprovalWaitOutcome::StopRequested) => { + emit_line(&app, "Execution interrupted during approval gate."); emit_state( &app, "halted", - Some(step.milestone), + Some("Stepped Approval"), None, Some("Execution interrupted by the operator."), ); return; } - StopState::Replaced => return, - } - - thread::sleep(Duration::from_millis(step.delay_ms)); - match stop_state(&runtime, run_id) { - StopState::Continue => {} - StopState::StopRequested => { - emit_line( + Ok(ApprovalWaitOutcome::Replaced) => return, + Err(message) => { + emit_line(&app, &message); + emit_state( &app, - "Execution interrupted before the next step could run.", + "error", + Some("Stepped Approval"), + None, + Some(&message), ); + return; + } + } + } + + let container_name = format!("specforge-exec-{run_id}"); + + match run_sandcastle_execution_turn( + &app, + &runtime, + run_id, + &workspace.root, + &spec_payload, + &model, + &reasoning, + &container_name, + ) { + Ok(result) => { + if stop_was_requested(&runtime, run_id) { + emit_line(&app, "Execution stopped while Sandcastle was running."); emit_state( &app, "halted", - Some(step.milestone), + Some("Sandcastle Runtime"), None, - Some("Execution interrupted by the operator."), + Some("Execution stopped by the operator."), ); return; } - StopState::Replaced => return, - } - emit_state(&app, "executing", Some(step.milestone), None, None); - emit_line(&app, &step.line); - - if step.gate { - let summary = if mode == "stepped" { - "Stepped approval required before the next write action." - } else { - "Milestone boundary reached. Review the diff before execution resumes." - }; - - match wait_for_approval(&app, &runtime, run_id, step.milestone, summary) { - Ok(ApprovalWaitOutcome::Approved) => {} - Ok(ApprovalWaitOutcome::StopRequested) => { - emit_line(&app, "Execution interrupted during approval gate."); - emit_state( - &app, - "halted", - Some(step.milestone), - None, - Some("Execution interrupted by the operator."), - ); - return; - } - Ok(ApprovalWaitOutcome::Replaced) => return, - Err(message) => { - emit_line(&app, &message); - emit_state( - &app, - "error", - Some(step.milestone), - None, - Some("Approval synchronization failed."), - ); - return; + + let host_diff = + git_get_diff_for_root(&workspace.root).unwrap_or_else(|_| SAMPLE_DIFF.to_string()); + let sandbox_result = format!( + "{}\n\n--- Sandcastle Result ---\n{}", + if host_diff.trim().is_empty() { + SAMPLE_DIFF + } else { + host_diff.trim() + }, + result.trim() + ); + emit_line( + &app, + "Sandcastle execution turn completed. Review the sandbox result.", + ); + + if mode == "milestone" { + match wait_for_approval( + &app, + &runtime, + run_id, + "Milestone Approval", + "Milestone boundary reached. Review the Sandcastle result before continuing.", + ) { + Ok(ApprovalWaitOutcome::Approved) => {} + Ok(ApprovalWaitOutcome::StopRequested) => { + emit_line(&app, "Execution interrupted during milestone approval."); + emit_state( + &app, + "halted", + Some("Milestone Approval"), + None, + Some("Execution interrupted by the operator."), + ); + return; + } + Ok(ApprovalWaitOutcome::Replaced) => return, + Err(message) => { + emit_line(&app, &message); + emit_state( + &app, + "error", + Some("Milestone Approval"), + None, + Some(&message), + ); + return; + } } } - emit_line(&app, "Approval received. Resuming the agent loop."); + emit_state( + &app, + "completed", + Some("Execution Complete"), + Some(&sandbox_result), + Some("Sandcastle execution completed. The sandbox result is ready for review."), + ); } - } + Err(error) => { + if stop_was_requested(&runtime, run_id) { + emit_line(&app, "Execution stopped while Sandcastle was running."); + emit_state( + &app, + "halted", + Some("Sandcastle Runtime"), + None, + Some("Execution stopped by the operator."), + ); + return; + } - if !matches!(stop_state(&runtime, run_id), StopState::Continue) { - return; + emit_line(&app, &error); + emit_state( + &app, + "error", + Some("Sandcastle Runtime"), + None, + Some(&error), + ); + } } - - emit_line( - &app, - "Execution complete. Final diff is ready for inspection.", - ); - emit_state( - &app, - "completed", - Some("Execution Complete"), - Some(SAMPLE_DIFF), - Some("Simulated agent execution completed successfully."), - ); } -pub(crate) fn build_simulated_steps( - heading_count: usize, - mode: &str, +fn run_sandcastle_execution_turn( + app: &AppHandle, + runtime: &Arc, + run_id: u64, + workspace_root: &Path, + spec_payload: &str, model: &str, reasoning: &str, -) -> Vec { - let mut steps = vec![ - SimulatedStep { - delay_ms: 450, - line: format!( - "Loaded approved specification with {heading_count} markdown headings into {model} using the {reasoning} reasoning profile." - ), - milestone: "Pre-flight Check", - gate: false, - }, - SimulatedStep { - delay_ms: 650, - line: String::from( - "Scanning CLI availability and staging the current repository diff.", - ), - milestone: "Pre-flight Check", - gate: false, - }, - SimulatedStep { - delay_ms: 750, - line: String::from( - "Mapping milestones for review UI, Zustand stores, and Tauri commands.", - ), - milestone: "Milestone Planning", - gate: false, - }, - ]; + container_name: &str, +) -> Result { + let temp_dir = create_spec_generation_temp_dir("sandcastle-execution")?; + let output_path = temp_dir.join("assistant-message.md"); + let diff_path = temp_dir.join("sandbox.diff"); + let docker = DockerRuntime::detect()?; + let image = ensure_sandcastle_image(&docker)?; + let prompt = format!( + "Execute from this approved technical specification inside the sandbox. Do not mutate the host workspace directly. Return a concise Sandbox Result with the intended patch, commands, or blockers.\n\n{}", + spec_payload + ); + let mut command = docker.command(); + command + .arg("run") + .arg("--rm") + .arg("-i") + .arg("--name") + .arg(container_name) + .arg("-v") + .arg(docker_mount_arg( + &docker, + workspace_root, + "/home/agent/input", + "ro", + )) + .arg("-v") + .arg(docker_mount_arg( + &docker, + &temp_dir, + "/home/agent/output", + "rw", + )) + .arg("-e") + .arg(format!("SPECFORGE_CODEX_MODEL={model}")) + .arg("-e") + .arg(format!( + "SPECFORGE_CODEX_REASONING={}", + map_codex_reasoning(reasoning) + )) + .arg("-e") + .arg("SPECFORGE_CODEX_SANDBOX=workspace-write") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); - if mode == "stepped" { - steps.push(SimulatedStep { - delay_ms: 650, - line: String::from( - "A write action is ready to execute against the approved specification.", - ), - milestone: "Stepped Approval", - gate: true, + configure_codex_auth(&mut command, &docker)?; + + command + .arg(image) + .arg("sh") + .arg("-lc") + .arg(sandcastle_codex_script()); + + set_active_container(runtime, run_id, Some(container_name.to_string()))?; + let output = + run_command_with_stdin_and_stream(&mut command, "Sandcastle Runtime", &prompt, |line| { + emit_line(app, line); }); + let clear_result = set_active_container(runtime, run_id, None); + clear_result?; + let output = output?; + + if !output.status.success() { + let _ = fs::remove_dir_all(&temp_dir); + return Err(format_process_failure("Sandcastle Runtime", &output)); } - steps.extend([ - SimulatedStep { - delay_ms: 700, - line: String::from( - "Applying Dracula theme tokens and composing the review workspace shell.", - ), - milestone: "Compose Review Workspace", - gate: false, - }, - SimulatedStep { - delay_ms: 650, - line: String::from( - "Wiring project, settings, and agent stores into the execution dashboard.", - ), - milestone: "Compose Review Workspace", - gate: false, - }, - ]); - - if mode == "milestone" { - steps.push(SimulatedStep { - delay_ms: 650, - line: String::from("The first milestone is complete and ready for diff review."), - milestone: "Milestone Approval", - gate: true, - }); + let result = fs::read_to_string(&output_path).or_else(|_| { + let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string(); + + if stdout.is_empty() { + Err(std::io::Error::other( + "The Sandcastle Runtime returned no sandbox result.", + )) + } else { + Ok(stdout) + } + }); + let mut result = match result { + Ok(result) => result, + Err(error) => { + let _ = fs::remove_dir_all(&temp_dir); + return Err(format!("Unable to read the Sandcastle result: {error}")); + } + }; + if let Ok(diff) = fs::read_to_string(&diff_path) + && !diff.trim().is_empty() + { + result.push_str("\n\n--- Sandbox Diff ---\n"); + result.push_str(diff.trim()); + } + let _ = fs::remove_dir_all(&temp_dir); + + Ok(result) +} + +fn sandcastle_codex_script() -> &'static str { + r#"set -eu +mkdir -p /home/agent/workspace +cp -a /home/agent/input/. /home/agent/workspace/ +cd /home/agent/workspace +codex exec --color never --skip-git-repo-check --sandbox "$SPECFORGE_CODEX_SANDBOX" --model "$SPECFORGE_CODEX_MODEL" --config "model_reasoning_effort=\"$SPECFORGE_CODEX_REASONING\"" --output-last-message /home/agent/output/assistant-message.md +git diff --no-ext-diff > /home/agent/output/sandbox.diff || true +"# +} + +fn set_active_container( + runtime: &Arc, + run_id: u64, + active_container: Option, +) -> Result<(), String> { + let mut control = runtime + .control + .lock() + .map_err(|_| String::from("Execution lock was poisoned."))?; + + if control.run_id == run_id { + control.active_container = active_container; } - steps.extend([ - SimulatedStep { - delay_ms: 650, - line: String::from("Streaming terminal telemetry and enabling approval controls."), - milestone: "Execution Dashboard", - gate: false, - }, - SimulatedStep { - delay_ms: 550, - line: String::from("Packaging a final summary for IDE handoff."), - milestone: "Execution Dashboard", - gate: false, - }, - ]); - - steps + Ok(()) +} + +fn stop_was_requested(runtime: &Arc, run_id: u64) -> bool { + runtime + .control + .lock() + .map(|control| control.run_id == run_id && control.stop_requested) + .unwrap_or(true) +} + +fn force_remove_container(container_name: &str) -> Result<(), String> { + let mut command = DockerRuntime::detect()?.command(); + let output = command + .arg("rm") + .arg("-f") + .arg(container_name) + .output() + .map_err(|error| { + format!("Unable to stop Sandcastle container {container_name}: {error}") + })?; + + if output.status.success() { + Ok(()) + } else { + Err(format_process_failure("Docker stop", &output)) + } +} + +fn ensure_sandcastle_image(docker: &DockerRuntime) -> Result { + let app_root = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .map(Path::to_path_buf) + .ok_or_else(|| String::from("Unable to resolve the SpecForge application root."))?; + let dockerfile = app_root.join("src").join("sandcastle").join("Dockerfile"); + + if !dockerfile.exists() { + return Err(format!( + "Sandcastle Dockerfile was not found at {}.", + dockerfile.display() + )); + } + + let image = "specforge-sandcastle-runtime:latest"; + let mut command = docker.command(); + let output = command + .args(sandcastle_build_args(docker, image, &dockerfile, &app_root)) + .env("NO_COLOR", "1") + .output() + .map_err(|error| format!("Unable to build the Sandcastle runtime image: {error}"))?; + + if !output.status.success() { + return Err(format_process_failure("Sandcastle image build", &output)); + } + + Ok(String::from(image)) +} + +fn configure_codex_auth(command: &mut Command, docker: &DockerRuntime) -> Result<(), String> { + if let Some(api_key) = read_cursor_api_key()?.filter(|value| !value.trim().is_empty()) { + command.arg("-e").arg(format!("OPENAI_API_KEY={api_key}")); + return Ok(()); + } + + let codex_home = local_codex_auth_dir() + .filter(|path| path.exists()) + .ok_or_else(|| { + String::from("Codex authentication is required before running Sandcastle.") + })?; + command.arg("-v").arg(docker_mount_arg( + docker, + &codex_home, + "/home/agent/.codex", + "ro", + )); + + Ok(()) +} + +fn local_codex_auth_dir() -> Option { + std::env::var_os("CODEX_HOME") + .map(PathBuf::from) + .or_else(|| std::env::var_os("USERPROFILE").map(|home| PathBuf::from(home).join(".codex"))) + .or_else(|| std::env::var_os("HOME").map(|home| PathBuf::from(home).join(".codex"))) } pub(crate) fn wait_for_approval( @@ -302,22 +524,6 @@ pub(crate) fn wait_for_approval( Ok(ApprovalWaitOutcome::Approved) } -pub(crate) fn stop_state(runtime: &Arc, run_id: u64) -> StopState { - runtime - .control - .lock() - .map(|control| { - if control.stop_requested { - StopState::StopRequested - } else if control.run_id != run_id { - StopState::Replaced - } else { - StopState::Continue - } - }) - .unwrap_or(StopState::StopRequested) -} - pub(crate) fn emit_line(app: &AppHandle, line: &str) { let _ = app.emit("cli-output", line.to_string()); } diff --git a/src-tauri/src/chat/commands.rs b/src-tauri/src/chat/commands.rs index 8ae8184..86c1824 100644 --- a/src-tauri/src/chat/commands.rs +++ b/src-tauri/src/chat/commands.rs @@ -1,4 +1,5 @@ use crate::{ + docker::DockerRuntime, environment::current_timestamp, models::{ AutonomyMode, ChatContextItem, ChatRuntimeState, ChatSessionIndexPayload, @@ -181,7 +182,21 @@ pub(crate) fn stop_chat_session( let control = controls.entry(session_id).or_default(); control.stop_requested = true; control.awaiting_approval = false; + let active_container = control.active_container.clone(); state.chat_runtime.signal.notify_all(); + drop(controls); + + if let Some(container_name) = active_container + && let Ok(docker) = DockerRuntime::detect() + { + let _ = docker + .command() + .arg("rm") + .arg("-f") + .arg(container_name) + .output(); + } + Ok(()) } @@ -219,6 +234,7 @@ pub(crate) fn send_chat_message( control.run_id = control.run_id.wrapping_add(1); control.stop_requested = false; control.awaiting_approval = false; + control.active_container = None; control.run_id }; diff --git a/src-tauri/src/chat/execution.rs b/src-tauri/src/chat/execution.rs index 7c51f4b..0ca7965 100644 --- a/src-tauri/src/chat/execution.rs +++ b/src-tauri/src/chat/execution.rs @@ -1,19 +1,21 @@ use crate::{ + docker::{DockerRuntime, docker_mount_arg, sandcastle_build_args}, environment::{current_timestamp, resolve_cli_binary}, generation::{ create_spec_generation_temp_dir, format_process_failure, map_claude_reasoning, - map_codex_reasoning, run_command_with_stdin, + map_codex_reasoning, run_command_with_stdin, run_command_with_stdin_and_stream, }, git::git_get_diff_for_root, models::{ AutonomyMode, ChatEventPayload, ChatMessage, ChatRuntimeState, ChatSessionSnapshot, MessageRole, SessionStatus, }, + secrets::read_cursor_api_key, state::{ChatExecutionRuntime, WorkspaceContext}, }; use std::{ fs, - path::Path, + path::{Path, PathBuf}, process::{Command, Stdio}, sync::Arc, }; @@ -387,15 +389,54 @@ fn execute_chat_phase( let context_blocks = build_context_blocks(workspace, snapshot)?; let prompt_payload = build_chat_prompt(snapshot, &context_blocks, user_message, phase); - let assistant_content = run_chat_provider_request( + let selected_model = snapshot.selected_model.clone(); + let selected_reasoning = snapshot.selected_reasoning.clone(); + let assistant_content = match run_chat_provider_request( + app, + runtime, + session_id, + run_id, + snapshot, &workspace.root, - &snapshot.selected_model, - &snapshot.selected_reasoning, + &selected_model, + &selected_reasoning, phase, &prompt_payload, claude_path.as_deref(), codex_path.as_deref(), - )?; + ) { + Ok(content) => content, + Err(_) + if matches!( + stop_state(runtime, session_id, run_id), + ChatStopState::StopRequested + ) => + { + halt_session( + app, + &workspace.root, + session_id, + snapshot, + "Turn stopped while Sandcastle was running.", + )?; + return Ok(()); + } + Err(error) => return Err(error), + }; + + if matches!( + stop_state(runtime, session_id, run_id), + ChatStopState::StopRequested + ) { + halt_session( + app, + &workspace.root, + session_id, + snapshot, + "Turn stopped while Sandcastle was running.", + )?; + return Ok(()); + } let assistant_message = ChatMessage { id: create_chat_entity_id("msg"), @@ -435,6 +476,11 @@ fn execute_chat_phase( } fn run_chat_provider_request( + app: &AppHandle, + runtime: &Arc, + session_id: &str, + run_id: u64, + snapshot: &mut ChatSessionSnapshot, workspace_root: &Path, model: &str, reasoning: &str, @@ -454,8 +500,16 @@ fn run_chat_provider_request( ) } else { run_codex_chat_request( + app, + Some((runtime, session_id, run_id)), + snapshot, workspace_root, &resolve_cli_binary("codex", codex_path)?, + Some(&format!( + "specforge-chat-{}-{run_id}-{}", + sanitize_container_segment(session_id), + phase.label() + )), model, reasoning, phase, @@ -465,8 +519,12 @@ fn run_chat_provider_request( } fn run_codex_chat_request( + app: &AppHandle, + runtime: Option<(&Arc, &str, u64)>, + snapshot: &mut ChatSessionSnapshot, workspace_root: &Path, - binary_path: &Path, + _binary_path: &Path, + container_name: Option<&str>, model: &str, reasoning: &str, phase: ChatExecutionPhase, @@ -474,50 +532,112 @@ fn run_codex_chat_request( ) -> Result { let temp_dir = create_spec_generation_temp_dir("codex-chat")?; let output_path = temp_dir.join("assistant-message.md"); - let mut command = Command::new(binary_path); + let diff_path = temp_dir.join("sandbox.diff"); + let docker = DockerRuntime::detect()?; + let image = ensure_sandcastle_image(&docker)?; + let owned_container_name = container_name + .map(str::to_string) + .unwrap_or_else(|| format!("specforge-chat-{}", current_timestamp())); + let mut command = docker.command(); command - .current_dir(workspace_root) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .arg("exec") - .arg("--color") - .arg("never") - .arg("--skip-git-repo-check") - .arg("--sandbox") - .arg(phase.codex_sandbox()) - .arg("--model") - .arg(model) - .arg("--config") + .arg("run") + .arg("--rm") + .arg("-i") + .arg("--name") + .arg(&owned_container_name) + .arg("-v") + .arg(docker_mount_arg( + &docker, + workspace_root, + "/home/agent/input", + "ro", + )) + .arg("-v") + .arg(docker_mount_arg( + &docker, + &temp_dir, + "/home/agent/output", + "rw", + )) + .arg("-e") + .arg(format!("SPECFORGE_CODEX_MODEL={model}")) + .arg("-e") .arg(format!( - "model_reasoning_effort=\"{}\"", + "SPECFORGE_CODEX_REASONING={}", map_codex_reasoning(reasoning) )) - .arg("--output-last-message") - .arg(&output_path); + .arg("-e") + .arg(format!("SPECFORGE_CODEX_SANDBOX={}", phase.codex_sandbox())) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + configure_codex_auth(&mut command, &docker)?; - let output = run_command_with_stdin(&mut command, "Codex CLI", prompt_payload)?; + command + .arg(image) + .arg("sh") + .arg("-lc") + .arg(sandcastle_codex_script()); + + if let Some((runtime, session_id, run_id)) = runtime { + set_active_chat_container( + runtime, + session_id, + run_id, + Some(owned_container_name.clone()), + )?; + } + let output = run_command_with_stdin_and_stream( + &mut command, + "Sandcastle Runtime", + prompt_payload, + |line| { + if let Some((_, session_id, _)) = runtime { + append_terminal_line(app, session_id, snapshot, line); + } + }, + ); + if let Some((runtime, session_id, run_id)) = runtime { + set_active_chat_container(runtime, session_id, run_id, None)?; + } + let output = output?; if !output.status.success() { let _ = fs::remove_dir_all(&temp_dir); - return Err(format_process_failure("Codex CLI", &output)); + return Err(format_process_failure("Sandcastle Runtime", &output)); } let result = fs::read_to_string(&output_path).or_else(|_| { let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string(); if stdout.is_empty() { - Err(std::io::Error::new( - std::io::ErrorKind::Other, - "The Codex CLI returned no assistant content.", + Err(std::io::Error::other( + "The Sandcastle Runtime returned no assistant content.", )) } else { Ok(stdout) } }); + + let mut result = match result { + Ok(result) => result, + Err(error) => { + let _ = fs::remove_dir_all(&temp_dir); + return Err(format!( + "Unable to read the Sandcastle assistant output: {error}" + )); + } + }; + if let Ok(diff) = fs::read_to_string(&diff_path) + && !diff.trim().is_empty() + { + result.push_str("\n\n--- Sandbox Diff ---\n"); + result.push_str(diff.trim()); + } let _ = fs::remove_dir_all(&temp_dir); - result.map_err(|error| format!("Unable to read the Codex assistant output: {error}")) + Ok(result) } fn run_claude_chat_request( @@ -556,6 +676,105 @@ fn run_claude_chat_request( Ok(String::from_utf8_lossy(&output.stdout).trim().to_string()) } +fn sandcastle_codex_script() -> &'static str { + r#"set -eu +mkdir -p /home/agent/workspace +cp -a /home/agent/input/. /home/agent/workspace/ +cd /home/agent/workspace +codex exec --color never --skip-git-repo-check --sandbox "$SPECFORGE_CODEX_SANDBOX" --model "$SPECFORGE_CODEX_MODEL" --config "model_reasoning_effort=\"$SPECFORGE_CODEX_REASONING\"" --output-last-message /home/agent/output/assistant-message.md +git diff --no-ext-diff > /home/agent/output/sandbox.diff || true +"# +} + +fn set_active_chat_container( + runtime: &Arc, + session_id: &str, + run_id: u64, + active_container: Option, +) -> Result<(), String> { + let mut controls = runtime + .control + .lock() + .map_err(|_| String::from("Chat execution lock was poisoned."))?; + let control = controls.entry(session_id.to_string()).or_default(); + + if control.run_id == run_id { + control.active_container = active_container; + } + + Ok(()) +} + +fn sanitize_container_segment(value: &str) -> String { + value + .chars() + .map(|character| { + if character.is_ascii_alphanumeric() || character == '-' || character == '_' { + character + } else { + '-' + } + }) + .collect() +} + +fn ensure_sandcastle_image(docker: &DockerRuntime) -> Result { + let app_root = Path::new(env!("CARGO_MANIFEST_DIR")) + .parent() + .map(Path::to_path_buf) + .ok_or_else(|| String::from("Unable to resolve the SpecForge application root."))?; + let dockerfile = app_root.join("src").join("sandcastle").join("Dockerfile"); + + if !dockerfile.exists() { + return Err(format!( + "Sandcastle Dockerfile was not found at {}.", + dockerfile.display() + )); + } + + let image = "specforge-sandcastle-runtime:latest"; + let mut command = docker.command(); + let output = command + .args(sandcastle_build_args(docker, image, &dockerfile, &app_root)) + .env("NO_COLOR", "1") + .output() + .map_err(|error| format!("Unable to build the Sandcastle runtime image: {error}"))?; + + if !output.status.success() { + return Err(format_process_failure("Sandcastle image build", &output)); + } + + Ok(String::from(image)) +} + +fn configure_codex_auth(command: &mut Command, docker: &DockerRuntime) -> Result<(), String> { + if let Some(api_key) = read_cursor_api_key()?.filter(|value| !value.trim().is_empty()) { + command.arg("-e").arg(format!("OPENAI_API_KEY={api_key}")); + return Ok(()); + } + + let codex_home = local_codex_auth_dir() + .filter(|path| path.exists()) + .ok_or_else(|| { + String::from("Codex authentication is required before running Sandcastle.") + })?; + command.arg("-v").arg(docker_mount_arg( + docker, + &codex_home, + "/home/agent/.codex", + "ro", + )); + + Ok(()) +} + +fn local_codex_auth_dir() -> Option { + std::env::var_os("CODEX_HOME") + .map(PathBuf::from) + .or_else(|| std::env::var_os("USERPROFILE").map(|home| PathBuf::from(home).join(".codex"))) + .or_else(|| std::env::var_os("HOME").map(|home| PathBuf::from(home).join(".codex"))) +} + fn append_terminal_line( app: &AppHandle, session_id: &str, diff --git a/src-tauri/src/cursor_agent.rs b/src-tauri/src/cursor_agent.rs index 7126058..5d7a545 100644 --- a/src-tauri/src/cursor_agent.rs +++ b/src-tauri/src/cursor_agent.rs @@ -1,7 +1,15 @@ -use crate::{models::CursorModel, paths::canonicalize_existing_path, secrets::read_cursor_api_key}; +use crate::{ + docker::{DockerRuntime, docker_mount_arg, sandcastle_build_args}, + generation::{ + create_spec_generation_temp_dir, format_process_failure as format_command_failure, + map_codex_reasoning, run_command_with_stdin, + }, + models::CursorModel, + paths::canonicalize_existing_path, + secrets::read_cursor_api_key, +}; use serde::{Deserialize, Serialize}; use std::{ - io::Write, path::{Path, PathBuf}, process::{Command, Stdio}, }; @@ -15,16 +23,6 @@ pub(crate) struct CursorAgentPromptRequest { prompt: String, } -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct CursorAgentRunnerRequest { - api_key: String, - workspace_root: String, - model: String, - reasoning: String, - prompt: String, -} - #[derive(Serialize)] #[serde(rename_all = "camelCase")] pub(crate) struct CursorAgentPromptResponse { @@ -32,18 +30,7 @@ pub(crate) struct CursorAgentPromptResponse { events: Vec, } -#[derive(Deserialize)] -#[serde(tag = "type", rename_all = "lowercase")] -enum CursorAgentRunnerLine { - Event { text: String }, - Result { content: String }, -} - -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct CursorModelsRunnerRequest { - api_key: Option, -} +const CODEX_REASONING_PARAMETER_ID: &str = "reasoning"; #[tauri::command] pub(crate) async fn run_cursor_agent_prompt( @@ -51,133 +38,196 @@ pub(crate) async fn run_cursor_agent_prompt( ) -> Result { tauri::async_runtime::spawn_blocking(move || run_cursor_agent_prompt_sync(payload)) .await - .map_err(|error| format!("Unable to join Cursor SDK runner task: {error}"))? + .map_err(|error| format!("Unable to join Sandcastle runtime task: {error}"))? } fn run_cursor_agent_prompt_sync( payload: CursorAgentPromptRequest, ) -> Result { if payload.prompt.trim().is_empty() { - return Err(String::from("Cursor prompt is required.")); + return Err(String::from("Sandcastle prompt is required.")); } - let api_key = read_cursor_api_key()? - .filter(|value| !value.trim().is_empty()) - .ok_or_else(|| String::from("Cursor API key is required."))?; - let workspace_root = canonicalize_existing_path(&PathBuf::from(payload.workspace_root.trim())) .map_err(|error| format!("Unable to resolve workspace root: {error}"))?; let app_root = resolve_app_root()?; - let runner_path = app_root.join("src").join("cursorAgentRunner.ts"); + let docker = DockerRuntime::detect()?; + let image = ensure_sandcastle_image(&app_root, &docker)?; + let temp_dir = create_spec_generation_temp_dir("sandcastle-document")?; + let output_path = temp_dir.join("assistant-message.md"); + let mut command = docker.command(); + command + .arg("run") + .arg("--rm") + .arg("-i") + .arg("-v") + .arg(docker_mount_arg( + &docker, + &workspace_root, + "/home/agent/workspace", + "ro", + )) + .arg("-v") + .arg(docker_mount_arg( + &docker, + &temp_dir, + "/home/agent/output", + "rw", + )) + .arg("-w") + .arg("/home/agent/workspace") + .env("NO_COLOR", "1") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + configure_codex_auth(&mut command, &docker)?; + + command + .arg(image) + .arg("codex") + .arg("exec") + .arg("--color") + .arg("never") + .arg("--skip-git-repo-check") + .arg("--sandbox") + .arg("read-only") + .arg("--model") + .arg(payload.model) + .arg("--config") + .arg(format!( + "model_reasoning_effort=\"{}\"", + map_codex_reasoning(&payload.reasoning) + )) + .arg("--output-last-message") + .arg("/home/agent/output/assistant-message.md"); + + let output = run_command_with_stdin(&mut command, "Sandcastle Runtime", &payload.prompt)?; + + if !output.status.success() { + let _ = std::fs::remove_dir_all(&temp_dir); + return Err(format_command_failure("Sandcastle Runtime", &output)); + } + + let content = std::fs::read_to_string(&output_path).or_else(|_| { + let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string(); + + if stdout.is_empty() { + Err(std::io::Error::other( + "The Sandcastle Runtime returned no assistant content.", + )) + } else { + Ok(stdout) + } + }); + let _ = std::fs::remove_dir_all(&temp_dir); - if !runner_path.exists() { + Ok(CursorAgentPromptResponse { + content: content.map_err(|error| format!("Unable to read Sandcastle output: {error}"))?, + events: vec![String::from("Sandcastle Runtime completed the Codex turn.")], + }) +} + +fn ensure_sandcastle_image(app_root: &Path, docker: &DockerRuntime) -> Result { + let dockerfile = app_root.join("src").join("sandcastle").join("Dockerfile"); + + if !dockerfile.exists() { return Err(format!( - "Cursor SDK runner was not found at {}.", - runner_path.display() + "Sandcastle Dockerfile was not found at {}.", + dockerfile.display() )); } - let bun_path = - which::which("bun").map_err(|error| format!("Unable to find Bun on PATH: {error}"))?; - let request = CursorAgentRunnerRequest { - api_key, - workspace_root: workspace_root.display().to_string(), - model: payload.model, - reasoning: payload.reasoning, - prompt: payload.prompt, - }; - let request_json = serde_json::to_vec(&request) - .map_err(|error| format!("Unable to prepare Cursor SDK request: {error}"))?; - let mut child = Command::new(bun_path) - .arg(&runner_path) - .current_dir(&app_root) + let image = "specforge-sandcastle-runtime:latest"; + let mut command = docker.command(); + let output = command + .args(sandcastle_build_args(docker, image, &dockerfile, app_root)) .env("NO_COLOR", "1") - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .map_err(|error| format!("Unable to start the Bun Cursor SDK runner: {error}"))?; - - let mut stdin = child - .stdin - .take() - .ok_or_else(|| String::from("Unable to open the Cursor SDK runner input."))?; - stdin - .write_all(&request_json) - .map_err(|error| format!("Unable to send the Cursor SDK request: {error}"))?; - drop(stdin); - - let output = child - .wait_with_output() - .map_err(|error| format!("Unable to read Cursor SDK runner output: {error}"))?; - let stdout = String::from_utf8_lossy(&output.stdout); - let stderr = String::from_utf8_lossy(&output.stderr); + .output() + .map_err(|error| format!("Unable to build the Sandcastle runtime image: {error}"))?; if !output.status.success() { - return Err(format_process_failure(&stderr, &stdout)); + return Err(format_command_failure("Sandcastle image build", &output)); + } + + Ok(String::from(image)) +} + +fn configure_codex_auth(command: &mut Command, docker: &DockerRuntime) -> Result<(), String> { + if let Some(api_key) = read_cursor_api_key()?.filter(|value| !value.trim().is_empty()) { + command.arg("-e").arg(format!("OPENAI_API_KEY={api_key}")); + return Ok(()); } - parse_runner_output(&stdout) + let codex_home = local_codex_auth_dir() + .filter(|path| path.exists()) + .ok_or_else(|| { + String::from("Codex authentication is required before running Sandcastle.") + })?; + command.arg("-v").arg(docker_mount_arg( + docker, + &codex_home, + "/home/agent/.codex", + "ro", + )); + + Ok(()) +} + +fn local_codex_auth_dir() -> Option { + std::env::var_os("CODEX_HOME") + .map(PathBuf::from) + .or_else(|| std::env::var_os("USERPROFILE").map(|home| PathBuf::from(home).join(".codex"))) + .or_else(|| std::env::var_os("HOME").map(|home| PathBuf::from(home).join(".codex"))) } #[tauri::command] pub(crate) async fn list_cursor_models() -> Result, String> { tauri::async_runtime::spawn_blocking(list_cursor_models_sync) .await - .map_err(|error| format!("Unable to join Cursor model runner task: {error}"))? + .map_err(|error| format!("Unable to join Codex model discovery task: {error}"))? } fn list_cursor_models_sync() -> Result, String> { - let api_key = read_cursor_api_key()?; - let app_root = resolve_app_root()?; - let runner_path = app_root.join("src").join("cursorModelsRunner.ts"); + let codex_path = which::which("codex") + .map_err(|error| format!("Unable to find Codex CLI on PATH: {error}"))?; + let live_output = Command::new(&codex_path) + .args(["debug", "models"]) + .env("NO_COLOR", "1") + .output() + .map_err(|error| format!("Unable to start Codex model discovery: {error}"))?; - if !runner_path.exists() { - return Err(format!( - "Cursor SDK model runner was not found at {}.", - runner_path.display() - )); + if live_output.status.success() { + let stdout = String::from_utf8_lossy(&live_output.stdout); + let models = parse_codex_models_output(&stdout); + + if !models.is_empty() { + return Ok(models); + } } - let bun_path = - which::which("bun").map_err(|error| format!("Unable to find Bun on PATH: {error}"))?; - let request_json = serde_json::to_vec(&CursorModelsRunnerRequest { api_key }) - .map_err(|error| format!("Unable to prepare Cursor model request: {error}"))?; - let mut child = Command::new(bun_path) - .arg(&runner_path) - .current_dir(&app_root) + let bundled_output = Command::new(&codex_path) + .args(["debug", "models", "--bundled"]) .env("NO_COLOR", "1") - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .map_err(|error| format!("Unable to start the Bun Cursor model runner: {error}"))?; - - let mut stdin = child - .stdin - .take() - .ok_or_else(|| String::from("Unable to open the Cursor model runner input."))?; - stdin - .write_all(&request_json) - .map_err(|error| format!("Unable to send the Cursor model request: {error}"))?; - drop(stdin); - - let output = child - .wait_with_output() - .map_err(|error| format!("Unable to read Cursor model runner output: {error}"))?; - let stdout = String::from_utf8_lossy(&output.stdout); - let stderr = String::from_utf8_lossy(&output.stderr); + .output() + .map_err(|error| format!("Unable to start bundled Codex model discovery: {error}"))?; + let bundled_stdout = String::from_utf8_lossy(&bundled_output.stdout); - if !output.status.success() { - return Err(format_process_failure(&stderr, &stdout)); + if bundled_output.status.success() { + let models = parse_codex_models_output(&bundled_stdout); + + if !models.is_empty() { + return Ok(models); + } } - serde_json::from_str(stdout.trim()).map_err(|error| { - format!( - "Cursor SDK model runner returned malformed output: {error}. Output: {}", - stdout.trim() - ) - }) + let live_stderr = String::from_utf8_lossy(&live_output.stderr); + let bundled_stderr = String::from_utf8_lossy(&bundled_output.stderr); + Err(format!( + "Codex model discovery returned no models. live: {} bundled: {}", + live_stderr.trim(), + bundled_stderr.trim() + )) } fn resolve_app_root() -> Result { @@ -187,38 +237,246 @@ fn resolve_app_root() -> Result { .ok_or_else(|| String::from("Unable to resolve the SpecForge application root.")) } -fn parse_runner_output(stdout: &str) -> Result { - let mut events = Vec::new(); - let mut content = None; +fn parse_codex_models_output(stdout: &str) -> Vec { + let trimmed = stdout.trim(); - for line in stdout.lines().filter(|line| !line.trim().is_empty()) { - let parsed: CursorAgentRunnerLine = serde_json::from_str(line).map_err(|error| { - format!("Cursor SDK runner returned malformed output: {error}. Output line: {line}") - })?; + if trimmed.is_empty() { + return Vec::new(); + } - match parsed { - CursorAgentRunnerLine::Event { text } => events.push(text), - CursorAgentRunnerLine::Result { content: result } => content = Some(result), - } + if let Ok(value) = serde_json::from_str::(trimmed) { + return parse_codex_models_json(&value); + } + + parse_codex_models_lines(trimmed) +} + +fn parse_codex_models_json(value: &serde_json::Value) -> Vec { + let model_values = value + .as_array() + .cloned() + .or_else(|| { + value + .get("models") + .and_then(|models| models.as_array().cloned()) + }) + .or_else(|| { + value + .get("availableModels") + .and_then(|models| models.as_array().cloned()) + }) + .unwrap_or_default(); + + if model_values.is_empty() { + return parse_codex_models_object(value); + } + + model_values + .iter() + .filter_map(|entry| { + if let Some(id) = entry.as_str() { + return Some(build_codex_model(id, None)); + } + + let id = entry + .get("id") + .or_else(|| entry.get("model")) + .or_else(|| entry.get("modelId")) + .or_else(|| entry.get("name")) + .and_then(|value| value.as_str())?; + let label = entry + .get("label") + .or_else(|| entry.get("name")) + .and_then(|value| value.as_str()); + + Some(build_codex_model(id, label)) + }) + .collect() +} + +fn parse_codex_models_object(value: &serde_json::Value) -> Vec { + let Some(object) = value.as_object() else { + return Vec::new(); + }; + + object + .iter() + .filter_map(|(id, entry)| { + if !looks_like_codex_model_id(id) { + return None; + } + + let label = entry + .get("label") + .or_else(|| entry.get("displayName")) + .and_then(|value| value.as_str()); + + Some(build_codex_model(id, label)) + }) + .collect() +} + +fn looks_like_codex_model_id(value: &str) -> bool { + let lower = value.to_ascii_lowercase(); + lower.starts_with("gpt-") || lower.starts_with("o3") || lower.starts_with("codex") +} + +fn parse_codex_models_lines(stdout: &str) -> Vec { + stdout + .lines() + .filter_map(|line| { + let id = line + .split_whitespace() + .find(|part| { + part.chars() + .any(|character| character.is_ascii_alphanumeric()) + })? + .trim_matches(|character: char| matches!(character, '-' | '*' | ',' | ':')); + + if id.is_empty() || id.eq_ignore_ascii_case("model") || id.eq_ignore_ascii_case("id") { + return None; + } + + Some(build_codex_model(id, None)) + }) + .collect() +} + +fn build_codex_model(id: &str, label: Option<&str>) -> CursorModel { + CursorModel { + id: id.to_string(), + label: label + .filter(|value| !value.trim().is_empty()) + .map(str::to_string) + .unwrap_or_else(|| format_codex_model_label(id)), + description: Some(String::from( + "Codex CLI model available through Sandcastle.", + )), + parameters: Some(vec![crate::models::CursorModelParameter { + id: String::from(CODEX_REASONING_PARAMETER_ID), + label: String::from("Reasoning"), + values: ["low", "medium", "high", "xhigh"] + .iter() + .map(|value| crate::models::CursorModelParameterValue { + value: (*value).to_string(), + label: format_reasoning_label(value), + }) + .collect(), + }]), + } +} + +fn format_codex_model_label(id: &str) -> String { + let normalized = id.trim(); + + if let Some(label) = format_gpt_model_label(normalized) { + return label; + } + + normalized + .split(['-', '_']) + .filter(|part| !part.is_empty()) + .map(format_model_label_part) + .collect::>() + .join(" ") +} + +fn format_gpt_model_label(id: &str) -> Option { + let mut parts = id.split(['-', '_']).filter(|part| !part.is_empty()); + let prefix = parts.next()?; + + if !prefix.eq_ignore_ascii_case("gpt") { + return None; } - let content = content - .filter(|result| !result.trim().is_empty()) - .ok_or_else(|| String::from("Cursor SDK runner returned no generated content."))?; + let version = parts.next()?; + let mut label = format!("GPT-{version}"); + let suffix = parts + .map(format_model_label_part) + .collect::>() + .join(" "); + + if !suffix.is_empty() { + label.push(' '); + label.push_str(&suffix); + } - Ok(CursorAgentPromptResponse { content, events }) + Some(label) } -fn format_process_failure(stderr: &str, stdout: &str) -> String { - let mut details = stderr.trim().to_string(); +fn format_model_label_part(part: &str) -> String { + let upper = part.to_ascii_uppercase(); + if matches!(upper.as_str(), "GPT" | "API") + || part + .chars() + .all(|character| character.is_ascii_digit() || character == '.') + { + upper + } else { + let mut characters = part.chars(); + match characters.next() { + Some(first) => format!("{}{}", first.to_uppercase(), characters.as_str()), + None => String::new(), + } + } +} - if details.is_empty() { - details = stdout.trim().to_string(); +fn format_reasoning_label(value: &str) -> String { + match value { + "xhigh" => String::from("Extra High"), + "high" => String::from("High"), + "medium" => String::from("Medium"), + "low" => String::from("Low"), + _ => value.to_string(), } +} + +#[cfg(test)] +mod tests { + use super::parse_codex_models_output; - if details.is_empty() { - details = String::from("The Bun process exited without an error message."); + #[test] + fn parses_json_model_array() { + let models = parse_codex_models_output(r#"[{"id":"gpt-5.2","label":"GPT-5.2"}]"#); + + assert_eq!(models.len(), 1); + assert_eq!(models[0].id, "gpt-5.2"); + assert_eq!(models[0].label, "GPT-5.2"); } - format!("Cursor SDK runner failed: {details}") + #[test] + fn parses_models_property() { + let models = parse_codex_models_output(r#"{"models":["gpt-5.4-mini"]}"#); + + assert_eq!(models.len(), 1); + assert_eq!(models[0].id, "gpt-5.4-mini"); + } + + #[test] + fn parses_keyed_model_object() { + let models = parse_codex_models_output( + r#"{"gpt-5.4-mini":{"description":"fast"},"gpt-5.2":{"label":"GPT-5.2"}}"#, + ); + + assert_eq!(models.len(), 2); + let mini_model = models + .iter() + .find(|model| model.id == "gpt-5.4-mini") + .expect("expected gpt-5.4-mini"); + let default_model = models + .iter() + .find(|model| model.id == "gpt-5.2") + .expect("expected gpt-5.2"); + + assert_eq!(mini_model.label, "GPT-5.4 Mini"); + assert_eq!(default_model.label, "GPT-5.2"); + } + + #[test] + fn parses_line_output() { + let models = parse_codex_models_output("gpt-5.2\n- gpt-5.4"); + + assert_eq!(models.len(), 2); + assert_eq!(models[1].id, "gpt-5.4"); + } } diff --git a/src-tauri/src/docker.rs b/src-tauri/src/docker.rs new file mode 100644 index 0000000..6a064ea --- /dev/null +++ b/src-tauri/src/docker.rs @@ -0,0 +1,342 @@ +use std::{ + path::{Path, PathBuf}, + process::{Command, Stdio}, + time::{Duration, Instant}, +}; + +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) enum DockerRuntime { + Host { binary: PathBuf }, + Wsl { distribution: Option }, +} + +impl DockerRuntime { + pub(crate) fn detect() -> Result { + let host_result = detect_host_docker(); + + if let Ok(runtime) = host_result { + return Ok(runtime); + } + + #[cfg(windows)] + { + let wsl_result = detect_wsl_docker(); + + if let Ok(runtime) = wsl_result { + return Ok(runtime); + } + + Err(format!( + "{} WSL Docker probe also failed: {}", + host_result + .err() + .unwrap_or_else(|| String::from("Host Docker probe failed.")), + wsl_result + .err() + .unwrap_or_else(|| String::from("unknown error")) + )) + } + + #[cfg(not(windows))] + { + Err(host_result + .err() + .unwrap_or_else(|| String::from("Docker is unavailable."))) + } + } + + pub(crate) fn command(&self) -> Command { + match self { + Self::Host { binary } => Command::new(binary), + Self::Wsl { distribution } => { + let mut command = Command::new("wsl"); + if let Some(distribution) = distribution { + command.args(["-d", distribution]); + } + command.args(["--", "docker"]); + command + } + } + } + + pub(crate) fn label(&self) -> String { + match self { + Self::Host { .. } => String::from("host Docker"), + Self::Wsl { + distribution: Some(distribution), + } => format!("WSL Docker ({distribution})"), + Self::Wsl { distribution: None } => String::from("default WSL Docker"), + } + } +} + +fn detect_host_docker() -> Result { + let binary = which::which("docker") + .map_err(|error| format!("Docker CLI was not found on the host PATH: {error}"))?; + let status = run_status_with_timeout( + Command::new(&binary) + .arg("info") + .stdout(Stdio::null()) + .stderr(Stdio::null()), + Duration::from_secs(5), + )?; + + if status.success() { + Ok(DockerRuntime::Host { binary }) + } else { + Err(format!( + "Host Docker CLI was found at {}, but the daemon probe exited with status {status}.", + binary.display() + )) + } +} + +#[cfg(windows)] +fn detect_wsl_docker() -> Result { + let default_result = probe_wsl_docker(None); + + if default_result.is_ok() { + return Ok(DockerRuntime::Wsl { distribution: None }); + } + + for distribution in wsl_distributions()? { + if probe_wsl_docker(Some(&distribution)).is_ok() { + return Ok(DockerRuntime::Wsl { + distribution: Some(distribution), + }); + } + } + + Err(format!( + "default WSL Docker probe failed: {}", + default_result + .err() + .unwrap_or_else(|| String::from("unknown error")) + )) +} + +#[cfg(windows)] +fn probe_wsl_docker(distribution: Option<&str>) -> Result<(), String> { + let mut command = Command::new("wsl"); + if let Some(distribution) = distribution { + command.args(["-d", distribution]); + } + command + .args(["--", "docker", "info"]) + .stdout(Stdio::null()) + .stderr(Stdio::null()); + + let status = run_status_with_timeout(&mut command, Duration::from_secs(10))?; + + if status.success() { + Ok(()) + } else { + Err(format!("probe exited with status {status}.")) + } +} + +#[cfg(windows)] +fn wsl_distributions() -> Result, String> { + let output = Command::new("wsl") + .args(["-l", "-q"]) + .output() + .map_err(|error| format!("Unable to list WSL distributions: {error}"))?; + + if !output.status.success() { + return Err(format!( + "Unable to list WSL distributions: {}", + output.status + )); + } + + Ok(parse_wsl_distribution_output(&output.stdout)) +} + +fn parse_wsl_distribution_output(output: &[u8]) -> Vec { + decode_wsl_output(output) + .lines() + .map(|line| line.trim_matches(['\0', '\r', '\n', ' ', '\t'])) + .filter(|line| !line.is_empty()) + .filter(|line| { + !line.eq_ignore_ascii_case("docker-desktop") + && !line.eq_ignore_ascii_case("docker-desktop-data") + }) + .map(str::to_string) + .collect() +} + +fn decode_wsl_output(output: &[u8]) -> String { + if output.len() >= 2 + && output + .chunks(2) + .all(|chunk| chunk.get(1).copied() == Some(0)) + { + let units = output + .chunks_exact(2) + .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]])) + .collect::>(); + String::from_utf16_lossy(&units) + } else { + String::from_utf8_lossy(output).to_string() + } +} + +pub(crate) fn docker_path_arg(runtime: &DockerRuntime, path: &Path) -> String { + match runtime { + DockerRuntime::Host { .. } => path.display().to_string(), + DockerRuntime::Wsl { .. } => windows_path_to_wsl(path) + .unwrap_or_else(|| path.display().to_string().replace('\\', "/")), + } +} + +pub(crate) fn docker_mount_arg( + runtime: &DockerRuntime, + host_path: &Path, + container_path: &str, + mode: &str, +) -> String { + format!( + "{}:{container_path}:{mode}", + docker_path_arg(runtime, host_path) + ) +} + +pub(crate) fn sandcastle_build_args( + runtime: &DockerRuntime, + image: &str, + dockerfile: &Path, + app_root: &Path, +) -> Vec { + vec![ + String::from("build"), + String::from("-t"), + image.to_string(), + String::from("-f"), + docker_path_arg(runtime, dockerfile), + docker_path_arg(runtime, app_root), + ] +} + +fn windows_path_to_wsl(path: &Path) -> Option { + let raw = path.display().to_string().replace('\\', "/"); + let mut chars = raw.chars(); + let drive = chars.next()?; + let colon = chars.next()?; + let slash = chars.next()?; + + if !drive.is_ascii_alphabetic() || colon != ':' || slash != '/' { + return None; + } + + Some(format!( + "/mnt/{}/{}", + drive.to_ascii_lowercase(), + chars.as_str() + )) +} + +fn run_status_with_timeout( + command: &mut Command, + timeout: Duration, +) -> Result { + let mut child = command.spawn().map_err(|error| error.to_string())?; + let started_at = Instant::now(); + + loop { + match child.try_wait().map_err(|error| error.to_string())? { + Some(status) => return Ok(status), + None if started_at.elapsed() >= timeout => { + let _ = child.kill(); + let _ = child.wait(); + return Err(format!("process timed out after {}s", timeout.as_secs())); + } + None => std::thread::sleep(Duration::from_millis(50)), + } + } +} + +#[cfg(test)] +mod tests { + use super::{DockerRuntime, docker_mount_arg, docker_path_arg, sandcastle_build_args}; + use std::path::{Path, PathBuf}; + + #[test] + fn host_build_args_include_explicit_context() { + let args = sandcastle_build_args( + &DockerRuntime::Host { + binary: PathBuf::from("docker"), + }, + "specforge-sandcastle-runtime:latest", + Path::new(r"C:\repo\src\sandcastle\Dockerfile"), + Path::new(r"C:\repo"), + ); + + assert_eq!(args.last().map(String::as_str), Some(r"C:\repo")); + } + + #[test] + fn host_paths_keep_unix_paths_for_macos_and_linux() { + let path = docker_path_arg( + &DockerRuntime::Host { + binary: PathBuf::from("docker"), + }, + Path::new("/Users/brehm/SpecForge"), + ); + + assert_eq!(path, "/Users/brehm/SpecForge"); + } + + #[test] + fn wsl_paths_convert_windows_drive_paths_to_mnt_paths() { + let converted = docker_path_arg( + &DockerRuntime::Wsl { distribution: None }, + Path::new(r"C:\Users\brehm\Project"), + ); + + assert_eq!(converted, "/mnt/c/Users/brehm/Project"); + } + + #[test] + fn wsl_mount_args_use_converted_host_paths() { + let mount = docker_mount_arg( + &DockerRuntime::Wsl { + distribution: Some(String::from("Ubuntu")), + }, + Path::new(r"D:\Work Space\SpecForge"), + "/home/agent/workspace", + "ro", + ); + + assert_eq!( + mount, + "/mnt/d/Work Space/SpecForge:/home/agent/workspace:ro" + ); + } + + #[test] + fn distro_wsl_runtime_keeps_wsl_path_conversion() { + let converted = docker_path_arg( + &DockerRuntime::Wsl { + distribution: Some(String::from("Ubuntu-24.04")), + }, + Path::new(r"C:\repo"), + ); + + assert_eq!(converted, "/mnt/c/repo"); + } + + #[test] + fn parses_wsl_distribution_output_and_skips_docker_internal_distros() { + let output = + "Ubuntu\0\r\0\n\0docker-desktop\0\r\0\n\0docker-desktop-data\0\r\0\n\0Debian\0\r\0\n\0"; + let bytes = output + .encode_utf16() + .flat_map(u16::to_le_bytes) + .collect::>(); + + assert_eq!( + super::parse_wsl_distribution_output(&bytes), + vec![String::from("Ubuntu"), String::from("Debian")] + ); + } +} diff --git a/src-tauri/src/documents.rs b/src-tauri/src/documents.rs index d9ee542..1ab416f 100644 --- a/src-tauri/src/documents.rs +++ b/src-tauri/src/documents.rs @@ -51,6 +51,34 @@ pub(crate) fn save_workspace_document( write_generated_workspace_document(&workspace_root, &output_path, content, &field_name) } +#[tauri::command] +pub(crate) fn save_document_preview( + workspace_root: String, + target: String, + content: String, +) -> Result { + write_document_preview(&workspace_root, &target, content) +} + +#[tauri::command] +pub(crate) fn delete_document_preview( + workspace_root: String, + target: String, +) -> Result<(), String> { + let preview_path = resolve_document_preview_path(&workspace_root, &target)?; + + if preview_path.exists() { + fs::remove_file(&preview_path).map_err(|error| { + format!( + "Unable to delete document preview {}: {error}", + preview_path.display() + ) + })?; + } + + Ok(()) +} + pub(crate) fn load_configured_workspace_document( workspace_root: &Path, relative_path: &str, @@ -75,6 +103,37 @@ pub(crate) fn load_configured_workspace_document( })) } +pub(crate) fn load_document_preview( + workspace_root: &Path, + target: &str, +) -> Result, String> { + let preview_path = workspace_root + .join(".specforge") + .join("previews") + .join(preview_file_name(target)?); + + if !preview_path.exists() { + return Ok(None); + } + + let content = fs::read_to_string(&preview_path).map_err(|error| { + format!( + "Unable to read document preview {}: {error}", + preview_path.display() + ) + })?; + + Ok(Some(WorkspaceDocument { + content, + source_path: preview_path.display().to_string(), + file_name: preview_path + .file_name() + .and_then(|value| value.to_str()) + .unwrap_or("preview.md") + .to_string(), + })) +} + pub(crate) fn write_generated_workspace_document( workspace_root: &str, output_path: &str, @@ -140,6 +199,76 @@ pub(crate) fn write_generated_workspace_document( }) } +pub(crate) fn write_document_preview( + workspace_root: &str, + target: &str, + generated_content: String, +) -> Result { + let preview_path = resolve_document_preview_path(workspace_root, target)?; + let rendered_document = format!( + "{}\n", + strip_wrapping_code_fence(generated_content.trim()).trim() + ); + + if rendered_document.trim().is_empty() { + return Err(String::from( + "The AI returned an empty document preview. Adjust the prompt and try again.", + )); + } + + if let Some(parent_directory) = preview_path.parent() { + fs::create_dir_all(parent_directory).map_err(|error| { + format!( + "Unable to create the document preview folder {}: {error}", + parent_directory.display() + ) + })?; + } + + fs::write(&preview_path, rendered_document.as_bytes()).map_err(|error| { + format!( + "Unable to save the document preview to {}: {error}", + preview_path.display() + ) + })?; + + Ok(WorkspaceDocument { + content: rendered_document, + source_path: preview_path.display().to_string(), + file_name: preview_path + .file_name() + .and_then(|value| value.to_str()) + .unwrap_or("preview.md") + .to_string(), + }) +} + +fn resolve_document_preview_path(workspace_root: &str, target: &str) -> Result { + let trimmed_root = workspace_root.trim(); + + if trimmed_root.is_empty() { + return Err(String::from("A workspace root is required.")); + } + + let canonical_root = canonicalize_existing_path(&PathBuf::from(trimmed_root)) + .map_err(|error| format!("Unable to resolve workspace root {}: {error}", trimmed_root))?; + + Ok(canonical_root + .join(".specforge") + .join("previews") + .join(preview_file_name(target)?)) +} + +fn preview_file_name(target: &str) -> Result<&'static str, String> { + match target { + "prd" => Ok("prd.md"), + "spec" => Ok("spec.md"), + _ => Err(String::from( + "Document preview target must be `prd` or `spec`.", + )), + } +} + pub(crate) fn parse_workspace_document(path: &Path) -> Result { match path .extension() diff --git a/src-tauri/src/environment.rs b/src-tauri/src/environment.rs index 0e0b4f8..47a0248 100644 --- a/src-tauri/src/environment.rs +++ b/src-tauri/src/environment.rs @@ -1,8 +1,17 @@ +use crate::docker::DockerRuntime; use crate::models::{CliStatus, EnvironmentStatus}; use crate::paths::resolve_override_path; use crate::secrets::cursor_key_status; use std::path::Path; use std::process::Command; +#[cfg(windows)] +use std::process::Output; +#[cfg(windows)] +use std::process::Stdio; +#[cfg(windows)] +use std::thread; +#[cfg(windows)] +use std::time::{Duration, Instant}; use std::time::{SystemTime, UNIX_EPOCH}; #[tauri::command] @@ -10,6 +19,8 @@ pub(crate) fn run_environment_scan() -> Result { Ok(EnvironmentStatus { scanned_at: current_timestamp(), cursor: cursor_key_status(), + codex: inspect_binary("Codex CLI", "codex", None), + docker: inspect_docker(), git: inspect_binary("Git", "git", None), }) } @@ -116,3 +127,110 @@ fn probe_binary_version(path: &Path) -> Result { "Binary detected. Version probe returned no output.", )) } + +fn inspect_docker() -> CliStatus { + match DockerRuntime::detect() { + Ok(runtime) => { + let label = runtime.label(); + CliStatus { + name: String::from("Docker"), + status: String::from("found"), + path: Some(label.clone()), + detail: format!("Docker daemon is reachable through {label}."), + } + } + Err(error) => CliStatus { + name: String::from("Docker"), + status: String::from("unavailable"), + path: which::which("docker") + .ok() + .map(|path| path.display().to_string()), + detail: docker_unavailable_detail(&error, windows_docker_service_hint()), + }, + } +} + +fn docker_unavailable_detail(error: &str, service_hint: Option) -> String { + let mut detail = format!( + "Docker CLI was found, but the Docker engine did not respond. Docker Desktop may still be starting or its WSL engine may be unhealthy. Probe failed: {error}" + ); + + if let Some(hint) = service_hint { + detail.push(' '); + detail.push_str(&hint); + } + + detail +} + +#[cfg(windows)] +fn windows_docker_service_hint() -> Option { + let output = run_output_with_timeout( + Command::new("sc") + .arg("query") + .arg("com.docker.service") + .stdout(Stdio::piped()) + .stderr(Stdio::piped()), + Duration::from_secs(2), + ) + .ok()?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + let combined = format!("{stdout}\n{stderr}"); + + if combined.contains("STOPPED") { + return Some(String::from( + "Windows also reports com.docker.service is stopped. Restart Docker Desktop as an administrator or start the Docker Desktop Service, then refresh the scan.", + )); + } + + if combined.contains("RUNNING") { + return Some(String::from( + "Windows reports com.docker.service is running, so the issue is likely in Docker Desktop's WSL engine or daemon startup.", + )); + } + + None +} + +#[cfg(not(windows))] +fn windows_docker_service_hint() -> Option { + None +} + +#[cfg(windows)] +fn run_output_with_timeout(command: &mut Command, timeout: Duration) -> Result { + let mut child = command.spawn().map_err(|error| error.to_string())?; + let started_at = Instant::now(); + + loop { + match child.try_wait().map_err(|error| error.to_string())? { + Some(_) => return child.wait_with_output().map_err(|error| error.to_string()), + None if started_at.elapsed() >= timeout => { + let _ = child.kill(); + let _ = child.wait(); + return Err(format!("process timed out after {}s", timeout.as_secs())); + } + None => thread::sleep(Duration::from_millis(50)), + } + } +} + +#[cfg(test)] +mod tests { + use super::docker_unavailable_detail; + + #[test] + fn docker_unavailable_detail_includes_windows_service_hint_when_present() { + let detail = docker_unavailable_detail( + "process timed out after 5s", + Some(String::from( + "Windows reports com.docker.service is stopped.", + )), + ); + + assert!(detail.contains("process timed out after 5s")); + assert!(detail.contains("com.docker.service is stopped")); + } +} diff --git a/src-tauri/src/generation.rs b/src-tauri/src/generation.rs index 85b9815..59db3fd 100644 --- a/src-tauri/src/generation.rs +++ b/src-tauri/src/generation.rs @@ -1,7 +1,10 @@ use std::fs; -use std::io::Write; +use std::io::{BufRead, BufReader, Write}; use std::path::PathBuf; use std::process::Command; +use std::sync::mpsc; +use std::thread; +use std::time::Duration; use std::time::{SystemTime, UNIX_EPOCH}; pub(crate) fn create_spec_generation_temp_dir(prefix: &str) -> Result { @@ -25,6 +28,18 @@ pub(crate) fn run_command_with_stdin( display_name: &str, stdin_payload: &str, ) -> Result { + run_command_with_stdin_and_stream(command, display_name, stdin_payload, |_| {}) +} + +pub(crate) fn run_command_with_stdin_and_stream( + command: &mut Command, + display_name: &str, + stdin_payload: &str, + mut on_line: F, +) -> Result +where + F: FnMut(&str), +{ let mut child = command .spawn() .map_err(|error| format!("Unable to start {display_name}: {error}"))?; @@ -38,9 +53,76 @@ pub(crate) fn run_command_with_stdin( .map_err(|error| format!("Unable to send the prompt to {display_name}: {error}"))?; drop(stdin); - child - .wait_with_output() - .map_err(|error| format!("{display_name} exited unexpectedly: {error}")) + let stdout = child + .stdout + .take() + .ok_or_else(|| format!("{display_name} did not expose stdout."))?; + let stderr = child + .stderr + .take() + .ok_or_else(|| format!("{display_name} did not expose stderr."))?; + let (line_sender, line_receiver) = mpsc::channel::(); + let stdout_reader = spawn_stream_reader(stdout, line_sender.clone()); + let stderr_reader = spawn_stream_reader(stderr, line_sender); + + let status = loop { + while let Ok(line) = line_receiver.try_recv() { + on_line(&line); + } + + match child + .try_wait() + .map_err(|error| format!("{display_name} exited unexpectedly: {error}"))? + { + Some(status) => break status, + None => thread::sleep(Duration::from_millis(50)), + } + }; + + while let Ok(line) = line_receiver.try_recv() { + on_line(&line); + } + + let stdout = stdout_reader + .join() + .map_err(|_| format!("{display_name} stdout reader panicked."))?; + let stderr = stderr_reader + .join() + .map_err(|_| format!("{display_name} stderr reader panicked."))?; + + Ok(std::process::Output { + status, + stdout, + stderr, + }) +} + +fn spawn_stream_reader( + stream: R, + line_sender: mpsc::Sender, +) -> thread::JoinHandle> { + thread::spawn(move || { + let mut reader = BufReader::new(stream); + let mut collected = Vec::new(); + let mut buffer = Vec::new(); + + loop { + buffer.clear(); + match reader.read_until(b'\n', &mut buffer) { + Ok(0) => break, + Ok(_) => { + collected.extend_from_slice(&buffer); + let line = String::from_utf8_lossy(&buffer).trim_end().to_string(); + if !line.is_empty() { + let _ = line_sender.send(line); + } + } + Err(_) => break, + } + } + + collected + }) } pub(crate) fn format_process_failure(display_name: &str, output: &std::process::Output) -> String { diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index fddfdde..821cadb 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -2,6 +2,7 @@ mod agent; mod chat; mod constants; mod cursor_agent; +mod docker; mod documents; mod environment; mod external_editors; @@ -20,7 +21,10 @@ use chat::{ rename_chat_session, save_chat_session, send_chat_message, stop_chat_session, }; use cursor_agent::{list_cursor_models, run_cursor_agent_prompt}; -use documents::{parse_document, pick_document, save_workspace_document}; +use documents::{ + delete_document_preview, parse_document, pick_document, save_document_preview, + save_workspace_document, +}; use environment::run_environment_scan; use external_editors::{list_external_editors, open_workspace_file_in_editor}; use git::git_get_diff; @@ -41,6 +45,8 @@ pub fn run() { parse_document, pick_document, save_workspace_document, + save_document_preview, + delete_document_preview, pick_project_folder, load_project_context, save_project_settings, diff --git a/src-tauri/src/models.rs b/src-tauri/src/models.rs index f05f532..7c6ee3b 100644 --- a/src-tauri/src/models.rs +++ b/src-tauri/src/models.rs @@ -61,6 +61,8 @@ pub(crate) struct CliStatus { pub(crate) struct EnvironmentStatus { pub(crate) scanned_at: String, pub(crate) cursor: CliStatus, + pub(crate) codex: CliStatus, + pub(crate) docker: CliStatus, pub(crate) git: CliStatus, } @@ -116,6 +118,10 @@ pub(crate) struct CursorModel { #[derive(Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub(crate) struct ProjectSettings { + #[serde(default = "default_agent_provider")] + pub(crate) agent_provider: String, + #[serde(default = "default_provider_auth_mode")] + pub(crate) provider_auth_mode: String, pub(crate) selected_model: String, pub(crate) selected_reasoning: String, #[serde(default, alias = "prdPrompt")] @@ -129,6 +135,14 @@ pub(crate) struct ProjectSettings { pub(crate) supporting_document_paths: Vec, } +fn default_agent_provider() -> String { + String::from("codex") +} + +fn default_provider_auth_mode() -> String { + String::from("subscription") +} + #[derive(Clone, Serialize)] #[serde(rename_all = "camelCase")] pub(crate) struct ProjectContextPayload { @@ -141,6 +155,8 @@ pub(crate) struct ProjectContextPayload { pub(crate) ignored_file_count: usize, pub(crate) prd_document: Option, pub(crate) spec_document: Option, + pub(crate) prd_preview: Option, + pub(crate) spec_preview: Option, pub(crate) chat_sessions: Vec, pub(crate) last_active_session_id: Option, } @@ -264,21 +280,6 @@ pub(crate) struct ChatEventPayload { pub(crate) summary: Option, } -#[derive(Clone)] -pub(crate) struct SimulatedStep { - pub(crate) delay_ms: u64, - pub(crate) line: String, - pub(crate) milestone: &'static str, - pub(crate) gate: bool, -} - -#[derive(Clone, Copy, PartialEq, Eq)] -pub(crate) enum StopState { - Continue, - StopRequested, - Replaced, -} - #[derive(Clone, Copy, PartialEq, Eq)] pub(crate) enum ApprovalWaitOutcome { Approved, diff --git a/src-tauri/src/project.rs b/src-tauri/src/project.rs index 9157319..cf1b37d 100644 --- a/src-tauri/src/project.rs +++ b/src-tauri/src/project.rs @@ -5,7 +5,7 @@ use crate::{ DEFAULT_PROJECT_PRD_PATH, DEFAULT_PROJECT_SPEC_PATH, DEFAULT_SPEC_AGENT_DESCRIPTION, SPECFORGE_SETTINGS_RELATIVE_PATH, }, - documents::load_configured_workspace_document, + documents::{load_configured_workspace_document, load_document_preview}, models::{ProjectContextPayload, ProjectSettings}, paths::{canonicalize_existing_path, normalize_relative_path}, state::{ScannedWorkspace, SharedState}, @@ -103,6 +103,8 @@ pub(crate) fn load_project_context_from_folder( load_project_settings_from_workspace_root(&context.root, default_settings)?; let prd_document = load_configured_workspace_document(&context.root, &settings.prd_path)?; let spec_document = load_configured_workspace_document(&context.root, &settings.spec_path)?; + let prd_preview = load_document_preview(&context.root, "prd")?; + let spec_preview = load_document_preview(&context.root, "spec")?; let chat_index = load_chat_session_index(&context.root)?; let mut active_workspace = state .workspace @@ -123,6 +125,8 @@ pub(crate) fn load_project_context_from_folder( ignored_file_count: result.ignored_file_count, prd_document, spec_document, + prd_preview, + spec_preview, chat_sessions: chat_index.sessions, last_active_session_id: chat_index.last_active_session_id, }) @@ -134,7 +138,9 @@ pub(crate) fn build_default_project_settings( spec_document: Option<&crate::models::WorkspaceDocument>, ) -> ProjectSettings { ProjectSettings { - selected_model: String::from("composer-2"), + agent_provider: String::from("codex"), + provider_auth_mode: String::from("subscription"), + selected_model: String::from("gpt-5.2"), selected_reasoning: String::from("medium"), prd_agent_description: String::from(DEFAULT_PRD_AGENT_DESCRIPTION), spec_agent_description: String::from(DEFAULT_SPEC_AGENT_DESCRIPTION), @@ -166,6 +172,8 @@ pub(crate) fn normalize_project_settings( normalize_project_model(&provided.selected_model, &defaults.selected_model)?; let selected_reasoning = normalize_project_reasoning(&provided.selected_reasoning, &defaults.selected_reasoning)?; + let agent_provider = normalize_agent_provider(&provided.agent_provider); + let provider_auth_mode = normalize_provider_auth_mode(&provided.provider_auth_mode); let normalized_prd_path = normalize_project_path_or_default(workspace_root, &provided.prd_path, &defaults.prd_path)?; let normalized_spec_path = normalize_project_path_or_default( @@ -180,6 +188,8 @@ pub(crate) fn normalize_project_settings( .collect::>(); Ok(ProjectSettings { + agent_provider, + provider_auth_mode, selected_model, selected_reasoning, prd_agent_description: if provided.prd_agent_description.trim().is_empty() { @@ -203,6 +213,18 @@ pub(crate) fn normalize_project_settings( }) } +pub(crate) fn normalize_agent_provider(_value: &str) -> String { + String::from("codex") +} + +pub(crate) fn normalize_provider_auth_mode(value: &str) -> String { + if value == "api_key" { + String::from("api_key") + } else { + String::from("subscription") + } +} + pub(crate) fn normalize_project_path_or_default( workspace_root: &Path, value: &str, diff --git a/src-tauri/src/secrets.rs b/src-tauri/src/secrets.rs index d659167..51666ed 100644 --- a/src-tauri/src/secrets.rs +++ b/src-tauri/src/secrets.rs @@ -1,46 +1,56 @@ use crate::models::CliStatus; use keyring_core::{Entry, Error as KeyringError}; +use std::path::PathBuf; -const CURSOR_KEY_SERVICE: &str = "SpecForge"; -const CURSOR_KEY_USER: &str = "cursor-api-key"; +const CODEX_KEY_SERVICE: &str = "SpecForge"; +const CODEX_KEY_USER: &str = "codex-api-key"; #[tauri::command] pub(crate) fn save_cursor_api_key(api_key: String) -> Result<(), String> { let trimmed_key = api_key.trim(); if trimmed_key.is_empty() { - return Err(String::from("Enter a Cursor API key before saving.")); + return Err(String::from("Enter a Codex API key before saving.")); } - cursor_key_entry()? + codex_key_entry()? .set_password(trimmed_key) - .map_err(|error| format!("Unable to save the Cursor API key: {error}")) + .map_err(|error| format!("Unable to save the Codex API key: {error}")) } #[tauri::command] pub(crate) fn delete_cursor_api_key() -> Result<(), String> { - match cursor_key_entry()?.delete_credential() { + match codex_key_entry()?.delete_credential() { Ok(()) | Err(KeyringError::NoEntry) => Ok(()), Err(error) => Err(format!("Unable to delete the Cursor API key: {error}")), } } pub(crate) fn cursor_key_status() -> CliStatus { + if local_codex_auth_dir().is_some_and(|path| path.exists()) { + return CliStatus { + name: String::from("Codex Provider"), + status: String::from("found"), + path: None, + detail: String::from("Local Codex subscription authentication was detected."), + }; + } + match read_cursor_api_key() { Ok(Some(_)) => CliStatus { - name: String::from("Cursor SDK"), + name: String::from("Codex Provider"), status: String::from("found"), path: None, - detail: String::from("Cursor API key is stored in the OS credential store."), + detail: String::from("Codex API key is stored in the OS credential store."), }, Ok(None) => CliStatus { - name: String::from("Cursor SDK"), + name: String::from("Codex Provider"), status: String::from("missing"), path: None, - detail: String::from("No Cursor API key is saved yet."), + detail: String::from("No local Codex subscription auth or Codex API key is available."), }, Err(error) => CliStatus { - name: String::from("Cursor SDK"), + name: String::from("Codex Provider"), status: String::from("unauthorized"), path: None, detail: error, @@ -49,17 +59,24 @@ pub(crate) fn cursor_key_status() -> CliStatus { } pub(crate) fn read_cursor_api_key() -> Result, String> { - match cursor_key_entry()?.get_password() { + match codex_key_entry()?.get_password() { Ok(value) if value.trim().is_empty() => Ok(None), Ok(value) => Ok(Some(value)), Err(KeyringError::NoEntry) => Ok(None), - Err(error) => Err(format!("Unable to read the Cursor API key: {error}")), + Err(error) => Err(format!("Unable to read the Codex API key: {error}")), } } -fn cursor_key_entry() -> Result { +fn codex_key_entry() -> Result { keyring::use_native_store(false) .map_err(|error| format!("Unable to open the OS credential store: {error}"))?; - Entry::new(CURSOR_KEY_SERVICE, CURSOR_KEY_USER) + Entry::new(CODEX_KEY_SERVICE, CODEX_KEY_USER) .map_err(|error| format!("Unable to open the OS credential store: {error}")) } + +fn local_codex_auth_dir() -> Option { + std::env::var_os("CODEX_HOME") + .map(PathBuf::from) + .or_else(|| std::env::var_os("USERPROFILE").map(|home| PathBuf::from(home).join(".codex"))) + .or_else(|| std::env::var_os("HOME").map(|home| PathBuf::from(home).join(".codex"))) +} diff --git a/src-tauri/src/state.rs b/src-tauri/src/state.rs index 460f089..607bcbd 100644 --- a/src-tauri/src/state.rs +++ b/src-tauri/src/state.rs @@ -23,6 +23,7 @@ pub(crate) struct ExecutionControl { pub(crate) run_id: u64, pub(crate) awaiting_approval: bool, pub(crate) stop_requested: bool, + pub(crate) active_container: Option, } #[derive(Default)] @@ -36,6 +37,7 @@ pub(crate) struct ChatExecutionControl { pub(crate) run_id: u64, pub(crate) awaiting_approval: bool, pub(crate) stop_requested: bool, + pub(crate) active_container: Option, } #[derive(Clone)] diff --git a/src/App.tsx b/src/App.tsx index 5434079..bf680ec 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -112,7 +112,7 @@ async function refreshCursorModelsForEnvironment( models: [], projectErrorMessage: error instanceof Error ? error.message - : "Unable to load Cursor SDK models." + : "Unable to load Codex models." }; } } @@ -254,7 +254,11 @@ function App() { handleGrillPrd, handleGeneratePrd, handleGrillSpec, - handleGenerateSpec + handleGenerateSpec, + handleSavePrdPreview, + handleDiscardPrdPreview, + handleSaveSpecPreview, + handleDiscardSpecPreview } = useDocumentHandlers({ agentState, derivedState, @@ -296,6 +300,7 @@ function App() { setConfiguredPrdPath: projectState.setConfiguredPrdPath, setConfiguredSpecPath: projectState.setConfiguredSpecPath, setExecutionAgentDescription: projectState.setExecutionAgentDescription, + setProviderAuthMode: projectState.setProviderAuthMode, setPrdPromptTemplate: projectState.setPrdPromptTemplate, setReasoningProfile: projectState.setReasoningProfile, setSelectedModel: projectState.setSelectedModel, @@ -405,6 +410,25 @@ function App() { return; } + if ( + desktopRuntime && + (settingsState.environment.cursor.status !== "found" || + settingsState.environment.codex.status !== "found" || + settingsState.environment.docker.status !== "found") + ) { + agentState.setStatus("error"); + agentState.setExecutionSummary("Sandcastle Runtime is not ready."); + agentState.appendTerminalOutput( + stampLog( + "error", + settingsState.environment.docker.status === "unavailable" + ? "Docker Desktop is open, but the Docker engine is unavailable. Restart Docker Desktop or run wsl --shutdown before starting execution." + : "Configure Codex authentication, Codex CLI, and Docker before starting execution." + ) + ); + return; + } + const modelLabel = getModelLabel(projectState.selectedModel); const reasoningLabel = getReasoningLabel( projectState.selectedModel, @@ -433,12 +457,17 @@ function App() { ); return; } catch (error) { + agentState.setStatus("error"); + agentState.setExecutionSummary( + error instanceof Error ? error.message : "Agent startup failed." + ); agentState.appendTerminalOutput( stampLog( "error", - `${error instanceof Error ? error.message : "Agent startup failed."} Falling back to the local simulator.` + error instanceof Error ? error.message : "Agent startup failed." ) ); + return; } } @@ -455,7 +484,7 @@ function App() { fallbackTimerRef, workspaceUiState.setLatestDiff ); - }, [agentState, desktopRuntime, projectState, workspaceUiState.setLatestDiff]); + }, [agentState, desktopRuntime, projectState, settingsState.environment, workspaceUiState.setLatestDiff]); const handleApproveExecutionGate = useCallback(async () => { if (agentState.status !== "awaiting_approval") { @@ -537,6 +566,10 @@ function App() { handleGenerateSpec, handleGrillPrd, handleGrillSpec, + handleSavePrdPreview, + handleDiscardPrdPreview, + handleSaveSpecPreview, + handleDiscardSpecPreview, handleOpenImportFile, handleStartBuild, handleWorkspaceFileOpen, diff --git a/src/components/CliHealthCard.test.tsx b/src/components/CliHealthCard.test.tsx new file mode 100644 index 0000000..2774f30 --- /dev/null +++ b/src/components/CliHealthCard.test.tsx @@ -0,0 +1,23 @@ +import { render, screen } from "@testing-library/react"; +import { describe, expect, it } from "vitest"; + +import { CliHealthCard } from "./CliHealthCard"; + +describe("CliHealthCard", () => { + it("shows tool readiness without exposing executable paths", () => { + render( + + ); + + expect(screen.getByText("Codex CLI")).toBeInTheDocument(); + expect(screen.getByText("Ready")).toBeInTheDocument(); + expect(screen.queryByText(/codex\.cmd/i)).not.toBeInTheDocument(); + }); +}); diff --git a/src/components/CliHealthCard.tsx b/src/components/CliHealthCard.tsx index f18a373..ac58008 100644 --- a/src/components/CliHealthCard.tsx +++ b/src/components/CliHealthCard.tsx @@ -9,24 +9,21 @@ interface CliHealthCardProps { export const CliHealthCard = memo(function CliHealthCard({ entry }: CliHealthCardProps) { return ( -
+
{entry.status === "found" ? ( - + ) : ( - + )} -
-

{entry.name}

-

{formatCliHealth(entry.status)}

+
+

+ {entry.name} +

+

{formatCliHealth(entry.status)}

-

{entry.detail}

- {entry.path ? ( - - {entry.path} - - ) : null} +

{entry.detail}

); }); @@ -40,5 +37,9 @@ function formatCliHealth(status: CliStatus["status"]) { return "Needs authentication"; } + if (status === "unavailable") { + return "Unavailable"; + } + return "Missing"; } diff --git a/src/components/ControlColumn.tsx b/src/components/ControlColumn.tsx index f7c6f31..a978d64 100644 --- a/src/components/ControlColumn.tsx +++ b/src/components/ControlColumn.tsx @@ -378,6 +378,8 @@ function formatMcpStatus(status: string) { return "Ready"; case "unauthorized": return "Check"; + case "unavailable": + return "Unavailable"; default: return status; } @@ -389,6 +391,8 @@ function getMcpBadgeClassName(status: string) { return "shrink-0 rounded-full border border-emerald-400/30 bg-emerald-400/12 px-3 py-1 text-xs font-medium uppercase tracking-[0.08em] text-emerald-100"; case "unauthorized": return "shrink-0 rounded-full border border-amber-300/30 bg-amber-300/12 px-3 py-1 text-xs font-medium uppercase tracking-[0.08em] text-amber-100"; + case "unavailable": + return "shrink-0 rounded-full border border-amber-300/30 bg-amber-300/12 px-3 py-1 text-xs font-medium uppercase tracking-[0.08em] text-amber-100"; default: return "shrink-0 rounded-full border border-[var(--border-soft)] bg-white/5 px-3 py-1 text-xs font-medium uppercase tracking-[0.08em] text-[var(--text-subtle)]"; } diff --git a/src/components/MainWorkspace.tsx b/src/components/MainWorkspace.tsx index d9add09..0d63854 100644 --- a/src/components/MainWorkspace.tsx +++ b/src/components/MainWorkspace.tsx @@ -39,6 +39,8 @@ interface MainWorkspaceProps { specContent: string; prdPaneMode: PaneMode; specPaneMode: PaneMode; + hasPrdPreview: boolean; + hasSpecPreview: boolean; isSpecApproved: boolean; canGeneratePrd: boolean; canGrillPrd: boolean; @@ -72,9 +74,13 @@ interface MainWorkspaceProps { onPrdGenerationPromptChange: (value: string) => void; onGeneratePrd: () => void; onGrillPrd: () => void; + onSavePrdPreview: () => void; + onDiscardPrdPreview: () => void; onSpecGenerationPromptChange: (value: string) => void; onGenerateSpec: () => void; onGrillSpec: () => void; + onSaveSpecPreview: () => void; + onDiscardSpecPreview: () => void; onSpecSelect: (event: ChangeEvent) => void; onEditorTabClose: (path: string) => void; onOpenEditorTabExternally: (path: string, editorId: string) => void; @@ -92,6 +98,8 @@ export const MainWorkspace = memo(function MainWorkspace({ specContent, prdPaneMode, specPaneMode, + hasPrdPreview, + hasSpecPreview, isSpecApproved, canGeneratePrd, canGrillPrd, @@ -125,9 +133,13 @@ export const MainWorkspace = memo(function MainWorkspace({ onPrdGenerationPromptChange, onGeneratePrd, onGrillPrd, + onSavePrdPreview, + onDiscardPrdPreview, onSpecGenerationPromptChange, onGenerateSpec, onGrillSpec, + onSaveSpecPreview, + onDiscardSpecPreview, onSpecSelect, onEditorTabClose, onOpenEditorTabExternally, @@ -217,6 +229,13 @@ export const MainWorkspace = memo(function MainWorkspace({ onModeChange={onPrdPaneModeChange} showModeButtons={hasPrdContent} /> + {hasPrdPreview ? ( + onPrdPaneModeChange("edit")} + onSave={onSavePrdPreview} + /> + ) : null} {showPrdEmptyState ? ( + {hasSpecPreview ? ( + onSpecPaneModeChange("edit")} + onSave={onSaveSpecPreview} + /> + ) : null} {approveSpecButton} @@ -375,6 +401,30 @@ function ExternalEditorMenu({ ); } +function PreviewActions({ + onSave, + onEdit, + onDiscard +}: { + onSave: () => void; + onEdit: () => void; + onDiscard: () => void; +}) { + return ( +
+ + + +
+ ); +} + const HEADER_ACTION_BUTTON_CLASS = "inline-flex items-center justify-center gap-2 rounded-lg border border-[var(--border-soft)] bg-[var(--bg-panel-strong)] px-4 py-3 font-medium text-[var(--text-main)] transition hover:border-[var(--accent)] hover:text-[var(--accent)]"; @@ -383,3 +433,6 @@ const APPROVED_ACTION_BUTTON_CLASS = const OPEN_EDITOR_BUTTON_CLASS = "rounded border border-[var(--border-soft)] bg-[var(--bg-panel-strong)] px-3 py-2 text-sm font-medium text-[var(--text-main)] transition hover:border-[var(--accent)] hover:text-[var(--accent)] disabled:cursor-not-allowed disabled:opacity-50"; + +const PREVIEW_ACTION_BUTTON_CLASS = + "rounded px-3 py-2 text-sm font-medium text-[var(--text-main)] transition hover:bg-[var(--bg-nav-active)] hover:text-[var(--accent)]"; diff --git a/src/components/ProjectAiSettingsCard.tsx b/src/components/ProjectAiSettingsCard.tsx index 4584a9b..3ea27ba 100644 --- a/src/components/ProjectAiSettingsCard.tsx +++ b/src/components/ProjectAiSettingsCard.tsx @@ -6,7 +6,7 @@ import { Brain, Spark } from "iconoir-react"; import { memo } from "react"; import { getModelOptions, getReasoningOptions } from "../lib/agentConfig"; -import type { CursorModel, ModelId, ReasoningProfileId } from "../types"; +import type { CursorModel, ModelId, ProviderAuthMode, ReasoningProfileId } from "../types"; import { FIELD_LABEL_CLASS, ScopedPathReference, @@ -23,12 +23,14 @@ interface ProjectAiSettingsCardProps { configPath: string; workspaceRootName: string; cursorModels?: CursorModel[]; + providerAuthMode: ProviderAuthMode; selectedModel: ModelId; selectedReasoning: ReasoningProfileId; prdPrompt: string; specPrompt: string; executionAgentDescription: string; onModelChange: (model: ModelId) => void; + onProviderAuthModeChange: (mode: ProviderAuthMode) => void; onReasoningChange: (reasoning: ReasoningProfileId) => void; onPrdPromptChange: (value: string) => void; onSpecPromptChange: (value: string) => void; @@ -39,12 +41,14 @@ export const ProjectAiSettingsCard = memo(function ProjectAiSettingsCard({ configPath, workspaceRootName, cursorModels = [], + providerAuthMode, selectedModel, selectedReasoning, prdPrompt, specPrompt, executionAgentDescription, onModelChange, + onProviderAuthModeChange, onReasoningChange, onPrdPromptChange, onSpecPromptChange, @@ -52,6 +56,18 @@ export const ProjectAiSettingsCard = memo(function ProjectAiSettingsCard({ }: ProjectAiSettingsCardProps) { const modelOptions = getModelOptions(undefined, cursorModels); const reasoningOptions = getReasoningOptions(selectedModel, cursorModels); + const authModeOptions = [ + { + value: "subscription" as const, + label: "Local subscription", + hint: "Use local Codex authentication material when preparing Sandcastle." + }, + { + value: "api_key" as const, + label: "API key", + hint: "Use the Codex API key stored in the OS credential store." + } + ]; return ( @@ -66,6 +82,20 @@ export const ProjectAiSettingsCard = memo(function ProjectAiSettingsCard({ />
+ undefined} + options={[{ value: "codex", label: "Codex", hint: "Runs through Sandcastle." }]} + selectedKey="codex" + /> + + +

- The empty-state prompt fields append the user note after these saved Cursor agent - descriptions before the SDK run starts. + The empty-state prompt fields append the user note after these saved workflow agent + descriptions before the Sandcastle turn starts.

diff --git a/src/components/SettingsView.tsx b/src/components/SettingsView.tsx index 3e1c907..b66fc94 100644 --- a/src/components/SettingsView.tsx +++ b/src/components/SettingsView.tsx @@ -14,6 +14,7 @@ import type { CursorModel, EnvironmentStatus, ModelId, + ProviderAuthMode, ReasoningProfileId, SpecAnnotation, ThemeMode @@ -41,6 +42,7 @@ interface SettingsViewProps { workspaceRootName: string; cursorModels: CursorModel[]; selectedModel: ModelId; + providerAuthMode: ProviderAuthMode; selectedReasoning: ReasoningProfileId; prdPrompt: string; specPrompt: string; @@ -55,6 +57,7 @@ interface SettingsViewProps { onSaveCursorApiKey: () => void; onDeleteCursorApiKey: () => void; onModelChange: (model: ModelId) => void; + onProviderAuthModeChange: (mode: ProviderAuthMode) => void; onReasoningChange: (reasoning: ReasoningProfileId) => void; onPrdPromptChange: (value: string) => void; onSpecPromptChange: (value: string) => void; @@ -85,6 +88,7 @@ export const SettingsView = memo(function SettingsView({ workspaceRootName, cursorModels, selectedModel, + providerAuthMode, selectedReasoning, prdPrompt, specPrompt, @@ -98,6 +102,7 @@ export const SettingsView = memo(function SettingsView({ onSaveCursorApiKey, onDeleteCursorApiKey, onModelChange, + onProviderAuthModeChange, onReasoningChange, onPrdPromptChange, onSpecPromptChange, @@ -160,37 +165,47 @@ export const SettingsView = memo(function SettingsView({ } title="General" /> -
+

- Cursor SDK + Sandcastle Runtime

- - -
- - +
+ + + +
+ {providerAuthMode === "api_key" ? ( + <> + +
+ + +
+ + ) : ( +

+ Local subscription mode uses the Codex authentication available on this + machine. No provider secret is written to project settings. +

+ )}
-
-

Git

- -
@@ -201,12 +216,14 @@ export const SettingsView = memo(function SettingsView({ configPath={configPath} cursorModels={cursorModels} onModelChange={onModelChange} + onProviderAuthModeChange={onProviderAuthModeChange} onPrdPromptChange={onPrdPromptChange} onReasoningChange={onReasoningChange} onExecutionAgentDescriptionChange={onExecutionAgentDescriptionChange} onSpecPromptChange={onSpecPromptChange} executionAgentDescription={executionAgentDescription} prdPrompt={prdPrompt} + providerAuthMode={providerAuthMode} selectedModel={selectedModel} selectedReasoning={selectedReasoning} specPrompt={specPrompt} diff --git a/src/hooks/useAppScreenProps.ts b/src/hooks/useAppScreenProps.ts index 363459c..e1d9818 100644 --- a/src/hooks/useAppScreenProps.ts +++ b/src/hooks/useAppScreenProps.ts @@ -76,6 +76,8 @@ export function useAppScreenProps({ isGeneratingPrd: derivedState.isGeneratingPrd, isGeneratingSpec: derivedState.isGeneratingSpec, isSpecApproved: projectState.isSpecApproved, + hasPrdPreview: projectState.hasPrdPreview, + hasSpecPreview: projectState.hasSpecPreview, executionControlsEnabled: false, onActiveTabChange: projectState.setActiveTab, onApproveExecutionGate: uiHandlers.handleApproveExecutionGateClick, @@ -89,6 +91,10 @@ export function useAppScreenProps({ onGenerateSpec: uiHandlers.handleGenerateSpecClick, onGrillPrd: uiHandlers.handleGrillPrdClick, onGrillSpec: uiHandlers.handleGrillSpecClick, + onSavePrdPreview: uiHandlers.handleSavePrdPreviewClick, + onDiscardPrdPreview: uiHandlers.handleDiscardPrdPreviewClick, + onSaveSpecPreview: uiHandlers.handleSaveSpecPreviewClick, + onDiscardSpecPreview: uiHandlers.handleDiscardSpecPreviewClick, onLoadPrd: uiHandlers.handleOpenPrdImportClick, onLoadSpec: uiHandlers.handleOpenSpecImportClick, onPrdContentChange: uiHandlers.handlePrdContentChange, @@ -188,6 +194,7 @@ export function useAppScreenProps({ onExecutionAgentDescriptionChange: projectSettingsHandlers.handleExecutionAgentDescriptionChange, onModelChange: projectSettingsHandlers.handleProjectModelChange, + onProviderAuthModeChange: projectSettingsHandlers.handleProviderAuthModeChange, onPrdPathChange: projectSettingsHandlers.handleConfiguredPrdPathChange, onPrdPromptChange: projectSettingsHandlers.handlePrdPromptTemplateChange, onReasoningChange: projectSettingsHandlers.handleProjectReasoningChange, diff --git a/src/hooks/useAppStoreSlices.ts b/src/hooks/useAppStoreSlices.ts index 1c11374..a45df41 100644 --- a/src/hooks/useAppStoreSlices.ts +++ b/src/hooks/useAppStoreSlices.ts @@ -35,12 +35,15 @@ export function useProjectStoreSlice() { autonomyMode: state.autonomyMode, configuredPrdPath: state.configuredPrdPath, configuredSpecPath: state.configuredSpecPath, + hasPrdPreview: state.hasPrdPreview, + hasSpecPreview: state.hasSpecPreview, isSpecApproved: state.isSpecApproved, openEditorTabs: state.openEditorTabs, prdContent: state.prdContent, prdPaneMode: state.prdPaneMode, prdPath: state.prdPath, prdPromptTemplate: state.prdPromptTemplate, + providerAuthMode: state.providerAuthMode, selectedModel: state.selectedModel, selectedReasoning: state.selectedReasoning, specContent: state.specContent, @@ -61,10 +64,13 @@ export function useProjectStoreSlice() { setPrdPaneMode: state.setPrdPaneMode, setPrdPromptTemplate: state.setPrdPromptTemplate, setProjectSettings: state.setProjectSettings, + setProviderAuthMode: state.setProviderAuthMode, setReasoningProfile: state.setReasoningProfile, setSelectedModel: state.setSelectedModel, setSelectedSpecRange: state.setSelectedSpecRange, setSpecContent: state.setSpecContent, + setPrdPreviewState: state.setPrdPreviewState, + setSpecPreviewState: state.setSpecPreviewState, setSpecPaneMode: state.setSpecPaneMode, setSpecPromptTemplate: state.setSpecPromptTemplate, setExecutionAgentDescription: state.setExecutionAgentDescription, diff --git a/src/hooks/useAppUiHandlers.ts b/src/hooks/useAppUiHandlers.ts index 561b138..c2af3d4 100644 --- a/src/hooks/useAppUiHandlers.ts +++ b/src/hooks/useAppUiHandlers.ts @@ -24,6 +24,10 @@ interface UseAppUiHandlersOptions { handleGenerateSpec: () => Promise; handleGrillPrd: () => Promise; handleGrillSpec: () => Promise; + handleSavePrdPreview: () => Promise; + handleDiscardPrdPreview: () => Promise; + handleSaveSpecPreview: () => Promise; + handleDiscardSpecPreview: () => Promise; handleOpenImportFile: (target: DocumentTarget) => Promise; handleStartBuild: () => Promise; handleWorkspaceFileOpen: (path: string) => Promise; @@ -41,6 +45,10 @@ export function useAppUiHandlers({ handleGenerateSpec, handleGrillPrd, handleGrillSpec, + handleSavePrdPreview, + handleDiscardPrdPreview, + handleSaveSpecPreview, + handleDiscardSpecPreview, handleOpenImportFile, handleStartBuild, handleWorkspaceFileOpen, @@ -149,7 +157,7 @@ export function useAppUiHandlers({ await refreshDiagnostics(); } catch (error) { workspaceUiState.setPrdGenerationError( - error instanceof Error ? error.message : "Unable to save the Cursor API key." + error instanceof Error ? error.message : "Unable to save the Codex API key." ); } }, [refreshDiagnostics, settingsState, workspaceUiState]); @@ -161,7 +169,7 @@ export function useAppUiHandlers({ await refreshDiagnostics(); } catch (error) { workspaceUiState.setPrdGenerationError( - error instanceof Error ? error.message : "Unable to delete the Cursor API key." + error instanceof Error ? error.message : "Unable to delete the Codex API key." ); } }, [refreshDiagnostics, settingsState, workspaceUiState]); @@ -214,6 +222,22 @@ export function useAppUiHandlers({ void handleGrillSpec(); }, [handleGrillSpec]); + const handleSavePrdPreviewClick = useCallback(() => { + void handleSavePrdPreview(); + }, [handleSavePrdPreview]); + + const handleDiscardPrdPreviewClick = useCallback(() => { + void handleDiscardPrdPreview(); + }, [handleDiscardPrdPreview]); + + const handleSaveSpecPreviewClick = useCallback(() => { + void handleSaveSpecPreview(); + }, [handleSaveSpecPreview]); + + const handleDiscardSpecPreviewClick = useCallback(() => { + void handleDiscardSpecPreview(); + }, [handleDiscardSpecPreview]); + return { handlePrdContentChange, handleSpecContentChange, @@ -235,7 +259,11 @@ export function useAppUiHandlers({ handleGeneratePrdClick, handleGenerateSpecClick, handleGrillPrdClick, - handleGrillSpecClick + handleGrillSpecClick, + handleSavePrdPreviewClick, + handleDiscardPrdPreviewClick, + handleSaveSpecPreviewClick, + handleDiscardSpecPreviewClick }; } diff --git a/src/hooks/useAppView.ts b/src/hooks/useAppView.ts index 71c8a46..d798696 100644 --- a/src/hooks/useAppView.ts +++ b/src/hooks/useAppView.ts @@ -77,6 +77,7 @@ export function useAppDerivedState({ configuredPrdPath: projectState.configuredPrdPath, configuredSpecPath: projectState.configuredSpecPath, prdAgentDescription: projectState.prdPromptTemplate, + providerAuthMode: projectState.providerAuthMode, selectedModel: projectState.selectedModel, selectedReasoning: projectState.selectedReasoning, specAgentDescription: projectState.specPromptTemplate, @@ -88,6 +89,7 @@ export function useAppDerivedState({ projectState.configuredSpecPath, projectState.executionAgentDescription, projectState.prdPromptTemplate, + projectState.providerAuthMode, projectState.selectedModel, projectState.selectedReasoning, projectState.specPromptTemplate, @@ -111,6 +113,8 @@ export function useAppDerivedState({ desktopRuntime && !isGeneratingPrd && settingsState.environment.cursor.status === "found" && + settingsState.environment.codex.status === "found" && + settingsState.environment.docker.status === "found" && workspaceUiState.projectRootPath.trim().length > 0 && projectState.configuredPrdPath.trim().length > 0 && workspaceUiState.prdGenerationPrompt.trim().length > 0, @@ -119,6 +123,8 @@ export function useAppDerivedState({ isGeneratingPrd, projectState.configuredPrdPath, settingsState.environment.cursor.status, + settingsState.environment.codex.status, + settingsState.environment.docker.status, workspaceUiState.prdGenerationPrompt, workspaceUiState.projectRootPath ] @@ -128,6 +134,8 @@ export function useAppDerivedState({ desktopRuntime && !isGeneratingSpec && settingsState.environment.cursor.status === "found" && + settingsState.environment.codex.status === "found" && + settingsState.environment.docker.status === "found" && workspaceUiState.projectRootPath.trim().length > 0 && projectState.prdContent.trim().length > 0 && projectState.configuredSpecPath.trim().length > 0 && @@ -138,6 +146,8 @@ export function useAppDerivedState({ projectState.configuredSpecPath, projectState.prdContent, settingsState.environment.cursor.status, + settingsState.environment.codex.status, + settingsState.environment.docker.status, workspaceUiState.projectRootPath, workspaceUiState.specGenerationPrompt ] @@ -147,6 +157,8 @@ export function useAppDerivedState({ desktopRuntime && !isGeneratingSpec && settingsState.environment.cursor.status === "found" && + settingsState.environment.codex.status === "found" && + settingsState.environment.docker.status === "found" && workspaceUiState.projectRootPath.trim().length > 0 && projectState.prdContent.trim().length > 0 && projectState.configuredSpecPath.trim().length > 0, @@ -156,6 +168,8 @@ export function useAppDerivedState({ projectState.configuredSpecPath, projectState.prdContent, settingsState.environment.cursor.status, + settingsState.environment.codex.status, + settingsState.environment.docker.status, workspaceUiState.projectRootPath ] ); diff --git a/src/hooks/useDocumentHandlers.ts b/src/hooks/useDocumentHandlers.ts index ee90690..c0a999c 100644 --- a/src/hooks/useDocumentHandlers.ts +++ b/src/hooks/useDocumentHandlers.ts @@ -15,9 +15,12 @@ import { runCursorAgentPrompt } from "../lib/cursorAgentRuntime"; import { + deleteDocumentPreview, generatePrdDocument, generateSpecDocument, - pickDocument + loadProjectContext, + pickDocument, + saveDocumentPreview } from "../lib/runtime"; import { type ImportableFile, @@ -56,14 +59,16 @@ export function useDocumentHandlers({ const assignDocument = useCallback( (target: DocumentTarget, content: string, path: string) => { startTransition(() => { - if (target === "prd") { - projectState.setPrdContent(content, path); - projectState.setPrdPaneMode("preview"); - return; - } + if (target === "prd") { + projectState.setPrdContent(content, path); + projectState.setPrdPaneMode("preview"); + projectState.setPrdPreviewState(false); + return; + } - projectState.setSpecContent(content, path); - projectState.setSpecPaneMode("preview"); + projectState.setSpecContent(content, path); + projectState.setSpecPaneMode("preview"); + projectState.setSpecPreviewState(false); }); if (target === "prd") { @@ -129,7 +134,9 @@ export function useDocumentHandlers({ const validationError = getPrdGenerationValidationError({ currentProjectSettings: derivedState.currentProjectSettings, desktopRuntime, + environmentCodexStatus: settingsState.environment.codex.status, environmentCursorStatus: settingsState.environment.cursor.status, + environmentDockerStatus: settingsState.environment.docker.status, projectRootPath: workspaceUiState.projectRootPath, trimmedPrompt }); @@ -158,25 +165,26 @@ export function useDocumentHandlers({ agentDescription: derivedState.currentProjectSettings.prdAgentDescription, userPrompt: trimmedPrompt }), - onEvent: (line) => agentState.appendTerminalOutput(stampLog("cursor", line)) + onEvent: (line) => agentState.appendTerminalOutput(stampLog("sandcastle", line)) }); - const generatedPrd = await generatePrdDocument({ + const generatedPrd = await saveDocumentPreview({ workspaceRoot: workspaceUiState.projectRootPath, - outputPath: derivedState.currentProjectSettings.prdPath, + target: "prd", content: generatedContent }); startTransition(() => { projectState.setPrdContent(generatedPrd.content, generatedPrd.sourcePath); projectState.setPrdPaneMode("preview"); + projectState.setPrdPreviewState(true); }); workspaceUiState.setPrdGenerationPrompt(""); agentState.setStatus("idle"); agentState.appendTerminalOutput( stampLog( "prd", - `PRD draft generated, saved to ${generatedPrd.fileName}, and loaded into the review pane.` + `PRD preview generated, saved to ${generatedPrd.fileName}, and loaded into the review pane.` ) ); } catch (error) { @@ -199,7 +207,9 @@ export function useDocumentHandlers({ const validationError = getPrdGenerationValidationError({ currentProjectSettings: derivedState.currentProjectSettings, desktopRuntime, + environmentCodexStatus: settingsState.environment.codex.status, environmentCursorStatus: settingsState.environment.cursor.status, + environmentDockerStatus: settingsState.environment.docker.status, projectRootPath: workspaceUiState.projectRootPath, trimmedPrompt }); @@ -223,7 +233,7 @@ export function useDocumentHandlers({ agentDescription: derivedState.currentProjectSettings.prdAgentDescription, userPrompt: trimmedPrompt }), - onEvent: (line) => agentState.appendTerminalOutput(stampLog("cursor", line)) + onEvent: (line) => agentState.appendTerminalOutput(stampLog("sandcastle", line)) }); workspaceUiState.setPrdGenerationPrompt(appendGrillResponse(trimmedPrompt, grillResponse)); @@ -249,7 +259,9 @@ export function useDocumentHandlers({ const validationError = getSpecGenerationValidationError({ currentProjectSettings: derivedState.currentProjectSettings, desktopRuntime, + environmentCodexStatus: settingsState.environment.codex.status, environmentCursorStatus: settingsState.environment.cursor.status, + environmentDockerStatus: settingsState.environment.docker.status, prdContent: projectState.prdContent, projectRootPath: workspaceUiState.projectRootPath, trimmedPrompt @@ -280,25 +292,26 @@ export function useDocumentHandlers({ userPrompt: trimmedPrompt, prdContent: projectState.prdContent }), - onEvent: (line) => agentState.appendTerminalOutput(stampLog("cursor", line)) + onEvent: (line) => agentState.appendTerminalOutput(stampLog("sandcastle", line)) }); - const generatedSpec = await generateSpecDocument({ + const generatedSpec = await saveDocumentPreview({ workspaceRoot: workspaceUiState.projectRootPath, - outputPath: derivedState.currentProjectSettings.specPath, + target: "spec", content: generatedContent }); startTransition(() => { projectState.setSpecContent(generatedSpec.content, generatedSpec.sourcePath); projectState.setSpecPaneMode("preview"); + projectState.setSpecPreviewState(true); }); workspaceUiState.setSpecGenerationPrompt(""); agentState.setStatus("idle"); agentState.appendTerminalOutput( stampLog( "spec", - `Specification draft generated, saved to ${generatedSpec.fileName}, and loaded into the review pane.` + `Specification preview generated, saved to ${generatedSpec.fileName}, and loaded into the review pane.` ) ); } catch (error) { @@ -322,7 +335,9 @@ export function useDocumentHandlers({ const validationError = getSpecGenerationValidationError({ currentProjectSettings: derivedState.currentProjectSettings, desktopRuntime, + environmentCodexStatus: settingsState.environment.codex.status, environmentCursorStatus: settingsState.environment.cursor.status, + environmentDockerStatus: settingsState.environment.docker.status, prdContent: projectState.prdContent, projectRootPath: workspaceUiState.projectRootPath, requirePrompt: false, @@ -349,7 +364,7 @@ export function useDocumentHandlers({ userPrompt: trimmedPrompt, prdContent: projectState.prdContent }), - onEvent: (line) => agentState.appendTerminalOutput(stampLog("cursor", line)) + onEvent: (line) => agentState.appendTerminalOutput(stampLog("sandcastle", line)) }); workspaceUiState.setSpecGenerationPrompt(appendGrillResponse(trimmedPrompt, grillResponse)); @@ -370,6 +385,56 @@ export function useDocumentHandlers({ workspaceUiState ]); + const handleSavePrdPreview = useCallback(async () => { + await savePreviewDocument({ + target: "prd", + content: projectState.prdContent, + outputPath: derivedState.currentProjectSettings.prdPath, + projectRootPath: workspaceUiState.projectRootPath, + setContent: projectState.setPrdContent, + setPreviewState: projectState.setPrdPreviewState, + setPaneMode: projectState.setPrdPaneMode, + appendTerminalOutput: agentState.appendTerminalOutput + }); + }, [agentState, derivedState.currentProjectSettings.prdPath, projectState, workspaceUiState.projectRootPath]); + + const handleDiscardPrdPreview = useCallback(async () => { + await discardPreviewDocument({ + target: "prd", + projectRootPath: workspaceUiState.projectRootPath, + fallbackPath: derivedState.currentProjectSettings.prdPath, + setContent: projectState.setPrdContent, + setPreviewState: projectState.setPrdPreviewState, + setPaneMode: projectState.setPrdPaneMode, + appendTerminalOutput: agentState.appendTerminalOutput + }); + }, [agentState, derivedState.currentProjectSettings.prdPath, projectState, workspaceUiState.projectRootPath]); + + const handleSaveSpecPreview = useCallback(async () => { + await savePreviewDocument({ + target: "spec", + content: projectState.specContent, + outputPath: derivedState.currentProjectSettings.specPath, + projectRootPath: workspaceUiState.projectRootPath, + setContent: projectState.setSpecContent, + setPreviewState: projectState.setSpecPreviewState, + setPaneMode: projectState.setSpecPaneMode, + appendTerminalOutput: agentState.appendTerminalOutput + }); + }, [agentState, derivedState.currentProjectSettings.specPath, projectState, workspaceUiState.projectRootPath]); + + const handleDiscardSpecPreview = useCallback(async () => { + await discardPreviewDocument({ + target: "spec", + projectRootPath: workspaceUiState.projectRootPath, + fallbackPath: derivedState.currentProjectSettings.specPath, + setContent: projectState.setSpecContent, + setPreviewState: projectState.setSpecPreviewState, + setPaneMode: projectState.setSpecPaneMode, + appendTerminalOutput: agentState.appendTerminalOutput + }); + }, [agentState, derivedState.currentProjectSettings.specPath, projectState, workspaceUiState.projectRootPath]); + return { assignDocument, handleOpenImportFile, @@ -377,7 +442,11 @@ export function useDocumentHandlers({ handleGrillPrd, handleGeneratePrd, handleGrillSpec, - handleGenerateSpec + handleGenerateSpec, + handleSavePrdPreview, + handleDiscardPrdPreview, + handleSaveSpecPreview, + handleDiscardSpecPreview }; } @@ -400,18 +469,22 @@ function reportImportError( function getPrdGenerationValidationError({ currentProjectSettings, desktopRuntime, + environmentCodexStatus, environmentCursorStatus, + environmentDockerStatus, projectRootPath, trimmedPrompt }: { currentProjectSettings: ProjectSettings; desktopRuntime: boolean; environmentCursorStatus: CliHealth; + environmentCodexStatus: CliHealth; + environmentDockerStatus: CliHealth; projectRootPath: string; trimmedPrompt: string; }) { if (!desktopRuntime) { - return "Cursor key access and document saving require the desktop runtime."; + return "Sandcastle access and document saving require the desktop runtime."; } if (!projectRootPath.trim()) { @@ -426,15 +499,20 @@ function getPrdGenerationValidationError({ return "Add the product context you want the AI to consider."; } - return environmentCursorStatus === "found" - ? "" - : "Save a Cursor API key in Settings before generating a PRD."; + return getRuntimeReadinessError({ + environmentCodexStatus, + environmentCursorStatus, + environmentDockerStatus, + target: "PRD" + }); } function getSpecGenerationValidationError({ currentProjectSettings, desktopRuntime, + environmentCodexStatus, environmentCursorStatus, + environmentDockerStatus, prdContent, projectRootPath, requirePrompt = true, @@ -443,13 +521,15 @@ function getSpecGenerationValidationError({ currentProjectSettings: ProjectSettings; desktopRuntime: boolean; environmentCursorStatus: CliHealth; + environmentCodexStatus: CliHealth; + environmentDockerStatus: CliHealth; prdContent: string; projectRootPath: string; requirePrompt?: boolean; trimmedPrompt: string; }) { if (!desktopRuntime) { - return "Cursor key access and document saving require the desktop runtime."; + return "Sandcastle access and document saving require the desktop runtime."; } if (!projectRootPath.trim()) { @@ -468,9 +548,46 @@ function getSpecGenerationValidationError({ return "Add the technical guidance you want the AI to consider."; } - return environmentCursorStatus === "found" - ? "" - : "Save a Cursor API key in Settings before generating a spec."; + return getRuntimeReadinessError({ + environmentCodexStatus, + environmentCursorStatus, + environmentDockerStatus, + target: "spec" + }); +} + +function getRuntimeReadinessError({ + environmentCodexStatus, + environmentCursorStatus, + environmentDockerStatus, + target +}: { + environmentCursorStatus: CliHealth; + environmentCodexStatus: CliHealth; + environmentDockerStatus: CliHealth; + target: "PRD" | "spec"; +}) { + if (environmentCursorStatus !== "found") { + return `Configure Codex authentication in Settings before generating a ${target}.`; + } + + if (environmentCodexStatus !== "found") { + return `Install or repair Codex CLI before generating a ${target}.`; + } + + if (environmentDockerStatus === "missing") { + return `Start Docker before generating a ${target}.`; + } + + if (environmentDockerStatus === "unavailable") { + return `Docker Desktop is open, but the Docker engine is unavailable. Restart Docker Desktop or run wsl --shutdown before generating a ${target}.`; + } + + if (environmentDockerStatus !== "found") { + return `Repair Docker before generating a ${target}.`; + } + + return ""; } function appendGrillResponse(currentPrompt: string, grillResponse: string) { @@ -486,3 +603,82 @@ My answer: ? `${trimmedCurrentPrompt}\n\n${nextBlock}` : nextBlock; } + +async function savePreviewDocument({ + target, + content, + outputPath, + projectRootPath, + setContent, + setPreviewState, + setPaneMode, + appendTerminalOutput +}: { + target: DocumentTarget; + content: string; + outputPath: string; + projectRootPath: string; + setContent: (content: string, path?: string) => void; + setPreviewState: (hasPreview: boolean) => void; + setPaneMode: (mode: "preview") => void; + appendTerminalOutput: (line: string) => void; +}) { + try { + const savedDocument = target === "prd" + ? await generatePrdDocument({ workspaceRoot: projectRootPath, outputPath, content }) + : await generateSpecDocument({ workspaceRoot: projectRootPath, outputPath, content }); + + await deleteDocumentPreview({ workspaceRoot: projectRootPath, target }); + setContent(savedDocument.content, savedDocument.sourcePath); + setPreviewState(false); + setPaneMode("preview"); + appendTerminalOutput( + stampLog(target, `Saved ${target.toUpperCase()} preview to ${savedDocument.fileName}.`) + ); + } catch (error) { + appendTerminalOutput( + stampLog( + "error", + error instanceof Error ? error.message : `Unable to save the ${target.toUpperCase()} preview.` + ) + ); + } +} + +async function discardPreviewDocument({ + target, + projectRootPath, + fallbackPath, + setContent, + setPreviewState, + setPaneMode, + appendTerminalOutput +}: { + target: DocumentTarget; + projectRootPath: string; + fallbackPath: string; + setContent: (content: string, path?: string) => void; + setPreviewState: (hasPreview: boolean) => void; + setPaneMode: (mode: "preview") => void; + appendTerminalOutput: (line: string) => void; +}) { + try { + await deleteDocumentPreview({ workspaceRoot: projectRootPath, target }); + const context = await loadProjectContext(projectRootPath); + const canonicalDocument = target === "prd" ? context.prdDocument : context.specDocument; + + setContent(canonicalDocument?.content ?? "", canonicalDocument?.sourcePath ?? fallbackPath); + setPreviewState(false); + setPaneMode("preview"); + appendTerminalOutput(stampLog(target, `Discarded ${target.toUpperCase()} preview.`)); + } catch (error) { + appendTerminalOutput( + stampLog( + "error", + error instanceof Error + ? error.message + : `Unable to discard the ${target.toUpperCase()} preview.` + ) + ); + } +} diff --git a/src/hooks/useProjectHandlers.ts b/src/hooks/useProjectHandlers.ts index e916ed6..5bfae81 100644 --- a/src/hooks/useProjectHandlers.ts +++ b/src/hooks/useProjectHandlers.ts @@ -11,6 +11,7 @@ import { buildCurrentProjectSettings, buildWorkspaceNotice } from "../lib/appState"; +import { getActiveDocumentFromPreview, hasDocumentPreview } from "../lib/documentPreview"; import { loadProjectContext, pickProjectFolder, @@ -92,10 +93,16 @@ export function useProjectHandlers({ const isSameProject = normalizedCurrentProjectPath.length > 0 && normalizedCurrentProjectPath === normalizedNextProjectPath; - const nextPrdSourcePath = - context.prdDocument?.sourcePath ?? context.settings.prdPath; - const nextSpecSourcePath = - context.specDocument?.sourcePath ?? context.settings.specPath; + const activePrdDocument = getActiveDocumentFromPreview( + context.prdDocument, + context.prdPreview + ); + const activeSpecDocument = getActiveDocumentFromPreview( + context.specDocument, + context.specPreview + ); + const nextPrdSourcePath = activePrdDocument?.sourcePath ?? context.settings.prdPath; + const nextSpecSourcePath = activeSpecDocument?.sourcePath ?? context.settings.specPath; const preserveEditingPrd = isSameProject && ps.prdPaneMode === "edit" && @@ -138,6 +145,8 @@ export function useProjectHandlers({ path: context.rootPath }); ps.setProjectSettings(context.settings); + ps.setPrdPreviewState(hasDocumentPreview(context.prdPreview)); + ps.setSpecPreviewState(hasDocumentPreview(context.specPreview)); uiState.clearGenerationState(); setChatSessions(context.chatSessions); setActiveSessionId(context.lastActiveSessionId ?? context.chatSessions[0]?.id ?? null); @@ -156,7 +165,7 @@ export function useProjectHandlers({ startTransition(() => { if (!preserveEditingPrd) { ps.setPrdContent( - context.prdDocument?.content ?? "", + activePrdDocument?.content ?? "", nextPrdSourcePath ); ps.setPrdPaneMode("preview"); @@ -164,7 +173,7 @@ export function useProjectHandlers({ if (!preserveEditingSpec) { ps.setSpecContent( - context.specDocument?.content ?? "", + activeSpecDocument?.content ?? "", nextSpecSourcePath ); ps.setSpecPaneMode("preview"); @@ -206,6 +215,7 @@ export function useProjectHandlers({ configuredPrdPath: latestProjectState.configuredPrdPath, configuredSpecPath: latestProjectState.configuredSpecPath, prdAgentDescription: latestProjectState.prdPromptTemplate, + providerAuthMode: latestProjectState.providerAuthMode, selectedModel: latestProjectState.selectedModel, selectedReasoning: latestProjectState.selectedReasoning, specAgentDescription: latestProjectState.specPromptTemplate, diff --git a/src/hooks/useProjectSettingsHandlers.ts b/src/hooks/useProjectSettingsHandlers.ts index 0fde2d8..458914f 100644 --- a/src/hooks/useProjectSettingsHandlers.ts +++ b/src/hooks/useProjectSettingsHandlers.ts @@ -19,6 +19,7 @@ interface UseProjectSettingsHandlersOptions { setConfiguredSpecPath: ProjectStoreSlice["setConfiguredSpecPath"]; setPrdPromptTemplate: ProjectStoreSlice["setPrdPromptTemplate"]; setExecutionAgentDescription: ProjectStoreSlice["setExecutionAgentDescription"]; + setProviderAuthMode: ProjectStoreSlice["setProviderAuthMode"]; setReasoningProfile: ProjectStoreSlice["setReasoningProfile"]; setSelectedModel: ProjectStoreSlice["setSelectedModel"]; setSpecPromptTemplate: ProjectStoreSlice["setSpecPromptTemplate"]; @@ -32,6 +33,7 @@ export function useProjectSettingsHandlers({ setConfiguredSpecPath, setPrdPromptTemplate, setExecutionAgentDescription, + setProviderAuthMode, setReasoningProfile, setSelectedModel, setSpecPromptTemplate, @@ -53,6 +55,14 @@ export function useProjectSettingsHandlers({ [scheduleProjectSettingsSave, setReasoningProfile] ); + const handleProviderAuthModeChange = useCallback( + (mode: Parameters[0]) => { + setProviderAuthMode(mode); + scheduleProjectSettingsSave(false); + }, + [scheduleProjectSettingsSave, setProviderAuthMode] + ); + const handlePrdPromptTemplateChange = useCallback( (value: string) => { setPrdPromptTemplate(value); @@ -108,6 +118,7 @@ export function useProjectSettingsHandlers({ return { handleProjectModelChange, handleProjectReasoningChange, + handleProviderAuthModeChange, handlePrdPromptTemplateChange, handleSpecPromptTemplateChange, handleExecutionAgentDescriptionChange, diff --git a/src/lib/agentConfig.test.ts b/src/lib/agentConfig.test.ts index b195521..9301b57 100644 --- a/src/lib/agentConfig.test.ts +++ b/src/lib/agentConfig.test.ts @@ -15,8 +15,8 @@ import { } from "./agentConfig"; describe("getModelLabel", () => { - it("returns the label for a known Cursor model", () => { - expect(getModelLabel("composer-2")).toBe("Composer 2"); + it("returns the label for the default Codex model", () => { + expect(getModelLabel("gpt-5.2")).toBe("GPT-5.2"); }); it("returns the label for auto", () => { @@ -25,31 +25,31 @@ describe("getModelLabel", () => { it("falls back to the first model for an unknown model id", () => { const label = getModelLabel("nonexistent-model"); - expect(label).toBe("Composer 2"); + expect(label).toBe("GPT-5.2"); }); }); describe("getModelProvider", () => { - it("returns cursor for Cursor models", () => { - expect(getModelProvider("composer-2")).toBe("cursor"); + it("returns codex for Codex models", () => { + expect(getModelProvider("gpt-5.2")).toBe("codex"); }); it("falls back to the first model's provider for unknown id", () => { - expect(getModelProvider("unknown")).toBe("cursor"); + expect(getModelProvider("unknown")).toBe("codex"); }); }); describe("getModelOption", () => { it("returns the full model option for a valid id", () => { - const option = getModelOption("composer-2"); - expect(option.id).toBe("composer-2"); - expect(option.provider).toBe("cursor"); - expect(option.label).toBe("Composer 2"); + const option = getModelOption("gpt-5.2"); + expect(option.id).toBe("gpt-5.2"); + expect(option.provider).toBe("codex"); + expect(option.label).toBe("GPT-5.2"); }); it("returns the first model as fallback for unknown id", () => { const option = getModelOption("fake"); - expect(option.id).toBe("composer-2"); + expect(option.id).toBe("gpt-5.2"); }); }); @@ -58,15 +58,15 @@ describe("getModelOptions", () => { const options = getModelOptions(); expect(options.length).toBeGreaterThan(0); const ids = options.map((o) => o.value); - expect(ids).toContain("composer-2"); + expect(ids).toContain("gpt-5.2"); expect(ids).toContain("auto"); }); - it("returns only Cursor models when filtered by cursor", () => { - const options = getModelOptions("cursor"); + it("returns only Codex models when filtered by codex", () => { + const options = getModelOptions("codex"); expect(options.length).toBeGreaterThan(0); for (const opt of options) { - expect(opt.hint).toMatch(/^Cursor/); + expect(opt.hint).toMatch(/^Codex/); } }); @@ -77,6 +77,18 @@ describe("getModelOptions", () => { expect(opt.hint).toBeTruthy(); } }); + + it("formats discovered Codex model labels like the static model list", () => { + const options = getModelOptions(undefined, [ + { + id: "gpt-5.4-mini", + label: "", + parameters: [] + } + ]); + + expect(options[0]?.label).toBe("GPT-5.4 Mini"); + }); }); describe("getReasoningLabel", () => { @@ -84,8 +96,8 @@ describe("getReasoningLabel", () => { expect(getReasoningLabel("composer-2", "medium")).toBe("Medium"); }); - it("returns 'Max' for Cursor model with max profile", () => { - expect(getReasoningLabel("composer-2", "max")).toBe("Max"); + it("returns 'Extra High' for Codex model with xhigh profile", () => { + expect(getReasoningLabel("gpt-5.2", "xhigh")).toBe("Extra High"); }); it("returns 'Low' for low profile", () => { @@ -94,18 +106,18 @@ describe("getReasoningLabel", () => { }); describe("getReasoningHint", () => { - it("returns description-based hint for Cursor model", () => { - const hint = getReasoningHint("composer-2", "medium"); - expect(hint).toContain("Cursor"); + it("returns description-based hint for Codex model", () => { + const hint = getReasoningHint("gpt-5.2", "medium"); + expect(hint).toContain("Codex"); expect(hint).toContain("Balanced"); }); }); describe("getReasoningOptions", () => { - it("returns full range for a Cursor model", () => { - const options = getReasoningOptions("composer-2"); + it("returns full range for a Codex model", () => { + const options = getReasoningOptions("gpt-5.2"); expect(options).toHaveLength(4); - expect(options.map((o) => o.value)).toEqual(["low", "medium", "high", "max"]); + expect(options.map((o) => o.value)).toEqual(["low", "medium", "high", "xhigh"]); }); it("each option has a label and hint", () => { @@ -119,11 +131,11 @@ describe("getReasoningOptions", () => { describe("normalizeReasoningProfile", () => { it("returns the profile when it is valid for the model", () => { - expect(normalizeReasoningProfile("composer-2", "high")).toBe("high"); + expect(normalizeReasoningProfile("gpt-5.2", "high")).toBe("high"); }); it("returns the default profile for invalid profile values", () => { - expect(normalizeReasoningProfile("composer-2", "invalid")).toBe("medium"); + expect(normalizeReasoningProfile("gpt-5.2", "invalid")).toBe("medium"); }); it("preserves account-specific profile values for dynamic model ids", () => { @@ -136,7 +148,7 @@ describe("normalizeReasoningProfile", () => { }); describe("normalizeModelId", () => { - it("preserves account-specific Cursor model ids", () => { + it("preserves account-specific Codex model ids", () => { expect(normalizeModelId("account-model")).toBe("account-model"); }); @@ -145,20 +157,20 @@ describe("normalizeModelId", () => { }); it("uses the default model for empty values", () => { - expect(normalizeModelId(" ")).toBe("composer-2"); - expect(normalizeModelId(null)).toBe("composer-2"); + expect(normalizeModelId(" ")).toBe("gpt-5.2"); + expect(normalizeModelId(null)).toBe("gpt-5.2"); }); }); describe("getProviderLabel", () => { - it("returns 'Cursor' for cursor provider", () => { - expect(getProviderLabel("cursor")).toBe("Cursor"); + it("returns 'Codex' for codex provider", () => { + expect(getProviderLabel("codex")).toBe("Codex"); }); }); describe("DEFAULT exports", () => { - it("DEFAULT_MODEL_ID is composer-2", () => { - expect(DEFAULT_MODEL_ID).toBe("composer-2"); + it("DEFAULT_MODEL_ID is gpt-5.2", () => { + expect(DEFAULT_MODEL_ID).toBe("gpt-5.2"); }); it("DEFAULT_REASONING_PROFILE is medium", () => { diff --git a/src/lib/agentConfig.ts b/src/lib/agentConfig.ts index 23c7a9c..aa2b76d 100644 --- a/src/lib/agentConfig.ts +++ b/src/lib/agentConfig.ts @@ -37,6 +37,10 @@ const REASONING_COPY: Partial entry.value) ?? FULL_REASONING_RANGE }; } @@ -256,16 +204,31 @@ function getCursorReasoningParameter(modelId: ModelId, cursorModels: CursorModel } function formatModelLabel(modelId: string) { - return modelId + const normalized = modelId.trim(); + const gptMatch = normalized.match(/^gpt[-_](\d+(?:\.\d+)?)(.*)$/i); + + if (gptMatch) { + const suffix = gptMatch[2] + ?.split(/[-_]/) + .filter(Boolean) + .map(capitalizeModelLabelPart) + .join(" "); + + return suffix ? `GPT-${gptMatch[1]} ${suffix}` : `GPT-${gptMatch[1]}`; + } + + return normalized .split("-") .filter(Boolean) - .map((part) => { - const upper = part.toUpperCase(); - return upper === "GPT" || upper === "O3" ? upper : `${part[0]?.toUpperCase() ?? ""}${part.slice(1)}`; - }) + .map(capitalizeModelLabelPart) .join(" "); } +function capitalizeModelLabelPart(part: string) { + const upper = part.toUpperCase(); + return upper === "GPT" || upper === "O3" ? upper : `${part[0]?.toUpperCase() ?? ""}${part.slice(1)}`; +} + function formatReasoningValue(value: string) { return value .split(/[-_]/) @@ -275,11 +238,11 @@ function formatReasoningValue(value: string) { } function getReasoningDescription(value: string) { - return REASONING_COPY[value]?.description ?? "Cursor model parameter value for this account."; + return REASONING_COPY[value]?.description ?? "Codex model parameter value for this account."; } function formatProvider(_provider: ModelProvider) { - return "Cursor"; + return "Codex"; } export function getProviderLabel(provider: ModelProvider) { diff --git a/src/lib/appState.test.ts b/src/lib/appState.test.ts index 73893eb..adf2052 100644 --- a/src/lib/appState.test.ts +++ b/src/lib/appState.test.ts @@ -8,16 +8,30 @@ import { buildWorkspaceNotice } from "./appState"; -function makeEnvironment(overrides?: Partial>>): EnvironmentStatus { +function makeEnvironment(overrides?: Partial>>): EnvironmentStatus { return { scannedAt: new Date().toISOString(), cursor: { - name: "Cursor SDK", + name: "Codex Provider", status: "found", path: null, - detail: "Cursor API key is configured", + detail: "Codex authentication is configured", ...overrides?.cursor }, + codex: { + name: "Codex CLI", + status: "found", + path: "/usr/bin/codex", + detail: "Codex CLI is available", + ...overrides?.codex + }, + docker: { + name: "Docker", + status: "found", + path: "/usr/bin/docker", + detail: "Docker daemon is reachable", + ...overrides?.docker + }, git: { name: "Git", status: "found", @@ -34,14 +48,15 @@ describe("buildCurrentProjectSettings", () => { configuredPrdPath: "docs/PRD.md", configuredSpecPath: "docs/SPEC.md", prdAgentDescription: "Generate PRD", - selectedModel: "composer-2", + providerAuthMode: "subscription", + selectedModel: "gpt-5.2", selectedReasoning: "medium", specAgentDescription: "Generate Spec", executionAgentDescription: "Execute Spec", supportingDocumentPaths: ["docs/api.md"] }); - expect(result.selectedModel).toBe("composer-2"); + expect(result.selectedModel).toBe("gpt-5.2"); expect(result.selectedReasoning).toBe("medium"); expect(result.prdPath).toBe("docs/PRD.md"); expect(result.specPath).toBe("docs/SPEC.md"); @@ -53,7 +68,8 @@ describe("buildCurrentProjectSettings", () => { configuredPrdPath: "", configuredSpecPath: "", prdAgentDescription: "prompt", - selectedModel: "composer-2", + providerAuthMode: "subscription", + selectedModel: "gpt-5.2", selectedReasoning: "medium", specAgentDescription: "prompt", executionAgentDescription: "prompt", @@ -69,7 +85,8 @@ describe("buildCurrentProjectSettings", () => { configuredPrdPath: "docs/PRD.md", configuredSpecPath: "docs/SPEC.md", prdAgentDescription: "prompt", - selectedModel: "composer-2", + providerAuthMode: "subscription", + selectedModel: "gpt-5.2", selectedReasoning: "high", specAgentDescription: "prompt", executionAgentDescription: "prompt", @@ -110,7 +127,9 @@ describe("buildWorkspaceNotice", () => { settingsPath: "/Users/me/my-project/.specforge/settings.json", hasSavedSettings: true, settings: { - selectedModel: "composer-2", + agentProvider: "codex", + providerAuthMode: "subscription", + selectedModel: "gpt-5.2", selectedReasoning: "medium", prdAgentDescription: "prompt", specAgentDescription: "prompt", @@ -123,6 +142,8 @@ describe("buildWorkspaceNotice", () => { ignoredFileCount: 0, prdDocument: null, specDocument: null, + prdPreview: null, + specPreview: null, chatSessions: [], lastActiveSessionId: null, ...overrides @@ -154,14 +175,14 @@ describe("buildWorkspaceNotice", () => { }); describe("buildConfiguredModelProviders", () => { - it("returns cursor when the Cursor API key is configured", () => { + it("returns codex when Codex authentication is configured", () => { const providers = buildConfiguredModelProviders( makeEnvironment({ cursor: { status: "found" } }) ); - expect(providers).toEqual(["cursor"]); + expect(providers).toEqual(["codex"]); }); - it("returns empty array when Cursor API key is missing", () => { + it("returns empty array when Codex authentication is missing", () => { const providers = buildConfiguredModelProviders( makeEnvironment({ cursor: { status: "missing" } }) ); @@ -170,11 +191,13 @@ describe("buildConfiguredModelProviders", () => { }); describe("buildMcpItems", () => { - it("returns two items for Cursor and git", () => { + it("returns readiness items for Codex, Docker, and git", () => { const items = buildMcpItems(makeEnvironment()); - expect(items).toHaveLength(2); - expect(items[0].name).toBe("Cursor SDK"); - expect(items[1].name).toBe("Git"); + expect(items).toHaveLength(4); + expect(items[0].name).toBe("Codex Provider"); + expect(items[1].name).toBe("Codex CLI"); + expect(items[2].name).toBe("Docker"); + expect(items[3].name).toBe("Git"); }); it("includes status and detail in each item", () => { @@ -187,6 +210,11 @@ describe("buildMcpItems", () => { it("reflects missing status", () => { const items = buildMcpItems(makeEnvironment({ git: { status: "missing" } })); - expect(items[1].status).toBe("missing"); + expect(items[3].status).toBe("missing"); + }); + + it("reflects unavailable status for installed tools that are not reachable", () => { + const items = buildMcpItems(makeEnvironment({ docker: { status: "unavailable" } })); + expect(items[2].status).toBe("unavailable"); }); }); diff --git a/src/lib/appState.ts b/src/lib/appState.ts index e715d0c..87883f1 100644 --- a/src/lib/appState.ts +++ b/src/lib/appState.ts @@ -3,6 +3,7 @@ import type { ModelId, ModelProvider, ProjectContext, + ProviderAuthMode, ReasoningProfileId } from "../types"; import { getModelLabel } from "./agentConfig"; @@ -10,7 +11,7 @@ import { DEFAULT_PROJECT_PRD_PATH, DEFAULT_PROJECT_SPEC_PATH, getWorkspaceDisplayPath, - normalizeProjectSettings, + normalizeProjectSettings, SPECFORGE_SETTINGS_RELATIVE_PATH } from "./projectConfig"; @@ -18,6 +19,7 @@ interface BuildCurrentProjectSettingsOptions { configuredPrdPath: string; configuredSpecPath: string; prdAgentDescription: string; + providerAuthMode: ProviderAuthMode; selectedModel: ModelId; selectedReasoning: ReasoningProfileId; specAgentDescription: string; @@ -51,7 +53,7 @@ export function buildConfiguredModelProviders( const providers: ModelProvider[] = []; if (environment.cursor.status === "found") { - providers.push("cursor"); + providers.push("codex"); } return providers; @@ -64,6 +66,16 @@ export function buildMcpItems(environment: EnvironmentStatus): McpListItem[] { detail: environment.cursor.detail, status: environment.cursor.status }, + { + name: environment.codex.name, + detail: environment.codex.detail, + status: environment.codex.status + }, + { + name: environment.docker.name, + detail: environment.docker.detail, + status: environment.docker.status + }, { name: environment.git.name, detail: environment.git.detail, @@ -76,6 +88,7 @@ export function buildCurrentProjectSettings({ configuredPrdPath, configuredSpecPath, prdAgentDescription, + providerAuthMode, selectedModel, selectedReasoning, specAgentDescription, @@ -85,6 +98,7 @@ export function buildCurrentProjectSettings({ return normalizeProjectSettings({ selectedModel, selectedReasoning, + providerAuthMode, prdAgentDescription, specAgentDescription, executionAgentDescription, @@ -135,10 +149,10 @@ export function getPrdGenerationHelperText({ } if (selectedProviderStatus.status !== "found") { - return "Save a Cursor API key in Settings before generating a PRD."; + return "Configure Codex authentication in Settings before generating a PRD."; } - return `This appends your note after the saved PRD agent description from ${configPathDisplay}, runs ${getModelLabel(selectedModel)} through Cursor SDK, and writes markdown to ${configuredDocumentPath}.`; + return `This appends your note after the saved PRD agent description from ${configPathDisplay}, runs ${getModelLabel(selectedModel)} through Sandcastle, and writes markdown to ${configuredDocumentPath}.`; } export function getSpecGenerationHelperText({ @@ -175,7 +189,7 @@ export function getSpecGenerationHelperText({ } if (selectedProviderStatus.status !== "found") { - return "Save a Cursor API key in Settings before generating a spec."; + return "Configure Codex authentication in Settings before generating a spec."; } return `This appends your note after the saved spec agent description from ${configPathDisplay}, includes the current PRD content, and writes markdown to ${configuredDocumentPath}.`; diff --git a/src/lib/cursorAgentRuntime.ts b/src/lib/cursorAgentRuntime.ts index 2aece3c..5ac5a56 100644 --- a/src/lib/cursorAgentRuntime.ts +++ b/src/lib/cursorAgentRuntime.ts @@ -95,7 +95,7 @@ export async function runCursorAgentPrompt({ } if (!result.content.trim()) { - throw new Error("Cursor SDK returned an empty response."); + throw new Error("Sandcastle Runtime returned an empty response."); } return stripWrappingCodeFence(result.content); diff --git a/src/lib/documentPreview.test.ts b/src/lib/documentPreview.test.ts new file mode 100644 index 0000000..fdedc1a --- /dev/null +++ b/src/lib/documentPreview.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, it } from "vitest"; +import type { WorkspaceDocument } from "../types"; +import { + getActiveDocumentFromPreview, + getDocumentPreviewFileName, + hasDocumentPreview +} from "./documentPreview"; + +describe("documentPreview", () => { + const canonical: WorkspaceDocument = { + content: "# Canonical", + fileName: "PRD.md", + sourcePath: "docs/PRD.md" + }; + const preview: WorkspaceDocument = { + content: "# Preview", + fileName: "prd.md", + sourcePath: ".specforge/previews/prd.md" + }; + + it("uses the preview document when one exists", () => { + expect(getActiveDocumentFromPreview(canonical, preview)).toBe(preview); + }); + + it("falls back to the canonical document when preview is missing", () => { + expect(getActiveDocumentFromPreview(canonical, null)).toBe(canonical); + }); + + it("reports preview presence from document content", () => { + expect(hasDocumentPreview(preview)).toBe(true); + expect(hasDocumentPreview(null)).toBe(false); + }); + + it("uses stable preview filenames per target", () => { + expect(getDocumentPreviewFileName("prd")).toBe("prd.md"); + expect(getDocumentPreviewFileName("spec")).toBe("spec.md"); + }); +}); diff --git a/src/lib/documentPreview.ts b/src/lib/documentPreview.ts new file mode 100644 index 0000000..43cd66e --- /dev/null +++ b/src/lib/documentPreview.ts @@ -0,0 +1,17 @@ +import type { WorkspaceDocument } from "../types"; +import type { DocumentTarget } from "./appShell"; + +export function getActiveDocumentFromPreview( + canonicalDocument: WorkspaceDocument | null, + previewDocument: WorkspaceDocument | null +) { + return previewDocument ?? canonicalDocument; +} + +export function hasDocumentPreview(previewDocument: WorkspaceDocument | null) { + return previewDocument !== null && previewDocument.content.trim().length > 0; +} + +export function getDocumentPreviewFileName(target: DocumentTarget) { + return `${target}.md`; +} diff --git a/src/lib/projectConfig.test.ts b/src/lib/projectConfig.test.ts index 7cd29ff..86071f3 100644 --- a/src/lib/projectConfig.test.ts +++ b/src/lib/projectConfig.test.ts @@ -106,7 +106,9 @@ describe("parseSupportingDocumentPaths", () => { describe("buildDefaultProjectSettings", () => { it("returns expected default values", () => { const settings = buildDefaultProjectSettings(); - expect(settings.selectedModel).toBe("composer-2"); + expect(settings.agentProvider).toBe("codex"); + expect(settings.providerAuthMode).toBe("subscription"); + expect(settings.selectedModel).toBe("gpt-5.2"); expect(settings.selectedReasoning).toBe("medium"); expect(settings.prdPath).toBe("docs/PRD.md"); expect(settings.specPath).toBe("docs/SPEC.md"); @@ -136,12 +138,15 @@ describe("normalizeProjectSettings", () => { it("preserves valid overrides", () => { const result = normalizeProjectSettings({ - selectedModel: "composer-2", + selectedModel: "gpt-5.2", selectedReasoning: "high", + providerAuthMode: "api_key", prdPath: "custom/PRD.md", specPath: "custom/SPEC.md" }); - expect(result.selectedModel).toBe("composer-2"); + expect(result.agentProvider).toBe("codex"); + expect(result.providerAuthMode).toBe("api_key"); + expect(result.selectedModel).toBe("gpt-5.2"); expect(result.selectedReasoning).toBe("high"); expect(result.prdPath).toBe("custom/PRD.md"); expect(result.specPath).toBe("custom/SPEC.md"); @@ -152,6 +157,16 @@ describe("normalizeProjectSettings", () => { expect(result.selectedModel).toBe("account-model"); }); + it("normalizes unsupported providers and auth modes to Codex subscription", () => { + const result = normalizeProjectSettings({ + agentProvider: "cursor", + providerAuthMode: "oauth" + } as unknown as Partial>); + + expect(result.agentProvider).toBe("codex"); + expect(result.providerAuthMode).toBe("subscription"); + }); + it("preserves account-specific reasoning values for dynamic Cursor models", () => { const result = normalizeProjectSettings({ selectedModel: "account-model", diff --git a/src/lib/projectConfig.ts b/src/lib/projectConfig.ts index a19c13c..d565c39 100644 --- a/src/lib/projectConfig.ts +++ b/src/lib/projectConfig.ts @@ -1,4 +1,4 @@ -import type { ProjectSettings, ReasoningProfileId } from "../types"; +import type { ModelProvider, ProjectSettings, ProviderAuthMode, ReasoningProfileId } from "../types"; import { DEFAULT_MODEL_ID, DEFAULT_REASONING_PROFILE, @@ -52,6 +52,8 @@ export const DEFAULT_SPEC_PROMPT = DEFAULT_SPEC_AGENT_DESCRIPTION; export function buildDefaultProjectSettings(): ProjectSettings { return { + agentProvider: "codex", + providerAuthMode: "subscription", selectedModel: DEFAULT_MODEL_ID, selectedReasoning: DEFAULT_REASONING_PROFILE, prdAgentDescription: DEFAULT_PRD_AGENT_DESCRIPTION, @@ -76,6 +78,8 @@ export function normalizeProjectSettings( ); return { + agentProvider: normalizeAgentProvider(value?.agentProvider), + providerAuthMode: normalizeProviderAuthMode(value?.providerAuthMode), selectedModel, selectedReasoning, prdAgentDescription: @@ -90,6 +94,14 @@ export function normalizeProjectSettings( }; } +export function normalizeAgentProvider(value?: string | null): ModelProvider { + return value === "codex" ? "codex" : "codex"; +} + +export function normalizeProviderAuthMode(value?: string | null): ProviderAuthMode { + return value === "api_key" ? "api_key" : "subscription"; +} + export function normalizeProjectRelativePath(value?: string | null) { return value?.trim().replace(/\\/g, "/").replace(/^\/+/, "") ?? ""; } diff --git a/src/lib/runtime.ts b/src/lib/runtime.ts index bc7e49a..e402e21 100644 --- a/src/lib/runtime.ts +++ b/src/lib/runtime.ts @@ -60,7 +60,9 @@ export async function runEnvironmentScan(): Promise { if (!isTauriRuntime()) { return { scannedAt: new Date().toISOString(), - cursor: fallbackStatus("Cursor SDK", "Desktop runtime not detected. Start Tauri to read the saved Cursor key."), + cursor: fallbackStatus("Codex Provider", "Desktop runtime not detected. Start Tauri to read Codex authentication."), + codex: fallbackStatus("Codex CLI", "Desktop runtime not detected. Start Tauri to inspect Codex."), + docker: fallbackStatus("Docker", "Desktop runtime not detected. Start Tauri to inspect Docker."), git: fallbackStatus("Git", "Desktop runtime not detected. Diff output falls back to the sample review.") }; } @@ -211,6 +213,36 @@ export async function generatePrdDocument(payload: { }); } +export async function saveDocumentPreview(payload: { + workspaceRoot: string; + target: "prd" | "spec"; + content: string; +}): Promise { + if (!isTauriRuntime()) { + throw new Error("Document previews require the desktop runtime."); + } + + return invoke("save_document_preview", { + workspaceRoot: payload.workspaceRoot, + target: payload.target, + content: payload.content + }); +} + +export async function deleteDocumentPreview(payload: { + workspaceRoot: string; + target: "prd" | "spec"; +}): Promise { + if (!isTauriRuntime()) { + throw new Error("Document previews require the desktop runtime."); + } + + await invoke("delete_document_preview", { + workspaceRoot: payload.workspaceRoot, + target: payload.target + }); +} + export async function generateSpecDocument(payload: { workspaceRoot: string; outputPath: string; @@ -235,7 +267,7 @@ export async function executeCursorAgentPrompt(payload: { prompt: string; }): Promise<{ content: string; events: string[] }> { if (!isTauriRuntime()) { - throw new Error("Cursor SDK generation requires the desktop runtime."); + throw new Error("Sandcastle generation requires the desktop runtime."); } return invoke<{ content: string; events: string[] }>("run_cursor_agent_prompt", { payload }); @@ -243,7 +275,7 @@ export async function executeCursorAgentPrompt(payload: { export async function saveCursorApiKey(apiKey: string): Promise { if (!isTauriRuntime()) { - throw new Error("Cursor API key storage requires the desktop runtime."); + throw new Error("Codex API key storage requires the desktop runtime."); } await invoke("save_cursor_api_key", { apiKey }); @@ -251,7 +283,7 @@ export async function saveCursorApiKey(apiKey: string): Promise { export async function deleteCursorApiKey(): Promise { if (!isTauriRuntime()) { - throw new Error("Cursor API key storage requires the desktop runtime."); + throw new Error("Codex API key storage requires the desktop runtime."); } await invoke("delete_cursor_api_key"); diff --git a/src/sandcastle/Dockerfile b/src/sandcastle/Dockerfile new file mode 100644 index 0000000..53b3a36 --- /dev/null +++ b/src/sandcastle/Dockerfile @@ -0,0 +1,12 @@ +FROM node:22-bookworm + +RUN apt-get update && apt-get install -y \ + git \ + && rm -rf /var/lib/apt/lists/* + +RUN npm install -g @openai/codex + +RUN usermod -d /home/agent -m -l agent node + +USER agent +WORKDIR /home/agent/workspace diff --git a/src/screens/ConfigurationScreen.tsx b/src/screens/ConfigurationScreen.tsx index 101dc63..34bb514 100644 --- a/src/screens/ConfigurationScreen.tsx +++ b/src/screens/ConfigurationScreen.tsx @@ -3,6 +3,7 @@ import { Folder, GitSolid, NavArrowRight, Refresh } from "iconoir-react"; import { memo } from "react"; import { useShallow } from "zustand/react/shallow"; +import { CliHealthCard } from "../components/CliHealthCard"; import { FIELD_LABEL_CLASS, INPUT_CLASS, @@ -25,8 +26,9 @@ export const ConfigurationScreen = memo(function ConfigurationScreen({ onOpenRecentProject, onRefresh }: ConfigurationScreenProps) { - const { recentProjects } = useSettingsStore( + const { environment, recentProjects } = useSettingsStore( useShallow((state) => ({ + environment: state.environment, recentProjects: state.recentProjects })) ); @@ -144,6 +146,23 @@ export const ConfigurationScreen = memo(function ConfigurationScreen({
+
+
+

+ Runtime Readiness +

+ + Sandcastle / Codex / Docker + +
+
+ + + + +
+
+

diff --git a/src/screens/SettingsScreen.tsx b/src/screens/SettingsScreen.tsx index 8bdf504..748d6ea 100644 --- a/src/screens/SettingsScreen.tsx +++ b/src/screens/SettingsScreen.tsx @@ -10,7 +10,7 @@ import { useAgentStore } from "../store/useAgentStore"; import { useProjectStore } from "../store/useProjectStore"; import { useSettingsStore } from "../store/useSettingsStore"; import { useWorkspaceUiStore } from "../store/useWorkspaceUiStore"; -import type { ModelId, ReasoningProfileId, ThemeMode } from "../types"; +import type { ModelId, ProviderAuthMode, ReasoningProfileId, ThemeMode } from "../types"; interface SettingsScreenProps { onRefresh: () => void; @@ -19,6 +19,7 @@ interface SettingsScreenProps { onSaveCursorApiKey: () => void; onDeleteCursorApiKey: () => void; onModelChange: (model: ModelId) => void; + onProviderAuthModeChange: (mode: ProviderAuthMode) => void; onReasoningChange: (reasoning: ReasoningProfileId) => void; onPrdPromptChange: (value: string) => void; onSpecPromptChange: (value: string) => void; @@ -35,6 +36,7 @@ export const SettingsScreen = memo(function SettingsScreen({ onSaveCursorApiKey, onDeleteCursorApiKey, onModelChange, + onProviderAuthModeChange, onReasoningChange, onPrdPromptChange, onSpecPromptChange, @@ -49,6 +51,7 @@ export const SettingsScreen = memo(function SettingsScreen({ executionAgentDescription, prdPath, prdPrompt, + providerAuthMode, selectedModel, selectedReasoning, specPath, @@ -60,6 +63,7 @@ export const SettingsScreen = memo(function SettingsScreen({ executionAgentDescription: state.executionAgentDescription, prdPath: state.configuredPrdPath, prdPrompt: state.prdPromptTemplate, + providerAuthMode: state.providerAuthMode, selectedModel: state.selectedModel, selectedReasoning: state.selectedReasoning, specPath: state.configuredSpecPath, @@ -117,6 +121,7 @@ export const SettingsScreen = memo(function SettingsScreen({ onDeleteCursorApiKey={onDeleteCursorApiKey} onExecutionAgentDescriptionChange={onExecutionAgentDescriptionChange} onModelChange={onModelChange} + onProviderAuthModeChange={onProviderAuthModeChange} onPrdPathChange={onPrdPathChange} onPrdPromptChange={onPrdPromptChange} onReasoningChange={onReasoningChange} @@ -127,6 +132,7 @@ export const SettingsScreen = memo(function SettingsScreen({ onThemeChange={onThemeChange} prdPath={prdPath} prdPrompt={prdPrompt} + providerAuthMode={providerAuthMode} projectErrorMessage={projectErrorMessage} projectStatusMessage={projectStatusMessage} selectedModel={selectedModel} diff --git a/src/store/useProjectStore.ts b/src/store/useProjectStore.ts index d947ada..bcf7f0e 100644 --- a/src/store/useProjectStore.ts +++ b/src/store/useProjectStore.ts @@ -14,6 +14,7 @@ import type { ModelId, PaneMode, ProjectSettings, + ProviderAuthMode, ReasoningProfileId, SelectionRange, SpecAnnotation, @@ -25,12 +26,15 @@ interface ProjectState { specContent: string; prdPath: string; specPath: string; + hasPrdPreview: boolean; + hasSpecPreview: boolean; configuredPrdPath: string; configuredSpecPath: string; supportingDocumentPaths: string[]; prdPromptTemplate: string; specPromptTemplate: string; executionAgentDescription: string; + providerAuthMode: ProviderAuthMode; selectedModel: ModelId; selectedReasoning: ReasoningProfileId; autonomyMode: AutonomyMode; @@ -49,8 +53,11 @@ interface ProjectState { setPrdPromptTemplate: (prompt: string) => void; setSpecPromptTemplate: (prompt: string) => void; setExecutionAgentDescription: (description: string) => void; + setProviderAuthMode: (mode: ProviderAuthMode) => void; setPrdContent: (content: string, path?: string) => void; setSpecContent: (content: string, path?: string) => void; + setPrdPreviewState: (hasPreview: boolean) => void; + setSpecPreviewState: (hasPreview: boolean) => void; setSelectedModel: (model: ModelId) => void; setReasoningProfile: (profile: ReasoningProfileId) => void; setAutonomyMode: (mode: AutonomyMode) => void; @@ -132,6 +139,8 @@ export const useProjectStore = create((set, get) => ({ activeTab: "review", prdPaneMode: "preview", specPaneMode: "preview", + hasPrdPreview: false, + hasSpecPreview: false, reviewPrompt: "", selectedSpecRange: null, annotations: buildInitialAnnotations(), @@ -145,6 +154,7 @@ export const useProjectStore = create((set, get) => ({ prdAgentDescription: state.prdPromptTemplate, specAgentDescription: state.specPromptTemplate, executionAgentDescription: state.executionAgentDescription, + providerAuthMode: state.providerAuthMode, prdPath: state.configuredPrdPath, specPath: state.configuredSpecPath, supportingDocumentPaths: state.supportingDocumentPaths, @@ -157,6 +167,7 @@ export const useProjectStore = create((set, get) => ({ prdPromptTemplate: nextSettings.prdAgentDescription, specPromptTemplate: nextSettings.specAgentDescription, executionAgentDescription: nextSettings.executionAgentDescription, + providerAuthMode: nextSettings.providerAuthMode, selectedModel: nextSettings.selectedModel, selectedReasoning: nextSettings.selectedReasoning, supportingDocumentPaths: nextSettings.supportingDocumentPaths @@ -168,6 +179,7 @@ export const useProjectStore = create((set, get) => ({ setPrdPromptTemplate: (prdPromptTemplate) => set({ prdPromptTemplate }), setSpecPromptTemplate: (specPromptTemplate) => set({ specPromptTemplate }), setExecutionAgentDescription: (executionAgentDescription) => set({ executionAgentDescription }), + setProviderAuthMode: (providerAuthMode) => set({ providerAuthMode }), setPrdContent: (prdContent, path) => set({ prdContent, @@ -179,6 +191,8 @@ export const useProjectStore = create((set, get) => ({ specPath: path ?? get().specPath, isSpecApproved: false }), + setPrdPreviewState: (hasPrdPreview) => set({ hasPrdPreview }), + setSpecPreviewState: (hasSpecPreview) => set({ hasSpecPreview }), setSelectedModel: (selectedModel) => set((state) => ({ selectedModel, @@ -291,6 +305,7 @@ function buildInitialProjectState() { prdPromptTemplate: defaults.prdAgentDescription, specPromptTemplate: defaults.specAgentDescription, executionAgentDescription: defaults.executionAgentDescription, + providerAuthMode: defaults.providerAuthMode, selectedModel: DEFAULT_MODEL_ID, selectedReasoning: DEFAULT_REASONING_PROFILE, supportingDocumentPaths: defaults.supportingDocumentPaths diff --git a/src/store/useSettingsStore.ts b/src/store/useSettingsStore.ts index 3ac6634..e601ea2 100644 --- a/src/store/useSettingsStore.ts +++ b/src/store/useSettingsStore.ts @@ -35,10 +35,22 @@ function createEnvironmentPlaceholder(): EnvironmentStatus { return { scannedAt: "", cursor: { - name: "Cursor SDK", + name: "Codex Provider", status: "missing", path: null, - detail: "Save a Cursor API key to enable PRD and spec generation." + detail: "Configure Codex authentication to enable Sandcastle turns." + }, + codex: { + name: "Codex CLI", + status: "missing", + path: null, + detail: "Install Codex CLI to discover models and prepare runtime turns." + }, + docker: { + name: "Docker", + status: "missing", + path: null, + detail: "Install Docker and start the daemon to enable the runtime sandbox." }, git: { name: "Git", diff --git a/src/types.ts b/src/types.ts index ab30b7b..a87c338 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1,5 +1,6 @@ export type ModelId = string; -export type ModelProvider = "cursor"; +export type ModelProvider = "codex"; +export type ProviderAuthMode = "subscription" | "api_key"; export type ReasoningProfileId = string; export type AutonomyMode = "stepped" | "milestone" | "god_mode"; export type ThemeMode = "dracula" | "light" | "system"; @@ -15,7 +16,7 @@ export type AgentStatus = | "halted" | "error" | "completed"; -export type CliHealth = "found" | "missing" | "unauthorized"; +export type CliHealth = "found" | "missing" | "unauthorized" | "unavailable"; export type AnnotationTone = "info" | "warning" | "success"; export type ChatContextKind = | "prd" @@ -57,6 +58,8 @@ export interface CliStatus { export interface EnvironmentStatus { scannedAt: string; cursor: CliStatus; + codex: CliStatus; + docker: CliStatus; git: CliStatus; } @@ -98,6 +101,8 @@ export interface CursorModel { } export interface ProjectSettings { + agentProvider: ModelProvider; + providerAuthMode: ProviderAuthMode; selectedModel: ModelId; selectedReasoning: ReasoningProfileId; prdAgentDescription: string; @@ -118,6 +123,8 @@ export interface ProjectContext { ignoredFileCount: number; prdDocument: WorkspaceDocument | null; specDocument: WorkspaceDocument | null; + prdPreview: WorkspaceDocument | null; + specPreview: WorkspaceDocument | null; chatSessions: ChatSessionSummary[]; lastActiveSessionId: string | null; }