diff --git a/design/003-ai-aiken-vulnerability-scaffolding.md b/design/003-ai-aiken-vulnerability-scaffolding.md new file mode 100644 index 0000000..0d91260 --- /dev/null +++ b/design/003-ai-aiken-vulnerability-scaffolding.md @@ -0,0 +1,237 @@ +# AI Vulnerability Scaffolding + +## Overview + +This document defines the **Milestone 1 scaffolding** for an AI-assisted vulnerability analysis command in Trix, initially focused on Aiken smart contracts. + +Scope for this milestone is intentionally limited to: +- CLI wiring for a new scoped command: `trix audit` +- Contracts for iterative skill-by-skill analysis state (JSON) +- Contracts for permission prompt generation and final vulnerability report generation +- Local-first execution boundaries and security assumptions +- C4 architecture diagrams in PlantUML + +Out of scope for this milestone: +- Real LLM integration implementation +- Actual command execution orchestration against an AI provider +- Deep prompt engineering and remediation automation + +## Goals + +1. Establish a stable command surface for future implementation. +2. Define the analysis loop model that processes vulnerability skills one by one. +3. Persist progress incrementally in JSON after each skill iteration. +4. Produce a final vulnerability document contract (Markdown output path + structure). +5. Define a local command permission prompt contract (e.g. `grep`, `cat`) for constrained auto-execution. + +## CLI Surface (Scaffolding) + +`trix audit` is a **scoped** command and requires a project context (`trix.toml`). + +**⚠️ EXPERIMENTAL**: This command requires the `unstable` feature to be enabled. Build with: +```bash +cargo build --features unstable +``` + +### Command Structure + +The command is currently focused on Aiken smart contracts but designed for future extensibility to other languages: + +```bash +trix audit [options] +``` + +### Command (Milestone 1) + +#### `trix audit` + +Audits smart contract code for vulnerabilities using AI-assisted detection. Currently focused on Aiken (`.ak` files). + +**Arguments:** +- `--state-out` (default: `.tx3/audit/state.json`) - Path where the incremental analysis state JSON will be written +- `--report-out` (default: `.tx3/audit/vulnerabilities.md`) - Path where the final vulnerability report markdown will be written +- `--skills-dir` (default: `skills/vulnerabilities`) - Path to vulnerability skill definitions +- `--provider` (default: `scaffold`) - Analysis provider: `scaffold` | `openai` | `anthropic` | `ollama` + +**Example:** +```bash +trix audit +trix audit --state-out ./custom/state.json +trix audit --provider openai +``` + +## Skill-by-skill Loop Contract + +The analysis process is modeled as an iterative loop: + +1. Load one vulnerability skill definition. +2. Build a focused mini-prompt for that single skill. +3. Execute analysis (future milestone, provider-backed). +4. Append iteration result to JSON state. +5. Continue with next skill until all skills are processed. +6. Render final vulnerability report document from aggregate findings. + +This loop enables narrow prompts per skill, improving precision and traceability. + +## State and Output Contracts + +### Incremental JSON state + +Defined by `AnalysisStateJson` and related structures in: +- `src/commands/audit/model.rs` + +Key sections: +- Source metadata (multi-file) and provider spec +- Permission prompt spec (allowed local commands, scope rules) +- Ordered list of `SkillIterationResult` + +Example (simplified): +```json +{ + "version": "1", + "source_files": [ + "onchain/validators/spend.ak", + "onchain/validators/mint.ak" + ], + "provider": { + "name": "openai-compatible", + "model": "gpt-4.1-mini", + "notes": "Endpoint: https://api.openai.com/v1/chat/completions" + }, + "permission_prompt": { + "shell": "bash", + "allowed_commands": ["grep", "cat", "find", "ls"], + "scope_rules": [ + "Only execute commands within the current project root.", + "Do not write outside designated output artifacts." + ], + "read_scope": "workspace", + "interactive_permissions": false, + "allowed_paths": [] + }, + "iterations": [ + { + "skill_id": "strict-value-equality-001", + "status": "completed", + "findings": [ + { + "title": "Strict equality on full value", + "severity": "high", + "summary": "Strict value equality can reject valid transactions.", + "evidence": ["validators/spend.ak:42"], + "recommendation": "Compare lovelace and assets separately.", + "file": "validators/spend.ak", + "line": 42 + } + ], + "next_prompt": null + } + ] +} +``` + +### Final report + +Defined by `VulnerabilityReportSpec` and a Markdown template scaffold: +- `templates/aiken/report.md` + +Note: the report no longer includes a single `target` path because analysis is performed over a set of source files. + +### Permission prompt + +Template scaffold: +- `templates/aiken/permission_prompt.md` + +This prompt is intended to grant **explicit, bounded, local** command execution rights to the analysis agent. + +## External AI Service Note + +Future milestones may integrate external AI services (for example, **Anthropic**) behind a provider adapter boundary. + +For this milestone, provider integration is represented as a contract only (`ProviderSpec`) and no network behavior is implemented. + +## Local Execution and Safety Boundaries + +The architecture assumes: +- Analysis runs locally from the developer machine. +- Only an allowlist of read-oriented commands is permitted by policy prompt (e.g. `grep`, `cat`, `find`, `ls`). +- Writes are limited to designated output artifacts (state JSON and report document). +- Scope rules constrain path access to project roots. + +## C4 Architecture Diagrams + +C4 diagrams are maintained as separate PlantUML files in [`003-assets/`](003-assets/) for easier image generation and version control. + +### Generating Diagrams + +To generate PNG/SVG images from PlantUML source: + +```bash +# Using PlantUML CLI +plantuml design/003-assets/*.puml + +# Or using Docker +docker run --rm -v $(pwd)/design/003-assets:/data plantuml/plantuml:latest *.puml +``` + +### C4 - Context Diagram + +**Source:** [c4-context.puml](003-assets/c4-context.puml) + +![C4 Context Diagram](https://www.plantuml.com/plantuml/svg/LP71SXen343l-nKg9peDxA4vfPSmj9b9XpGCQNibnXP1nMjx97c1lw-zcsc7qrhMqvEblHSrhBdpiBpTBcDGFEjsGKSClxCFpGSArcU7S51DSjUsR4xpDz93tcL1jhKWwDp6hatUX2gQYJfFktPvErlNgrzFgxOpeiZj_nRpLCYcMIDB35E7_GrClcAFFYRaIGasEGZyP3e31J3WepKU4iS_Q7NoiNcv564trG8KUE2MgyT9FPy_GpBsQDuGEXFAX__nsszddHegL3aWXw9SFCAQOqzkRFkSb6AztsVDZ93USo3P7i08B88UE2QorzAzbEBLyCW7yWXZgXhNuvj1OcQBDo17yhxGCMlAAaNJRD2FAnZ76MT_hG6Ox4XV2rIPCJsqsg1n0ZOwX4_GPn-GpOuywUMzmHSuCIaAV7zv_W6YHX5CSF1a-EZYAG1ZYgnNvc4p3yFWTo0Od8oocIIqj2TRrz4bbS74Q95wh87n5w5XbFjILV_iNm00) + +*System context showing developer interaction with Trix CLI, local filesystem, and future external AI provider integration.* + +### C4 - Container Diagram + +**Source:** [c4-container.puml](003-assets/c4-container.puml) + +![C4 Container Diagram](https://www.plantuml.com/plantuml/svg/RLHDKnin3Btlhr1p2jD2Bfmu4LAOIIUqJA3jaN5sHT34QxkM7CXq-jyhhzbj4dg9hG-zPqalUybYegJMmkpySUQT678O7wUqGVSZMLTz85VRr20yYmI-c4oYUJbRapodLMACjPQWaxFQjvDWZSjGfooDfTLaTdvwbrtVRnUJrh1WdEoJd0NDhQexZEpTkC7j9nXznYrQ7p2EJghxdPQqZrS-kSR4tLWYlMhAWnumMyn79_2x0XZWnhKb3KzJGwBUjZMk6QbZcLZW8yoi2TdKIgXB5D19t0LFJY2R9gvJYseygsN9hVKH-mJM-w08znko5XPgmBLRQwHdoUTSC1f1XgqEJwyhiYGxu5p37KiAJPV0eiSHGuY-3Q8mTzqbjn4yOzphum7RO3D0zbqbiWr3IuvEIstX21XROYRleBYFU3hkt4e-uMSaEU3uSl_jQpRpQeWLKpKGCO_6MCKtTiVq_mOY1Xyb8tMNzz2t9cuQ_-8E-z1scrGZmnCf-j7i2kRxIMsfmTWLbojC6nD4p_5DaXJnPA2LQ4XWzIQax9wkM9srCE2hbcJIwHmOuvRgtDEVgfVVzQkCdrXB5hxMh1ko22MaQNiv9ellUWziU5R_7CslbcYBqejOvsxhDyWQoO7ErujCJCpBp6f-2vV8894b_ah8maLR1neuAFYtQn-HWJg1PcMebUnHSfdreAooZ_pRa5jp2H-_PrX4wVC236-qqiT1AgMWLrqtpKuORnegUYivXU4mgUqrSq3VzTfdBj4q_IIUo6Bv6qYliQ6aVwkXuRtWK5vyt82kr-RCwX9TapzDVm40) + +*Container-level view of Trix CLI internals: command entrypoint, skill loop engine, prompt composer, state/report writers, and provider adapter boundary.* + +### C4 - Component Diagram + +**Source:** [c4-component.puml](003-assets/c4-component.puml) + +![C4 Component Diagram](https://www.plantuml.com/plantuml/svg/TPFFRjim3CRlVWekfnPhuqjFFJNDQKMp1Issgw58J29KVpIAQXeCU_T9jc4dNdeBAVBtCUJdXyY2E5a3oxjcwMtm3Xqt2_s6xbSohlgYdJH98UoPGxolQnA7-KxpFcrIH9BUSMwOIs_hO8GhgDl8okXDlRoxNPVKpzLpRKLafFfKP1voRLKjCq5eXzGy-kIKeEELAEetC5iafHboWnLEmSghP71s3NygW85o9QtAfPJSbQ1Tl07ftLRlRY2Gtsg3rxFyQhsQKoo_NgKFg43sp0oWO_3QyUpwZ1hVj36DyK8XbROImdCR5imsSI84susK4-KfGkw5zsGSajXee_a8BGS4Wm3M5YSgLZlT1CenPyOgHx1k8tXCsdocrRv5s7gP8Yhe-dD881DHvdQg4ws0P7MCshjHSJtaBkWosc048vGRav1yecsy56ROueZkXvakapEciLwXARii4QnArDy6JN-TXHFc67LiIvvYvdq5pKY2CtkO8rx4H_Q40_wrH1Utzu92nVf0cHj1EqbeCqGOU7hzyHrQreHF63nnG-WK4bBBA1buwJEVRgihY1DFM45loX_Sz43attKkRuGIWLkTTP-2HKcTsI5hQEvsndloGSlQynu9UWo1TeoxVdMR_jFAad-hwWLDzzbCN4xuEs7a20u_1XEDrUTktNHE24YtRULm-h5XzCMqhHi85U07obH6lD1-RfiKCOuMcDQDzy-YKPS_WKBkmNWVHWkTTlr_LUgljTLEcFxzj6DweE4Oz_tVG5Xy0ocCAUDL3ncNhAd-0G00) + +*Component-level detail of the Aiken command module: skill loader, prompt builders, state model, storage, report renderer, and provider adapter.* + +## Skills Source Convention + +Vulnerability skills live under: +- `skills/vulnerabilities/` + +One file per skill, designed for 1:1 loop processing. + +Each skill file uses YAML frontmatter plus optional markdown guidance body: + +Required frontmatter fields: +- `id` +- `name` +- `severity` (`low` | `medium` | `high` | `critical`) +- `description` +- `prompt_fragment` + +Optional frontmatter fields: +- `examples` (list) +- `false_positives` (list) +- `references` (list) +- `tags` (list) +- `confidence_hint` + +The markdown body can include richer instructions, rationale, and examples and is passed as guidance context to the prompt builder. + +## Milestone 1 Acceptance Criteria + +- `trix audit` command exists as a top-level scoped command. +- Command is gated behind `unstable` feature flag (following `publish` pattern). +- `trix audit` is implemented as scaffold with Aiken-focused analysis. +- `src/commands/audit/mod.rs` provides the public command interface. +- `src/commands/audit/mod.rs` contains the core audit implementation. +- `src/commands/audit/model.rs` defines scaffolding contracts for state, findings, and prompts. +- Templates for report and permission prompt exist in `templates/aiken/`. +- `skills/vulnerabilities/` exists with seed skill files. +- This design document includes C4 diagrams as separate PlantUML files in `003-assets/`. +- E2E scaffold tests verify command visibility and baseline behavior for `audit` (tests run with `--features unstable`). diff --git a/design/003-assets/README.md b/design/003-assets/README.md new file mode 100644 index 0000000..0f3e61b --- /dev/null +++ b/design/003-assets/README.md @@ -0,0 +1,44 @@ +# Design 003 Assets + +This folder contains PlantUML C4 architecture diagrams for the AI Aiken Vulnerability Scaffolding design. + +## Files + +- `c4-context.puml` - System context diagram +- `c4-container.puml` - Container-level architecture +- `c4-component.puml` - Component-level details + +## Generating Images + +### Using PlantUML CLI + +Install PlantUML and run: + +```bash +plantuml c4-*.puml +``` + +This will generate PNG files in the same directory. + +### Using Docker + +```bash +docker run --rm -v $(pwd):/data plantuml/plantuml:latest c4-*.puml +``` + +### Using Online Editor + +1. Copy the content of any `.puml` file +2. Go to https://www.plantuml.com/plantuml/uml/ +3. Paste and generate + +### Using VS Code + +Install the PlantUML extension: +- Extension ID: `jebbs.plantuml` +- Right-click on `.puml` file → "Preview Current Diagram" +- Export as PNG/SVG + +## Output + +Generated images (`*.png`, `*.svg`) should be committed to this directory so they render in the markdown document on GitHub and other viewers. diff --git a/design/003-assets/c4-component.puml b/design/003-assets/c4-component.puml new file mode 100644 index 0000000..056dc07 --- /dev/null +++ b/design/003-assets/c4-component.puml @@ -0,0 +1,23 @@ +@startuml C4_Component_AikenVuln +!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Component.puml + +Container_Boundary(aiken, "Audit Command Module") { + Component(cmd, "run(args, config, profile)", "mod.rs", "Scoped command entrypoint") + Component(skill_loader, "Skill Loader", "future module", "Loads one vulnerability skill at a time") + Component(mini_prompt, "Mini Prompt Builder", "future module", "Builds focused prompt for current skill") + Component(permission_prompt, "Permission Prompt Builder", "template contract", "Builds local command permission prompt") + Component(state_model, "State Model", "model.rs", "AnalysisStateJson + iteration contracts") + Component(state_store, "State Store", "future module", "Reads/writes incremental JSON state") + Component(report_renderer, "Report Renderer", "template contract", "Renders vulnerability markdown") + Component(provider_adapter, "Provider Adapter", "future trait", "Anthropic/other provider integration boundary") +} + +Rel(cmd, skill_loader, "requests next skill") +Rel(cmd, mini_prompt, "builds per-skill prompt") +Rel(cmd, permission_prompt, "builds bounded execution prompt") +Rel(cmd, state_model, "uses contracts") +Rel(cmd, state_store, "persists each loop iteration") +Rel(cmd, report_renderer, "renders final report") +Rel(cmd, provider_adapter, "future: execute AI calls") + +@enduml diff --git a/design/003-assets/c4-container.puml b/design/003-assets/c4-container.puml new file mode 100644 index 0000000..f39a8ed --- /dev/null +++ b/design/003-assets/c4-container.puml @@ -0,0 +1,28 @@ +@startuml C4_Container_AikenVuln +!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Container.puml + +Person(dev, "Developer") +System_Boundary(trix, "Trix CLI") { + Container(cli, "Audit Command", "Rust + Clap", "CLI command entrypoint and argument handling") + Container(loop, "Skill Loop Engine", "Rust", "Iterates vulnerability skills and updates state") + Container(prompt, "Prompt Composer", "Rust + Templates", "Builds mini-prompts and permission prompt") + Container(state, "State Writer", "Rust + JSON", "Persists incremental analysis state") + Container(report, "Report Writer", "Rust + Markdown Templates", "Produces final vulnerability report") + Container(provider, "Provider Adapter (Future)", "Rust trait boundary", "Abstracts external AI service") +} + +System_Ext(fs, "Local File System") +System_Ext(ai, "External AI Provider (Future)") + +Rel(dev, cli, "Invokes") +Rel(cli, loop, "Starts audit") +Rel(loop, prompt, "Requests skill mini-prompts") +Rel(loop, state, "Stores iteration results") +Rel(loop, report, "Builds final findings report") +Rel(loop, provider, "Future: asks for analysis") +Rel(state, fs, "Writes state JSON") +Rel(report, fs, "Writes markdown report") +Rel(prompt, fs, "Reads skill files and templates") +Rel(provider, ai, "Future network calls") + +@enduml diff --git a/design/003-assets/c4-context.puml b/design/003-assets/c4-context.puml new file mode 100644 index 0000000..64b5f6b --- /dev/null +++ b/design/003-assets/c4-context.puml @@ -0,0 +1,13 @@ +@startuml C4_Context_AikenVuln +!include https://raw.githubusercontent.com/plantuml-stdlib/C4-PlantUML/master/C4_Context.puml + +Person(dev, "Developer", "Runs Trix in a local project") +System(trix, "Trix CLI", "Tx3 package manager") +System_Ext(ai, "External AI Provider", "Optional future provider such as Anthropic") +System_Ext(fs, "Local File System", "Project source, skills, outputs") + +Rel(dev, trix, "Runs `trix audit`") +Rel(trix, fs, "Reads code + vulnerability skills; writes JSON state and Markdown report") +Rel(trix, ai, "Future: sends skill-specific prompts and receives analysis") + +@enduml diff --git a/design/004-audit-implementation-spec.md b/design/004-audit-implementation-spec.md new file mode 100644 index 0000000..aba1412 --- /dev/null +++ b/design/004-audit-implementation-spec.md @@ -0,0 +1,420 @@ +# Audit Command Implementation Spec + +## Status + +This document captures the **currently implemented behavior** of `trix audit` as an implementation-spec companion to [003-ai-aiken-vulnerability-scaffolding.md](003-ai-aiken-vulnerability-scaffolding.md). + +## Scope + +In-scope: +- Full CLI contract currently accepted by `trix audit` +- Runtime behavior of the skill loop +- State/report output contracts as implemented +- Provider behavior (`scaffold`, `openai`, `anthropic`, `ollama`) +- Local read-tool permission and scope enforcement +- Current test-backed acceptance behavior + +Out-of-scope: +- Future UX redesigns +- Non-Aiken source language support + +## Command Surface + +`trix audit` is a **scoped command** (requires `trix.toml` in cwd). + +It is **hidden + unstable-gated**: +- Hidden in clap command listing (`#[command(hide = true)]`) +- Returns an error unless compiled with `--features unstable` + +### CLI Arguments (current) + +```bash +trix audit \ + [--state-out ] \ + [--report-out ] \ + [--skills-dir ] \ + [--provider ] \ + [--endpoint ] \ + [--model ] \ + [--api-key-env ] \ + [--ai-logs] \ + [--read-scope ] \ + [--interactive-permissions] +``` + +Defaults: +- `--state-out`: `.tx3/audit/state.json` +- `--report-out`: `.tx3/audit/vulnerabilities.md` +- `--skills-dir`: `skills/vulnerabilities` +- `--provider`: `scaffold` +- `--read-scope`: `workspace` +- `--ai-logs`: `false` +- `--interactive-permissions`: `false` + +### Provider arguments (required behavior) + +The following arguments are interpreted with provider-specific defaults: + +- `--provider` + - Supported values: `scaffold`, `openai`, `anthropic`, `ollama` + - Any other value must fail with an unsupported provider error + +- `--endpoint` + - Optional override for provider API URL + - Default when omitted: + - `openai`: `https://api.openai.com/v1/chat/completions` + - `anthropic`: `https://api.anthropic.com/v1/messages` + - `ollama`: `http://localhost:11434/v1/chat/completions` + - `scaffold`: not used + +- `--model` + - Optional model override + - Default when omitted: + - `openai`: `gpt-4.1-mini` + - `anthropic`: `claude-3-5-haiku-latest` + - `ollama`: `llama3.1` + - `scaffold`: not used + +- `--api-key-env` + - Optional environment-variable name override for API credentials + - Default when omitted: + - `openai`: `OPENAI_API_KEY` + - `anthropic`: `ANTHROPIC_API_KEY` + - `ollama`: not required (fixed placeholder token is used) + - `scaffold`: not required + - Runtime behavior: + - `openai` and `anthropic` must fail early if the resolved env var is not set + - `ollama` does not read env credentials and uses `ollama` as a fixed API key string + +- `--ai-logs` + - When enabled, prints iterative model/tool progress logs to stderr + - Logs include step counts, requested local actions, and (truncated) model/tool output + +Examples: + +```bash +# OpenAI with defaults +trix audit --provider openai + +# OpenAI with endpoint/model/api key env overrides +trix audit --provider openai \ + --endpoint https://example.com/v1/chat/completions \ + --model gpt-4.1 \ + --api-key-env MY_OPENAI_KEY + +# Anthropic default endpoint + model +trix audit --provider anthropic + +# Ollama local runtime +trix audit --provider ollama --ai-logs +``` + +## High-Level Execution Flow + +1. Build provider from args. +2. Determine `project_root = current_dir`. +3. Discover source files recursively under project root: + - Include: `*.ak` + - Skip directories: `.git`, `target`, `.tx3`, `build` +4. If no `.ak` files were found, fallback to `config.protocol.main` as a source reference. +5. Build `PermissionPromptSpec` based on `read_scope` and `interactive_permissions`. +6. Load skills from `--skills-dir`. + - If directory is missing and arg is default `skills/vulnerabilities`, load embedded seed skills. + - If directory is missing and arg is custom, fail. +7. Initialize `AnalysisStateJson` with empty iterations and write it immediately. +8. For each skill in sorted order: + - Compose mini-prompt from skill metadata/body. + - Call provider `analyze_skill(...)`. + - Append iteration to state. + - Persist full state JSON after each skill. +9. Build aggregated report from all findings. +10. Render markdown via template and write report file. +11. Print completion summary to stdout. + +## Data Contracts (Implemented) + +Defined in `src/commands/audit/model.rs`. + +### `VulnerabilitySkill` +Required semantic fields: +- `id`, `name`, `severity`, `description`, `prompt_fragment` + +Optional/collection fields (default empty if missing): +- `examples`, `false_positives`, `references`, `tags` +- `confidence_hint` optional string +- `guidance_markdown` from markdown body (post-frontmatter) + +### `AnalysisStateJson` +```json +{ + "version": "1", + "source_files": ["..."], + "provider": { + "name": "...", + "model": "... or null", + "notes": "..." + }, + "permission_prompt": { + "shell": "bash", + "allowed_commands": ["grep", "cat", "find", "ls"], + "scope_rules": ["..."], + "read_scope": "workspace|strict", + "interactive_permissions": false, + "allowed_paths": ["..."] + }, + "iterations": [ + { + "skill_id": "...", + "status": "completed|scaffolded|...", + "findings": [ + { + "title": "...", + "severity": "...", + "summary": "...", + "evidence": ["..."], + "recommendation": "...", + "file": "optional", + "line": 42 + } + ], + "next_prompt": { + "skill_id": "...", + "text": "..." + } + } + ] +} +``` + +### `VulnerabilityReportSpec` +- `title` +- `generated_at` (UTC RFC3339) +- `findings` (flattened from all iterations) + +## Skill File Contract (Implemented Parser) + +Each skill file must be markdown with YAML frontmatter delimited by `---`. + +Rules: +- Missing frontmatter delimiters => error +- Unknown frontmatter fields => error (`deny_unknown_fields`) +- Required string fields must be non-empty after trim +- `severity` must be one of: `low|medium|high|critical` (case-normalized to lowercase) +- Tabs in frontmatter are normalized to two spaces before YAML parse +- Markdown body after frontmatter is stored in `guidance_markdown` + +## Prompt Construction + +Per skill, a mini-prompt is composed from: +- `Skill ID` +- `Name` +- `Severity` +- `Description` +- `Prompt Fragment` +- Optional sections for tags/hint/examples/false positives/references/guidance markdown + +Provider initial prompt includes: +- Mini-prompt text +- Referenced source files list +- Allowed commands + scope rules from `PermissionPromptSpec` + +## Permission Model and Local Tooling + +Allowed tool actions requested by model: +- `read_file` +- `grep` +- `list_dir` +- `find_files` +- `final` + +Mapped local commands: +- `read_file` -> `cat` +- `grep` -> `grep -n -C -- ` +- `list_dir` -> `ls -la ` +- `find_files` -> `find -type f [-name ]` + +Global safeguards: +- Requested path must canonicalize successfully +- Canonical path must remain under project root +- Command must be in `allowed_commands` +- Output truncation at 30,000 chars + +### Read scope modes + +`workspace`: +- Reads/searches over any path under project root + +`strict`: +- Denies `list_dir` and `find_files` +- Allows reads/searches only on regular files listed in `permission_prompt.allowed_paths` +- `allowed_paths` is populated from discovered source files (displayed relative paths) + +### Interactive permissions + +If enabled: +- Each local read request prompts `Allow this request? [y/N]:` +- Non-yes response denies request with an explicit error + +## Providers (Current) + +### `scaffold` +- No network calls +- Returns one iteration with: + - `status = scaffolded` + - empty findings + - placeholder `next_prompt` + +### `openai` +- Provider spec: + - `name = openai-compatible` + - `notes = Endpoint: ` +- Defaults: + - endpoint: `https://api.openai.com/v1/chat/completions` + - model: `gpt-4.1-mini` + - api key env: `OPENAI_API_KEY` +- Request shape: + - `model`, `messages`, `response_format: { type: json_object }` + - auth: Bearer API key +- Response extraction: + - `/choices/0/message/content` (string JSON) +- Iterative loop: + - max 25 steps (`MAX_AGENT_STEPS`) + - parse model output as action (`read request` or `final`) + - execute local read request and feed output back as user message + +### `anthropic` +- Provider spec: + - `name = anthropic` + - `notes = Endpoint: ` +- Defaults: + - endpoint: `https://api.anthropic.com/v1/messages` + - model: `claude-3-5-haiku-latest` + - api key env: `ANTHROPIC_API_KEY` + - version header: `2023-06-01` +- Request shape: + - `model`, `max_tokens`, `system`, `messages` + - headers: `x-api-key`, `anthropic-version` +- Response extraction: + - `/content/0/text` (string JSON) +- Same 25-step interactive read loop as `openai` + +### `ollama` +- Implemented via `OpenAiProvider` compatibility +- Defaults: + - endpoint: `http://localhost:11434/v1/chat/completions` + - model: `llama3.1` + - api key literal: `ollama` + +## Parsing of AI Output + +Accepted model output forms: +- Raw JSON object +- JSON inside fenced blocks (```json ... ``` or ``` ... ```) + +Action interpretation: +- If `action` missing but payload has `findings` or `status` => treated as `final` +- `final` payload is converted into `SkillIterationResult` +- `findings[*].line` can be number or numeric string +- Also supports nested fallback location fields: + - `location.file` + - `location.line` + +Defaults when missing: +- iteration status: `completed` +- finding title: `Untitled finding` +- finding severity: skill severity +- other finding text fields default to empty string + +## Output Rendering + +Report template: `templates/aiken/report.md` + +Findings markdown rendering: +- Empty findings => `- *(none)*` +- Per finding include title, severity, summary, recommendation +- Include `Location` line when `file` and/or `line` available + +Permission template file exists (`templates/aiken/permission_prompt.md`) but current runtime behavior constructs prompt data directly from `PermissionPromptSpec` and does not render this template for provider calls. + +## Embedded Seed Skills + +When using default `--skills-dir` and path is absent, embedded content is loaded from: +- `skills/vulnerabilities/001-strict-value-equality.md` + +## Current Acceptance Signals (Tests) + +E2E tests assert: +- `audit --help` works with unstable feature +- `audit` fails without `trix.toml` (scoped command requirement) +- `audit` fails for missing custom skills dir +- `audit` succeeds after `init --yes` +- Outputs are created: + - `.tx3/audit/state.json` + - `.tx3/audit/vulnerabilities.md` +- State contract basics: + - `version == "1"` + - `iterations.len() == 3` for seed skills + +Unit tests assert: +- Skill parser behavior and validation errors +- Source discovery recursion and ignored directories +- Strict read scope allows known file and rejects directory listing +- Report markdown includes location formatting + +## Specification Evolution Notes + +The following items represent milestone evolution from initial scaffolding to current implementation: + +1. **Real provider integrations now exist** (`openai`, `anthropic`, `ollama`), not contract-only. +2. **Interactive read tool loop is implemented** with bounded local command execution. +3. **Additional CLI controls exist** (`endpoint`, `model`, `api_key_env`, `ai_logs`, `read_scope`, `interactive_permissions`). +4. **Strict/workspace read scopes are enforced in code**. +5. **Seed skill fallback is embedded** when default skills directory is not found. +6. **Permission prompt template is currently not part of runtime rendering path**. + +## Spec-Driven Viability Assessment + +Using this document for spec-driven development of the current `audit` behavior is **viable**. + +This section upgrades the contract into strict spec-first form via: +- normative requirement levels (`MUST`/`SHOULD`) +- requirement-to-test traceability +- canonical golden fixtures + +## Normative Requirements + +### MUST (behavior compatibility) + +- Same CLI flags, defaults, and unstable gating behavior. +- Same provider selection and provider-specific defaults/env handling. +- Same `.ak` discovery semantics and skipped directories. +- Same skills parsing rules (frontmatter, required fields, severity enum, unknown-field rejection). +- Same iterative per-skill persistence to state JSON. +- Same read-request action schema and local command mapping. +- Same path confinement and strict/workspace enforcement. +- Same max step guard (`25`) and command output truncation (`30_000` chars). +- Same report generation shape and findings rendering. +- Same seed-skill fallback behavior and baseline test outcomes. + +### SHOULD (implementation quality) + +- Keep provider/network and local-tooling boundaries separated behind provider adapter interfaces. +- Preserve deterministic ordering where current implementation sorts inputs/paths. +- Preserve error messages close to current wording when feasible, to reduce e2e churn. +- Keep state/report writes atomic at logical checkpoints (initial state + post-iteration). + +## Requirement-to-Test Traceability + +| Requirement | Test anchors | +|---|---| +| CLI visibility and unstable behavior | `tests/e2e/smoke.rs::audit_help_runs_without_error`, `tests/e2e/smoke.rs::audit_help_displays_provider_options` | +| Scoped command requirement (`trix.toml`) | `tests/e2e/edge_cases.rs::aiken_audit_fails_without_trix_config` | +| Missing custom skills dir failure | `tests/e2e/edge_cases.rs::aiken_audit_fails_with_missing_skills_dir` | +| Baseline success path + output artifacts | `tests/e2e/happy_path.rs::aiken_audit_runs_in_initialized_project` | +| State shape baseline (`version`, seed iterations) | `tests/e2e/happy_path.rs::aiken_audit_runs_in_initialized_project` | +| Skill parser frontmatter/body behavior | `src/commands/audit/mod.rs::parse_skill_content_reads_frontmatter_and_guidance` | +| Skill parser validation failures | `src/commands/audit/mod.rs::parse_skill_content_requires_frontmatter`, `src/commands/audit/mod.rs::parse_skill_content_rejects_invalid_severity` | +| Source discovery recursion and filtering | `src/commands/audit/mod.rs::discover_source_files_finds_ak_files_recursively`, `src/commands/audit/mod.rs::discover_source_files_skips_target_tx3_and_build_dirs` | +| Strict read-scope allows known file | `src/commands/audit/providers/shared.rs::execute_read_request_strict_allows_known_file` | +| Strict read-scope denies directory listing | `src/commands/audit/providers/shared.rs::execute_read_request_strict_rejects_list_dir` | +| Report location rendering contract | `src/commands/audit/mod.rs::render_findings_markdown_includes_location_when_available` | diff --git a/design/005-aiken-ast-validator-context.md b/design/005-aiken-ast-validator-context.md new file mode 100644 index 0000000..704366d --- /dev/null +++ b/design/005-aiken-ast-validator-context.md @@ -0,0 +1,280 @@ +# Aiken AST & Validator Context for Audit + +## Status + +Proposed implementation spec for extending `trix audit` with: +- **Phase 1**: on-demand Aiken AST generation +- **Phase 2**: `ValidatorContextMap` extraction from AST + +--- + +## Goals + +1. Ensure `trix audit` can obtain a **fresh structural view** of Aiken code without relying on pre-existing artifacts. +2. Build a deterministic `ValidatorContextMap` that can be injected into audit prompts. +3. Persist enough metadata in state to make runs reproducible and diagnosable. + +--- + +## Scope + +- New AST generation flow in `audit` execution path. +- New model contract for validator context. +- Prompt template/data-path extension to include validator context. +- State JSON extension to include AST/context metadata. +- Failure semantics for AST generation/parsing. +- Unit/e2e acceptance coverage for phase behavior. + +--- + +## High-Level Flow (Phase 1 + 2) + +Before skill loop execution: + +1. Discover `.ak` source files (existing behavior). +2. Generate Aiken AST on-demand (new behavior). +3. Parse AST into normalized internal structures. +4. Build `ValidatorContextMap` (validator-centric mapping). +5. Add this context to: + - initial prompt rendering payload + - persisted analysis state +6. Run existing skill loop unchanged, except prompts now include validator context block. + +--- + +## CLI Surface Changes + +No mandatory user-facing flags are required for baseline phase 1–2. + +Optional (recommended) additions: +- `--ast-out ` (default: `.tx3/audit/aiken-ast.json`) +- `--no-ast-cache` (default: false) + +If optional flags are deferred, runtime should still write AST snapshot to default path. + +--- + +## Data Contracts + +## `AnalysisStateJson` extension + +Add fields: + +```json +{ + "ast": { + "path": ".tx3/audit/aiken-ast.json", + "fingerprint": "sha256:...", + "generated_at": "2026-02-26T12:00:00Z", + "tool": { + "name": "aiken", + "version": "vX.Y.Z" + } + }, + "validator_context": { + "validators": [ ... ] + } +} +``` + +### `AstMetadata` + +- `path`: persisted AST snapshot path (workspace-relative in state) +- `fingerprint`: deterministic digest of AST content (or source-set digest) +- `generated_at`: RFC3339 UTC timestamp +- `tool.name`: fixed string `aiken` +- `tool.version`: resolved from CLI runtime + +### `ValidatorContextMap` + +```json +{ + "validators": [ + { + "id": "vesting.hello_world", + "module": "validators/vesting.ak", + "source_file": "onchain/validators/vesting.ak", + "source_span": { + "start_line": 13, + "end_line": 31 + }, + "handlers": [ + { + "name": "spend", + "parameters": [ + { "name": "datum", "type": "Option" }, + { "name": "redeemer", "type": "Redeemer" }, + { "name": "_own_ref", "type": "OutputReference" }, + { "name": "self", "type": "Transaction" } + ] + }, + { + "name": "else", + "parameters": [ + { "name": "_", "type": "Unknown" } + ] + } + ] + } + ] +} +``` + +Normalization rules: +- `validators` MUST be sorted deterministically by `id` then `source_file`. +- `handlers` MUST preserve source order when available. +- `parameters` MUST preserve declared order. +- If precise type text is unavailable, set type to `"Unknown"` (do not omit parameter). +- If source span is unavailable, omit `source_span`. + +--- + +## AST Generation Contract (Phase 1) + +`audit` MUST execute an on-demand AST generation step before skill analysis. + +Requirements: +- MUST run within current project root. +- MUST fail the audit run if AST generation fails. +- MUST persist raw AST output to `.tx3/audit/aiken-ast.json` (or configured path). +- MUST record Aiken tool version in state metadata. +- SHOULD avoid repeated generation in same run once AST is available. + +Failure behavior: +- Return explicit error category: + - Aiken CLI missing + - Aiken command failed + - AST output unreadable/invalid JSON + +No fallback behavior is defined in this phase. + +--- + +## Validator Context Extraction (Phase 2) + +Parser must transform AST into `ValidatorContextMap`. + +Extraction requirements: +- MUST enumerate all validator definitions in analyzed source set. +- MUST extract handler names and ordered parameter lists. +- MUST include best-effort type display for each parameter. +- MUST include source file path linkage for each validator. +- SHOULD include source spans when present in AST. + +Validation requirements: +- If AST is valid but yields no validators, run continues with empty validator list. +- If AST schema is incompatible, fail with parse-contract error. + +--- + +## Prompt Integration + +Template update target: +- `templates/aiken/audit_agent_initial_user_prompt.md` + +Add new section after source references: + +```markdown +Validator context map: +{{VALIDATOR_CONTEXT_MAP}} +``` + +Rendering rules: +- Use concise markdown bullets (not raw JSON dump) for readability. +- Include: + - validator id + - source file + - handlers and parameter signatures +- If empty: render `- (none)`. + +Provider integration: +- Existing providers (`openai`, `anthropic`, `ollama`, `scaffold`) receive the same expanded prompt content via shared builder. + +--- + +## Implementation Notes (Code Placement) + +Likely code touchpoints: +- `src/commands/audit/mod.rs` + - orchestration: AST generation + context extraction prior to skill loop + - state population +- `src/commands/audit/model.rs` + - add `AstMetadata`, `ValidatorContextMap`, related structs +- `src/commands/audit/providers/shared.rs` + - extend `build_initial_user_prompt(...)` + - renderer for validator context markdown block +- `templates/aiken/audit_agent_initial_user_prompt.md` + - add `{{VALIDATOR_CONTEXT_MAP}}` placeholder + +Recommended internal modules: +- `src/commands/audit/ast.rs` + - command execution + AST load + - schema adapter/parser into internal normalized models + +--- + +## Determinism & Caching + +Minimum deterministic guarantees: +- Stable sort ordering for validator map. +- Stable markdown rendering order. +- State includes fingerprint for traceability. + +Caching (optional in phase 1–2, but recommended): +- Reuse AST file if fingerprint of relevant sources unchanged. +- `--no-ast-cache` bypasses reuse. + +--- + +## Security & Permissions + +- AST generation is local and non-interactive. +- No additional AI read permissions are introduced by this phase. +- Generated AST artifact remains inside project `.tx3/` output scope. + +--- + +## Acceptance Criteria + +Phase 1 accepted when: +- `trix audit` generates AST snapshot on each run (or cache-hit behavior if enabled). +- Run fails clearly when Aiken CLI/AST generation fails. +- State JSON includes AST metadata block. + +Phase 2 accepted when: +- Validator context map is extracted and persisted in state. +- Initial provider prompt includes rendered validator context map. +- Map includes validator handlers and ordered parameter signatures. +- Deterministic ordering verified by tests. + +--- + +## Testing Plan + +Unit tests: +- AST parse adapter: + - parses validators/handlers/parameters + - handles missing type info with `Unknown` + - deterministic sorting +- Prompt renderer: + - renders non-empty context map + - renders `- (none)` for empty map + +Integration/e2e tests: +- `audit` produces `.tx3/audit/aiken-ast.json`. +- `state.json` contains `ast` and `validator_context` blocks. +- Prompt-building path includes `Validator context map:` section. + +Negative tests: +- Missing Aiken binary => explicit failure. +- Invalid AST JSON => explicit failure. + +--- + +## Open Questions + +1. Which exact Aiken command/output format is canonical for AST export in current supported versions? +2. Should type rendering preserve Aiken syntax verbatim or use normalized aliases? +3. Should `source_span` include columns now or lines only? + +These questions must be resolved before implementation starts, but do not change the phase scope. diff --git a/design/006-heuristic-audit-provider-minimal.md b/design/006-heuristic-audit-provider-minimal.md new file mode 100644 index 0000000..de618a7 --- /dev/null +++ b/design/006-heuristic-audit-provider-minimal.md @@ -0,0 +1,159 @@ +# Heuristic Audit Provider (No-LLM) — Minimal Milestone 2 Spec + +## Status + +Draft (spec-first). + +This document defines the minimal implementation scope to add a local, deterministic heuristic analysis provider for `trix audit` without using LLMs. + +## Goals + +- Provide a functioning heuristic analysis engine for common Aiken vulnerability patterns. +- Expose heuristic scanning via the existing `trix audit` CLI flow. +- Reuse current state/report contracts and avoid breaking compatibility. +- Keep implementation minimal and focused on Milestone 2 outputs. + +## Scope + +In-scope: +- New `heuristic` provider in the existing provider selector. +- Rule-based detection for the 3 currently embedded vulnerability skills: + - `strict-value-equality-001` + - `missing-address-validation-002` + - `unvalidated-datum-003` +- Deterministic, local-only analysis (no network calls, no LLM/tool loop). +- Continued use of current output files: + - `.tx3/audit/state.json` + - `.tx3/audit/vulnerabilities.md` + +Out-of-scope: +- Generic interpretation of arbitrary custom skills. +- Replacing existing LLM providers (`openai`, `anthropic`, `ollama`). +- New output formats or schema changes. +- Type-checked semantic analysis beyond untyped AST (future work). + +## Current Architecture Anchors + +- Audit orchestration and skill loop: `src/commands/audit/mod.rs` +- Provider abstraction and factory: `src/commands/audit/providers/mod.rs` +- Heuristic provider adapter: `src/commands/audit/providers/heuristic.rs` +- Heuristic detector engine (AST-first): `src/commands/audit/providers/heuristic_detectors.rs` +- AST/cache and validator context: `src/commands/audit/ast.rs` +- Analysis/report data contracts: `src/commands/audit/model.rs` +- Existing seed skills: + - `skills/vulnerabilities/001-strict-value-equality.md` + - `skills/vulnerabilities/002-missing-address-validation.md` + - `skills/vulnerabilities/003-unvalidated-datum.md` + +## CLI Contract Delta + +`--provider` MUST accept `heuristic`. + +Defaults remain unchanged: +- Default provider stays `scaffold`. +- `heuristic` does not require `--endpoint`, `--model`, or `--api-key-env`. + +## High-Level Execution Flow (heuristic mode) + +1. Build provider from CLI args (`heuristic`). +2. Discover source files and load/reuse AST cache. +3. Load vulnerability skills from `--skills-dir` (or embedded seeds fallback). +4. For each skill, run deterministic local rule evaluation. +5. Persist incremental state after each skill. +6. Render final report with existing markdown template. + +## Heuristic Provider Requirements + +### Functional requirements + +- MUST implement `AnalysisProvider` and return `SkillIterationResult` for each skill. +- MUST run without network/API keys. +- MUST be deterministic in findings ordering and status values. +- MUST support only the 3 known embedded skill IDs in this milestone. +- MUST continue processing when a skill is not supported. + +### Unsupported skills + +If a skill ID is not supported by the heuristic provider: +- `status` MUST be `unsupported-skill`. +- `findings` MUST be empty. +- `next_prompt` MUST be `None`. +- Audit execution MUST continue. + +## Detection Strategy (M2 minimal) + +The provider uses an **AST-first** approach: +- Parse each `.ak` source into Aiken `UntypedModule` (`aiken_lang` parser). +- Traverse validator handlers/fallback expressions and patterns (`UntypedExpr`, `UntypedPattern`). +- Apply deterministic rule checks from AST structure and operators. +- Use text matching only as fallback when AST parsing fails for a file. + +This keeps detection deterministic, local-only, and less fragile than string-only scanning. + +### Rule 1: strict-value-equality-001 + +Report when AST `BinOp::Eq` compares expressions that include ADA/value signals. + +Do NOT report when clear safe patterns are detected, e.g.: +- `without_lovelace(...)` +- minimum checks (`>=`) for lovelace/value constraints + +### Rule 2: missing-address-validation-002 + +Report when AST patterns extract script credentials from output addresses (e.g. `Script(hash_var)`) but no later equality/inequality validation references that extracted variable. + +Do NOT report when explicit address checks are present. + +### Rule 3: unvalidated-datum-003 + +Report when inline datum is extracted from output (e.g. `InlineDatum(x)`) but is not semantically validated, or is validated only partially (e.g. spread pattern `Datum { ..., .. }`). + +Do NOT report when evidence suggests complete datum extraction/validation. + +## Data Contract Compatibility + +- `AnalysisStateJson` schema remains unchanged. +- `VulnerabilityFinding` schema remains unchanged. +- Report rendering remains unchanged. +- Provider metadata SHOULD identify `heuristic` clearly in `state.json`. + +## Caching / Memory Requirements + +- The provider MUST reuse AST/context built by existing audit flow. +- Existing AST cache in `.tx3/audit/aiken-ast.json` remains the inter-run memory mechanism. +- `--no-ast-cache` MUST still force regeneration. +- Heuristic rule execution MUST be AST-first even when cache is present (parsing source modules directly for rule traversal). + +## Security and Isolation + +- No outbound requests. +- No AI tool-loop execution path. +- Only local workspace file reads under existing audit orchestration. + +## Acceptance Criteria (Milestone 2 minimal) + +- A1: `trix audit --provider heuristic` produces a structured vulnerability report. +- B1: Rule behavior is consistent with the 3 public skill definition files. +- C1: Users can execute heuristic scans locally end-to-end from CLI. +- D1: Running against known vulnerable scripts yields non-zero findings. + +## Testing Plan + +- Unit tests for each heuristic rule: + - positive and negative scenarios + - unsupported-skill behavior +- E2E audit test for `--provider heuristic` in initialized project. +- Keep existing audit smoke/edge coverage passing. + +## Requirement-to-Test Traceability (initial) + +- Provider selection supports `heuristic` → audit provider validation tests. +- End-to-end execution and artifacts → `tests/e2e/happy_path.rs`. +- Unsupported skill non-fatal handling → heuristic provider unit test. +- Contract compatibility (`state.json`, report rendering) → existing audit happy-path assertions + heuristic additions. + +## Open Questions (deferred) + +- Should heuristic become default provider in a later milestone? +- Should custom external skills be supported beyond known IDs? +- Should future versions parse semantic expressions from typed AST for lower false positives? diff --git a/docs/superpowers/plans/2026-05-05-extract-audit-to-preflight.md b/docs/superpowers/plans/2026-05-05-extract-audit-to-preflight.md new file mode 100644 index 0000000..096c5f6 --- /dev/null +++ b/docs/superpowers/plans/2026-05-05-extract-audit-to-preflight.md @@ -0,0 +1,854 @@ +# Extract `trix audit` to `preflight` Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Replace the in-tree `trix audit` implementation with a thin spawn wrapper that delegates to the standalone `preflight` binary, drop the heavy `aiken-lang` and `serde_yaml_ng` dependencies, and add `preflight` to the `tx3up` toolchain manifests so users get the binary on their next refresh. + +**Architecture:** `commands::audit::run` becomes a thin dispatcher behind the existing `unstable` feature gate; when enabled it calls a new `spawn::preflight::run` that resolves the binary via `home::tool_path("preflight")` and forwards every preflight CLI flag plus injects `--main-source` from `RootConfig.protocol.main`. Pattern mirrors `spawn::tx3c` / `spawn::dolos` / `spawn::cshell`. + +**Tech Stack:** Rust 2024 edition, `clap` v4 derive macros, `miette` for diagnostics, `assert_cmd` + `tempfile` for integration tests, `std::process::Command` for spawning. + +**Repos involved:** +- `/Users/mduthey/Documents/Work/txpipe/tx3/trix` (this repo, on branch `feat/aiken-vulnerability-detection`) +- `/Users/mduthey/Documents/Work/txpipe/tx3/toolchain` (separate repo, `tx3-lang/toolchain`) + +**Spec:** `docs/superpowers/specs/2026-05-05-extract-audit-to-preflight-design.md` + +--- + +## File Structure + +### Files created + +- `src/commands/audit.rs` — flat file replacing the `src/commands/audit/` directory. Owns the clap `Args` struct (mirror of preflight's flags) and the `unstable` feature gate. Single responsibility: CLI surface for `trix audit`. +- `src/spawn/preflight.rs` — sibling of `spawn::tx3c`/`spawn::dolos`. Single responsibility: build the `Command::new(home::tool_path("preflight"))` invocation, forward flags, inject `--main-source`. +- `tests/e2e/audit_wrapper.rs` (new module under e2e) — integration test for the spawn forwarding contract using a mock preflight script. Single responsibility: verify the wrapper sends the right argv. + +### Files modified + +- `src/spawn/mod.rs` — add `pub mod preflight;` +- `Cargo.toml` — remove `aiken-lang` and `serde_yaml_ng` from `[dependencies]` +- `tests/e2e/mod.rs` — register the new `audit_wrapper` test module +- `tests/e2e/happy_path.rs` — remove `aiken_audit_runs_in_initialized_project` and `aiken_audit_runs_with_heuristic_provider` (depend on internal `trix::commands::audit::model::*` types that no longer exist; replaced by the new spawn wrapper test plus preflight's own E2E tests) +- `tests/e2e/edge_cases.rs` — remove `aiken_audit_fails_with_missing_skills_dir` (asserts on a preflight-side error message we no longer own) + +### Files deleted + +- `src/commands/audit/` (entire directory: `mod.rs`, `ast.rs`, `model.rs`, and all of `providers/`) +- `skills/vulnerabilities/` (3 markdown files embedded by `audit/mod.rs` via `include_str!`) +- `templates/aiken/` (5 markdown files embedded by `audit/providers/shared.rs` and `audit/mod.rs` via `include_str!`) +- `tests/fixtures/audit/` (currently empty placeholder) + +### Files unchanged (verified) + +- `src/cli.rs` — `Audit(commands::audit::Args)` variant signature is preserved (Args struct keeps the same name and ReadScopeArg enum) +- `src/main.rs` — `audit::run(args, &config, &profile)` signature is preserved +- `src/commands/mod.rs` — `pub mod audit;` resolves to either the dir or the file; no edit needed +- All other commands and their dependencies + +--- + +## Phase 1: Toolchain manifest update + +This phase happens in a different repo (`tx3-lang/toolchain` at `/Users/mduthey/Documents/Work/txpipe/tx3/toolchain`) and must be merged before Phase 2's release reaches users. + +### Task 1: Add `preflight` to all three toolchain manifests + +**Files:** +- Modify: `/Users/mduthey/Documents/Work/txpipe/tx3/toolchain/manifest-stable.json` +- Modify: `/Users/mduthey/Documents/Work/txpipe/tx3/toolchain/manifest-beta.json` +- Modify: `/Users/mduthey/Documents/Work/txpipe/tx3/toolchain/manifest-nightly.json` + +- [ ] **Step 1: Switch to the toolchain repo and create a feature branch** + +```bash +cd /Users/mduthey/Documents/Work/txpipe/tx3/toolchain +git checkout main +git pull +git checkout -b add-preflight +``` + +- [ ] **Step 2: Add `preflight` entry to `manifest-stable.json`** + +Open `manifest-stable.json`. Append a new object to the `tools` array (after the `cshell` entry). The full file should end with the entries below (preserving the existing 5 tools, only the `preflight` entry is new): + +```json + { + "name": "cshell", + "description": "A terminal wallet for Cardano", + "repo_name": "cshell", + "repo_owner": "txpipe", + "version": "^0.13.2" + }, + { + "name": "preflight", + "description": "Aiken smart contract vulnerability auditor", + "repo_name": "preflight", + "repo_owner": "tx3-lang", + "version": "^0.1" + } + ] +} +``` + +- [ ] **Step 3: Add identical `preflight` entry to `manifest-beta.json`** + +`manifest-beta.json` is byte-identical to `manifest-stable.json` today. Append the exact same `preflight` entry as Step 2. + +- [ ] **Step 4: Add `preflight` entry to `manifest-nightly.json` with `"version": "^0"`** + +Note: nightly uses `"^0"` for all tools, not pinned versions. After the existing `cshell` entry, append: + +```json + { + "name": "cshell", + "description": "A terminal wallet for Cardano", + "repo_name": "cshell", + "repo_owner": "txpipe", + "version": "^0" + }, + { + "name": "preflight", + "description": "Aiken smart contract vulnerability auditor", + "repo_name": "preflight", + "repo_owner": "tx3-lang", + "version": "^0" + } + ] +} +``` + +- [ ] **Step 5: Verify all three files are valid JSON** + +Run: +```bash +cd /Users/mduthey/Documents/Work/txpipe/tx3/toolchain +python3 -m json.tool manifest-stable.json > /dev/null && echo "stable OK" +python3 -m json.tool manifest-beta.json > /dev/null && echo "beta OK" +python3 -m json.tool manifest-nightly.json > /dev/null && echo "nightly OK" +``` + +Expected: three lines printing "OK". A non-zero exit means malformed JSON; re-open the file and fix the trailing comma / bracket issue before continuing. + +- [ ] **Step 6: Commit** + +```bash +cd /Users/mduthey/Documents/Work/txpipe/tx3/toolchain +git add manifest-stable.json manifest-beta.json manifest-nightly.json +git commit -m "feat: add preflight to toolchain manifests" +``` + +- [ ] **Step 7: Push and open PR** + +```bash +git push -u origin add-preflight +gh pr create --title "feat: add preflight to toolchain manifests" --body "$(cat <<'EOF' +## Summary +- Adds `preflight` (Aiken smart contract vulnerability auditor) to stable, beta, and nightly manifests. +- Stable + beta pin to `^0.1`; nightly uses `^0`. +- Required precondition for trix `feat/aiken-vulnerability-detection` to merge — trix will spawn the binary via tx3up's install path. + +## Test plan +- [ ] Validate JSON parses for all three manifests +- [ ] After merge, run `tx3up` against this manifest and confirm `~/.tx3/default/bin/preflight` is installed +EOF +)" +``` + +This PR must merge before Phase 2's release (Step 18) reaches end users. + +--- + +## Phase 2: Trix wrapper + +All remaining tasks happen in `/Users/mduthey/Documents/Work/txpipe/tx3/trix` on branch `feat/aiken-vulnerability-detection` (the user has already merged main into this branch). + +### Task 2: Read the existing spawn pattern (prep, read-only) + +**Files:** +- Read: `src/spawn/tx3c.rs` +- Read: `src/spawn/dolos.rs` +- Read: `src/spawn/cshell.rs` +- Read: `src/home.rs` + +- [ ] **Step 1: Confirm the spawn pattern** + +Read each file. Confirm: +- All use `crate::home::tool_path("")` to resolve the binary. +- All use `Command::new(...)` and call `.status()` (not `.output()`) so stdio is inherited. +- All return `miette::Result<()>` and `bail!` on non-zero exit codes. +- Flag forwarding is straightforward `cmd.args(["--flag", value])` calls. + +The new `spawn/preflight.rs` should match this style exactly. No code changes in this task. + +### Task 3: Remove broken audit-internal test imports + +**Files:** +- Modify: `tests/e2e/happy_path.rs` +- Modify: `tests/e2e/edge_cases.rs` + +These tests reference `trix::commands::audit::model::AnalysisStateJson`, which will not exist after the audit module is replaced. Remove them now so the project keeps compiling at every step. + +- [ ] **Step 1: Remove the AnalysisStateJson import and the two audit happy-path tests** + +In `tests/e2e/happy_path.rs`: +- Delete line `use trix::commands::audit::model::AnalysisStateJson;` near the top +- Delete the entire test function `aiken_audit_runs_in_initialized_project` (lines ~186-216) +- Delete the entire test function `aiken_audit_runs_with_heuristic_provider` (lines ~218-242) + +- [ ] **Step 2: Remove the missing-skills-dir edge case** + +In `tests/e2e/edge_cases.rs`, delete the entire `aiken_audit_fails_with_missing_skills_dir` test function (lines ~63-81). The error message it asserts (`"Audit skills directory not found"`) is preflight's responsibility now. + +Keep `aiken_audit_fails_without_trix_config` — that test exercises trix's `run_global_command` routing (see `src/main.rs:25-31`), which is unchanged by this work. + +- [ ] **Step 3: Verify the test file still compiles** + +Run: +```bash +cargo check --features unstable --tests +``` + +Expected: clean compile. If you see other references to `trix::commands::audit::*` types, grep them out: + +```bash +grep -rn "trix::commands::audit::" tests/ src/ +``` + +Only `src/main.rs:50` (the dispatch in `run_scoped_command`) and `src/cli.rs:59` (the `Audit` variant) should match — both use `commands::audit::Args` and `commands::audit::run`, which are preserved. + +- [ ] **Step 4: Commit** + +```bash +git add tests/e2e/happy_path.rs tests/e2e/edge_cases.rs +git commit -m "test: drop audit tests that depend on internal types being extracted" +``` + +### Task 4: Replace the audit module with a wrapper skeleton (stubbed spawn) + +This task does the structural replacement: deletes the directory, creates the new flat file, deletes the embedded assets, drops the heavy deps, and creates a stubbed `spawn::preflight`. The project compiles at the end. Functionality is broken — `trix audit` returns a "not implemented" error — but tests for help text and the global-config check still pass. + +**Files:** +- Delete: `src/commands/audit/` (entire dir, 10 .rs files) +- Delete: `skills/vulnerabilities/` (3 .md files) +- Delete: `templates/aiken/` (5 .md files) +- Delete: `tests/fixtures/audit/` (empty placeholder) +- Create: `src/commands/audit.rs` +- Create: `src/spawn/preflight.rs` +- Modify: `src/spawn/mod.rs` +- Modify: `Cargo.toml` + +- [ ] **Step 1: Delete the audit directory and embedded assets** + +```bash +cd /Users/mduthey/Documents/Work/txpipe/tx3/trix +rm -rf src/commands/audit +rm -rf skills/vulnerabilities +rm -rf templates/aiken +rmdir tests/fixtures/audit 2>/dev/null || true +``` + +If `templates/` or `skills/` end up empty, leave them alone — they may have other contents: +```bash +ls templates/ skills/ +``` +`templates/` should still contain `tx3/`, `configs/`, `profile/` — and `skills/` is now gone (it only had `vulnerabilities/`). That is fine; the directory will be recreated if other skills are added later. + +At this point the project will NOT compile (`commands/mod.rs` and `cli.rs` reference `commands::audit::*`). Steps 2-4 fix that. + +- [ ] **Step 2: Create `src/commands/audit.rs` (the wrapper)** + +Create the file with this exact content: + +```rust +use clap::{Args as ClapArgs, ValueEnum}; +use miette::Result; + +use crate::config::{ProfileConfig, RootConfig}; + +#[derive(Debug, Clone, Copy, ValueEnum)] +pub enum ReadScopeArg { + Workspace, + Strict, +} + +impl ReadScopeArg { + pub fn as_str(self) -> &'static str { + match self { + Self::Workspace => "workspace", + Self::Strict => "strict", + } + } +} + +#[derive(ClapArgs)] +pub struct Args { + /// Path where the incremental analysis state JSON will be written. + #[arg(long, default_value = ".tx3/audit/state.json")] + pub state_out: String, + + /// Path where the final vulnerability report markdown will be written. + #[arg(long, default_value = ".tx3/audit/vulnerabilities.md")] + pub report_out: String, + + /// Path to vulnerability skill definitions. + #[arg(long, default_value = "skills/vulnerabilities")] + pub skills_dir: String, + + /// Path where the Aiken AST snapshot JSON will be written. + #[arg(long, default_value = ".tx3/audit/aiken-ast.json")] + pub ast_out: String, + + /// Analysis provider: scaffold | heuristic | openai | anthropic | ollama + #[arg(long, default_value = "scaffold")] + pub provider: String, + + /// API endpoint override. Default depends on --provider. + #[arg(long)] + pub endpoint: Option, + + /// Model override. Default depends on --provider. + #[arg(long)] + pub model: Option, + + /// API key environment variable override. Default depends on --provider. + #[arg(long)] + pub api_key_env: Option, + + /// Optional reasoning effort hint for OpenAI-compatible providers (e.g. low|medium|high). + #[arg(long)] + pub reasoning_effort: Option, + + /// Print chat-style progress of model requests and local tool actions while auditing. + #[arg(long, default_value_t = false)] + pub ai_logs: bool, + + /// Regenerate AST even if an up-to-date snapshot is already available. + #[arg(long, default_value_t = false)] + pub no_ast_cache: bool, + + /// File read scope for AI-assisted local tool requests: workspace | strict. + #[arg(long, value_enum, default_value_t = ReadScopeArg::Workspace)] + pub read_scope: ReadScopeArg, + + /// Ask confirmation before executing each AI-requested local read action. + #[arg(long, default_value_t = false)] + pub interactive_permissions: bool, +} + +#[allow(unused_variables)] +pub fn run(args: Args, config: &RootConfig, profile: &ProfileConfig) -> Result<()> { + #[cfg(feature = "unstable")] + { + let _ = profile; + crate::spawn::preflight::run(args, config) + } + #[cfg(not(feature = "unstable"))] + { + let _ = (args, config, profile); + Err(miette::miette!( + "The audit command is currently unstable and requires the `unstable` feature to be enabled." + )) + } +} +``` + +- [ ] **Step 3: Create `src/spawn/preflight.rs` (stubbed for now)** + +Create the file. We start with a stub that returns an error, so the integration test in Task 5 can see "red" before we implement the real logic in Task 6. + +```rust +use miette::bail; + +use crate::commands::audit::Args; +use crate::config::RootConfig; + +#[allow(unused_variables)] +pub fn run(args: Args, config: &RootConfig) -> miette::Result<()> { + bail!("preflight spawn not implemented") +} +``` + +- [ ] **Step 4: Register the new `spawn::preflight` module** + +Edit `src/spawn/mod.rs`. Add `pub mod preflight;` so the file looks like: + +```rust +pub mod cshell; +pub mod dolos; +pub mod preflight; +pub mod tx3c; +``` + +- [ ] **Step 5: Drop `aiken-lang` and `serde_yaml_ng` from `Cargo.toml`** + +Open `Cargo.toml`. In the `[dependencies]` section: +- Delete the line `aiken-lang = "1.1.21"` +- Delete the line `serde_yaml_ng = "0.10"` + +Leave all other dependencies in place. The `[features] unstable = []` block stays. + +- [ ] **Step 6: Verify the project compiles cleanly with the unstable feature** + +Run: +```bash +cargo build --features unstable +``` + +Expected: clean compile. If it fails: +- `unresolved import trix::commands::audit::model` — there is still a leftover reference somewhere; grep with `grep -rn "audit::model\|audit::providers\|audit::ast" src/ tests/` and remove. +- `cannot find module audit` — make sure `src/commands/audit.rs` exists and `src/commands/audit/` was deleted. +- `unresolved import crate::spawn::preflight` — make sure `src/spawn/mod.rs` has the new `pub mod preflight;` line. + +- [ ] **Step 7: Verify the project compiles cleanly without the feature too** + +Run: +```bash +cargo build +``` + +Expected: clean compile. The `#[cfg(not(feature = "unstable"))]` branch in `audit.rs` returns the error path; the `spawn::preflight` module is still compiled (Rust modules don't get gated by feature unless explicitly annotated), but its only caller is in the gated block, so the unused-warning is suppressed by `#[allow(unused_variables)]`. + +- [ ] **Step 8: Run the existing test suite to confirm no regressions** + +Run: +```bash +cargo test --features unstable +``` + +Expected: all tests pass. The two audit `--help` tests in `tests/e2e/smoke.rs` (`audit_help_runs_without_error`, `audit_help_displays_provider_options`) pass because the `Args` mirror in the new `audit.rs` reproduces the same flags. The `aiken_audit_fails_without_trix_config` test in `edge_cases.rs` passes because the global routing in `main.rs` is unchanged. + +If one of those three tests fails: +- `audit_help_runs_without_error` expects `"vulnerability"` in stdout — verify the command doc on `Audit(commands::audit::Args)` in `src/cli.rs:57-59` still contains the word "vulnerability". +- `audit_help_displays_provider_options` expects `"provider"` in stdout — verify the `provider` flag's `///` doc comment in the new `Args` includes the word. + +- [ ] **Step 9: Commit** + +```bash +git add -A +git commit -m "refactor(audit): replace in-tree audit with wrapper + stubbed spawn + +- Delete src/commands/audit/, skills/vulnerabilities/, templates/aiken/ +- Add src/commands/audit.rs as a thin clap wrapper (Args mirrors preflight) +- Add src/spawn/preflight.rs as a stub returning a 'not implemented' error +- Drop aiken-lang and serde_yaml_ng from Cargo.toml +- Remove tests that depended on internal audit types + +Spawn forwarding implemented in the next commit." +``` + +### Task 5: Write the failing integration test for spawn forwarding (TDD red) + +**Files:** +- Create: `tests/e2e/audit_wrapper.rs` +- Modify: `tests/e2e/mod.rs` + +- [ ] **Step 1: Add the `audit_wrapper` module to the e2e test harness** + +Edit `tests/e2e/mod.rs`. Find the section that lists test submodules (around line 209): + +```rust +pub mod edge_cases; +pub mod happy_path; +pub mod smoke; +``` + +Add `pub mod audit_wrapper;` so it reads: + +```rust +pub mod audit_wrapper; +pub mod edge_cases; +pub mod happy_path; +pub mod smoke; +``` + +- [ ] **Step 2: Write the integration test** + +Create `tests/e2e/audit_wrapper.rs` with this exact content: + +```rust +//! Integration tests for the `trix audit` spawn wrapper. +//! +//! Strategy: point `TX3_PREFLIGHT_PATH` at a small bash script that records +//! its argv to a file and exits 0. Then run `trix audit ...` and assert that +//! the recorded argv contains the flags we expect to forward. + +#![cfg(all(unix, feature = "unstable"))] + +use super::*; +use std::fs; +use std::os::unix::fs::PermissionsExt; + +fn install_mock_preflight(ctx: &TestContext, log_path: &str) -> std::path::PathBuf { + let mock_path = ctx.file_path("mock-preflight.sh"); + let log_full_path = ctx.file_path(log_path); + + let script = format!( + "#!/usr/bin/env bash\nprintf '%s\\n' \"$@\" > {log}\nexit 0\n", + log = log_full_path.display(), + ); + fs::write(&mock_path, script).expect("write mock script"); + + let mut perms = fs::metadata(&mock_path).expect("stat mock").permissions(); + perms.set_mode(0o755); + fs::set_permissions(&mock_path, perms).expect("chmod mock"); + + mock_path +} + +fn run_audit_with_mock(ctx: &TestContext, audit_args: &[&str]) -> (CommandResult, Vec) { + let init_result = ctx.run_trix(&["init", "--yes"]); + assert_success(&init_result); + + let mock_path = install_mock_preflight(ctx, "argv.log"); + let mock_path_str = mock_path.to_string_lossy().to_string(); + + let result = ctx.run_trix_with_env( + audit_args, + &[("TX3_PREFLIGHT_PATH", mock_path_str.as_str())], + ); + + let recorded = fs::read_to_string(ctx.file_path("argv.log")) + .expect("mock should have written argv.log"); + let lines: Vec = recorded.lines().map(str::to_string).collect(); + + (result, lines) +} + +fn flag_value<'a>(argv: &'a [String], flag: &str) -> Option<&'a str> { + argv.iter() + .position(|a| a == flag) + .and_then(|i| argv.get(i + 1)) + .map(String::as_str) +} + +#[test] +fn forwards_default_flags_and_injects_main_source() { + let ctx = TestContext::new(); + let (result, argv) = run_audit_with_mock(&ctx, &["audit"]); + + assert_success(&result); + + // Default flag values from src/commands/audit.rs are forwarded. + assert_eq!( + flag_value(&argv, "--provider"), + Some("scaffold"), + "argv: {:?}", + argv + ); + assert_eq!(flag_value(&argv, "--state-out"), Some(".tx3/audit/state.json")); + assert_eq!( + flag_value(&argv, "--report-out"), + Some(".tx3/audit/vulnerabilities.md") + ); + assert_eq!(flag_value(&argv, "--skills-dir"), Some("skills/vulnerabilities")); + assert_eq!(flag_value(&argv, "--ast-out"), Some(".tx3/audit/aiken-ast.json")); + assert_eq!(flag_value(&argv, "--read-scope"), Some("workspace")); + + // --main-source is injected from RootConfig.protocol.main, not from the + // user-facing CLI of `trix audit`. The init template uses "main.tx3". + assert_eq!(flag_value(&argv, "--main-source"), Some("main.tx3")); + + // Boolean flags default to off → not present in argv. + assert!(!argv.iter().any(|a| a == "--ai-logs")); + assert!(!argv.iter().any(|a| a == "--no-ast-cache")); + assert!(!argv.iter().any(|a| a == "--interactive-permissions")); +} + +#[test] +fn forwards_provider_overrides_and_optional_flags() { + let ctx = TestContext::new(); + let (result, argv) = run_audit_with_mock( + &ctx, + &[ + "audit", + "--provider", "openai", + "--model", "gpt-test", + "--endpoint", "https://example/v1/responses", + "--api-key-env", "MY_KEY", + "--reasoning-effort", "high", + "--ai-logs", + "--no-ast-cache", + "--read-scope", "strict", + "--interactive-permissions", + ], + ); + + assert_success(&result); + + assert_eq!(flag_value(&argv, "--provider"), Some("openai")); + assert_eq!(flag_value(&argv, "--model"), Some("gpt-test")); + assert_eq!( + flag_value(&argv, "--endpoint"), + Some("https://example/v1/responses") + ); + assert_eq!(flag_value(&argv, "--api-key-env"), Some("MY_KEY")); + assert_eq!(flag_value(&argv, "--reasoning-effort"), Some("high")); + assert_eq!(flag_value(&argv, "--read-scope"), Some("strict")); + + assert!(argv.iter().any(|a| a == "--ai-logs")); + assert!(argv.iter().any(|a| a == "--no-ast-cache")); + assert!(argv.iter().any(|a| a == "--interactive-permissions")); +} + +#[test] +fn propagates_non_zero_exit_from_preflight() { + let ctx = TestContext::new(); + let init_result = ctx.run_trix(&["init", "--yes"]); + assert_success(&init_result); + + // Mock that exits non-zero. + let mock_path = ctx.file_path("mock-fail.sh"); + fs::write(&mock_path, "#!/usr/bin/env bash\nexit 7\n").expect("write"); + let mut perms = fs::metadata(&mock_path).expect("stat").permissions(); + perms.set_mode(0o755); + fs::set_permissions(&mock_path, perms).expect("chmod"); + + let result = ctx.run_trix_with_env( + &["audit"], + &[( + "TX3_PREFLIGHT_PATH", + mock_path.to_string_lossy().to_string().as_str(), + )], + ); + + assert!( + !result.success(), + "trix audit should fail when preflight exits non-zero" + ); +} +``` + +- [ ] **Step 3: Run the new tests and confirm they fail** + +Run: +```bash +cargo test --features unstable --test e2e_tests audit_wrapper +``` + +Expected: all three tests in `audit_wrapper` FAIL with errors mentioning `"preflight spawn not implemented"`. This confirms our stub is being reached (good) but doesn't yet do the work (red). + +If the tests pass instead, something's wrong — the stub should be returning an error. If the tests skip, check the `#![cfg(all(unix, feature = "unstable"))]` line — `cargo test --features unstable` on a unix machine should run them. + +If the tests error compiling because `argv.log` is unwritten, check that the mock script's heredoc-style write actually escapes correctly when interpolated. The script content uses Rust raw-style format with `\\n` so the bash `\n` is literal in the file — that's intentional. + +### Task 6: Implement the real spawn forwarding (TDD green) + +**Files:** +- Modify: `src/spawn/preflight.rs` + +- [ ] **Step 1: Replace the stub with the real implementation** + +Replace the entire contents of `src/spawn/preflight.rs` with: + +```rust +use std::process::Command; + +use miette::{Context as _, IntoDiagnostic as _, bail}; + +use crate::commands::audit::Args; +use crate::config::RootConfig; + +pub fn run(args: Args, config: &RootConfig) -> miette::Result<()> { + let tool_path = crate::home::tool_path("preflight")?; + + let mut cmd = Command::new(tool_path); + + // Always-present flags with default values. + cmd.args(["--state-out", &args.state_out]); + cmd.args(["--report-out", &args.report_out]); + cmd.args(["--skills-dir", &args.skills_dir]); + cmd.args(["--ast-out", &args.ast_out]); + cmd.args(["--provider", &args.provider]); + cmd.args(["--read-scope", args.read_scope.as_str()]); + + // Optional string flags. + if let Some(value) = &args.endpoint { + cmd.args(["--endpoint", value]); + } + if let Some(value) = &args.model { + cmd.args(["--model", value]); + } + if let Some(value) = &args.api_key_env { + cmd.args(["--api-key-env", value]); + } + if let Some(value) = &args.reasoning_effort { + cmd.args(["--reasoning-effort", value]); + } + + // Boolean flags. + if args.ai_logs { + cmd.arg("--ai-logs"); + } + if args.no_ast_cache { + cmd.arg("--no-ast-cache"); + } + if args.interactive_permissions { + cmd.arg("--interactive-permissions"); + } + + // --main-source is injected from RootConfig.protocol.main; preflight + // expects it as the fallback when its own .ak discovery returns empty. + let main_source = config.protocol.main.to_string_lossy().to_string(); + cmd.args(["--main-source", &main_source]); + + let status = cmd + .status() + .into_diagnostic() + .context("running preflight")?; + + if !status.success() { + bail!("preflight exited with non-zero status"); + } + + Ok(()) +} +``` + +- [ ] **Step 2: Run the integration tests and confirm they pass** + +Run: +```bash +cargo test --features unstable --test e2e_tests audit_wrapper +``` + +Expected: all three tests in `audit_wrapper` PASS. + +If `forwards_default_flags_and_injects_main_source` fails on the `--main-source` assertion with something other than `"main.tx3"`: +- Inspect what `trix init --yes` writes for `protocol.main` (read `templates/tx3/trix.toml` or run `cat trix.toml` in the temp dir during the test). Update the assertion to match the init template's actual default. + +If `propagates_non_zero_exit_from_preflight` fails because trix returns success: confirm the new `spawn/preflight.rs` checks `!status.success()` and `bail!`s. The mock exits 7 → trix should bail. + +- [ ] **Step 3: Run the full test suite to confirm no regressions** + +Run: +```bash +cargo test --features unstable +``` + +Expected: all tests pass, including the existing `audit_help_*` smoke tests and `aiken_audit_fails_without_trix_config`. + +- [ ] **Step 4: Run a quick build without the feature to confirm gating still works** + +Run: +```bash +cargo build +``` + +Expected: clean compile. Then verify the unstable error message: +```bash +TRIX_BIN="$(pwd)/target/debug/trix" +SMOKE_DIR="$(mktemp -d)" +cd "$SMOKE_DIR" +"$TRIX_BIN" init --yes +"$TRIX_BIN" audit 2>&1 | head -5 +cd - > /dev/null +rm -rf "$SMOKE_DIR" +``` + +Expected: the audit invocation prints something containing `"requires the `unstable` feature to be enabled"`. + +- [ ] **Step 5: Commit** + +```bash +cd /Users/mduthey/Documents/Work/txpipe/tx3/trix +git add src/spawn/preflight.rs tests/e2e/mod.rs tests/e2e/audit_wrapper.rs +git commit -m "feat(audit): forward flags to preflight via Command::new + +- spawn::preflight::run resolves the binary via home::tool_path +- forwards every public preflight flag verbatim +- injects --main-source from RootConfig.protocol.main +- propagates non-zero exit codes via miette bail +- integration test uses TX3_PREFLIGHT_PATH + a bash mock to assert argv" +``` + +### Task 7: Final verification + +**Files:** none modified. + +- [ ] **Step 1: Run cargo fmt to confirm style is consistent** + +Run: +```bash +cargo fmt --check +``` + +Expected: no output (clean). If files are flagged, run `cargo fmt` and amend the previous commit: +```bash +cargo fmt +git add -u +git commit --amend --no-edit +``` + +- [ ] **Step 2: Run cargo clippy on both feature configurations** + +Run: +```bash +cargo clippy --features unstable -- -D warnings +cargo clippy -- -D warnings +``` + +Expected: no warnings. If clippy complains about unused variables in `spawn/preflight.rs`, you forgot to remove the `#[allow(unused_variables)]` attribute that was on the stub — remove it now since the real implementation uses everything. + +- [ ] **Step 3: Run the full test suite once more on both feature configurations** + +Run: +```bash +cargo test --features unstable +cargo test +``` + +Expected: all tests pass in both runs. + +- [ ] **Step 4: Confirm dependency graph no longer contains aiken-lang** + +Run: +```bash +cargo tree --features unstable | grep -E "aiken-lang|serde_yaml_ng" || echo "clean" +``` + +Expected: prints `clean`. If it prints any aiken or serde_yaml_ng line, some other dependency is pulling them in transitively — investigate which crate (the line above the match in `cargo tree` shows the parent). + +- [ ] **Step 5: Push the branch** + +```bash +git push origin feat/aiken-vulnerability-detection +``` + +This phase is complete when the push succeeds and the branch is ready for PR review. + +--- + +## Phase 3: Release coordination (out of plan scope) + +The trix release that picks up these changes must wait until the toolchain PR from Phase 1 is merged. Once both are merged: + +1. Confirm Phase 1 is on `main` of `tx3-lang/toolchain`. +2. Open the trix PR from `feat/aiken-vulnerability-detection` to `main`. Merge. +3. Cut a trix release via the existing `cargo-release` flow (`release.toml` configured in repo root). +4. Validate end-to-end on a clean machine: `tx3up` → `trix audit --provider scaffold` produces `.tx3/audit/{state.json,vulnerabilities.md,aiken-ast.json}`. + +Release execution is a manual operator step, not part of this implementation plan. + +--- + +## Self-review + +**Spec coverage:** +- Architecture diagram → represented in `audit.rs` + `spawn/preflight.rs` design across Tasks 4 and 6. +- Trix-side files deleted/created/modified → Task 4 (file structure) + Tasks 3, 5, 6, 7. +- `aiken-lang` and `serde_yaml_ng` removal → Task 4 Step 5; verified Task 7 Step 4. +- Toolchain manifest changes → Task 1 (all three manifests). +- Sequencing (toolchain first, then trix) → enforced by Phase 1 preceding Phase 2; release dependency documented in Phase 3. +- Stdio inheritance via `.status()` → Task 6 Step 1. +- `home::tool_path("preflight")` lookup → Task 6 Step 1. +- `unstable` feature gate preserved → Task 4 Step 2 (the cfg block in `audit::run`). +- `#[command(hide = true)]` preserved → no edit to `cli.rs` (file structure section confirms this). +- Test strategy (mock preflight via `TX3_PREFLIGHT_PATH`) → Task 5 Step 2. +- Risk: flag drift → caught by Task 5 tests. +- Risk: `config.protocol.main` resolution → covered by Task 5 default-flags assertion plus Task 6 Step 1 implementation. +- Risk: transition window → handled by Phase 1 precedence. + +**Placeholder scan:** searched for "TBD", "TODO", "implement later", "similar to" — none present in task code blocks. All steps include the actual code, exact commands, and expected outputs. + +**Type consistency:** `Args`, `ReadScopeArg`, and `ReadScopeArg::as_str(self) -> &'static str` are defined in Task 4 Step 2 and used unchanged in Task 6 Step 1. `crate::commands::audit::Args` and `crate::config::RootConfig` imports match across both files. diff --git a/docs/superpowers/specs/2026-05-05-extract-audit-to-preflight-design.md b/docs/superpowers/specs/2026-05-05-extract-audit-to-preflight-design.md new file mode 100644 index 0000000..b7073d1 --- /dev/null +++ b/docs/superpowers/specs/2026-05-05-extract-audit-to-preflight-design.md @@ -0,0 +1,208 @@ +# Extract `trix audit` to the `preflight` binary + +**Date:** 2026-05-05 +**Status:** Approved (pending implementation) +**Repos touched:** `tx3-lang/trix`, `tx3-lang/toolchain` +**Repo not touched:** `tx3-lang/preflight` (already at v0.1.0, no work required) + +## Context + +`trix audit` lives in `src/commands/audit/` (~6.000 lines) and pulls heavy dependencies into the `trix` binary: the full `aiken-lang` compiler, `serde_yaml_ng`, plus embedded vulnerability skills (`skills/vulnerabilities/*.md`) and Aiken prompt templates (`templates/aiken/*.md`) bundled via `include_str!`. + +The same code already exists as a standalone binary at [`tx3-lang/preflight`](https://github.com/tx3-lang/preflight) v0.1.0. Preflight is functionally equivalent (and slightly ahead — it has `--main-source`, `validate_anthropic_reasoning_effort` with tests, and full anthropic reasoning-effort wiring including adaptive thinking variants, none of which are in trix's current `feat/aiken-vulnerability-detection` branch). + +The goal is to mirror the pattern already used for `tx3c`, `dolos`, and `cshell`: trix invokes the external binary via `Command::new(home::tool_path(name))`, and `tx3up` distributes it. + +## Goals + +- Remove all audit/Aiken-specific code, skills, and templates from the trix binary. +- Drop `aiken-lang` and `serde_yaml_ng` from trix's `Cargo.toml`. +- Replace `commands::audit` with a thin wrapper that spawns `preflight`. +- Add `preflight` to the `tx3up` toolchain manifests so users get it on `tx3up` refresh. +- Preserve current UX: `trix audit --help`, flags, stdout/stderr behavior, and the `unstable`/`hide` gating. + +## Non-goals + +- No changes to preflight (it is the source of truth and is up to date). +- No new features in audit. UX changes deferred to a separate spec. +- No revisit of the `unstable` feature gate or `hide = true` decision. Both are kept as today. +- No work on `~/.tx3/default/bin` install layout, `home::tool_path` resolution, or `tx3up` itself. + +## Architecture + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ tx3-lang/trix │ +│ │ +│ cli.rs │ +│ └─ Audit(commands::audit::Args) (hide = true, marked UNSTABLE)│ +│ │ +│ commands/audit.rs (single file replacing commands/audit/ dir) │ +│ └─ run(args, &RootConfig, &ProfileConfig) -> Result<()> │ +│ ├─ #[cfg(feature = "unstable")] → spawn::preflight::run │ +│ └─ otherwise → Err("requires unstable feature") │ +│ │ +│ spawn/preflight.rs (new sibling of tx3c.rs / dolos.rs) │ +│ └─ run(args, &RootConfig) │ +│ Command::new(home::tool_path("preflight")) │ +│ .args(forwarded flags) │ +│ .args(["--main-source", &config.protocol.main]) │ +│ .status() → inherits stdin/stdout/stderr │ +└──────────────────────────────────────────────────────────────────┘ + │ spawn + ▼ +┌──────────────────────────────────────────────────────────────────┐ +│ tx3-lang/preflight (installed by tx3up) │ +│ ~/.tx3/default/bin/preflight │ +│ or $TX3_PREFLIGHT_PATH if set (per home::tool_path conventions)│ +└──────────────────────────────────────────────────────────────────┘ +``` + +The wrapper sits between `cli.rs` and `home::tool_path`. The contract with preflight is: trix forwards every public preflight flag verbatim, plus injects `--main-source` from `RootConfig.protocol.main` (which preflight already accepts as the fallback when `.ak` discovery returns empty). + +## Trix-side changes + +### Files deleted + +``` +src/commands/audit/ (10 .rs files, ~5.993 lines) + ├── mod.rs 786 + ├── ast.rs 462 + ├── model.rs 141 + └── providers/ + ├── mod.rs 124 + ├── anthropic.rs 594 + ├── heuristic.rs 59 + ├── heuristic_detectors.rs 1.919 + ├── openai.rs 854 + ├── scaffold.rs 44 + └── shared.rs 1.010 + +skills/vulnerabilities/ (3 markdown files; embedded in audit/mod.rs) + ├── 001-strict-value-equality.md + ├── 002-missing-address-validation.md + └── 003-unvalidated-datum.md + +templates/aiken/ (5 markdown files; embedded in providers/shared.rs) + ├── audit_agent_initial_user_prompt.md + ├── audit_agent_system_prompt.md + ├── audit_agent_tool_result_prompt.md + ├── permission_prompt.md + └── report.md +``` + +### Files created + +**`src/commands/audit.rs`** — clap `Args` (mirror of preflight's flags) + thin dispatcher that preserves the `unstable` feature gate: + +- Mirrors all preflight flags 1:1 EXCEPT `--main-source` (trix derives it from `RootConfig`). +- Keeps `ReadScopeArg` enum identical to today (so the `cli.rs` `Audit` variant signature does not change). +- `run(args, &RootConfig, &ProfileConfig)` keeps the same shape `main.rs` calls today. +- When `unstable` is on: `spawn::preflight::run(args, config)`. +- When `unstable` is off: returns the same miette error as today (`"The audit command is currently unstable and requires the `unstable` feature to be enabled."`). + +**`src/spawn/preflight.rs`** — new module following the `spawn::tx3c` / `spawn::dolos` pattern: + +- Resolves the binary via `home::tool_path("preflight")` (gives env-var override `TX3_PREFLIGHT_PATH` and the consistent "please run tx3up" error for free). +- Builds `Command::new(...)` and forwards each flag from `Args` to the corresponding preflight CLI flag. +- Injects `--main-source` from `config.protocol.main` (converted via `to_string_lossy()`). +- Uses `.status()` (not `.output()`) so stdin/stdout/stderr are inherited from the parent process. This preserves: the `🧭` progress logs, the experimental warning print, interactive permission prompts, and final summary lines. +- On non-zero exit: `bail!("preflight exited with non-zero status")`. + +### Files modified + +| File | Change | +|---|---| +| `src/spawn/mod.rs` | Add `pub mod preflight;` | +| `src/cli.rs` | No structural change. `#[command(hide = true)]` and the `(UNSTABLE - ...)` doc comment stay. | +| `src/main.rs` | No change — `audit::run(args, &config, &profile)` signature is preserved. | +| `Cargo.toml` | Remove `aiken-lang = "1.1.21"` and `serde_yaml_ng = "0.10"`. Keep `[features] unstable = []`. | + +### Cargo.toml dependencies — verified retention + +Other "audit-looking" dependencies stay because they have non-audit usage in the trix codebase: + +- `cryptoxide` → used by `home.rs`, `wallet.rs` +- `chrono` → used by `commands/publish.rs` +- `reqwest` → used by `codegen.rs`, `codegen_legacy.rs`, `telemetry/client.rs` +- `tempfile` → used by `codegen.rs`, `codegen_legacy.rs` + +Only `aiken-lang` and `serde_yaml_ng` are exclusive to the audit module. + +### Tests + +The existing `#[cfg(test)] mod tests` blocks under `src/commands/audit/` already exist verbatim in preflight (verified for `mod.rs`, `heuristic_detectors.rs`, `shared.rs`, `anthropic.rs`, `openai.rs`). Nothing needs to be rescued back into trix. + +New trix tests: + +1. **Compile-time / clap parse smoke test.** Confirms `audit::Args` parses the same flags as before (regression guard for typos in the mirrored definitions). +2. **Spawn integration test (with `assert_cmd`).** Sets `TX3_PREFLIGHT_PATH` to a small bash script that writes its argv to a file and exits 0. Runs `trix audit --provider openai --model foo` from a fixture project. Asserts the recorded argv contains all forwarded flags plus `--main-source `. Standard pattern for testing spawn wrappers. + +## Toolchain manifest changes (`tx3-lang/toolchain`) + +A separate PR in a separate repo. Three manifests, one entry each. + +**`manifest-stable.json`** — append to `tools` array: + +```json +{ + "name": "preflight", + "description": "Aiken smart contract vulnerability auditor", + "repo_name": "preflight", + "repo_owner": "tx3-lang", + "version": "^0.1" +} +``` + +**`manifest-beta.json`** — same entry as stable (today both files are byte-identical except for self/header). + +**`manifest-nightly.json`** — same entry but `"version": "^0"` (nightly always tracks latest 0.x): + +```json +{ + "name": "preflight", + "description": "Aiken smart contract vulnerability auditor", + "repo_name": "preflight", + "repo_owner": "tx3-lang", + "version": "^0" +} +``` + +The binary on disk lands as `~/.tx3/default/bin/preflight`, which matches preflight's clap `name = "preflight"` and trix's `home::tool_path("preflight")` lookup. + +## Sequencing + +Three PRs across two repos in this strict order. The order eliminates any window where a user could update trix and find `preflight` missing. + +1. **Toolchain PR.** Add the `preflight` entry to all three manifests in `tx3-lang/toolchain`. Merge to main. Effect: any `tx3up` run starts installing preflight to `~/.tx3/default/bin/preflight`. Risk: zero — adds a standalone binary that nothing currently consumes. +2. **Trix PR.** Strip audit, add wrapper, drop deps. Merge to main of `tx3-lang/trix`. Effect: trix binary shrinks, `commands::audit` now delegates to preflight. Behind `unstable` + `hide`, so end users on stable do not see the change. +3. **Trix release.** Standard `cargo-release` flow (`release.toml` + `cliff.toml` already configured). Tag → cargo-dist publishes binaries → users running `tx3up` get the new trix and already have preflight from step 1. + +### Inverse order is unsafe + +If trix releases first, a user running `tx3up` between trix release and toolchain merge gets the new trix without preflight. Their `trix audit --provider ...` would fail with `tool preflight not found` (correct error, suggests `tx3up`, but that suggestion would not help until the toolchain PR lands). + +### Local verification per step + +| Step | Verification | +|---|---| +| Toolchain | Run `tx3up` against the local manifest path (or staging branch); confirm `~/.tx3/default/bin/preflight` exists and is executable. | +| Trix | `cargo build --features unstable` succeeds without `aiken-lang` or `serde_yaml_ng` in the dep graph. Smoke run with `TX3_PREFLIGHT_PATH=/tmp/mock-preflight.sh` script that records argv; assert all flags + `--main-source` are forwarded. | +| Release | From a clean machine: `tx3up` → `trix audit --provider scaffold` end-to-end. Confirm `.tx3/audit/{state.json,vulnerabilities.md,aiken-ast.json}` produced. | + +### Rollback + +- Trix: revert PR + patch release. Preflight stays installed but unused. No data loss, no user-facing breakage beyond the audit command. +- Toolchain: only revert if also reverting trix. A toolchain-only revert after a trix release pointing to preflight would break `trix audit` for new `tx3up` users. + +## Risks + +1. **Flag drift between trix and preflight.** Each new flag in preflight requires a manual mirror in trix's `Args`. Mitigation: low rate of change (preflight has ~12 flags), and CI integration test from the testing section catches forwarded-flag mismatches. +2. **`config.protocol.main` resolution.** It is typically a relative path. `Command::new` inherits the parent's cwd by default, so preflight resolves it from the same project root. Confirmed safe; documented as an implementation note. +3. **Transition window.** Mitigated by the toolchain-first sequencing. + +## Out of scope (follow-ups) + +- Removing `unstable`/`hide` once the feature is declared GA (separate decision, separate spec). +- Reusing `preflight::Args` from trix as a Cargo library dependency (would re-introduce `aiken-lang` transitively; would only become viable after a preflight refactor that splits Args into a dependency-light sub-crate). +- Adding `preflight` to additional release channels or alternative installers. diff --git a/src/cli.rs b/src/cli.rs index d9b1c92..cb775db 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -54,6 +54,10 @@ pub enum Commands { /// Inspect and manage profiles Profile(commands::profile::Args), + /// Run vulnerability analysis scaffolding (UNSTABLE - This feature is experimental and may change) + #[command(hide = true)] + Audit(commands::audit::Args), + /// Publish a Tx3 package into the registry (UNSTABLE - This feature is experimental and may change) #[command(hide = true)] Publish(commands::publish::Args), diff --git a/src/commands/audit.rs b/src/commands/audit.rs new file mode 100644 index 0000000..ad91b80 --- /dev/null +++ b/src/commands/audit.rs @@ -0,0 +1,90 @@ +use clap::{Args as ClapArgs, ValueEnum}; +use miette::Result; + +use crate::config::{ProfileConfig, RootConfig}; + +#[derive(Debug, Clone, Copy, ValueEnum)] +pub enum ReadScopeArg { + Workspace, + Strict, +} + +impl ReadScopeArg { + pub fn as_str(self) -> &'static str { + match self { + Self::Workspace => "workspace", + Self::Strict => "strict", + } + } +} + +#[derive(ClapArgs)] +pub struct Args { + /// Path where the incremental analysis state JSON will be written. + #[arg(long, default_value = ".tx3/audit/state.json")] + pub state_out: String, + + /// Path where the final vulnerability report markdown will be written. + #[arg(long, default_value = ".tx3/audit/vulnerabilities.md")] + pub report_out: String, + + /// Path to vulnerability skill definitions. + #[arg(long, default_value = "skills/vulnerabilities")] + pub skills_dir: String, + + /// Path where the Aiken AST snapshot JSON will be written. + #[arg(long, default_value = ".tx3/audit/aiken-ast.json")] + pub ast_out: String, + + /// Analysis provider: scaffold | heuristic | openai | anthropic | ollama + #[arg(long, default_value = "scaffold")] + pub provider: String, + + /// API endpoint override. Default depends on --provider. + #[arg(long)] + pub endpoint: Option, + + /// Model override. Default depends on --provider. + #[arg(long)] + pub model: Option, + + /// API key environment variable override. Default depends on --provider. + #[arg(long)] + pub api_key_env: Option, + + /// Optional reasoning effort hint for OpenAI-compatible providers (e.g. low|medium|high). + #[arg(long)] + pub reasoning_effort: Option, + + /// Print chat-style progress of model requests and local tool actions while auditing. + #[arg(long, default_value_t = false)] + pub ai_logs: bool, + + /// Regenerate AST even if an up-to-date snapshot is already available. + #[arg(long, default_value_t = false)] + pub no_ast_cache: bool, + + /// File read scope for AI-assisted local tool requests: workspace | strict. + #[arg(long, value_enum, default_value_t = ReadScopeArg::Workspace)] + pub read_scope: ReadScopeArg, + + /// Ask confirmation before executing each AI-requested local read action. + #[arg(long, default_value_t = false)] + pub interactive_permissions: bool, +} + +#[allow(unused_variables)] +pub fn run(args: Args, config: &RootConfig, profile: &ProfileConfig) -> Result<()> { + #[cfg(feature = "unstable")] + { + let _ = profile; + crate::spawn::preflight::run(args, config) + } + #[cfg(not(feature = "unstable"))] + { + let _ = (args, config, profile); + Err(miette::miette!( + "The audit command is currently unstable and requires the `unstable` feature to be enabled." + )) + } +} diff --git a/src/commands/codegen.rs b/src/commands/codegen.rs index 9386002..4324e26 100644 --- a/src/commands/codegen.rs +++ b/src/commands/codegen.rs @@ -121,7 +121,8 @@ pub async fn run(_args: Args, config: &RootConfig, _profile: &ProfileConfig) -> }; let template_temp = TempDir::new().into_diagnostic()?; - let templates_dir = extract_github_templates(&github_url, &template_temp, &plugin.path).await?; + let templates_dir = + extract_github_templates(&github_url, &template_temp, &plugin.path).await?; crate::spawn::tx3c::codegen(&tii_path, &templates_dir, &output_dir)?; println!("Bindgen successful"); diff --git a/src/commands/devnet/copy.rs b/src/commands/devnet/copy.rs index 6c0eda6..3f26dd3 100644 --- a/src/commands/devnet/copy.rs +++ b/src/commands/devnet/copy.rs @@ -77,22 +77,23 @@ async fn fetch_utxo_deps( .into_diagnostic()?; if let Some(tx) = tx - && let Some(tx) = tx.parsed { - let utxos = client - .read_utxos( - tx.inputs - .iter() - .map(|r| TxoRef { - hash: r.tx_hash.clone(), - index: r.output_index, - }) - .collect(), - ) - .await - .into_diagnostic()?; - - return Ok(utxos); - } + && let Some(tx) = tx.parsed + { + let utxos = client + .read_utxos( + tx.inputs + .iter() + .map(|r| TxoRef { + hash: r.tx_hash.clone(), + index: r.output_index, + }) + .collect(), + ) + .await + .into_diagnostic()?; + + return Ok(utxos); + } Ok(vec![]) } diff --git a/src/commands/mod.rs b/src/commands/mod.rs index f351445..531bee9 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,3 +1,4 @@ +pub mod audit; pub mod build; pub mod check; pub mod devnet; diff --git a/src/commands/profile/list.rs b/src/commands/profile/list.rs index e0a0885..d60da5c 100644 --- a/src/commands/profile/list.rs +++ b/src/commands/profile/list.rs @@ -4,8 +4,8 @@ use termimad::MadSkin; use crate::config::RootConfig; use super::{ - resolve_network_source, resolve_profile_source, NetworkListItem, ProfileListItem, - ProfileListView, + NetworkListItem, ProfileListItem, ProfileListView, resolve_network_source, + resolve_profile_source, }; // ============================================================================ diff --git a/src/commands/profile/show.rs b/src/commands/profile/show.rs index ba2bdbb..76465e8 100644 --- a/src/commands/profile/show.rs +++ b/src/commands/profile/show.rs @@ -4,8 +4,8 @@ use termimad::MadSkin; use crate::config::{NetworkConfig, ProfileConfig, RootConfig}; use super::{ - load_and_mask_env_vars, mask_value, resolve_network_source, resolve_profile_source, ConfigSource, EndpointView, EnvFileStatus, EnvFileView, IdentityView, NetworkView, ProfileView, + load_and_mask_env_vars, mask_value, resolve_network_source, resolve_profile_source, }; // ============================================================================ diff --git a/src/commands/test.rs b/src/commands/test.rs index 0c75066..d53e16a 100644 --- a/src/commands/test.rs +++ b/src/commands/test.rs @@ -6,7 +6,7 @@ use std::{ }; use clap::Args as ClapArgs; -use miette::{bail, Context as _, IntoDiagnostic, Result}; +use miette::{Context as _, IntoDiagnostic, Result, bail}; use serde::{Deserialize, Serialize}; use crate::{ diff --git a/src/devnet/mod.rs b/src/devnet/mod.rs index b9ab7f4..fa498d5 100644 --- a/src/devnet/mod.rs +++ b/src/devnet/mod.rs @@ -9,7 +9,7 @@ use std::{ use miette::{Diagnostic, IntoDiagnostic as _}; use serde::{Deserialize, Serialize}; -use serde_with::{serde_as, DisplayFromStr}; +use serde_with::{DisplayFromStr, serde_as}; use thiserror::Error; use crate::wallet::WalletProxy; diff --git a/src/main.rs b/src/main.rs index b21dabd..a0dd3c1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,12 @@ use clap::Parser; +use miette::{IntoDiagnostic as _, Result}; use trix::{ - builder, cli::{Cli, Commands}, commands as cmds, config::RootConfig, - devnet, dirs, global, home, spawn, telemetry, updates, wallet, + global, telemetry, updates, }; -use miette::{IntoDiagnostic as _, Result}; pub fn load_config() -> Result> { let current_dir = std::env::current_dir().into_diagnostic()?; @@ -48,6 +47,7 @@ async fn run_scoped_command(cli: Cli, config: RootConfig) -> Result<()> { Commands::Build(args) => cmds::build::run(args, &config, &profile), Commands::Identities(args) => cmds::identities::run(args, &config, &profile), Commands::Profile(args) => cmds::profile::run(args, &config, &profile), + Commands::Audit(args) => cmds::audit::run(args, &config, &profile), Commands::Publish(args) => cmds::publish::run(args, &config), Commands::Telemetry(args) => cmds::telemetry::run(args), }; diff --git a/src/spawn/cshell.rs b/src/spawn/cshell.rs index 62261bf..7e9f6c5 100644 --- a/src/spawn/cshell.rs +++ b/src/spawn/cshell.rs @@ -6,8 +6,8 @@ use std::{ use askama::Template; -use miette::{bail, Context as _, IntoDiagnostic as _}; -use serde::{de, Deserialize, Deserializer, Serialize}; +use miette::{Context as _, IntoDiagnostic as _, bail}; +use serde::{Deserialize, Deserializer, Serialize, de}; use crate::config::{TrpConfig, U5cConfig}; diff --git a/src/spawn/mod.rs b/src/spawn/mod.rs index c244d86..1796ac1 100644 --- a/src/spawn/mod.rs +++ b/src/spawn/mod.rs @@ -1,3 +1,4 @@ pub mod cshell; pub mod dolos; +pub mod preflight; pub mod tx3c; diff --git a/src/spawn/preflight.rs b/src/spawn/preflight.rs new file mode 100644 index 0000000..7a73d0e --- /dev/null +++ b/src/spawn/preflight.rs @@ -0,0 +1,61 @@ +use std::process::Command; + +use miette::{Context as _, IntoDiagnostic as _, bail}; + +use crate::commands::audit::Args; +use crate::config::RootConfig; + +pub fn run(args: Args, config: &RootConfig) -> miette::Result<()> { + let tool_path = crate::home::tool_path("preflight")?; + + let mut cmd = Command::new(tool_path); + + // Always-present flags with default values. + cmd.args(["--state-out", &args.state_out]); + cmd.args(["--report-out", &args.report_out]); + cmd.args(["--skills-dir", &args.skills_dir]); + cmd.args(["--ast-out", &args.ast_out]); + cmd.args(["--provider", &args.provider]); + cmd.args(["--read-scope", args.read_scope.as_str()]); + + // Optional string flags. + if let Some(value) = &args.endpoint { + cmd.args(["--endpoint", value]); + } + if let Some(value) = &args.model { + cmd.args(["--model", value]); + } + if let Some(value) = &args.api_key_env { + cmd.args(["--api-key-env", value]); + } + if let Some(value) = &args.reasoning_effort { + cmd.args(["--reasoning-effort", value]); + } + + // Boolean flags. + if args.ai_logs { + cmd.arg("--ai-logs"); + } + if args.no_ast_cache { + cmd.arg("--no-ast-cache"); + } + if args.interactive_permissions { + cmd.arg("--interactive-permissions"); + } + + // --main-source is injected from RootConfig.protocol.main; preflight + // expects it as the fallback when its own .ak discovery returns empty. + let main_source = config.protocol.main.to_string_lossy().to_string(); + cmd.args(["--main-source", &main_source]); + + let status = cmd + .status() + .into_diagnostic() + .context("running preflight")?; + + if !status.success() { + bail!("preflight exited with non-zero status"); + } + + Ok(()) +} diff --git a/src/spawn/tx3c.rs b/src/spawn/tx3c.rs index 4c895ad..14901d6 100644 --- a/src/spawn/tx3c.rs +++ b/src/spawn/tx3c.rs @@ -1,6 +1,6 @@ use std::{path::Path, process::Command}; -use miette::{bail, Context as _, IntoDiagnostic as _}; +use miette::{Context as _, IntoDiagnostic as _, bail}; use crate::config::RootConfig; diff --git a/src/telemetry/mod.rs b/src/telemetry/mod.rs index c90ba7b..3ab5e1b 100644 --- a/src/telemetry/mod.rs +++ b/src/telemetry/mod.rs @@ -1,7 +1,10 @@ use tokio::{sync::OnceCell, task::JoinHandle}; use tracing::debug; -use crate::{cli::{Cli, Commands}, global::TelemetryConfig}; +use crate::{ + cli::{Cli, Commands}, + global::TelemetryConfig, +}; mod client; mod fingerprint; @@ -40,6 +43,7 @@ impl From<&Cli> for Option { Commands::Inspect(_) => Some(CommandMetric::new("inspect")), Commands::Test(_) => Some(CommandMetric::new("test")), Commands::Identities(_) => Some(CommandMetric::new("identities")), + Commands::Audit(_) => Some(CommandMetric::new("audit")), Commands::Publish(_) => Some(CommandMetric::new("publish")), _ => None, } diff --git a/src/wallet.rs b/src/wallet.rs index 3bc90b5..37ffc3d 100644 --- a/src/wallet.rs +++ b/src/wallet.rs @@ -6,7 +6,7 @@ use std::{ use askama::Template as _; use bip39::Mnemonic; use cryptoxide::{digest::Digest, sha2::Sha256}; -use miette::{bail, Context, IntoDiagnostic as _, Result}; +use miette::{Context, IntoDiagnostic as _, Result, bail}; use crate::{ config::{IdentityConfig, NetworkConfig, ProfileConfig, RootConfig}, diff --git a/tests/e2e/audit_wrapper.rs b/tests/e2e/audit_wrapper.rs new file mode 100644 index 0000000..d6db3b2 --- /dev/null +++ b/tests/e2e/audit_wrapper.rs @@ -0,0 +1,165 @@ +//! Integration tests for the `trix audit` spawn wrapper. +//! +//! Strategy: point `TX3_PREFLIGHT_PATH` at a small bash script that records +//! its argv to a file and exits 0. Then run `trix audit ...` and assert that +//! the recorded argv contains the flags we expect to forward. + +#![cfg(all(unix, feature = "unstable"))] + +use super::*; +use std::fs; +use std::os::unix::fs::PermissionsExt; + +fn install_mock_preflight(ctx: &TestContext, log_path: &str) -> std::path::PathBuf { + let mock_path = ctx.file_path("mock-preflight.sh"); + let log_full_path = ctx.file_path(log_path); + + let script = format!( + "#!/usr/bin/env bash\nprintf '%s\\n' \"$@\" > {log}\nexit 0\n", + log = log_full_path.display(), + ); + fs::write(&mock_path, script).expect("write mock script"); + + let mut perms = fs::metadata(&mock_path).expect("stat mock").permissions(); + perms.set_mode(0o755); + fs::set_permissions(&mock_path, perms).expect("chmod mock"); + + mock_path +} + +fn run_audit_with_mock(ctx: &TestContext, audit_args: &[&str]) -> (CommandResult, Vec) { + let init_result = ctx.run_trix(&["init", "--yes"]); + assert_success(&init_result); + + let mock_path = install_mock_preflight(ctx, "argv.log"); + let mock_path_str = mock_path.to_string_lossy().to_string(); + + let result = ctx.run_trix_with_env( + audit_args, + &[("TX3_PREFLIGHT_PATH", mock_path_str.as_str())], + ); + + let recorded = + fs::read_to_string(ctx.file_path("argv.log")).expect("mock should have written argv.log"); + let lines: Vec = recorded.lines().map(str::to_string).collect(); + + (result, lines) +} + +fn flag_value<'a>(argv: &'a [String], flag: &str) -> Option<&'a str> { + argv.iter() + .position(|a| a == flag) + .and_then(|i| argv.get(i + 1)) + .map(String::as_str) +} + +#[test] +fn forwards_default_flags_and_injects_main_source() { + let ctx = TestContext::new(); + let (result, argv) = run_audit_with_mock(&ctx, &["audit"]); + + assert_success(&result); + + // Default flag values from src/commands/audit.rs are forwarded. + assert_eq!( + flag_value(&argv, "--provider"), + Some("scaffold"), + "argv: {:?}", + argv + ); + assert_eq!( + flag_value(&argv, "--state-out"), + Some(".tx3/audit/state.json") + ); + assert_eq!( + flag_value(&argv, "--report-out"), + Some(".tx3/audit/vulnerabilities.md") + ); + assert_eq!( + flag_value(&argv, "--skills-dir"), + Some("skills/vulnerabilities") + ); + assert_eq!( + flag_value(&argv, "--ast-out"), + Some(".tx3/audit/aiken-ast.json") + ); + assert_eq!(flag_value(&argv, "--read-scope"), Some("workspace")); + + // --main-source is injected from RootConfig.protocol.main, not from the + // user-facing CLI of `trix audit`. The init template uses "main.tx3". + assert_eq!(flag_value(&argv, "--main-source"), Some("main.tx3")); + + // Boolean flags default to off → not present in argv. + assert!(!argv.iter().any(|a| a == "--ai-logs")); + assert!(!argv.iter().any(|a| a == "--no-ast-cache")); + assert!(!argv.iter().any(|a| a == "--interactive-permissions")); +} + +#[test] +fn forwards_provider_overrides_and_optional_flags() { + let ctx = TestContext::new(); + let (result, argv) = run_audit_with_mock( + &ctx, + &[ + "audit", + "--provider", + "openai", + "--model", + "gpt-test", + "--endpoint", + "https://example/v1/responses", + "--api-key-env", + "MY_KEY", + "--reasoning-effort", + "high", + "--ai-logs", + "--no-ast-cache", + "--read-scope", + "strict", + "--interactive-permissions", + ], + ); + + assert_success(&result); + + assert_eq!(flag_value(&argv, "--provider"), Some("openai")); + assert_eq!(flag_value(&argv, "--model"), Some("gpt-test")); + assert_eq!( + flag_value(&argv, "--endpoint"), + Some("https://example/v1/responses") + ); + assert_eq!(flag_value(&argv, "--api-key-env"), Some("MY_KEY")); + assert_eq!(flag_value(&argv, "--reasoning-effort"), Some("high")); + assert_eq!(flag_value(&argv, "--read-scope"), Some("strict")); + + assert!(argv.iter().any(|a| a == "--ai-logs")); + assert!(argv.iter().any(|a| a == "--no-ast-cache")); + assert!(argv.iter().any(|a| a == "--interactive-permissions")); +} + +#[test] +fn propagates_non_zero_exit_from_preflight() { + let ctx = TestContext::new(); + let init_result = ctx.run_trix(&["init", "--yes"]); + assert_success(&init_result); + + // Mock that exits non-zero. + let mock_path = ctx.file_path("mock-fail.sh"); + fs::write(&mock_path, "#!/usr/bin/env bash\nexit 7\n").expect("write"); + let mut perms = fs::metadata(&mock_path).expect("stat").permissions(); + perms.set_mode(0o755); + fs::set_permissions(&mock_path, perms).expect("chmod"); + + let result = ctx.run_trix_with_env( + &["audit"], + &[( + "TX3_PREFLIGHT_PATH", + mock_path.to_string_lossy().to_string().as_str(), + )], + ); + + assert!( + !result.success(), + "trix audit should fail when preflight exits non-zero" + ); +} diff --git a/tests/e2e/edge_cases.rs b/tests/e2e/edge_cases.rs index 58bdd8b..1ac7676 100644 --- a/tests/e2e/edge_cases.rs +++ b/tests/e2e/edge_cases.rs @@ -40,3 +40,22 @@ fn init_preserves_existing_test_file() { ctx.assert_file_contains("tests/basic.toml", "# Custom test file"); ctx.assert_file_contains("tests/basic.toml", "name = \"custom\""); } + +#[test] +#[cfg(feature = "unstable")] +fn aiken_audit_fails_without_trix_config() { + let ctx = TestContext::new(); + let result = ctx.run_trix(&["audit"]); + + assert!( + !result.success(), + "audit should fail outside scoped project" + ); + assert!( + result + .stderr + .contains("No trix.toml found in current directory"), + "Expected missing trix.toml error, got stderr: {}", + result.stderr + ); +} diff --git a/tests/e2e/mod.rs b/tests/e2e/mod.rs index 58e10cc..0e7f80d 100644 --- a/tests/e2e/mod.rs +++ b/tests/e2e/mod.rs @@ -206,6 +206,7 @@ pub fn is_process_running(_pid: u32) -> bool { true } +pub mod audit_wrapper; pub mod edge_cases; pub mod happy_path; pub mod smoke; diff --git a/tests/e2e/smoke.rs b/tests/e2e/smoke.rs index 374ab3d..62320b5 100644 --- a/tests/e2e/smoke.rs +++ b/tests/e2e/smoke.rs @@ -8,3 +8,23 @@ fn init_runs_without_error() { assert_success(&result); ctx.assert_file_exists("trix.toml"); } + +#[test] +#[cfg(feature = "unstable")] +fn audit_help_runs_without_error() { + let ctx = TestContext::new(); + let result = ctx.run_trix(&["audit", "--help"]); + + assert_success(&result); + assert_output_contains(&result, "vulnerability"); +} + +#[test] +#[cfg(feature = "unstable")] +fn audit_help_displays_provider_options() { + let ctx = TestContext::new(); + let result = ctx.run_trix(&["audit", "--help"]); + + assert_success(&result); + assert_output_contains(&result, "provider"); +}