diff --git a/CHANGELOG.md b/CHANGELOG.md index f0685fa..8685814 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ - Added a Grok CLI provider for review, fix, revalidate, and doctor flows, thanks @ebastos. - Added `--since ` on `clawpatch review` and `clawpatch revalidate` to restrict runs to features whose owned or context files changed since the given git ref, thanks @mvanhorn. - Added Flask route feature mapping for Python projects, including `web/` source roots, common root entry files, non-list method literals, and Python framework detection. +- Added the `acpx` provider for routing review, fix, and revalidate through ACP-compatible coding agents, thanks @mvanhorn. - Added Next.js route mapping for `src/app` and `src/pages` layouts, thanks @obatried. - Added first-pass Python mapping for project metadata, console scripts, source groups, pytest suites, and conservative validation defaults, thanks @xiamx. - Added progress output for `clawpatch revalidate`, thanks @twidtwid. diff --git a/README.md b/README.md index 8c305ff..6b567e1 100644 --- a/README.md +++ b/README.md @@ -93,11 +93,11 @@ working tree during the explicit fix command. Supported provider names today: - `codex`: local Codex CLI +- `acpx`: any ACP-compatible coding agent (Codex / Claude / Pi / Gemini / ...) via openclaw/acpx +- `grok`: local Grok Build CLI - `mock`: deterministic test provider - `mock-fail`: failure test provider -Direct OpenAI, Claude, Gemini, and provider panels are not implemented yet. - ## Commands - `clawpatch init`: create `.clawpatch/`, detect project basics, write config diff --git a/docs/providers.md b/docs/providers.md index 5fffa60..6828a78 100644 --- a/docs/providers.md +++ b/docs/providers.md @@ -14,11 +14,12 @@ clawpatch doctor Provider names today: - `codex`: shells out to `codex exec` (default) +- `acpx`: routes through any ACP-compatible coding agent via `acpx` - `grok`: shells out to the xAI Grok Build CLI in headless mode (`grok --prompt-file`) - `mock`: deterministic provider for tests and fixtures - `mock-fail`: failure provider for tests -### Codex +## Codex Codex invocation: @@ -28,9 +29,47 @@ Codex invocation: - output: strict JSON schema via `--output-schema` - final message capture: `--output-last-message` -### Grok +Model selection: -The `grok` provider shells out to the local [Grok Build CLI](https://x.ai/cli) +```bash +clawpatch review --model +CLAWPATCH_MODEL= clawpatch review +``` + +## ACPX + +The `acpx` provider routes through `acpx exec`, where `` is any +ACP-compatible coding agent. + +- review / revalidate: `--approve-reads` plus an explicit read-only prompt directive +- fix: `--approve-all` +- output: `--format json --json-strict --suppress-reads`, parsed from known ACP NDJSON envelope kinds +- tested envelope shape: `acpx@^0.8.0` + +Permission caveat: `acpx --approve-all` is not the same as `codex --sandbox +workspace-write`. Codex's workspace-write mode is an enforced sandbox. ACPX +approval flags control ACP permission prompts; the underlying agent still has +whatever filesystem and network access its own runtime grants. For untrusted +code, run `clawpatch fix --provider acpx` inside an isolated checkout. For +review and revalidate, strict read-only behavior still depends on the underlying +agent honoring read-only permissions and the prompt directive. + +Agent selection uses `--model` as `` or `:`, split on the +first colon: + +- unset: agent `codex`, default model +- `codex`: agent `codex`, default model +- `claude`: agent `claude`, default model +- `claude:sonnet-4-5`: agent `claude`, model `sonnet-4-5` +- `ollama:llama3:70b`: agent `ollama`, model `llama3:70b` + +Migration note: `--provider codex --model gpt-5-codex` is not equivalent to +`--provider acpx --model gpt-5-codex`; the latter selects an ACP agent named +`gpt-5-codex`. Use `--provider acpx --model codex:gpt-5-codex`. + +## Grok + +The `grok` provider shells out to the local [Grok Build CLI](https://x.ai/cli). Install the Grok CLI: @@ -38,9 +77,10 @@ Install the Grok CLI: curl -fsSL https://x.ai/cli/install.sh | bash ``` -Then ensure `grok --version` works and authenticate using the flow supported by the local Grok CLI. +Then ensure `grok --version` works and authenticate using the flow supported by +the local Grok CLI. -**Provider selection:** +Provider selection: ```bash clawpatch review --provider grok @@ -49,21 +89,14 @@ clawpatch fix --finding --provider grok --model grok-build clawpatch doctor --provider grok ``` -**Model selection** (works for both `codex` and `grok`): - -```bash -clawpatch review --model -CLAWPATCH_MODEL= clawpatch review -``` - -#### How the grok provider works +How the Grok provider works: -- **Headless mode**: Uses `--prompt-file` + `--output-format json --always-approve --verbatim --cwd `. -- **Read-only operations** (`review`, `revalidate`): Adds `--disallowed-tools "search_replace,run_terminal_cmd,Agent"` so the agent cannot modify files or run shell commands. -- **Write operations** (`fix`): Uses full `--always-approve` so the agent can edit files and run validation commands. -- **Structured output**: The inner prompt includes clawpatch's JSON schema. The provider parses the text field of the JSON envelope returned by `--output-format json` and validates it with Zod (same schemas used for Codex). -- **Large prompts**: Review and fix prompts can be tens of kilobytes (they embed the full source of owned and context files). Passing such prompts via `-p`/`--single` is unreliable, so the provider always uses `--prompt-file` (the intended mechanism for automation and large inputs). +- Headless mode: `--prompt-file` plus `--output-format json --always-approve --verbatim --cwd ` +- Read-only operations: adds `--disallowed-tools "search_replace,run_terminal_cmd,Agent"` +- Write operations: uses full `--always-approve` so the agent can edit files and run validation commands +- Structured output: validates the returned JSON against the same Zod schemas used for Codex +- Large prompts: always uses `--prompt-file` instead of passing prompt text on the command line -Direct OpenAI, Claude, Gemini, local-model, and multi-model panel providers are -not implemented yet. The `grok` provider uses the local `grok` binary for the -best integration with the surrounding Grok Build environment. +Direct OpenAI API, local-model, and multi-model panel providers are not +implemented yet. The `acpx` provider is the generic route for ACP-compatible +agents; the `grok` provider is a direct integration for the local Grok CLI. diff --git a/docs/safety.md b/docs/safety.md index d974a9f..9b25fb5 100644 --- a/docs/safety.md +++ b/docs/safety.md @@ -12,7 +12,11 @@ Current safety rules: - `fix` requires explicit `--finding `. - `fix` refuses a dirty source worktree by default. - `.clawpatch/` state changes are allowed during runs. -- review and revalidate provider calls use a read-only sandbox. +- review and revalidate provider calls use a read-only sandbox for the `codex` + provider. The `acpx` provider relies on `acpx --approve-reads` plus an explicit + read-only prompt directive; underlying agents that bypass ACP permissions (e.g. + agents running in their own full-access mode) may not be strictly sandboxed. + See docs/providers.md. - provider output must pass runtime schema validation. - feature locks are stored in feature records and `.clawpatch/locks/`; `status` surfaces both, and `clean-locks` clears both. diff --git a/src/exec.ts b/src/exec.ts index cb514f8..1215482 100644 --- a/src/exec.ts +++ b/src/exec.ts @@ -8,6 +8,19 @@ export async function runCommand( cwd: string, input?: string, options: { trimOutput?: boolean } = {}, +): Promise { + const result = await runCommandRaw(command, cwd, input); + return { + ...result, + stdout: options.trimOutput === false ? result.stdout : trimOutput(result.stdout), + stderr: options.trimOutput === false ? result.stderr : trimOutput(result.stderr), + }; +} + +export async function runCommandRaw( + command: string, + cwd: string, + input?: string, ): Promise { const started = Date.now(); const child = spawn(command, { @@ -47,8 +60,8 @@ export async function runCommand( cwd, exitCode, durationMs: Date.now() - started, - stdout: options.trimOutput === false ? stdout : trimOutput(stdout), - stderr: options.trimOutput === false ? stderr : trimOutput(stderr), + stdout, + stderr, }; } diff --git a/src/provider.test.ts b/src/provider.test.ts index 02e2d80..e120e0a 100644 --- a/src/provider.test.ts +++ b/src/provider.test.ts @@ -1,5 +1,47 @@ import { describe, expect, it } from "vitest"; -import { extractJson, providerByName } from "./provider.js"; +import { ClawpatchError } from "./errors.js"; +import { __testing, extractJson, providerByName } from "./provider.js"; + +// eslint-disable-next-line no-underscore-dangle +const { acpxFailureMessage, extractAcpxJson, parseAcpxAgent } = __testing; + +function updateEnvelope(update: object): string { + return JSON.stringify({ + jsonrpc: "2.0", + method: "session/update", + params: { sessionId: "session-1", update }, + }); +} + +function textChunk( + sessionUpdate: "agent_message_chunk" | "agent_thought_chunk", + text: string, +): string { + return updateEnvelope({ + sessionUpdate, + content: { type: "text", text }, + }); +} + +function toolResult(output: string): string { + return updateEnvelope({ + sessionUpdate: "tool_call_result", + output, + }); +} + +function expectMalformed(fn: () => unknown, message: RegExp): void { + try { + fn(); + } catch (err) { + expect(err).toBeInstanceOf(ClawpatchError); + expect((err as ClawpatchError).code).toBe("malformed-output"); + expect((err as ClawpatchError).exitCode).toBe(8); + expect((err as Error).message).toMatch(message); + return; + } + throw new Error("expected malformed-output"); +} describe("extractJson", () => { it("parses strict JSON directly", () => { @@ -32,14 +74,178 @@ describe("extractJson", () => { }); }); +describe("parseAcpxAgent", () => { + it("defaults null model to codex/null", () => { + expect(parseAcpxAgent(null)).toEqual({ agent: "codex", agentModel: null }); + }); + + it("maps a bare agent name to agent/null", () => { + expect(parseAcpxAgent("claude")).toEqual({ agent: "claude", agentModel: null }); + }); + + it("splits agent and model on a single colon", () => { + expect(parseAcpxAgent("claude:sonnet-4-5")).toEqual({ + agent: "claude", + agentModel: "sonnet-4-5", + }); + }); + + it("splits on the first colon so model ids may contain colons", () => { + expect(parseAcpxAgent("ollama:llama3:70b")).toEqual({ + agent: "ollama", + agentModel: "llama3:70b", + }); + }); +}); + +describe("extractAcpxJson", () => { + it("reconstructs JSON from agent_message_chunk stream", () => { + const stdout = [ + textChunk("agent_message_chunk", '{"findings":'), + textChunk("agent_message_chunk", '[],"inspected":{"files":[],"symbols":[],"notes":[]}}'), + ].join("\n"); + + expect(extractAcpxJson(stdout)).toEqual({ + findings: [], + inspected: { files: [], symbols: [], notes: [] }, + }); + }); + + it("reconstructs JSON from agent_thought_chunk stream", () => { + const stdout = [ + textChunk("agent_thought_chunk", '{"outcome":"fixed",'), + textChunk("agent_thought_chunk", '"reasoning":"ok","commands":[]}'), + ].join("\n"); + + expect(extractAcpxJson(stdout)).toEqual({ + outcome: "fixed", + reasoning: "ok", + commands: [], + }); + }); + + it("reads tool_call_result output when chunks are absent", () => { + const stdout = toolResult( + '{"summary":"plan","findingIds":[],"plannedFiles":[],"risk":"low","steps":[],"validationCommands":[]}', + ); + + expect(extractAcpxJson(stdout)).toEqual({ + summary: "plan", + findingIds: [], + plannedFiles: [], + risk: "low", + steps: [], + validationCommands: [], + }); + }); + + it("prefers final message chunks over thought chunks", () => { + const stdout = [ + textChunk("agent_thought_chunk", '{"note":"not final"}'), + textChunk("agent_message_chunk", '{"ok":true}'), + ].join("\n"); + + expect(extractAcpxJson(stdout)).toEqual({ ok: true }); + }); + + it("strips json markdown fences", () => { + const stdout = textChunk("agent_message_chunk", '```json\n{"ok":true}\n```'); + + expect(extractAcpxJson(stdout)).toEqual({ ok: true }); + }); + + it("tolerates a prose preamble before the JSON object", () => { + const stdout = textChunk("agent_message_chunk", 'Here is the JSON:\n{"ok":true}'); + + expect(extractAcpxJson(stdout)).toEqual({ ok: true }); + }); + + it("throws malformed-output with observed envelope kinds when nothing is extractable", () => { + const stdout = updateEnvelope({ + sessionUpdate: "usage_update", + usage: { inputTokens: 1, outputTokens: 2 }, + }); + + expectMalformed(() => extractAcpxJson(stdout), /no extractable text.*usage_update.*\^0\.8\.0/u); + }); + + it("throws malformed-output on unparseable concatenation", () => { + const stdout = [ + textChunk("agent_message_chunk", '{"ok":'), + textChunk("agent_message_chunk", "not-json}"), + ].join("\n"); + + expectMalformed(() => extractAcpxJson(stdout), /unparseable JSON/u); + }); + + it("ignores initialize, session/new, and result envelopes", () => { + const stdout = [ + JSON.stringify({ jsonrpc: "2.0", method: "initialize", result: { output: '{"bad":true}' } }), + JSON.stringify({ jsonrpc: "2.0", method: "session/new", result: { output: '{"bad":true}' } }), + JSON.stringify({ jsonrpc: "2.0", id: 1, result: { output: '{"bad":true}' } }), + textChunk("agent_message_chunk", '{"ok":true}'), + ].join("\n"); + + expect(extractAcpxJson(stdout)).toEqual({ ok: true }); + }); + + it("survives a 256-line NDJSON fixture over 8KB", () => { + const filler = Array.from({ length: 255 }, (_, idx) => + updateEnvelope({ + sessionUpdate: "usage_update", + usage: { + inputTokens: idx, + outputTokens: idx + 1, + note: "x".repeat(80), + }, + }), + ); + const lines = [...filler, textChunk("agent_message_chunk", '{"large":true}')]; + const stdout = lines.join("\n"); + + expect(lines).toHaveLength(256); + expect(stdout.length).toBeGreaterThan(8_000); + expect(extractAcpxJson(stdout)).toEqual({ large: true }); + }); +}); + +describe("acpxFailureMessage", () => { + it("does not include raw prompt envelopes from ACPX stdout", () => { + const secretPrompt = "SOURCE_CONTEXT_SECRET"; + const stdout = [ + JSON.stringify({ + jsonrpc: "2.0", + id: 2, + method: "session/prompt", + params: { + prompt: [{ type: "text", text: secretPrompt }], + }, + }), + JSON.stringify({ + jsonrpc: "2.0", + id: null, + error: { + code: -32070, + message: "Timed out after 500ms", + data: { acpxCode: "TIMEOUT", origin: "cli", sessionId: "session-1" }, + }, + }), + ].join("\n"); + + const message = acpxFailureMessage(stdout, "", 3); + + expect(message).toContain("acpx provider failed"); + expect(message).toContain("acpxCode=TIMEOUT"); + expect(message).toContain("message=Timed out after 500ms"); + expect(message).not.toContain(secretPrompt); + expect(message).not.toContain("session/prompt"); + }); +}); + describe("providerByName", () => { - it("returns the grok provider for name 'grok'", () => { - const p = providerByName("grok"); - expect(p.name).toBe("grok"); - expect(typeof p.check).toBe("function"); - expect(typeof p.review).toBe("function"); - expect(typeof p.fix).toBe("function"); - expect(typeof p.revalidate).toBe("function"); + it("returns provider instances for optional CLI-backed providers", () => { + expect(providerByName("acpx").name).toBe("acpx"); + expect(providerByName("grok").name).toBe("grok"); }); it("still supports codex, mock, and mock-fail", () => { diff --git a/src/provider.ts b/src/provider.ts index feb1e6c..7a88014 100644 --- a/src/provider.ts +++ b/src/provider.ts @@ -24,6 +24,9 @@ export function providerByName(name: string): Provider { if (name === "codex") { return codexProvider; } + if (name === "acpx") { + return acpxProvider; + } if (name === "grok") { return grokProvider; } @@ -59,6 +62,36 @@ const codexProvider: Provider = { }, }; +const ACPX_TESTED_VERSIONS = "^0.8.0"; + +const acpxProvider: Provider = { + name: "acpx", + async check(root: string): Promise { + const result = await runCommandArgs("acpx", ["--version"], root); + if (result.exitCode !== 0) { + throw new ClawpatchError( + "acpx CLI not available. Install: npm install -g acpx@latest", + 4, + "provider-auth", + ); + } + const version = result.stdout.trim(); + return `${version} (tested against ${ACPX_TESTED_VERSIONS})`; + }, + async review(root: string, prompt: string, model: string | null): Promise { + const output = await runAcpxJson(root, prompt, model, reviewJsonSchema, "read"); + return reviewOutputSchema.parse(output); + }, + async fix(root: string, prompt: string, model: string | null): Promise { + const output = await runAcpxJson(root, prompt, model, fixPlanJsonSchema, "approve"); + return fixPlanOutputSchema.parse(output); + }, + async revalidate(root: string, prompt: string, model: string | null): Promise { + const output = await runAcpxJson(root, prompt, model, revalidateJsonSchema, "read"); + return revalidateOutputSchema.parse(output); + }, +}; + const grokProvider: Provider = { name: "grok", async check(root: string): Promise { @@ -204,6 +237,157 @@ async function runCodexJson( return JSON.parse(raw) as unknown; } +export function parseAcpxAgent(model: string | null): { + agent: string; + agentModel: string | null; +} { + if (model === null) { + return { agent: "codex", agentModel: null }; + } + const idx = model.indexOf(":"); + if (idx === -1) { + return { agent: model, agentModel: null }; + } + return { agent: model.slice(0, idx), agentModel: model.slice(idx + 1) }; +} + +async function runAcpxJson( + root: string, + prompt: string, + model: string | null, + schema: object, + permission: "read" | "approve", +): Promise { + const { agent, agentModel } = parseAcpxAgent(model); + const permFlag = permission === "read" ? "--approve-reads" : "--approve-all"; + const args = ["--cwd", root, permFlag, "--format", "json", "--json-strict", "--suppress-reads"]; + if (agentModel !== null) { + args.push("--model", agentModel); + } + args.push(agent, "exec", "--file", "-"); + const result = await runCommandArgs( + "acpx", + args, + root, + buildAcpxPrompt(prompt, schema, permission), + { trimOutput: false }, + ); + if (result.exitCode !== 0) { + throw new ClawpatchError( + acpxFailureMessage(result.stdout, result.stderr, result.exitCode), + acpxExitCode(result.stdout, result.stderr, result.exitCode), + "provider-failure", + ); + } + return extractAcpxJson(result.stdout); +} + +function buildAcpxPrompt(prompt: string, schema: object, permission: "read" | "approve"): string { + const promptBody = + permission === "read" + ? "READ-ONLY REVIEW MODE.\n" + + "Do not modify, create, or delete any files.\n" + + "Do not make any tool calls that write to the workspace.\n" + + "Only read files and report findings in the JSON output below.\n\n" + + prompt + : prompt; + + return ( + `${promptBody}\n\n` + + "Return ONLY a JSON object matching this schema. No prose preamble, no markdown fences, " + + "no thinking-out-loud text before the JSON. " + + `Schema:\n${JSON.stringify(schema)}\n` + ); +} + +export function extractAcpxJson(stdout: string): unknown { + const toolCandidates: string[] = []; + const messageChunks: string[] = []; + const thoughtChunks: string[] = []; + const observedKinds = new Set(); + for (const line of stdout.split("\n")) { + const trimmed = line.trim(); + if (trimmed.length === 0) { + continue; + } + let env: { + method?: string; + params?: { + update?: { + sessionUpdate?: string; + content?: { type?: string; text?: string }; + output?: unknown; + }; + }; + }; + try { + env = JSON.parse(trimmed); + } catch { + continue; + } + if (env.method !== "session/update") { + continue; + } + const update = env.params?.update; + if (update?.sessionUpdate === undefined) { + continue; + } + observedKinds.add(update.sessionUpdate); + if ( + update.sessionUpdate === "agent_message_chunk" && + update.content?.type === "text" && + typeof update.content.text === "string" + ) { + messageChunks.push(update.content.text); + } else if ( + update.sessionUpdate === "agent_thought_chunk" && + update.content?.type === "text" && + typeof update.content.text === "string" + ) { + thoughtChunks.push(update.content.text); + } else if (update.sessionUpdate === "tool_call_result" && typeof update.output === "string") { + toolCandidates.push(update.output); + } + } + const candidates = [ + ...(messageChunks.length > 0 ? [messageChunks.join("")] : []), + ...toolCandidates.toReversed(), + ...(thoughtChunks.length > 0 ? [thoughtChunks.join("")] : []), + ]; + if (candidates.length === 0) { + throw new ClawpatchError( + `acpx provider produced no extractable text. Observed envelope kinds: ` + + `[${[...observedKinds].join(", ")}]. ` + + `acpx envelope shape may have changed since clawpatch was tested ` + + `against ${ACPX_TESTED_VERSIONS}. Check the installed acpx version.`, + 8, + "malformed-output", + ); + } + + let lastErr: unknown; + for (const candidate of candidates) { + const text = candidate.trim(); + try { + const parsed = extractJson(text); + if (parsed !== null) { + return parsed; + } + throw new Error("no JSON object found"); + } catch (err) { + lastErr = err; + } + } + throw new ClawpatchError( + `acpx provider produced unparseable JSON: ${(lastErr as Error).message}. ` + + `Observed envelope kinds: [${[...observedKinds].join(", ")}]. ` + + `acpx envelope shape may have changed since clawpatch was tested ` + + `against ${ACPX_TESTED_VERSIONS}. Check the installed acpx version.`, + 8, + "malformed-output", + ); +} + async function runGrokJson( root: string, prompt: string, @@ -359,6 +543,87 @@ function providerExitCode(stderr: string): number { return 1; } +function acpxFailureMessage(stdout: string, stderr: string, exitCode: number | null): string { + const error = extractAcpxError(stdout); + if (error !== null) { + return `acpx provider failed: ${error}`; + } + const stderrPreview = safeProviderPreview(stderr); + if (stderrPreview.length > 0) { + return `acpx provider failed: ${stderrPreview}`; + } + return `acpx provider failed with exit code ${exitCode ?? "unknown"}`; +} + +function extractAcpxError(stdout: string): string | null { + for (const line of stdout.split("\n")) { + const trimmed = line.trim(); + if (trimmed.length === 0) { + continue; + } + let env: unknown; + try { + env = JSON.parse(trimmed) as unknown; + } catch { + continue; + } + if (typeof env !== "object" || env === null) { + continue; + } + const error = (env as Record)["error"]; + if (typeof error !== "object" || error === null) { + continue; + } + const errorRecord = error as Record; + const data = errorRecord["data"]; + const dataRecord = + typeof data === "object" && data !== null ? (data as Record) : {}; + const parts = [ + stringPart("code", errorRecord["code"]), + stringPart("acpxCode", dataRecord["acpxCode"]), + stringPart("detail", dataRecord["detailCode"]), + stringPart("origin", dataRecord["origin"]), + stringPart("message", errorRecord["message"], 160), + ].filter((part) => part.length > 0); + if (parts.length > 0) { + return parts.join("; "); + } + } + return null; +} + +function stringPart(label: string, value: unknown, maxLength = 80): string { + if (typeof value !== "string" && typeof value !== "number") { + return ""; + } + const preview = safeProviderPreview(String(value), maxLength); + return preview.length === 0 ? "" : `${label}=${preview}`; +} + +function safeProviderPreview(value: string, maxLength = 200): string { + return value.replace(/\s+/gu, " ").trim().slice(0, maxLength); +} + +function acpxExitCode(stdout: string, stderr: string, exitCode: number | null): number { + const combined = `${stderr}\n${extractAcpxError(stdout) ?? ""}`; + if (/auth|login|api key|not authenticated|AUTH_REQUIRED/iu.test(combined)) { + return 4; + } + if (/quota|rate.?limit/iu.test(combined)) { + return 5; + } + if (/acpx: command not found|spawn acpx ENOENT/iu.test(combined)) { + return 4; + } + if (exitCode === 3 || /TIMEOUT/iu.test(combined)) { + return 1; + } + return 1; +} + +// eslint-disable-next-line no-underscore-dangle +export const __testing = { acpxFailureMessage, extractAcpxJson, parseAcpxAgent }; + const reviewJsonSchema = { type: "object", additionalProperties: false,