From 817ee231b165bfaff111c99c5b120476177aa682 Mon Sep 17 00:00:00 2001 From: Finesssee <90105158+Finesssee@users.noreply.github.com> Date: Fri, 8 May 2026 21:10:12 +0700 Subject: [PATCH 1/5] Harden ACP parity behavior --- crates/ai/src/acp/bridge_init.rs | 192 +++++++++-- docs/goals/athas-acp-zed-parity/goal.md | 82 +++++ .../notes/T001-acp-zed-map.md | 224 +++++++++++++ docs/goals/athas-acp-zed-parity/state.yaml | 304 ++++++++++++++++++ .../ai/services/acp-stream-handler.ts | 10 + .../ai/tests/acp-cancellation.test.ts | 157 +++++++++ 6 files changed, 935 insertions(+), 34 deletions(-) create mode 100644 docs/goals/athas-acp-zed-parity/goal.md create mode 100644 docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md create mode 100644 docs/goals/athas-acp-zed-parity/state.yaml create mode 100644 src/features/ai/tests/acp-cancellation.test.ts diff --git a/crates/ai/src/acp/bridge_init.rs b/crates/ai/src/acp/bridge_init.rs index 921ce4f43..00f4c1dda 100644 --- a/crates/ai/src/acp/bridge_init.rs +++ b/crates/ai/src/acp/bridge_init.rs @@ -326,7 +326,7 @@ async fn bootstrap_session( load_session(connection.clone(), cwd.clone(), existing_session_id.clone()).await; if let Ok(Err(err)) = &load_result - && matches!(err.code, acp::ErrorCode::AuthRequired) + && is_auth_required(err) { if let Err(e) = authenticate(connection.clone()).await { ctx.io_handle.abort(); @@ -342,16 +342,13 @@ async fn bootstrap_session( Ok(Ok(load_response)) => { log::info!("ACP session loaded: {}", existing_session_id); client.set_session_id(existing_session_id.clone()).await; - return Ok(SessionBootstrap { - session_id: Some(acp::SessionId::new(existing_session_id)), - initial_modes: load_response.modes.map(map_mode_state), - initial_config_options: load_response.config_options.map(&ctx.map_config_options), - }); + return Ok(bootstrap_from_loaded_session( + existing_session_id, + load_response, + &ctx.map_config_options, + )); } - Ok(Err(err)) - if matches!(err.code, acp::ErrorCode::MethodNotFound) - && ctx.supports_session_resume => - { + Ok(Err(err)) if should_try_resume_after_load(&err, ctx.supports_session_resume) => { log::warn!( "ACP session/load unavailable ({}), trying session/resume", err @@ -360,7 +357,7 @@ async fn bootstrap_session( resume_session(connection.clone(), cwd.clone(), existing_session_id.clone()).await; if let Ok(Err(err)) = &resume_result - && matches!(err.code, acp::ErrorCode::AuthRequired) + && is_auth_required(err) { if let Err(e) = authenticate(connection.clone()).await { ctx.io_handle.abort(); @@ -377,20 +374,13 @@ async fn bootstrap_session( Ok(Ok(resume_response)) => { log::info!("ACP session resumed: {}", existing_session_id); client.set_session_id(existing_session_id.clone()).await; - return Ok(SessionBootstrap { - session_id: Some(acp::SessionId::new(existing_session_id)), - initial_modes: resume_response.modes.map(map_mode_state), - initial_config_options: resume_response - .config_options - .map(&ctx.map_config_options), - }); + return Ok(bootstrap_from_resumed_session( + existing_session_id, + resume_response, + &ctx.map_config_options, + )); } - Ok(Err(err)) - if matches!( - err.code, - acp::ErrorCode::MethodNotFound | acp::ErrorCode::ResourceNotFound - ) => - { + Ok(Err(err)) if should_fallback_to_new_after_resume(&err) => { log::warn!( "ACP session/resume unavailable or session missing ({}), falling back to \ session/new", @@ -415,12 +405,7 @@ async fn bootstrap_session( } } } - Ok(Err(err)) - if matches!( - err.code, - acp::ErrorCode::MethodNotFound | acp::ErrorCode::ResourceNotFound - ) => - { + Ok(Err(err)) if should_fallback_to_new_after_load(&err) => { log::warn!( "ACP session/load unavailable or session missing ({}), falling back to session/new", err @@ -447,7 +432,7 @@ async fn bootstrap_session( let mut session_result = create_session(connection.clone(), cwd.clone()).await; if let Ok(Err(err)) = &session_result - && matches!(err.code, acp::ErrorCode::AuthRequired) + && is_auth_required(err) { if let Err(e) = authenticate(connection.clone()).await { ctx.io_handle.abort(); @@ -480,11 +465,64 @@ async fn bootstrap_session( log::info!("ACP session created: {}", session.session_id); client.set_session_id(session.session_id.to_string()).await; - Ok(SessionBootstrap { + Ok(bootstrap_from_new_session(session, &ctx.map_config_options)) +} + +fn is_auth_required(err: &acp::Error) -> bool { + matches!(err.code, acp::ErrorCode::AuthRequired) +} + +fn should_try_resume_after_load(err: &acp::Error, supports_session_resume: bool) -> bool { + supports_session_resume && matches!(err.code, acp::ErrorCode::MethodNotFound) +} + +fn should_fallback_to_new_after_load(err: &acp::Error) -> bool { + matches!( + err.code, + acp::ErrorCode::MethodNotFound | acp::ErrorCode::ResourceNotFound + ) +} + +fn should_fallback_to_new_after_resume(err: &acp::Error) -> bool { + matches!( + err.code, + acp::ErrorCode::MethodNotFound | acp::ErrorCode::ResourceNotFound + ) +} + +fn bootstrap_from_loaded_session( + existing_session_id: String, + load_response: acp::LoadSessionResponse, + map_config_options: &impl Fn(Vec) -> Vec, +) -> SessionBootstrap { + SessionBootstrap { + session_id: Some(acp::SessionId::new(existing_session_id)), + initial_modes: load_response.modes.map(map_mode_state), + initial_config_options: load_response.config_options.map(map_config_options), + } +} + +fn bootstrap_from_resumed_session( + existing_session_id: String, + resume_response: acp::ResumeSessionResponse, + map_config_options: &impl Fn(Vec) -> Vec, +) -> SessionBootstrap { + SessionBootstrap { + session_id: Some(acp::SessionId::new(existing_session_id)), + initial_modes: resume_response.modes.map(map_mode_state), + initial_config_options: resume_response.config_options.map(map_config_options), + } +} + +fn bootstrap_from_new_session( + session: acp::NewSessionResponse, + map_config_options: &impl Fn(Vec) -> Vec, +) -> SessionBootstrap { + SessionBootstrap { session_id: Some(session.session_id), initial_modes: session.modes.map(map_mode_state), - initial_config_options: session.config_options.map(ctx.map_config_options), - }) + initial_config_options: session.config_options.map(map_config_options), + } } async fn create_session( @@ -570,3 +608,89 @@ fn emit_initial_session_state( log::warn!("Failed to emit initial session config options: {}", e); } } + +#[cfg(test)] +mod tests { + use super::*; + + fn no_config_options(_: Vec) -> Vec { + Vec::new() + } + + #[test] + fn loaded_session_bootstrap_preserves_requested_session_id() { + let bootstrap = bootstrap_from_loaded_session( + "existing-session".to_string(), + acp::LoadSessionResponse::new(), + &no_config_options, + ); + + assert_eq!( + bootstrap.session_id.map(|id| id.to_string()), + Some("existing-session".to_string()) + ); + assert!(bootstrap.initial_modes.is_none()); + assert!(bootstrap.initial_config_options.is_none()); + } + + #[test] + fn method_not_found_load_uses_resume_only_when_supported() { + let err = acp::Error::method_not_found(); + + assert!(should_try_resume_after_load(&err, true)); + assert!(!should_try_resume_after_load(&err, false)); + } + + #[test] + fn missing_or_unsupported_load_falls_back_to_new_session() { + assert!(should_fallback_to_new_after_load( + &acp::Error::method_not_found() + )); + assert!(should_fallback_to_new_after_load( + &acp::Error::resource_not_found(None) + )); + assert!(!should_fallback_to_new_after_load( + &acp::Error::invalid_params() + )); + } + + #[test] + fn missing_or_unsupported_resume_falls_back_to_new_session() { + assert!(should_fallback_to_new_after_resume( + &acp::Error::method_not_found() + )); + assert!(should_fallback_to_new_after_resume( + &acp::Error::resource_not_found(None) + )); + assert!(!should_fallback_to_new_after_resume( + &acp::Error::internal_error() + )); + } + + #[test] + fn auth_required_errors_are_retriable_before_session_fallbacks() { + assert!(is_auth_required(&acp::Error::auth_required())); + assert!(!is_auth_required(&acp::Error::method_not_found())); + assert!(!should_fallback_to_new_after_load( + &acp::Error::auth_required() + )); + assert!(!should_fallback_to_new_after_resume( + &acp::Error::auth_required() + )); + } + + #[test] + fn new_session_bootstrap_uses_agent_created_session_id() { + let bootstrap = bootstrap_from_new_session( + acp::NewSessionResponse::new("created-session"), + &no_config_options, + ); + + assert_eq!( + bootstrap.session_id.map(|id| id.to_string()), + Some("created-session".to_string()) + ); + assert!(bootstrap.initial_modes.is_none()); + assert!(bootstrap.initial_config_options.is_none()); + } +} diff --git a/docs/goals/athas-acp-zed-parity/goal.md b/docs/goals/athas-acp-zed-parity/goal.md new file mode 100644 index 000000000..5867cb24d --- /dev/null +++ b/docs/goals/athas-acp-zed-parity/goal.md @@ -0,0 +1,82 @@ +# Athas ACP Zed Parity + +## Objective + +Harden Athas Agent Client Protocol behavior against Zed as the local reference implementation, then complete successive safe verified patches until the first behavioral parity tranche is proven. + +## Original Request + +Make sure Athas ACP is on par with Zed by studying the local Zed repo at `/Users/sw/Code/zed`, hardening Athas ACP, and adding providers or harnesses only where they are needed to patch weak angles. + +## Intake Summary + +- Input shape: `specific` +- Audience: Athas users and maintainers who need reliable ACP behavior. +- Authority: `approved` +- Proof type: `test` +- Completion proof: Zed-referenced ACP behavior gaps are mapped, the highest-leverage Athas ACP gaps are patched, and verification proves the selected core flows now behave correctly in Athas. +- Likely misfire: Producing a comparison document or broad provider wishlist without making and verifying concrete Athas ACP improvements. +- Blind spots considered: Whether "on par with Zed" means protocol correctness, provider breadth, UX polish, or harness maturity; whether provider expansion should be delayed until core behavior is hardened; whether proof should be behavioral rather than just architectural. +- Existing plan facts: Use `/Users/sw/Code/zed` as the source to study and understand; Athas already has a good foundation; patch the bad angles; avoid broad provider expansion in the first tranche unless parity-critical evidence requires it. + +## Goal Kind + +`specific` + +## Current Tranche + +Discover Zed's ACP behavior and Athas's current ACP implementation, identify the highest-leverage behavioral gaps, choose small safe implementation slices, patch Athas, verify with behavioral parity evidence, audit each slice, and continue until the first ACP hardening tranche is complete. + +## Non-Negotiable Constraints + +- Treat `/Users/sw/Code/zed` as the local reference implementation for ACP behavior. +- Cross-reference Zed before changing Athas ACP behavior. +- Preserve Athas's existing foundation; do not rewrite or bulldoze working ACP architecture without evidence. +- Do not broaden into provider expansion by default. +- Add or modify provider support only when Scout/Judge evidence shows it is required for a parity-critical ACP flow. +- Prefer behavioral proof over architecture notes. +- Keep implementation slices small, reviewable, and locally verifiable. +- Follow Athas repo rules: use Bun for repo scripts, update relevant specs when code changes, and run relevant checks before handoff. + +## Stop Rule + +Stop only when a final audit proves the full original outcome is complete. + +Do not stop after planning, discovery, or Judge selection if a safe Worker task can be activated. + +Do not stop after a single verified Worker slice when the broader ACP parity tranche still has safe local follow-up slices. After each slice audit, advance the board to the next highest-leverage safe Worker task and continue. + +Do not stop because a slice needs owner input, credentials, production access, destructive operations, or policy decisions. Mark that exact slice blocked with a receipt, create the smallest safe follow-up or workaround task, and continue all local, non-destructive work that can still move the goal toward the full outcome. + +## Canonical Board + +Machine truth lives at: + +`docs/goals/athas-acp-zed-parity/state.yaml` + +If this charter and `state.yaml` disagree, `state.yaml` wins for task status, active task, receipts, verification freshness, and completion truth. + +## Run Command + +```text +/goal Follow docs/goals/athas-acp-zed-parity/goal.md. +``` + +## PM Loop + +On every `/goal` continuation: + +1. Read this charter. +2. Read `state.yaml`. +3. Run the bundled GoalBuddy update checker when available and mention a newer version without blocking. +4. Re-check the intake: original request, input shape, authority, proof, blind spots, existing plan facts, and likely misfire. +5. Work only on the active board task. +6. Assign Scout, Judge, Worker, or PM according to the task. +7. Write a compact task receipt. +8. Update the board. +9. If Judge selected a safe Worker task with `allowed_files`, `verify`, and `stop_if`, activate it and continue unless blocked. +10. If a problem, suggestion, or follow-up should become a repo artifact, create an approved issue/PR or ask the operator whether to create one. +11. Treat a slice audit as a checkpoint, not completion, unless it explicitly proves the full original outcome is complete. +12. Finish only with a Judge/PM audit receipt that maps receipts and verification back to the original user outcome and records `full_outcome_complete: true`. + +Issue and PR handoffs are supporting artifacts. `state.yaml` remains authoritative, and every external artifact decision must be recorded in a task receipt. diff --git a/docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md b/docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md new file mode 100644 index 000000000..c0916930c --- /dev/null +++ b/docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md @@ -0,0 +1,224 @@ +# T001 Scout Receipt: ACP Zed Map + +## Result + +Mapped. + +## Summary + +Athas has a functional ACP client bridge with process lifecycle, initialize/auth/session bootstrap, prompt/cancel, session list/load/resume, file and terminal client capabilities, permission UI, events, and frontend stream handling. Zed's reference is materially deeper in behavioral turn semantics and test coverage: it models ACP as an `AcpThread`, guards prompt turns, cancels/replaces in-flight turns, marks pending tools cancelled, buffers terminal output before terminal creation, tracks usage/cost, and has extensive authorization/cancellation tests. Highest-leverage first tranche should harden Athas core protocol behavior, not broaden providers. + +## Evidence Paths + +Board: + +- `/Users/sw/Code/athas/docs/goals/athas-acp-zed-parity/goal.md` +- `/Users/sw/Code/athas/docs/goals/athas-acp-zed-parity/state.yaml` + +Athas core: + +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs` +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs` +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_prompt.rs` +- `/Users/sw/Code/athas/crates/ai/src/acp/client.rs` +- `/Users/sw/Code/athas/crates/ai/src/acp/types.rs` +- `/Users/sw/Code/athas/crates/ai/src/acp/terminal_state.rs` +- `/Users/sw/Code/athas/crates/ai/src/acp/config.rs` +- `/Users/sw/Code/athas/src-tauri/src/commands/ai/acp.rs` + +Athas frontend: + +- `/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts` +- `/Users/sw/Code/athas/src/features/ai/components/chat/ai-chat.tsx` +- `/Users/sw/Code/athas/src/features/ai/types/acp.ts` +- `/Users/sw/Code/athas/src/features/ai/tests/acp-activity-groups.test.ts` +- `/Users/sw/Code/athas/src/features/ai/tests/session-config-option-classifier.test.ts` +- `/Users/sw/Code/athas/src/features/ai/tests/acp-session-info.test.ts` + +Zed reference: + +- `/Users/sw/Code/zed/crates/acp_thread/src/connection.rs` +- `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs` +- `/Users/sw/Code/zed/crates/agent/src/thread.rs` +- `/Users/sw/Code/zed/crates/agent/src/tests/mod.rs` +- `/Users/sw/Code/zed/crates/agent/src/tests/test_tools.rs` +- `/Users/sw/Code/zed/docs/src/ai/external-agents.md` +- `/Users/sw/Code/zed/crates/project/src/agent_registry_store.rs` +- `/Users/sw/Code/zed/crates/migrator/src/migrations/m_2026_02_25/settings.rs` + +## Athas ACP Implementation Map + +Lifecycle: + +- Tauri commands expose start/stop/send/status/respond/mode/config/list/cancel in `src-tauri/src/commands/ai/acp.rs`. +- `AcpAgentBridge` dispatches commands to a dedicated worker thread and emits status in `crates/ai/src/acp/bridge.rs`. +- Worker stops existing agent before initialize and tracks process/connection/session/client in `crates/ai/src/acp/bridge.rs`. +- Process death emits error/status and clears state in `crates/ai/src/acp/bridge.rs`. +- Stop optionally calls `session/close` when advertised, aborts IO, stops child tree, and clears state in `crates/ai/src/acp/bridge.rs`. + +Session bootstrap: + +- Initialize creates `ClientSideConnection`, sends `InitializeRequest` with fs read/write, terminal, and Athas ext-method metadata in `crates/ai/src/acp/bridge_init.rs`. +- `AuthRequired` retry exists for session load/resume/new in `crates/ai/src/acp/bridge_init.rs`. +- Requested session tries `session/load`, then `session/resume` only when resume capability exists, then `session/new` fallback on `MethodNotFound`/`ResourceNotFound` in `crates/ai/src/acp/bridge_init.rs`. +- Initial modes/config options are emitted as frontend events in `crates/ai/src/acp/bridge_init.rs`. + +Streaming and events: + +- Client maps `UserMessageChunk`, `AgentMessageChunk`, `AgentThoughtChunk`, `ToolCall`, `ToolCallUpdate`, modes, config, session info, commands, and plan to `AcpEvent` in `crates/ai/src/acp/client.rs`. +- Frontend `AcpStreamHandler` listens to `acp-event` and routes chunks/tools/permission/status/modes/config/plan/prompt_complete/ui_action in `src/features/ai/services/acp-stream-handler.ts`. +- Chat component keeps global ACP state in sync and appends visible plan/error/status events in `src/features/ai/components/chat/ai-chat.tsx`. + +Cancellation: + +- Backend sends ACP cancel notification in `crates/ai/src/acp/bridge.rs`. +- Frontend force-stops active handler before invoking cancel, then completes UI locally in `src/features/ai/services/acp-stream-handler.ts`. +- Chat stop cancels prompt and responds to queued permissions as cancelled in `src/features/ai/components/chat/ai-chat.tsx`. + +Tools and client capabilities: + +- Permission request emits options and waits up to 300s for response in `crates/ai/src/acp/client.rs`. +- `read_text_file`/`write_text_file` are direct filesystem operations with simple error mapping in `crates/ai/src/acp/client.rs`. +- Terminal create/output/release/wait/kill are implemented over Athas terminal manager in `crates/ai/src/acp/client.rs`. +- Terminal output buffer/truncation is isolated in `crates/ai/src/acp/terminal_state.rs`; tests exist only for buffer truncation, not full ACP ordering. + +Provider handoff: + +- Agent registry is dynamic/manifest-driven; installed detection and managed wrappers live in `crates/ai/src/acp/config.rs`. +- Codex has special direct `codex-acp` or `npx @zed-industries/codex-acp` detection in `crates/ai/src/acp/config.rs`. +- Athas has catalog install/uninstall wrappers in `src-tauri/src/commands/ai/acp.rs`. + +Test health: + +- Athas visible ACP-related tests are mostly frontend utility tests and `terminal_state` unit tests. +- Scout found no Rust ACP bridge/client behavioral tests for prompt/cancel/session/tool ordering. + +## Zed ACP Implementation Map + +Lifecycle and abstraction: + +- Zed centers ACP behind `AgentConnection`: new/load/resume/close/session_history/auth/prompt/retry/cancel/modes/config/list/model selector in `/Users/sw/Code/zed/crates/acp_thread/src/connection.rs`. +- `AcpThread` handles session updates into thread events and UI state in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. + +Turn model: + +- `AcpThread::send` optimistically inserts user message, checkpoints git state, and calls `connection.prompt` with a `UserMessageId` in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. +- `run_turn` cancels any previous turn, tracks `turn_id`, only clears same turn, flushes streaming text, handles `MaxTokens`/`Cancelled`/`Refusal` in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. +- Cancelled responses mark pending tools cancelled and skip completed-plan snapshot in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. + +File and terminal tools: + +- Zed file reads use project buffers, resource-not-found/invalid-params handling, shared snapshots, and agent location in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. +- Zed writes diff against buffer snapshot, update action log, format on save, and save buffer in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. +- Zed terminal creation shells command with env/PAGER handling and terminal entities in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. + +Authorization: + +- Built-in tool permissions support allow/deny once/always, path/url/terminal patterns, and pipeline pattern dropdowns in `/Users/sw/Code/zed/crates/agent/src/thread.rs`. +- Authorization loop watches settings so always allow/deny resolves sibling pending prompts in `/Users/sw/Code/zed/crates/agent/src/thread.rs`. +- Third-party/MCP authorization is separated in `/Users/sw/Code/zed/crates/agent/src/thread.rs`. + +Provider breadth: + +- Docs list Gemini CLI, Claude Agent, Codex, GitHub Copilot, registry/custom agents, and MCP forwarding in `/Users/sw/Code/zed/docs/src/ai/external-agents.md`. +- Registry migration maps `gemini`, `claude-acp`, and `codex-acp` in `/Users/sw/Code/zed/crates/migrator/src/migrations/m_2026_02_25/settings.rs`. + +Test health: + +- Zed has direct ACP tests for terminal buffering, cancellation, pending tools, usage/cost, and authorization in `crates/acp_thread/src/acp_thread.rs` and `crates/agent/src/tests/mod.rs`. + +## Behavioral Parity Matrix + +- Lifecycle: Athas has process spawn/init/status/stop/close. Zed has stronger trait separation and turn/thread state. Gap is not an architecture rewrite; harden Athas state transitions around stop/crash/cancel. +- Sessions: Athas supports new/load/resume/list/close capability checks. Zed exposes load/resume/close/list/history through trait plus UI thread. Gap: Athas lacks behavioral tests for load-to-resume fallback and `ResourceNotFound`/`MethodNotFound`/`AuthRequired` branches. +- Streaming: Athas forwards chunks/tools/plans/modes/config/session info. Zed additionally flushes buffered streaming text at turn completion and suppresses duplicate echoed user chunks. Gap: Athas should test event ordering and duplicate user echo behavior. +- Cancellation: Athas sends cancel but frontend marks complete immediately. Zed cancels previous in-flight turn before new send, returns cancelled stop, marks pending tools cancelled, and tests multiple terminal/subagent cases. This is the top protocol behavior gap. +- Tools: Athas maps tool start/update/complete and terminal/file callbacks. Zed has richer buffer-aware file operations, terminal output ordering/buffering, and permission-state propagation. First tranche should target terminal/tool lifecycle semantics, not the full Zed buffer model. +- Errors: Athas maps many backend failures to string errors and synthetic "Tool call failed"; unknown stop reasons become `EndTurn`. Zed distinguishes `MaxTokens`, `Refusal`, `Cancelled`, `ResourceNotFound`, `InvalidParams`, and error events. Gap: preserve stop/error semantics rather than collapse. +- Provider handoff: Athas has dynamic catalog plus Codex adapter fallback. Zed has registry/provider breadth and MCP forwarding. This is provider breadth, not first-tranche protocol hardening, unless a core flow is blocked by a missing adapter. +- Tests: Athas lacks ACP bridge/client behavioral harness; Zed's tests are the main reference. First tranche should add focused Rust or TypeScript tests around one selected behavior. + +## Ranked Gap Candidates + +1. Cancellation semantics and pending tool finalization + - Type: protocol behavior + - Risk: medium + - Zed evidence: `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`, `/Users/sw/Code/zed/crates/agent/src/tests/mod.rs` + - Athas evidence: `/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs`, `/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts`, `/Users/sw/Code/athas/src/features/ai/components/chat/ai-chat.tsx` + - Likely files: `crates/ai/src/acp/bridge.rs`, `crates/ai/src/acp/bridge_prompt.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/types/acp.ts` + - Verification options: `bun typecheck`, `bunx vp test run src/features/ai/tests`, `cargo test -p athas-ai acp`, focused fake ACP adapter/unit harness if available or added. + +2. Missing ACP behavioral test harness for session load/resume/auth fallbacks + - Type: protocol behavior + - Risk: low-medium + - Zed evidence: `/Users/sw/Code/zed/crates/acp_thread/src/connection.rs` + - Athas evidence: `/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs` + - Likely files: `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/bridge_commands.rs` only if needed for test seams. + - Verification options: `cargo test -p athas-ai acp::bridge_init`, `bun check:rust`. + +3. Tool update ordering and unknown/succeeded-after-cancel handling + - Type: protocol behavior + - Risk: medium + - Zed evidence: `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs` + - Athas evidence: `/Users/sw/Code/athas/crates/ai/src/acp/client.rs`, `/Users/sw/Code/athas/src/features/ai/lib/tool-call-state.ts` + - Likely files: `crates/ai/src/acp/client.rs`, `src/features/ai/lib/tool-call-state.ts`, `src/features/ai/services/acp-stream-handler.ts` + - Verification options: frontend unit test for tool-call-state; Rust unit test for client event mapping if fake `AppHandle` is practical. + +4. UsageUpdate/token-cost events unsupported + - Type: protocol behavior + - Risk: low + - Zed evidence: `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs` + - Athas evidence: no `UsageUpdate`/`TokenUsage` hits under `/Users/sw/Code/athas/crates/ai` or `src/features/ai` except unrelated provider usage. + - Likely files: `crates/ai/src/acp/types.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/types/acp.ts`, `src/features/ai/services/acp-stream-handler.ts` + - Verification options: typecheck plus unit event mapping test. + +5. Provider breadth and registry/MCP forwarding + - Type: provider breadth + - Risk: high for first tranche + - Zed evidence: `/Users/sw/Code/zed/docs/src/ai/external-agents.md` + - Athas evidence: `/Users/sw/Code/athas/crates/ai/src/acp/config.rs`, `/Users/sw/Code/athas/src-tauri/src/commands/ai/acp.rs` + - Recommendation: defer unless Judge decides a missing provider blocks cancellation/session/tool protocol proof. + +## Candidate Worker Slices + +W1: + +- Objective: Add focused cancellation behavior proof and patch Athas so cancelled ACP turns do not leave stale running/tool state. +- Candidate files: `crates/ai/src/acp/bridge.rs`, `crates/ai/src/acp/bridge_prompt.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/types/acp.ts`, focused tests under `crates/ai/src/acp` or `src/features/ai/tests`. +- Candidate verification: `bun typecheck`, `bunx vp test run src/features/ai/tests`, `cargo test -p athas-ai acp`. +- Stop if: requires provider expansion or no fake/stub path can prove behavior locally. + +W2: + +- Objective: Add `bridge_init` session bootstrap tests for load/resume/new `AuthRequired`/`MethodNotFound`/`ResourceNotFound` decisions. +- Candidate files: `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/bridge_commands.rs` only if needed for test seams. +- Candidate verification: `cargo test -p athas-ai acp::bridge_init`, `bun check:rust`. +- Stop if: needs live external ACP adapters instead of local stubs. + +W3: + +- Objective: Map ACP `UsageUpdate` into Athas events/types and add event-routing test, without UI polish beyond state availability. +- Candidate files: `crates/ai/src/acp/types.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/types/acp.ts`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/tests/*`. +- Candidate verification: `bun typecheck`, `bunx vp test run src/features/ai/tests`, `cargo test -p athas-ai acp`. +- Stop if: requires broad analytics/cost UI design. + +## Commands Run By Scout + +- `rg ACP/acp` across Athas board/src/crates/src-tauri. +- `sed`/`nl` reads of Athas ACP bridge/init/prompt/client/types/config/frontend files. +- `rg ACP/acp` across `/Users/sw/Code/zed`. +- `sed`/`nl` reads of Zed `acp_thread`, `connection`, agent thread, docs, tests. +- `jq '.scripts' package.json`. +- `git status --short`. + +## Health Signals + +- Athas repo had untracked `docs/` in `git status`; Scout did not edit or stage anything. +- No dependency installs, builds, tests, or destructive commands were run. +- Athas validation scripts available: `bun typecheck`, `bun check`, `bun check:rust`, `bunx vp test run`. + +## Ambiguity Requiring Judge + +- Whether first Worker should patch behavior immediately or first add a fake ACP harness. Scout recommends W1 only if Judge permits adding the minimal harness/test surface needed to prove cancellation locally. +- Whether `UsageUpdate` belongs in the first tranche. It is a clear Zed parity gap but lower leverage than cancellation/session/tool correctness. +- Whether provider breadth matters for this tranche. Evidence says defer broad provider expansion; only Codex adapter parity appears locally relevant today. diff --git a/docs/goals/athas-acp-zed-parity/state.yaml b/docs/goals/athas-acp-zed-parity/state.yaml new file mode 100644 index 000000000..d8acb3fcd --- /dev/null +++ b/docs/goals/athas-acp-zed-parity/state.yaml @@ -0,0 +1,304 @@ +# GoalBuddy v2 state.yaml +# Board truth lives here. goal.md is the editable charter; notes/ holds long receipts only. + +version: 2 + +goal: + title: "Athas ACP Zed Parity" + slug: "athas-acp-zed-parity" + kind: specific + tranche: "Zed-referenced ACP behavioral hardening: discover, patch, verify, audit, and continue through safe slices until the first parity tranche is complete." + status: done + intake: + original_request: "Make Athas ACP on par with Zed by studying /Users/sw/Code/zed, hardening ACP, and adding providers or harnesses where needed." + interpreted_outcome: "Athas ACP behavior is hardened against Zed's local implementation and verified with behavioral parity evidence for the selected core flows." + input_shape: specific + audience: "Athas users and maintainers" + authority: approved + proof_type: test + completion_proof: "Zed-referenced ACP gaps are mapped, selected Athas patches are implemented, relevant tests/checks pass, and a final audit confirms behavioral parity for the first tranche." + likely_misfire: "Stopping at comparison notes, provider brainstorming, or broad architecture commentary without verified Athas ACP behavior changes." + blind_spots_considered: + - "Parity could mean protocol correctness, provider breadth, UX behavior, or harness maturity." + - "Broad provider expansion can distract from hardening core ACP behavior." + - "Athas already has a foundation; the goal is targeted patching, not a rewrite." + - "Proof must be behavioral and verifiable, not just source-backed notes." + existing_plan_facts: + - "Use /Users/sw/Code/zed as the reference source." + - "Cross-reference Zed before changing Athas ACP." + - "Athas already has a good ACP foundation." + - "Patch weak angles in Athas rather than replacing everything." + - "Do not pursue broad provider expansion in the first tranche unless parity-critical evidence requires it." + +rules: + pm_owns_state: true + one_active_task: true + max_write_workers: 1 + no_implementation_without_worker_or_pm_task: true + no_completion_without_judge_or_pm_audit: true + planning_is_not_completion: true + queued_required_worker_blocks_completion: true + continuous_until_full_outcome: true + missing_input_or_credentials_do_not_stop_goal: true + preserve_and_validate_existing_plan: true + intake_misfire_must_be_audited: true + +agents: + scout: installed + worker: installed + judge: installed + +active_task: null + +tasks: + - id: T001 + type: scout + assignee: Scout + status: done + reasoning_hint: medium + objective: "Map Athas ACP against Zed ACP and identify the highest-leverage behavioral gaps for the first hardening tranche." + inputs: + - "/Users/sw/Code/athas" + - "/Users/sw/Code/zed" + - "Athas ACP-related crates, commands, frontend integration, tests, and docs discovered by search." + - "Zed ACP-related implementation, provider integration, lifecycle behavior, tests, and docs discovered by search." + constraints: + - "Read-only." + - "Do not edit implementation files." + - "Treat /Users/sw/Code/zed as the reference source." + - "Use concrete file paths, symbols, tests, and command evidence." + - "Separate protocol behavior gaps from provider breadth gaps." + - "Prefer targeted hardening candidates over rewrite proposals." + expected_output: + - "Athas ACP implementation map with file-path evidence." + - "Zed ACP implementation map with file-path evidence." + - "Behavioral parity matrix covering lifecycle, sessions, streaming, cancellation, tools, errors, and provider handoff where applicable." + - "Ranked Athas gap candidates with risk, likely files, and verification options." + - "Candidate Worker slices that avoid broad provider expansion unless required." + receipt: + result: done + note: "notes/T001-acp-zed-map.md" + summary: "Mapped Athas ACP against Zed. Top gap is cancellation semantics and pending tool finalization; session bootstrap tests, tool ordering, and usage events follow. Provider breadth should stay deferred unless parity-critical." + evidence: + - "/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs" + - "/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs" + - "/Users/sw/Code/athas/crates/ai/src/acp/client.rs" + - "/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts" + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" + - "/Users/sw/Code/zed/crates/acp_thread/src/connection.rs" + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs" + candidate_worker_slices: + - "W1: cancellation behavior proof and stale running/tool state patch." + - "W2: bridge_init session bootstrap fallback tests." + - "W3: UsageUpdate event mapping and routing test." + + - id: T002 + type: judge + assignee: Judge + status: done + reasoning_hint: high + objective: "Choose the first safe Athas ACP hardening slice from Scout evidence." + inputs: + - "T001 receipt or note." + - "Current dirty diff." + - "Athas project validation commands." + constraints: + - "Read-only." + - "Do not implement." + - "Require Zed evidence for any claimed parity gap." + - "Prefer the smallest slice with strong verification." + - "Do not choose broad provider expansion unless it is required for a parity-critical behavior." + expected_output: + - "Decision." + - "Why this slice is first." + - "Exact Worker objective." + - "allowed_files." + - "verify." + - "stop_if." + - "Deferred or blocked candidates." + receipt: + result: done + decision: "Select a narrowed W1 cancellation-state slice first: harden Athas frontend ACP cancellation so an active cancelled prompt finalizes visible running tool state instead of leaving stale running tool calls behind. Do not attempt the full backend prompt-turn rewrite yet." + why_this_slice_is_first: + - "Smallest parity-critical subset of Scout's top-ranked gap." + - "Zed cancels running turns and marks pending or in-progress tools cancelled; Athas currently force-stops frontend handling without visible tool finalization." + - "Implementable without provider expansion, live ACP adapters, Rust process lifecycle changes, or broad architecture work." + - "Verifiable with focused frontend tests plus typecheck." + evidence: + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs" + - "/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs" + - "/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts" + - "/Users/sw/Code/athas/src/features/ai/lib/tool-call-state.ts" + - "/Users/sw/Code/athas/src/features/ai/types/acp.ts" + next_allowed_task: T003 + deferred_or_blocked_candidates: + - "Deferred: full backend prompt-turn lifecycle hardening." + - "Deferred: bridge_init session fallback tests." + - "Deferred: UsageUpdate/token-cost event mapping." + - "Blocked for this tranche unless Judge reopens: broad provider expansion, registry/MCP forwarding, or adding providers solely for breadth." + full_outcome_complete: false + + - id: T003 + type: worker + assignee: Worker + status: done + reasoning_hint: default + objective: "Add a focused frontend cancellation regression test and patch Athas ACP stream handling so cancelling an active ACP prompt finalizes any active visible tool calls and activity rows as cancelled/failed, prevents late events from resurrecting the cancelled turn, and still sends cancel_acp_prompt to the backend." + allowed_files: + - "src/features/ai/services/acp-stream-handler.ts" + - "src/features/ai/lib/tool-call-state.ts" + - "src/features/ai/lib/acp-event-timeline.ts" + - "src/features/ai/components/chat/ai-chat.tsx" + - "src/features/ai/components/messages/tool-call-display.tsx" + - "src/features/ai/types/acp.ts" + - "src/features/ai/tests/acp-cancellation.test.ts" + verify: + - "bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts" + - "bun typecheck" + - "If the test runner cannot target both files reliably, run: bunx vp test run src/features/ai/tests" + stop_if: + - "The fix requires editing crates/ai/src/acp/* or src-tauri/*; escalate back to Judge for a backend cancellation slice." + - "The fix requires a live ACP provider, external adapter, or broad fake ACP harness instead of local frontend mocks." + - "The fix requires provider expansion, registry/MCP work, new dependencies, or test/build config changes." + - "The Worker cannot produce a deterministic regression test showing active tool state is finalized on cancel." + - "Late-event handling is ambiguous enough that implementing it would silently discard valid future-turn events." + - "Verification fails twice after scoped fixes." + receipt: + result: done + changed_files: + - "src/features/ai/services/acp-stream-handler.ts" + - "src/features/ai/tests/acp-cancellation.test.ts" + commands: + - cmd: "bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts" + status: pass + - cmd: "bun typecheck" + status: pass + summary: "ACP cancellation now finalizes tracked active tools with a Cancelled error before cleanup, both for frontend force-stop cancellation and protocol prompt_complete(cancelled). Regression tests prove active tool finalization, backend cancel invocation, and late-event suppression after cancellation." + + - id: T004 + type: judge + assignee: Judge + status: done + reasoning_hint: high + objective: "Audit the first implemented ACP slice against Zed-referenced behavioral parity and decide whether to continue with another slice." + inputs: + - "T001 receipt or note." + - "T002 receipt." + - "T003 receipt." + - "Current dirty diff." + - "Last verification." + constraints: + - "Read-only." + - "Reject completion if the slice is only documented but not verified." + - "Reject completion if required ACP hardening work remains safely actionable." + - "If not complete, define the next smallest safe Scout, Judge, or Worker task." + expected_output: + - "slice_complete: true | false." + - "full_outcome_complete: true | false." + - "Behavioral parity evidence." + - "Missing evidence." + - "Next task if not complete." + receipt: + result: done + decision: "Approve the first implemented ACP cancellation slice as complete, but do not complete the tranche. Continue with the next safe ACP hardening slice." + slice_complete: true + full_outcome_complete: false + behavioral_parity_evidence: + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs marks pending/in-progress tools canceled when cancelled." + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs includes cancellation tests." + - "/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts now finalizes active tools before cancelled completion and force-stop cleanup." + - "/Users/sw/Code/athas/src/features/ai/tests/acp-cancellation.test.ts verifies tool finalization, backend cancel invocation, and late-event suppression." + missing_evidence: + - "No Athas Rust/backend behavioral test yet proves session load/resume/new fallbacks or AuthRequired retry branches against the Zed connection/session model." + - "No Athas usage/token-cost ACP event mapping exists yet." + next_allowed_task: T005 + tranche_complete: false + + - id: T005 + type: worker + assignee: Worker + status: done + reasoning_hint: default + objective: "Add focused Rust behavioral tests for ACP session bootstrap fallback parity: requested session load success, session/load MethodNotFound falling through to session/resume when resume is supported, ResourceNotFound/MethodNotFound falling through to session/new, and AuthRequired retry behavior where a deterministic local test seam is practical." + allowed_files: + - "crates/ai/src/acp/bridge_init.rs" + verify: + - "cargo test -p athas-ai acp::bridge_init" + - "bun check:rust" + stop_if: + - "A deterministic local test cannot be added inside crates/ai/src/acp/bridge_init.rs without introducing a live ACP provider, external adapter, new dependency, or broad fake harness." + - "The tests require editing src-tauri, process lifecycle code, build config, Cargo.toml, or files outside allowed_files." + - "The implementation pressure turns into runtime behavior changes beyond small test seams needed inside bridge_init.rs." + - "The Worker discovers that the current bridge_init structure cannot be tested safely without a prior Scout/Judge task to design a narrower seam." + - "Verification fails twice after scoped fixes." + receipt: + result: done + changed_files: + - "crates/ai/src/acp/bridge_init.rs" + commands: + - cmd: "cargo test -p athas-ai acp::bridge_init" + status: pass + - cmd: "cargo check -p athas-ai" + status: pass + - cmd: "cargo fmt -p athas-ai --check" + status: pass + blocked_verification: + - cmd: "bun check:rust" + reason: "Repo-wide rustfmt check reports unrelated pre-existing diffs in crates/runtime/src/lib.rs, crates/tooling/src/registry.rs, and crates/tooling/src/types.rs, outside this task's allowed_files." + summary: "Added deterministic in-file tests and small decision seams for ACP session bootstrap fallback parity: load success preserves requested session id, MethodNotFound load tries resume only when supported, MethodNotFound/ResourceNotFound load/resume fall back to new session, AuthRequired is classified for retry before fallbacks, and new-session bootstrap preserves the agent-created id." + + - id: T999 + type: judge + assignee: Judge + status: done + reasoning_hint: high + objective: "Final audit for the first Athas ACP Zed parity tranche." + inputs: + - "All done task receipts." + - "Zed reference evidence." + - "Last verification." + - "Current dirty diff." + constraints: + - "Read-only." + - "Do not implement." + - "Reject completion if required Worker work is still queued or active." + - "Reject completion if the evidence does not map Athas behavior back to Zed-referenced parity." + - "Reject completion if the result is only a comparison artifact with no verified Athas improvement." + expected_output: + - "complete | not_complete." + - "full_outcome_complete: true | false." + - "Completed parity behaviors." + - "Missing evidence." + - "Next task if not complete." + receipt: + result: done + decision: complete + summary: "Approve T999. The first Athas ACP Zed parity tranche is complete enough to close: it produced Zed-referenced behavioral changes, not just comparison notes, and the implemented slices have focused regression coverage plus scoped frontend, TypeScript, Rust test/check, and Rust format verification." + full_outcome_complete: true + tranche_complete: true + completed_parity_behaviors: + - "Cancellation parity: Athas now finalizes active ACP tools with a Cancelled error before cleanup on protocol prompt_complete(cancelled) and local forceStop/user cancel, matching Zed's pending/in-progress tool cancellation behavior." + - "Cancellation proof: acp-cancellation.test.ts verifies active tool finalization, backend cancel_acp_prompt invocation, completion callback behavior, and late-event suppression after force-stop cancellation." + - "Session bootstrap parity: bridge_init.rs now has explicit decision/build seams covering requested session load success, MethodNotFound load-to-resume only when resume is supported, MethodNotFound/ResourceNotFound fallback to new, AuthRequired retry classification before fallbacks, and new-session id preservation." + - "Session bootstrap proof: Rust unit tests cover load id preservation, load/resume fallback gates, AuthRequired classification, and new-session id preservation." + missing_evidence: + - "No blocking missing evidence for this first tranche." + - "Deferred parity gap: Zed has UsageUpdate/token-cost handling and tests; Athas usage/token-cost ACP event mapping remains a valid later parity slice." + - "Verification caveat: bun check:rust remains blocked by unrelated repo-wide rustfmt diffs outside this tranche's allowed files." + - "No live ACP provider smoke was included; acceptable because this tranche used deterministic local behavioral tests and did not require provider expansion." + next_task_if_not_complete: null + +checks: + dirty_fingerprint: unknown + last_verification: + result: scoped_pass_with_unrelated_blocker + task: T005 + commands: + - "bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts" + - "bunx vp test run src/features/ai/tests" + - "bun typecheck" + - "cargo test -p athas-ai acp::bridge_init" + - "cargo check -p athas-ai" + - "cargo fmt -p athas-ai --check" + - "bun check:rust blocked by unrelated repo-wide rustfmt diffs outside T005 allowed_files" diff --git a/src/features/ai/services/acp-stream-handler.ts b/src/features/ai/services/acp-stream-handler.ts index 177e830c3..21b66d1e1 100644 --- a/src/features/ai/services/acp-stream-handler.ts +++ b/src/features/ai/services/acp-stream-handler.ts @@ -327,6 +327,7 @@ export class AcpStreamHandler { // The stop reason can be used to determine how to handle the completion if (event.stopReason === "cancelled") { // User cancelled the prompt + this.finalizeActiveToolsAsCancelled(); this.cleanup(); this.handlers.onComplete(); return; @@ -542,10 +543,19 @@ export class AcpStreamHandler { } } + private finalizeActiveToolsAsCancelled(): void { + if (!this.handlers.onToolComplete || this.activeTools.size === 0) return; + + for (const [toolId, toolName] of this.activeTools) { + this.handlers.onToolComplete(toolName, toolId, undefined, "Cancelled"); + } + } + private forceStop(): void { if (this.sessionComplete || this.cancelled) return; this.cancelled = true; this.pendingNewMessage = false; + this.finalizeActiveToolsAsCancelled(); this.cleanup(); this.handlers.onComplete(); } diff --git a/src/features/ai/tests/acp-cancellation.test.ts b/src/features/ai/tests/acp-cancellation.test.ts new file mode 100644 index 000000000..50d700174 --- /dev/null +++ b/src/features/ai/tests/acp-cancellation.test.ts @@ -0,0 +1,157 @@ +import { afterEach, describe, expect, it, vi } from "vite-plus/test"; +import { invoke } from "@tauri-apps/api/core"; +import { AcpStreamHandler } from "../services/acp-stream-handler"; +import type { AcpEvent } from "../types/acp"; + +vi.mock("@tauri-apps/api/core", () => ({ + invoke: vi.fn(), +})); + +vi.mock("@tauri-apps/api/event", () => ({ + listen: vi.fn(), +})); + +vi.mock("@/features/ai/store/store", () => ({ + useAIChatStore: { + getState: () => ({ + acpStatus: null, + getChatById: () => null, + getCurrentChat: () => null, + setAcpStatus: vi.fn(), + setChatAcpSessionId: vi.fn(), + setAvailableSlashCommands: vi.fn(), + setSessionConfigOptions: vi.fn(), + setSessionModeState: vi.fn(), + setCurrentModeId: vi.fn(), + }), + }, +})); + +vi.mock("@/features/editor/stores/buffer-store", () => ({ + useBufferStore: { + getState: () => ({ + actions: { + openWebViewerBuffer: vi.fn(), + openTerminalBuffer: vi.fn(), + }, + }), + }, +})); + +vi.mock("@/features/window/stores/project-store", () => ({ + useProjectStore: { + getState: () => ({ + rootFolderPath: "/repo", + }), + }, +})); + +vi.mock("@/features/ai/lib/acp-session-info", () => ({ + getChatTitleFromSessionInfo: (_currentTitle: string | undefined, nextTitle: string) => nextTitle, +})); + +vi.mock("../utils/ai-context-builder", () => ({ + buildContextPrompt: () => "", +})); + +const mockedInvoke = vi.mocked(invoke); + +type TestableAcpStreamHandler = { + handleAcpEvent: (event: unknown) => void; +}; + +type AcpStreamHandlerStatic = { + activeHandler: AcpStreamHandler | null; +}; + +const setActiveHandler = (handler: AcpStreamHandler | null) => { + (AcpStreamHandler as unknown as AcpStreamHandlerStatic).activeHandler = handler; +}; + +const handleAcpEvent = (handler: AcpStreamHandler, event: AcpEvent) => { + (handler as unknown as TestableAcpStreamHandler).handleAcpEvent(event); +}; + +describe("AcpStreamHandler cancellation", () => { + afterEach(() => { + mockedInvoke.mockReset(); + setActiveHandler(null); + }); + + it("finalizes active tools before sending backend cancellation", async () => { + const onComplete = vi.fn(); + const onToolComplete = vi.fn(); + const handler = new AcpStreamHandler( + "codex", + { + onChunk: vi.fn(), + onComplete, + onError: vi.fn(), + onToolComplete, + }, + "chat-1", + ); + + handleAcpEvent(handler, { + type: "tool_start", + sessionId: "session-1", + toolName: "read_text_file", + toolId: "tool-1", + input: { path: "src/main.ts" }, + kind: "read", + status: "in_progress", + locations: [], + }); + setActiveHandler(handler); + + await AcpStreamHandler.cancelPrompt(); + + expect(onToolComplete).toHaveBeenCalledWith("read_text_file", "tool-1", undefined, "Cancelled"); + expect(onComplete).toHaveBeenCalledOnce(); + expect(mockedInvoke).toHaveBeenCalledWith("cancel_acp_prompt"); + }); + + it("ignores late events after a cancelled turn is force-stopped", async () => { + const onComplete = vi.fn(); + const onToolComplete = vi.fn(); + const onToolUse = vi.fn(); + const handler = new AcpStreamHandler( + "codex", + { + onChunk: vi.fn(), + onComplete, + onError: vi.fn(), + onToolUse, + onToolComplete, + }, + "chat-1", + ); + + handleAcpEvent(handler, { + type: "tool_start", + sessionId: "session-1", + toolName: "read_text_file", + toolId: "tool-1", + input: { path: "src/main.ts" }, + kind: "read", + status: "in_progress", + locations: [], + }); + setActiveHandler(handler); + + await AcpStreamHandler.cancelPrompt(); + handleAcpEvent(handler, { + type: "tool_start", + sessionId: "session-1", + toolName: "write_text_file", + toolId: "tool-2", + input: { path: "src/main.ts" }, + kind: "edit", + status: "in_progress", + locations: [], + }); + + expect(onToolUse).toHaveBeenCalledOnce(); + expect(onToolComplete).toHaveBeenCalledOnce(); + }); +}); From 9e5b01a602021e8091d3521ae0ce0c3752552e95 Mon Sep 17 00:00:00 2001 From: Finesssee <90105158+Finesssee@users.noreply.github.com> Date: Fri, 8 May 2026 21:58:56 +0700 Subject: [PATCH 2/5] Add ACP harness parity coverage --- crates/ai/src/acp/client.rs | 236 +++++++---- crates/ai/src/acp/types.rs | 7 + docs/goals/athas-acp-harness-parity/goal.md | 77 ++++ .../notes/T001-acp-harness-map.md | 219 ++++++++++ .../goals/athas-acp-harness-parity/state.yaml | 382 ++++++++++++++++++ .../ai/services/acp-stream-handler.ts | 3 + src/features/ai/tests/acp-permission.test.ts | 139 +++++++ src/features/ai/tests/acp-usage.test.ts | 104 +++++ src/features/ai/types/acp.ts | 6 + 9 files changed, 1095 insertions(+), 78 deletions(-) create mode 100644 docs/goals/athas-acp-harness-parity/goal.md create mode 100644 docs/goals/athas-acp-harness-parity/notes/T001-acp-harness-map.md create mode 100644 docs/goals/athas-acp-harness-parity/state.yaml create mode 100644 src/features/ai/tests/acp-permission.test.ts create mode 100644 src/features/ai/tests/acp-usage.test.ts diff --git a/crates/ai/src/acp/client.rs b/crates/ai/src/acp/client.rs index 4b3ac6739..6695ea712 100644 --- a/crates/ai/src/acp/client.rs +++ b/crates/ai/src/acp/client.rs @@ -61,6 +61,51 @@ impl AthasAcpClient { self.permission_tx.clone() } + fn map_permission_option_kind( + kind: acp::PermissionOptionKind, + ) -> super::types::AcpPermissionOptionKind { + match kind { + acp::PermissionOptionKind::AllowOnce => super::types::AcpPermissionOptionKind::AllowOnce, + acp::PermissionOptionKind::AllowAlways => { + super::types::AcpPermissionOptionKind::AllowAlways + } + acp::PermissionOptionKind::RejectOnce => super::types::AcpPermissionOptionKind::RejectOnce, + acp::PermissionOptionKind::RejectAlways => { + super::types::AcpPermissionOptionKind::RejectAlways + } + _ => super::types::AcpPermissionOptionKind::RejectOnce, + } + } + + fn permission_response_for_choice( + options: &[acp::PermissionOption], + approved: bool, + ) -> acp::RequestPermissionResponse { + let selected_option = options + .iter() + .find(|opt| { + if approved { + matches!( + opt.kind, + acp::PermissionOptionKind::AllowOnce | acp::PermissionOptionKind::AllowAlways + ) + } else { + matches!( + opt.kind, + acp::PermissionOptionKind::RejectOnce | acp::PermissionOptionKind::RejectAlways + ) + } + }) + .or_else(|| options.first()) + .map(|opt| acp::SelectedPermissionOutcome::new(opt.option_id.clone())); + + if let Some(selected) = selected_option { + acp::RequestPermissionResponse::new(acp::RequestPermissionOutcome::Selected(selected)) + } else { + acp::RequestPermissionResponse::new(acp::RequestPermissionOutcome::Cancelled) + } + } + pub async fn set_session_id(&self, session_id: String) { let mut current = self.current_session_id.lock().await; *current = Some(session_id); @@ -209,23 +254,7 @@ impl AthasAcpClient { fn fallback_permission_response( args: &acp::RequestPermissionRequest, ) -> acp::RequestPermissionResponse { - let selected_option = args - .options - .iter() - .find(|opt| { - matches!( - opt.kind, - acp::PermissionOptionKind::RejectOnce | acp::PermissionOptionKind::RejectAlways - ) - }) - .or_else(|| args.options.first()) - .map(|opt| acp::SelectedPermissionOutcome::new(opt.option_id.clone())); - - if let Some(selected) = selected_option { - acp::RequestPermissionResponse::new(acp::RequestPermissionOutcome::Selected(selected)) - } else { - acp::RequestPermissionResponse::new(acp::RequestPermissionOutcome::Cancelled) - } + Self::permission_response_for_choice(&args.options, false) } fn map_plan_priority(priority: acp::PlanEntryPriority) -> AcpPlanEntryPriority { @@ -348,6 +377,14 @@ impl AthasAcpClient { .collect() } + fn map_usage_update(session_id: String, update: acp::UsageUpdate) -> AcpEvent { + AcpEvent::UsageUpdate { + session_id, + used: update.used, + size: update.size, + } + } + pub(crate) fn map_session_config_option( option: acp::SessionConfigOption, ) -> Option { @@ -418,21 +455,7 @@ impl acp::Client for AthasAcpClient { .map(|option| super::types::AcpPermissionOption { id: option.option_id.to_string(), name: option.name.clone(), - kind: match option.kind { - acp::PermissionOptionKind::AllowOnce => { - super::types::AcpPermissionOptionKind::AllowOnce - } - acp::PermissionOptionKind::AllowAlways => { - super::types::AcpPermissionOptionKind::AllowAlways - } - acp::PermissionOptionKind::RejectOnce => { - super::types::AcpPermissionOptionKind::RejectOnce - } - acp::PermissionOptionKind::RejectAlways => { - super::types::AcpPermissionOptionKind::RejectAlways - } - _ => super::types::AcpPermissionOptionKind::RejectOnce, - }, + kind: Self::map_permission_option_kind(option.kind), }) .collect(), }); @@ -486,53 +509,9 @@ impl acp::Client for AthasAcpClient { return Ok(Self::fallback_permission_response(&args)); } - // Prefer allow-once/allow-always options if available - let selected_option = args - .options - .iter() - .find(|opt| { - matches!( - opt.kind, - acp::PermissionOptionKind::AllowOnce - | acp::PermissionOptionKind::AllowAlways - ) - }) - .or_else(|| args.options.first()) - .map(|opt| acp::SelectedPermissionOutcome::new(opt.option_id.clone())); - - if let Some(selected) = selected_option { - Ok(acp::RequestPermissionResponse::new( - acp::RequestPermissionOutcome::Selected(selected), - )) - } else { - Ok(acp::RequestPermissionResponse::new( - acp::RequestPermissionOutcome::Cancelled, - )) - } + Ok(Self::permission_response_for_choice(&args.options, true)) } else { - // Prefer reject-once/reject-always options if available - let selected_option = args - .options - .iter() - .find(|opt| { - matches!( - opt.kind, - acp::PermissionOptionKind::RejectOnce - | acp::PermissionOptionKind::RejectAlways - ) - }) - .or_else(|| args.options.first()) - .map(|opt| acp::SelectedPermissionOutcome::new(opt.option_id.clone())); - - if let Some(selected) = selected_option { - Ok(acp::RequestPermissionResponse::new( - acp::RequestPermissionOutcome::Selected(selected), - )) - } else { - Ok(acp::RequestPermissionResponse::new( - acp::RequestPermissionOutcome::Cancelled, - )) - } + Ok(Self::permission_response_for_choice(&args.options, false)) } } _ => Ok(acp::RequestPermissionResponse::new( @@ -695,6 +674,9 @@ impl acp::Client for AthasAcpClient { updated_at: update.updated_at.take(), }); } + acp::SessionUpdate::UsageUpdate(update) => { + self.emit_event(Self::map_usage_update(session_id, update)); + } acp::SessionUpdate::AvailableCommandsUpdate(commands_update) => { self.emit_event(AcpEvent::SlashCommandsUpdate { session_id, @@ -1161,3 +1143,101 @@ impl acp::Client for AthasAcpClient { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + + fn permission_option( + id: &'static str, + kind: acp::PermissionOptionKind, + ) -> acp::PermissionOption { + acp::PermissionOption::new(id, id, kind) + } + + fn selected_option_id(response: acp::RequestPermissionResponse) -> Option { + match response.outcome { + acp::RequestPermissionOutcome::Selected(selected) => Some(selected.option_id.to_string()), + acp::RequestPermissionOutcome::Cancelled => None, + _ => None, + } + } + + #[test] + fn usage_update_maps_to_frontend_event() { + let event = AthasAcpClient::map_usage_update( + "session-1".to_string(), + acp::UsageUpdate::new(1234, 200000), + ); + + match event { + AcpEvent::UsageUpdate { + session_id, + used, + size, + } => { + assert_eq!(session_id, "session-1"); + assert_eq!(used, 1234); + assert_eq!(size, 200000); + } + other => panic!("expected usage update event, got {other:?}"), + } + } + + #[test] + fn permission_option_kinds_map_to_frontend_kinds() { + assert!(matches!( + AthasAcpClient::map_permission_option_kind(acp::PermissionOptionKind::AllowOnce), + super::super::types::AcpPermissionOptionKind::AllowOnce + )); + assert!(matches!( + AthasAcpClient::map_permission_option_kind(acp::PermissionOptionKind::AllowAlways), + super::super::types::AcpPermissionOptionKind::AllowAlways + )); + assert!(matches!( + AthasAcpClient::map_permission_option_kind(acp::PermissionOptionKind::RejectOnce), + super::super::types::AcpPermissionOptionKind::RejectOnce + )); + assert!(matches!( + AthasAcpClient::map_permission_option_kind(acp::PermissionOptionKind::RejectAlways), + super::super::types::AcpPermissionOptionKind::RejectAlways + )); + } + + #[test] + fn approved_permission_prefers_allow_options() { + let options = vec![ + permission_option("reject-once", acp::PermissionOptionKind::RejectOnce), + permission_option("allow-always", acp::PermissionOptionKind::AllowAlways), + ]; + + let response = AthasAcpClient::permission_response_for_choice(&options, true); + + assert_eq!( + selected_option_id(response).as_deref(), + Some("allow-always") + ); + } + + #[test] + fn rejected_permission_prefers_reject_options() { + let options = vec![ + permission_option("allow-once", acp::PermissionOptionKind::AllowOnce), + permission_option("reject-always", acp::PermissionOptionKind::RejectAlways), + ]; + + let response = AthasAcpClient::permission_response_for_choice(&options, false); + + assert_eq!( + selected_option_id(response).as_deref(), + Some("reject-always") + ); + } + + #[test] + fn permission_choice_cancels_when_options_are_empty() { + let response = AthasAcpClient::permission_response_for_choice(&[], true); + + assert_eq!(selected_option_id(response), None); + } +} diff --git a/crates/ai/src/acp/types.rs b/crates/ai/src/acp/types.rs index c6e9af22a..8754c99fe 100644 --- a/crates/ai/src/acp/types.rs +++ b/crates/ai/src/acp/types.rs @@ -486,6 +486,13 @@ pub enum AcpEvent { title: Option, updated_at: Option, }, + /// Session token/context usage updated + #[serde(rename_all = "camelCase")] + UsageUpdate { + session_id: String, + used: u64, + size: u64, + }, /// Prompt turn completed with a stop reason #[serde(rename_all = "camelCase")] PromptComplete { diff --git a/docs/goals/athas-acp-harness-parity/goal.md b/docs/goals/athas-acp-harness-parity/goal.md new file mode 100644 index 000000000..f94918cb6 --- /dev/null +++ b/docs/goals/athas-acp-harness-parity/goal.md @@ -0,0 +1,77 @@ +# Athas ACP Harness Parity + +## Objective + +Bring Athas's ACP harness posture up toward Zed's by studying Zed's local ACP harness/test patterns, then extending Athas's existing ACP foundation with comparable behavioral and provider/adaptor harness coverage. + +## Original Request + +Match the amount of ACP harness that Zed has for Athas as well. Look at how Zed does it and do the same for Athas. Athas's foundation is already good; keep going from there. + +## Intake Summary + +- Input shape: `specific` +- Audience: Athas maintainers and contributors working on ACP reliability. +- Authority: `approved` +- Proof type: `test` +- Completion proof: Zed's ACP harness/test posture is mapped, Athas gains the first highest-value comparable harness slices, and local verification proves those Athas harnesses run. +- Likely misfire: Counting tests or cloning Zed structure mechanically without creating reusable, reliable Athas ACP coverage. +- Blind spots considered: Harness parity could mean raw test count, protocol behavior coverage, fake-agent infrastructure, provider/adaptor compatibility, real adapter smoke, or developer tooling; real provider smoke can become auth-heavy or network-flaky. +- Existing plan facts: Use `/Users/sw/Code/zed` as the reference source; do not treat Athas as sparse or broken; extend Athas's existing foundation; optimize for behavioral ACP harness coverage plus provider compatibility harness; include real adapter smoke where practical but avoid auth-heavy or flaky provider runs. + +## Goal Kind + +`specific` + +## Current Tranche + +Map how Zed structures ACP harnesses and tests, identify the closest Athas extension points, implement successive safe harness slices for core ACP behavior and provider/adaptor compatibility, verify them locally, and audit whether the first harness parity tranche is complete. + +## Non-Negotiable Constraints + +- Treat `/Users/sw/Code/zed` as the local reference implementation for ACP harness/test posture. +- Cross-reference Zed before choosing Athas harness work. +- Preserve and extend Athas's existing ACP foundation; do not rewrite working architecture just to mimic Zed names. +- Optimize for reliable harness infrastructure, not raw test count. +- Cover both behavioral ACP flows and provider/adaptor compatibility where practical. +- Prefer deterministic local harnesses. If real adapter smoke needs auth, secrets, network, or brittle local state, record the blocker and use a simulated/fake harness path instead. +- Do not add broad provider expansion unless Zed evidence shows it is required for harness parity. +- Follow Athas repo rules: use Bun for repo scripts, keep changes scoped, and run relevant checks. + +## Stop Rule + +Stop only when a final audit proves the full original outcome for this tranche is complete. + +Do not stop after mapping Zed, picking a harness design, or adding one partial test if safe harness work remains. Continue through verified implementation slices until the first harness parity tranche has concrete Athas tests/harnesses and an audit receipt. + +Do not stop because real provider smoke needs auth, credentials, production access, network state, or policy decisions. Mark that exact smoke task blocked with a receipt, then continue with deterministic local harness work that advances the same parity goal. + +## Canonical Board + +Machine truth lives at: + +`docs/goals/athas-acp-harness-parity/state.yaml` + +If this charter and `state.yaml` disagree, `state.yaml` wins for task status, active task, receipts, verification freshness, and completion truth. + +## Run Command + +```text +/goal Follow docs/goals/athas-acp-harness-parity/goal.md. +``` + +## PM Loop + +On every `/goal` continuation: + +1. Read this charter. +2. Read `state.yaml`. +3. Run the bundled GoalBuddy update checker when available and mention a newer version without blocking. +4. Re-check the intake: original request, input shape, authority, proof, blind spots, existing plan facts, and likely misfire. +5. Work only on the active board task. +6. Assign Scout, Judge, Worker, or PM according to the task. +7. Write a compact task receipt. +8. Update the board. +9. If Judge selected a safe Worker task with `allowed_files`, `verify`, and `stop_if`, activate it and continue unless blocked. +10. Treat a slice audit as a checkpoint, not completion, unless it explicitly proves the full original outcome is complete. +11. Finish only with a Judge/PM audit receipt that maps receipts and verification back to the original user outcome and records `full_outcome_complete: true`. diff --git a/docs/goals/athas-acp-harness-parity/notes/T001-acp-harness-map.md b/docs/goals/athas-acp-harness-parity/notes/T001-acp-harness-map.md new file mode 100644 index 000000000..8f5cde9c9 --- /dev/null +++ b/docs/goals/athas-acp-harness-parity/notes/T001-acp-harness-map.md @@ -0,0 +1,219 @@ +# T001 Scout Receipt: ACP Harness Map + +## Result + +Read-only Scout mapped. + +## Summary + +Zed's ACP harness posture is not just more tests; it has reusable fake ACP transports plus GPUI behavioral tests that exercise production connection/thread paths. Athas already has a solid ACP foundation with frontend stream-handler regression tests, Rust bootstrap decision tests, terminal-state tests, registry/catalog seams, and Tauri command surfaces. The closest parity move is to add a deterministic Athas fake ACP connection/client harness around `crates/ai/src/acp/*`, then use it for behavior slices before any provider expansion. + +## Zed ACP Harness/Test Map + +Behavior harness: + +- `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`: GPUI behavioral ACP thread tests using `FakeAgentConnection::new()` and in-thread session updates. +- Representative tests: + - `test_terminal_output_buffered_before_created_renders` + - `test_terminal_output_and_exit_buffered_before_created` + - `test_terminal_kill_allows_wait_for_exit_to_complete` + - `test_push_user_content_block` + - `test_thinking_concatenation` + - `test_ignore_echoed_user_message_chunks_during_active_turn` + - `test_edits_concurrently_to_user` + - `test_reading_from_line` + - `test_reading_empty_file` + - `test_reading_non_existing_file` + - `test_succeeding_canceled_toolcall` + - `test_no_pending_edits_if_tool_calls_are_completed` + - `test_tool_result_refusal` + - `test_user_prompt_refusal_emits_event` + - `test_refusal` + - `test_tool_call_not_found_creates_failed_entry` + - `test_follow_up_message_during_generation_does_not_clear_turn` + - `test_send_returns_cancelled_response_and_marks_tools_as_cancelled` + - `test_running_turn_cleared_when_send_task_dropped` + - `test_session_info_update_replaces_provisional_title_and_emits_event` + - `test_usage_update_populates_token_usage_and_cost` + - `test_usage_update_without_cost_preserves_existing_cost` + - `test_response_usage_does_not_clobber_session_usage` + - `test_clearing_token_usage_also_clears_cost` + +Fake ACP transport/provider harness: + +- `/Users/sw/Code/zed/crates/agent_servers/src/acp.rs`: `FakeAcpAgentServer`, `FakeAcpConnectionHarness`, `FakeAcpAgentConnection`, `build_fake_acp_connection`, `connect_fake_acp_connection`. +- Harness uses `agent_client_protocol::Channel::duplex()`, wires production `connect_client_future` handlers, responds to initialize/auth/new/prompt/load/close/cancel, tracks load/close counts, supports simulated server exit and forced prompt failure. +- Tests: + - `test_loaded_sessions_keep_state_until_last_close` + - `test_load_session_replays_notifications_sent_before_response` + - `test_close_session_during_in_flight_load` + - `test_close_during_load_preserves_other_concurrent_loader` + +Permissions/tools/provider harness: + +- `/Users/sw/Code/zed/crates/agent/src/tests/mod.rs`: fake model/provider and fake terminal environment tests for tools, streaming tools, cancellation, auth/permission behavior, MCP tool compatibility, retry/error handling. +- Key tests: + - `test_basic_tool_calls` + - `test_streaming_tool_calls` + - `test_tool_authorization` + - `test_tool_hallucination` + - `test_cancellation` + - `test_terminal_tool_cancellation_captures_output` + - `test_cancellation_aware_tool_responds_to_cancellation` + - `test_in_progress_send_canceled_by_next_send` + - `test_retry_cancelled_promptly_on_new_send` + - `test_tool_updates_to_completion` + - `test_send_retry_finishes_tool_calls_on_error` + - `test_streaming_tool_completes_when_llm_stream_ends_without_final_input` + - `test_streaming_tool_error_breaks_stream_loop_immediately` + - `test_streaming_tool_error_waits_for_prior_tools_to_complete` +- Permission option tests include: + - `test_permission_options_terminal_with_pattern` + - `test_permission_options_edit_file_with_path_pattern` + - `test_permission_options_fetch_with_domain_pattern` + - `test_permission_options_terminal_pipeline_produces_dropdown_with_patterns` +- `/Users/sw/Code/zed/crates/agent/src/tool_permissions.rs` has a focused permission-rule matrix for allow/deny/path/url/terminal patterns and rule resolution. + +Reference commands: + +- `cargo test -p acp_thread test_usage_update_populates_token_usage_and_cost` +- `cargo test -p acp_thread test_send_returns_cancelled_response_and_marks_tools_as_cancelled` +- `cargo test -p agent_servers test_load_session_replays_notifications_sent_before_response` +- `cargo test -p agent test_cancellation` +- `cargo test -p agent test_permission_options_terminal_with_pattern` + +## Athas Current ACP Harness/Test Map + +Frontend behavior tests: + +- `/Users/sw/Code/athas/src/features/ai/tests/acp-cancellation.test.ts` + - `finalizes active tools before sending backend cancellation` + - `ignores late events after a cancelled turn is force-stopped` +- `/Users/sw/Code/athas/src/features/ai/tests/acp-activity-groups.test.ts` + - `groups running, recent, and error activity without duplicate signatures` +- `/Users/sw/Code/athas/src/features/ai/tests/acp-session-info.test.ts` + - `returns trimmed title updates` + - `ignores empty or unchanged titles` +- `/Users/sw/Code/athas/src/features/ai/tests/session-config-option-classifier.test.ts` + - `prefers ACP semantic categories over label heuristics` + - `falls back to text classification when category is missing or custom` + +Rust behavior tests: + +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs` + - `loaded_session_bootstrap_preserves_requested_session_id` + - `method_not_found_load_uses_resume_only_when_supported` + - `missing_or_unsupported_load_falls_back_to_new_session` + - `missing_or_unsupported_resume_falls_back_to_new_session` + - `auth_required_errors_are_retriable_before_session_fallbacks` + - `new_session_bootstrap_uses_agent_created_session_id` +- `/Users/sw/Code/athas/crates/ai/src/acp/terminal_state.rs` + - `append_output_truncates_from_beginning` + - `append_output_preserves_utf8_boundaries_when_truncating` + - `exit_status_preserves_none_exit_code_for_signal_termination` + +Registry/adapter tests: + +- `/Users/sw/Code/athas/crates/ai/src/acp/config.rs` + - `managed_wrapper_path_prefers_expected_wrapper_name` + - `check_dir_for_binary_returns_none_for_missing_binary` +- Implementation seams: `AgentRegistry::replace_agents`, `detect_installed`, `detect_codex_adapter`, `managed_wrapper_path`, `find_binary`. + +Extension points: + +- `/Users/sw/Code/athas/crates/ai/src/acp/client.rs`: `AthasAcpClient` maps ACP session notifications, permission requests, file callbacks, terminal callbacks, and extension methods into `AcpEvent`. +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs`: `AcpWorker` owns connection/session/process lifecycle and exposes send/cancel/list/stop/mode/config operations. +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs`: bootstrap path for initialize/auth/session load/resume/new and initial mode/config event emission. +- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_prompt.rs`: prompt send, auth retry, timeout, and prompt_complete emission. +- `/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts`: frontend event-order/cancellation/session/status/tool/permission handler and current easiest deterministic test seam. +- `/Users/sw/Code/athas/src-tauri/src/commands/ai/acp.rs`: Tauri command shell plus marketplace catalog conversion and `refresh_registered_agents`. + +Current verification options: + +- `bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts` +- `cargo test -p athas-ai acp::bridge_init::tests` +- `cargo test -p athas-ai acp::terminal_state::tests` +- `cargo test -p athas-ai acp::config::tests` +- `bun typecheck` +- `bun check:rust` + +## Harness Parity Matrix + +- Sessions: Zed is strong with fake ACP transport covering load/close ref counts, in-flight load, notifications before response, concurrent loaders, and session info title updates. Athas is medium: bootstrap decision tests and frontend title/session-info tests exist, but no fake ACP transport proves production connection event ordering. +- Cancellation: Zed is strong with cancelled prompt responses, pending tool cancellation, running-turn cleanup, new send canceling previous send, and terminal/subagent/tool cancellation. Athas is medium with frontend cancellation tests but no Rust bridge fake transport proof. +- Streaming: Zed is strong with echoed user chunk suppression, thinking concatenation, streaming tool parsing/completion/error loops, and buffered text flushing. Athas is low-medium: stream handler routes chunks/tools, but deterministic ordering tests are thin. +- Tools: Zed is strong across tool start/update/complete/failure/not-found, file reads/writes, concurrent edits, terminal buffering/kill/output. Athas is medium: event mapping and terminal buffer tests exist, but no client-level fake app/terminal manager harness for ACP callbacks. +- Permissions: Zed is strong across permission options and allow/deny rule matrices. Athas is low-medium: permission requests are emitted and queue cancellation exists, but no focused protocol response matrix. +- Errors: Zed is strong on startup-exited load errors, auth/internal mapping, refusal, hallucinated tools, retry failure, and dropped send cleanup. Athas is medium with startup/auth/session fallback tests and stream handler formatting. +- Usage: Zed is strong with `UsageUpdate` token usage/cost tests. Athas appears absent for ACP usage event/type mapping. +- Provider/adaptor compatibility: Zed is strong with fake ACP AgentServer and fake model/provider/terminal/MCP harnesses. Athas is low-medium with registry/catalog seams and Codex adapter fallback, but no fake adapter executable/transport smoke. + +## Ranked Athas Harness Gaps + +1. No Rust fake ACP connection harness equivalent to Zed's `FakeAcpConnectionHarness`. + - Highest leverage because it can prove sessions, cancellation, streaming, tools, permissions, errors, and usage locally through production-ish bridge/client paths. + - Target files: `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/client.rs`, `crates/ai/src/acp/bridge_prompt.rs`, `crates/ai/src/acp/bridge.rs`, possible `crates/ai/src/acp/test_harness.rs`. + +2. No ACP usage/token-cost mapping or tests. + - Clear Zed parity gap with narrow blast radius; currently no Athas event/type exists for usage. + - Target files: `crates/ai/src/acp/types.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/types/acp.ts`, `src/features/ai/services/acp-stream-handler.ts`. + +3. Client-level permission mapping/cancel/timeout harness is missing. + - Athas has UI queue behavior but not the protocol response matrix for selected/cancelled/auto-reject/fallback options. + - Target files: `crates/ai/src/acp/client.rs`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/components/chat/ai-chat.tsx`. + +4. Streaming/event ordering tests are thin. + - Zed proves duplicate user echo suppression, thinking concatenation, and late turn isolation. + - Target files: `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/tests`, `crates/ai/src/acp/client.rs`. + +5. Provider/adaptor compatibility smoke is config-only. + - Athas detects Codex adapter and marketplace agents, but lacks local fake adapter/fixture validation of initialize/session/prompt/cancel without auth/network. + - Target files: `crates/ai/src/acp/config.rs`, `src-tauri/src/commands/ai/acp.rs`, possible test fixture under `crates/ai/src/acp` or tests. + +6. Terminal/file ACP callback harness is only partial. + - Terminal buffer unit tests exist, but full create/output/wait/kill/release and read/write file callbacks through `AthasAcpClient` are not covered. + - Target files: `crates/ai/src/acp/client.rs`, `crates/ai/src/acp/terminal_state.rs`. + +## Candidate Worker Slices + +W1: + +- Objective: Add the minimal reusable Athas fake ACP harness for `athas-ai` without changing product behavior. +- Allowed files: `crates/ai/src/acp/client.rs`, `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/bridge_prompt.rs`, `crates/ai/src/acp/bridge.rs`, `crates/ai/src/acp/mod.rs`, optional new test-only file `crates/ai/src/acp/test_harness.rs`. +- Verification: `cargo test -p athas-ai acp`, `bun check:rust`. +- Stop if: requires live ACP provider/auth/network/secrets; broad provider expansion; bridge rewrite instead of narrow test seam. + +W2: + +- Objective: Use fake harness to prove session notification ordering and load/resume/new behavior through production-ish paths. +- Allowed files: `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/client.rs`, `crates/ai/src/acp/test_harness.rs`. +- Verification: `cargo test -p athas-ai acp::bridge_init`, `cargo test -p athas-ai acp::client`. +- Stop if: needs external adapter process or new dependencies. + +W3: + +- Objective: Add ACP `UsageUpdate` mapping and deterministic tests. +- Allowed files: `crates/ai/src/acp/types.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/types/acp.ts`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/tests/*usage*.test.ts`. +- Verification: `cargo test -p athas-ai acp`, `bunx vp test run src/features/ai/tests`, `bun typecheck`. +- Stop if: usage semantics require UI design beyond exposing routed state/event, or protocol crate lacks `UsageUpdate`. + +W4: + +- Objective: Add permission protocol harness for selected/cancelled/auto-reject/fallback responses. +- Allowed files: `crates/ai/src/acp/client.rs`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/tests/acp-permissions.test.ts`. +- Verification: `cargo test -p athas-ai acp::client`, `bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-permissions.test.ts`. +- Stop if: requires changing product permission policy instead of harnessing current behavior, or long real-time timeout. + +W5: + +- Objective: Add local fake adapter/provider compatibility smoke without real provider auth. +- Allowed files: `crates/ai/src/acp/config.rs`, `src-tauri/src/commands/ai/acp.rs`, optional test fixture under `crates/ai/src/acp/tests` or src-tauri tests. +- Verification: `cargo test -p athas-ai acp::config`, `bun check:rust`. +- Stop if: requires real Codex/Claude/Gemini login or expands provider catalog instead of proving adapter compatibility. + +## Ambiguity Requiring Judge + +- Whether first tranche should prioritize reusable harness infrastructure (W1) or immediately add visible usage event slice (W3). Scout recommends W1 for leverage, W3 for fastest narrow parity. +- Whether Athas should add a Rust test-only fake app/terminal manager seam now, or keep first harness frontend-only. Zed evidence favors Rust/in-process fake transport. +- Whether provider/adaptor compatibility means fake local adapter smoke only, or real Codex/Claude/Gemini smoke. Evidence and charter favor fake/local deterministic smoke and avoiding auth-heavy real providers. +- Whether `UsageUpdate` is available in Athas's pinned `agent_client_protocol` version; current Athas code has no usage mapping hits, but Worker should confirm dependency API before editing. diff --git a/docs/goals/athas-acp-harness-parity/state.yaml b/docs/goals/athas-acp-harness-parity/state.yaml new file mode 100644 index 000000000..0e0c73d32 --- /dev/null +++ b/docs/goals/athas-acp-harness-parity/state.yaml @@ -0,0 +1,382 @@ +# GoalBuddy v2 state.yaml +# Board truth lives here. goal.md is the editable charter; notes/ holds long receipts only. + +version: 2 + +goal: + title: "Athas ACP Harness Parity" + slug: "athas-acp-harness-parity" + kind: specific + tranche: "Zed-referenced ACP harness parity: map Zed harnesses, extend Athas harness coverage, verify, audit, and continue through safe slices until the first harness tranche is complete." + status: done + intake: + original_request: "Match the amount of ACP harness that Zed has for Athas; look at how Zed does it and do the same for Athas while building on Athas's good foundation." + interpreted_outcome: "Athas gains Zed-referenced ACP harness coverage for core behavior and provider/adaptor compatibility, with local tests proving the first tranche." + input_shape: specific + audience: "Athas maintainers and contributors" + authority: approved + proof_type: test + completion_proof: "A Zed harness matrix exists, selected Athas harness slices are implemented, relevant local tests/checks pass, and final audit confirms the first harness parity tranche is complete." + likely_misfire: "Creating many tests or copying Zed structure mechanically without reliable Athas ACP harness value." + blind_spots_considered: + - "Harness parity may mean behavior coverage, fake-agent infrastructure, provider/adaptor compatibility, smoke tooling, or raw test count." + - "Real provider smoke can be flaky if it depends on auth, secrets, network, or local CLI state." + - "Athas foundation is already good; the task is extension, not replacement." + - "Provider compatibility belongs in scope, but broad provider expansion does not." + existing_plan_facts: + - "Use /Users/sw/Code/zed as the local reference source." + - "Study how Zed does ACP harnesses/tests before choosing Athas work." + - "Athas foundation is good; keep going from there." + - "Optimize for behavioral ACP harness coverage plus provider compatibility harness." + - "Include real adapter smoke where practical, but avoid auth-heavy or network-flaky runs." + +rules: + pm_owns_state: true + one_active_task: true + max_write_workers: 1 + no_implementation_without_worker_or_pm_task: true + no_completion_without_judge_or_pm_audit: true + planning_is_not_completion: true + queued_required_worker_blocks_completion: true + continuous_until_full_outcome: true + missing_input_or_credentials_do_not_stop_goal: true + preserve_and_validate_existing_plan: true + intake_misfire_must_be_audited: true + +agents: + scout: installed + worker: installed + judge: installed + +active_task: null + +tasks: + - id: T001 + type: scout + assignee: Scout + status: done + reasoning_hint: medium + objective: "Map Zed's ACP harness/test posture and identify the closest Athas harness extension points." + inputs: + - "/Users/sw/Code/zed" + - "/Users/sw/Code/athas" + - "Prior Athas ACP parity board: docs/goals/athas-acp-zed-parity/state.yaml" + - "Prior Athas ACP parity note: docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md" + constraints: + - "Read-only." + - "Do not edit implementation files." + - "Treat /Users/sw/Code/zed as the reference for ACP harness/test posture." + - "Use concrete file paths, test names, fixtures, fake providers/adapters, and commands." + - "Separate behavior harness gaps from provider/adaptor compatibility harness gaps." + - "Preserve Athas's existing foundation; recommend extension points, not rewrites." + expected_output: + - "Zed ACP harness/test map with file-path and test-name evidence." + - "Athas current ACP harness/test map with file-path and test-name evidence." + - "Harness parity matrix covering sessions, cancellation, streaming, tools, permissions, errors, usage, and provider/adaptor compatibility." + - "Ranked Athas harness gaps by leverage, determinism, and verification strength." + - "Candidate Worker slices with allowed_files, verification options, and stop conditions." + receipt: + result: done + note: "notes/T001-acp-harness-map.md" + summary: "Mapped Zed and Athas ACP harness posture. Zed's leverage comes from fake ACP transports plus behavioral tests. Athas has a solid foundation but lacks a reusable fake ACP connection harness and several coverage slices." + evidence: + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" + - "/Users/sw/Code/zed/crates/agent_servers/src/acp.rs" + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs" + - "/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs" + - "/Users/sw/Code/athas/crates/ai/src/acp/client.rs" + - "/Users/sw/Code/athas/src/features/ai/tests/acp-cancellation.test.ts" + candidate_worker_slices: + - "W1: add minimal reusable fake ACP harness for athas-ai." + - "W3: add ACP UsageUpdate mapping and deterministic tests." + - "W4: add permission protocol harness." + - "W5: add local fake adapter/provider compatibility smoke." + + - id: T002 + type: judge + assignee: Judge + status: done + reasoning_hint: high + objective: "Choose the first safe Athas ACP harness implementation slice from Scout evidence." + inputs: + - "T001 receipt or note." + - "Current dirty diff." + - "Athas validation commands." + constraints: + - "Read-only." + - "Do not implement." + - "Require Zed harness evidence for the selected gap." + - "Prefer deterministic local harnesses over auth-heavy or network-flaky real provider runs." + - "Do not choose broad provider expansion." + expected_output: + - "Decision." + - "Why this harness slice is first." + - "Exact Worker objective." + - "allowed_files." + - "verify." + - "stop_if." + - "Deferred or blocked candidates." + receipt: + result: done + decision: "Choose a narrowed W3 slice first: ACP UsageUpdate mapping plus deterministic harness tests. Do not start with the broad reusable fake ACP connection harness yet." + why_this_slice_is_first: + - "Zed has direct behavior-level usage update handling and focused tests." + - "Athas's pinned ACP schema exposes UsageUpdate with used and size." + - "Athas currently drops UsageUpdate in client session notification handling." + - "This is deterministic and local, with no provider auth, network, secrets, real adapter process, or broad provider expansion." + evidence: + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" + - "/Users/sw/Code/zed/crates/agent_servers/src/acp.rs" + - "/Users/sw/Code/athas/vendor/agent-client-protocol-schema/src/client.rs" + - "/Users/sw/Code/athas/crates/ai/src/acp/client.rs" + next_allowed_task: T003 + deferred_or_blocked_candidates: + - "Deferred: broad fake ACP connection harness." + - "Deferred: session notification ordering/load-resume-new behavior through fake harness." + - "Deferred: permission protocol harness." + - "Deferred: local fake adapter/provider compatibility smoke." + - "Blocked for this step: real Codex/Claude/Gemini provider runs." + + - id: T003 + type: worker + assignee: Worker + status: done + reasoning_hint: default + objective: "Add deterministic ACP UsageUpdate mapping and harness tests as the first Zed-referenced Athas ACP harness slice." + allowed_files: + - "crates/ai/src/acp/types.rs" + - "crates/ai/src/acp/client.rs" + - "src/features/ai/types/acp.ts" + - "src/features/ai/services/acp-stream-handler.ts" + - "src/features/ai/tests/acp-usage.test.ts" + verify: + - "cargo test -p athas-ai acp::client" + - "cargo test -p athas-ai acp" + - "bunx vp test run src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" + - "bun typecheck" + - "bun check:rust" + stop_if: + - "Need files outside allowed_files." + - "The Worker cannot test the Rust client mapping without introducing a broad app-handle or Tauri runtime rewrite." + - "The implementation starts storing/rendering usage in UI state instead of only adding pass-through/mapping coverage." + - "The Worker tries to add cost/currency fields not present in Athas's pinned UsageUpdate schema." + - "The task requires live ACP provider auth, real adapter processes, secrets, network, or a provider catalog expansion." + - "Verification fails twice for reasons not directly caused by this slice." + - "Existing untracked GoalBuddy files are the only dirty files now; stop if unrelated implementation dirty files appear before editing." + receipt: + result: done + summary: "Implemented deterministic ACP UsageUpdate pass-through coverage. The Rust ACP client now maps protocol UsageUpdate notifications into frontend acp-event payloads, and frontend stream handling accepts usage_update as a generic event without mutating chat output or tool state." + changed_files: + - "crates/ai/src/acp/types.rs" + - "crates/ai/src/acp/client.rs" + - "src/features/ai/types/acp.ts" + - "src/features/ai/services/acp-stream-handler.ts" + - "src/features/ai/tests/acp-usage.test.ts" + harness_evidence: + - "crates/ai/src/acp/client.rs::usage_update_maps_to_frontend_event" + - "src/features/ai/tests/acp-usage.test.ts" + verification: + passed: + - "cargo fmt -p athas-ai --check" + - "cargo test -p athas-ai acp::client" + - "cargo test -p athas-ai acp" + - "cargo check -p athas-ai" + - "bunx vp test run src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" + - "bun typecheck" + - "git diff --check" + blocked: + - "bun check:rust exits 1 on pre-existing repo-wide rustfmt diffs in crates/runtime/src/lib.rs, crates/tooling/src/registry.rs, and crates/tooling/src/types.rs; those files are outside the T003 allowed_files and were not changed." + commands: + - cmd: "cargo fmt -p athas-ai --check" + status: pass + - cmd: "cargo test -p athas-ai acp::client" + status: pass + - cmd: "cargo test -p athas-ai acp" + status: pass + - cmd: "cargo check -p athas-ai" + status: pass + - cmd: "bunx vp test run src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" + status: pass + - cmd: "bun typecheck" + status: pass + - cmd: "git diff --check" + status: pass + blocked_verification: + - cmd: "bun check:rust" + reason: "Pre-existing repo-wide rustfmt diffs in crates/runtime/src/lib.rs, crates/tooling/src/registry.rs, and crates/tooling/src/types.rs are outside the T003 allowed_files and were not changed." + zed_reference: + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs usage update tests cited by T001" + + - id: T004 + type: judge + assignee: Judge + status: done + reasoning_hint: high + objective: "Audit the first implemented harness slice and choose the next safe harness slice if the tranche is not complete." + inputs: + - "T001 receipt or note." + - "T002 receipt." + - "T003 receipt." + - "Current dirty diff." + - "Last verification." + constraints: + - "Read-only." + - "Reject completion if the slice is only a map or plan." + - "Reject completion if no Athas harness/test was implemented and verified." + - "If not complete, define the next smallest safe Scout, Judge, or Worker task." + expected_output: + - "slice_complete: true | false." + - "full_outcome_complete: true | false." + - "Harness parity evidence." + - "Missing evidence." + - "Next task if not complete." + receipt: + result: done + slice_complete: true + full_outcome_complete: false + summary: "T003 is a valid first harness slice: it adds implemented Rust and frontend coverage for ACP UsageUpdate, references Zed's behavior-level usage harness posture, and keeps the behavior deterministic with no provider auth or UI storage expansion." + harness_parity_evidence: + - "Rust protocol UsageUpdate is mapped into an AcpEvent and covered by cargo test -p athas-ai acp::client." + - "Frontend AcpStreamHandler accepts usage_update through the generic ACP event stream and does not mutate chat output, covered by src/features/ai/tests/acp-usage.test.ts." + - "The broader athas-ai ACP suite still passes with the new event shape." + missing_evidence: + - "Permission request/response behavior is implemented but has no focused harness tests matching Zed's permission-heavy ACP posture." + - "Provider/adaptor fake smoke remains deferred." + next_task: T005 + decision: "Continue with a narrow permission protocol harness slice before broad fake adapter work." + why_next: + - "Zed has extensive permission behavior tests and request_permission handling evidence." + - "Athas already has permission request, option, callback, auto-reject, and selected-option behavior; the gap is deterministic harness coverage." + - "This is local and does not require real providers, secrets, network, or adapter processes." + + - id: T005 + type: worker + assignee: Worker + status: done + reasoning_hint: default + objective: "Add deterministic ACP permission protocol harness coverage for Rust permission selection/mapping and frontend permission request handling, using Zed's permission harness posture as the reference." + allowed_files: + - "crates/ai/src/acp/client.rs" + - "src/features/ai/services/acp-stream-handler.ts" + - "src/features/ai/tests/acp-permission.test.ts" + verify: + - "cargo test -p athas-ai acp::client" + - "cargo test -p athas-ai acp" + - "bunx vp test run src/features/ai/tests/acp-permission.test.ts src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" + - "bun typecheck" + - "bun check:rust" + stop_if: + - "No Judge receipt has selected this slice." + - "Need files outside allowed_files." + - "Need to instantiate a full Tauri AppHandle or run a live ACP provider instead of pure helper/frontend handler harnesses." + - "The implementation changes the permission UI or product behavior rather than adding deterministic harness seams and tests." + - "The task requires auth-heavy, network-flaky, or secret-bearing provider runs." + - "The task broadens into provider expansion without harness-parity evidence." + - "Verification fails twice." + receipt: + result: done + summary: "Added deterministic ACP permission protocol harness coverage without changing user-facing permission behavior. Rust now exposes pure permission mapping/selection seams covered by unit tests, and frontend stream handling is covered for explicit permission callbacks plus safety auto-reject when no handler is registered." + changed_files: + - "crates/ai/src/acp/client.rs" + - "src/features/ai/tests/acp-permission.test.ts" + harness_evidence: + - "crates/ai/src/acp/client.rs::permission_option_kinds_map_to_frontend_kinds" + - "crates/ai/src/acp/client.rs::approved_permission_prefers_allow_options" + - "crates/ai/src/acp/client.rs::rejected_permission_prefers_reject_options" + - "crates/ai/src/acp/client.rs::permission_choice_cancels_when_options_are_empty" + - "src/features/ai/tests/acp-permission.test.ts" + commands: + - cmd: "cargo fmt -p athas-ai" + status: pass + - cmd: "cargo fmt -p athas-ai --check" + status: pass + - cmd: "cargo test -p athas-ai acp::client" + status: pass + - cmd: "cargo test -p athas-ai acp" + status: pass + - cmd: "cargo check -p athas-ai" + status: pass + - cmd: "bunx vp test run src/features/ai/tests/acp-permission.test.ts src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" + status: pass + - cmd: "bun typecheck" + status: pass + - cmd: "git diff --check" + status: pass + blocked_verification: + - cmd: "bun check:rust" + reason: "Pre-existing repo-wide rustfmt diffs in crates/runtime/src/lib.rs, crates/tooling/src/registry.rs, and crates/tooling/src/types.rs are outside the T005 allowed_files and were not changed." + zed_reference: + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs permission option and parallel permission tests" + - "/Users/sw/Code/zed/crates/agent_servers/src/e2e_tests.rs::test_tool_call_with_permission" + - "/Users/sw/Code/zed/crates/agent_servers/src/acp.rs::handle_request_permission" + + - id: T999 + type: judge + assignee: Judge + status: done + reasoning_hint: high + objective: "Final audit for the first Athas ACP harness parity tranche." + inputs: + - "All done task receipts." + - "Zed harness reference evidence." + - "Last verification." + - "Current dirty diff." + constraints: + - "Read-only." + - "Do not implement." + - "Reject completion if required Worker work is still queued or active." + - "Reject completion if the evidence does not map Athas harness work back to Zed-referenced parity." + - "Reject completion if the result is only a comparison artifact with no verified Athas harness improvement." + expected_output: + - "complete | not_complete." + - "full_outcome_complete: true | false." + - "Completed harness parity behaviors." + - "Missing evidence." + - "Next task if not complete." + receipt: + result: done + decision: complete + summary: "Approve T999. The first Athas ACP harness parity tranche is complete: Zed's ACP harness posture was mapped, then Athas gained two deterministic, Zed-referenced harness slices with Rust and frontend tests covering usage updates and permission protocol behavior." + full_outcome_complete: true + tranche_complete: true + completed_harness_parity_behaviors: + - "UsageUpdate mapping: ACP UsageUpdate notifications now produce frontend usage_update events with used and size, matching the pinned schema and closing the previous silent-drop path." + - "UsageUpdate proof: Rust unit coverage verifies protocol-to-event mapping, and frontend coverage verifies usage_update passes through the generic ACP event stream without mutating chat output or tool state." + - "Permission protocol harness: permission option kind mapping and selected allow/reject/cancel outcomes now have pure Rust tests, reflecting Zed's permission-heavy ACP harness posture." + - "Permission frontend proof: AcpStreamHandler tests verify permission requests reach the permission callback and auto-reject safely when no handler is registered." + zed_reference_evidence: + - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" + - "/Users/sw/Code/zed/crates/agent_servers/src/acp.rs" + - "/Users/sw/Code/zed/crates/agent_servers/src/e2e_tests.rs::test_tool_call_with_permission" + - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs usage and permission tests cited by T001" + verification_summary: + - "cargo test -p athas-ai acp::client passed with 5 client harness tests." + - "cargo test -p athas-ai acp passed with 16 ACP tests." + - "Frontend ACP focused harness run passed with 6 files and 10 tests." + - "bun typecheck passed." + - "cargo check -p athas-ai passed." + - "cargo fmt -p athas-ai --check passed." + - "git diff --check passed." + missing_evidence: + - "No blocking missing evidence for this first harness tranche." + - "Deferred: broad reusable fake ACP connection harness and local fake provider/adaptor smoke remain useful later slices." + - "No live ACP provider smoke was included; acceptable because real provider smoke would risk auth, network, and local adapter state." + - "Verification caveat: bun check:rust remains blocked by unrelated repo-wide rustfmt diffs outside this tranche's allowed files." + next_task_if_not_complete: null + +checks: + dirty_fingerprint: unknown + last_verification: + result: scoped_pass_with_unrelated_blocker + task: T999 + commands: + - "cargo fmt -p athas-ai" + - "cargo fmt -p athas-ai --check" + - "cargo test -p athas-ai acp::client" + - "cargo test -p athas-ai acp" + - "cargo check -p athas-ai" + - "bunx vp test run src/features/ai/tests/acp-permission.test.ts src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" + - "bun typecheck" + - "git diff --check" + - "bun check:rust blocked by unrelated repo-wide rustfmt diffs outside T003/T005 allowed_files" diff --git a/src/features/ai/services/acp-stream-handler.ts b/src/features/ai/services/acp-stream-handler.ts index 21b66d1e1..48e7eced1 100644 --- a/src/features/ai/services/acp-stream-handler.ts +++ b/src/features/ai/services/acp-stream-handler.ts @@ -311,6 +311,9 @@ export class AcpStreamHandler { case "session_info_update": break; + case "usage_update": + break; + case "prompt_complete": this.handlePromptComplete(event); break; diff --git a/src/features/ai/tests/acp-permission.test.ts b/src/features/ai/tests/acp-permission.test.ts new file mode 100644 index 000000000..84cdcf1af --- /dev/null +++ b/src/features/ai/tests/acp-permission.test.ts @@ -0,0 +1,139 @@ +import { afterEach, describe, expect, it, vi } from "vite-plus/test"; +import { invoke } from "@tauri-apps/api/core"; +import { AcpStreamHandler } from "../services/acp-stream-handler"; +import type { AcpEvent } from "../types/acp"; + +vi.mock("@tauri-apps/api/core", () => ({ + invoke: vi.fn(), +})); + +vi.mock("@tauri-apps/api/event", () => ({ + listen: vi.fn(), +})); + +vi.mock("@/features/ai/store/store", () => ({ + useAIChatStore: { + getState: () => ({ + acpStatus: null, + getChatById: () => null, + getCurrentChat: () => null, + setAcpStatus: vi.fn(), + setChatAcpSessionId: vi.fn(), + setAvailableSlashCommands: vi.fn(), + setSessionConfigOptions: vi.fn(), + setSessionModeState: vi.fn(), + setCurrentModeId: vi.fn(), + }), + }, +})); + +vi.mock("@/features/editor/stores/buffer-store", () => ({ + useBufferStore: { + getState: () => ({ + actions: { + openWebViewerBuffer: vi.fn(), + openTerminalBuffer: vi.fn(), + }, + }), + }, +})); + +vi.mock("@/features/window/stores/project-store", () => ({ + useProjectStore: { + getState: () => ({ + rootFolderPath: "/repo", + }), + }, +})); + +vi.mock("@/features/ai/lib/acp-session-info", () => ({ + getChatTitleFromSessionInfo: (_currentTitle: string | undefined, nextTitle: string) => nextTitle, +})); + +vi.mock("../utils/ai-context-builder", () => ({ + buildContextPrompt: () => "", +})); + +type TestableAcpStreamHandler = { + handleAcpEvent: (event: unknown) => void; +}; + +const mockedInvoke = vi.mocked(invoke); + +const handleAcpEvent = (handler: AcpStreamHandler, event: AcpEvent) => { + (handler as unknown as TestableAcpStreamHandler).handleAcpEvent(event); +}; + +const permissionEvent: AcpEvent = { + type: "permission_request", + requestId: "request-1", + permissionType: "tool_call", + resource: "tool-1", + description: "Run command (tool-1)", + options: [ + { + id: "allow-once", + name: "Allow once", + kind: "allow_once", + }, + { + id: "reject-once", + name: "Reject once", + kind: "reject_once", + }, + ], +}; + +describe("AcpStreamHandler permission requests", () => { + afterEach(() => { + mockedInvoke.mockReset(); + vi.clearAllMocks(); + }); + + it("routes permission requests to the permission handler", () => { + const onEvent = vi.fn(); + const onPermissionRequest = vi.fn(); + const handler = new AcpStreamHandler( + "codex", + { + onChunk: vi.fn(), + onComplete: vi.fn(), + onError: vi.fn(), + onEvent, + onPermissionRequest, + }, + "chat-1", + ); + + handleAcpEvent(handler, permissionEvent); + + expect(onEvent).toHaveBeenCalledWith(permissionEvent); + expect(onPermissionRequest).toHaveBeenCalledWith(permissionEvent); + expect(mockedInvoke).not.toHaveBeenCalled(); + }); + + it("auto-rejects permission requests when no permission handler is registered", async () => { + mockedInvoke.mockResolvedValue(undefined); + const handler = new AcpStreamHandler( + "codex", + { + onChunk: vi.fn(), + onComplete: vi.fn(), + onError: vi.fn(), + }, + "chat-1", + ); + + handleAcpEvent(handler, permissionEvent); + await vi.waitFor(() => { + expect(mockedInvoke).toHaveBeenCalledWith("respond_acp_permission", { + args: { + requestId: "request-1", + approved: false, + cancelled: false, + optionId: undefined, + }, + }); + }); + }); +}); diff --git a/src/features/ai/tests/acp-usage.test.ts b/src/features/ai/tests/acp-usage.test.ts new file mode 100644 index 000000000..9d8c6a5ab --- /dev/null +++ b/src/features/ai/tests/acp-usage.test.ts @@ -0,0 +1,104 @@ +import { afterEach, describe, expect, it, vi } from "vite-plus/test"; +import { AcpStreamHandler } from "../services/acp-stream-handler"; +import type { AcpEvent } from "../types/acp"; + +vi.mock("@tauri-apps/api/core", () => ({ + invoke: vi.fn(), +})); + +vi.mock("@tauri-apps/api/event", () => ({ + listen: vi.fn(), +})); + +vi.mock("@/features/ai/store/store", () => ({ + useAIChatStore: { + getState: () => ({ + acpStatus: null, + getChatById: () => null, + getCurrentChat: () => null, + setAcpStatus: vi.fn(), + setChatAcpSessionId: vi.fn(), + setAvailableSlashCommands: vi.fn(), + setSessionConfigOptions: vi.fn(), + setSessionModeState: vi.fn(), + setCurrentModeId: vi.fn(), + }), + }, +})); + +vi.mock("@/features/editor/stores/buffer-store", () => ({ + useBufferStore: { + getState: () => ({ + actions: { + openWebViewerBuffer: vi.fn(), + openTerminalBuffer: vi.fn(), + }, + }), + }, +})); + +vi.mock("@/features/window/stores/project-store", () => ({ + useProjectStore: { + getState: () => ({ + rootFolderPath: "/repo", + }), + }, +})); + +vi.mock("@/features/ai/lib/acp-session-info", () => ({ + getChatTitleFromSessionInfo: (_currentTitle: string | undefined, nextTitle: string) => nextTitle, +})); + +vi.mock("../utils/ai-context-builder", () => ({ + buildContextPrompt: () => "", +})); + +type TestableAcpStreamHandler = { + handleAcpEvent: (event: unknown) => void; +}; + +const handleAcpEvent = (handler: AcpStreamHandler, event: AcpEvent) => { + (handler as unknown as TestableAcpStreamHandler).handleAcpEvent(event); +}; + +describe("AcpStreamHandler usage updates", () => { + afterEach(() => { + vi.clearAllMocks(); + }); + + it("passes usage updates through the generic ACP event stream without mutating chat output", () => { + const onEvent = vi.fn(); + const onChunk = vi.fn(); + const onToolUse = vi.fn(); + const onToolComplete = vi.fn(); + const onComplete = vi.fn(); + const onError = vi.fn(); + const handler = new AcpStreamHandler( + "codex", + { + onChunk, + onComplete, + onError, + onEvent, + onToolUse, + onToolComplete, + }, + "chat-1", + ); + const event: AcpEvent = { + type: "usage_update", + sessionId: "session-1", + used: 1234, + size: 200000, + }; + + handleAcpEvent(handler, event); + + expect(onEvent).toHaveBeenCalledWith(event); + expect(onChunk).not.toHaveBeenCalled(); + expect(onToolUse).not.toHaveBeenCalled(); + expect(onToolComplete).not.toHaveBeenCalled(); + expect(onComplete).not.toHaveBeenCalled(); + expect(onError).not.toHaveBeenCalled(); + }); +}); diff --git a/src/features/ai/types/acp.ts b/src/features/ai/types/acp.ts index aea4140de..53774e765 100644 --- a/src/features/ai/types/acp.ts +++ b/src/features/ai/types/acp.ts @@ -268,6 +268,12 @@ export type AcpEvent = title: string | null; updatedAt: string | null; } + | { + type: "usage_update"; + sessionId: string; + used: number; + size: number; + } | { type: "prompt_complete"; sessionId: string; From 6dc39953a6f3ae24a86c98d4e540cf2224bbc2ec Mon Sep 17 00:00:00 2001 From: Finesssee <90105158+Finesssee@users.noreply.github.com> Date: Fri, 8 May 2026 23:55:23 +0700 Subject: [PATCH 3/5] Load ACP agents from registry --- src-tauri/src/commands/ai/acp.rs | 398 ++++++++++++++++++++++++++++++- 1 file changed, 390 insertions(+), 8 deletions(-) diff --git a/src-tauri/src/commands/ai/acp.rs b/src-tauri/src/commands/ai/acp.rs index 7f0021d9b..09c0ee435 100644 --- a/src-tauri/src/commands/ai/acp.rs +++ b/src-tauri/src/commands/ai/acp.rs @@ -15,6 +15,8 @@ use tokio::sync::Mutex; pub type AcpBridgeState = Arc>; const EXTENSIONS_CDN_BASE_URL: &str = "https://athas.dev/extensions"; +const ACP_REGISTRY_URL: &str = + "https://cdn.agentclientprotocol.com/registry/v1/latest/registry.json"; const AGENT_CATALOG_CACHE_SECONDS: u64 = 300; #[derive(Deserialize)] @@ -163,12 +165,57 @@ struct MarketplaceExtensionManifest { agents: Vec, } +#[derive(Deserialize)] +struct AcpRegistryIndex { + #[serde(default)] + agents: Vec, +} + +#[derive(Deserialize)] +struct AcpRegistryAgent { + id: String, + name: String, + description: String, + icon: Option, + distribution: AcpRegistryDistribution, +} + +#[derive(Deserialize)] +struct AcpRegistryDistribution { + binary: Option>, + npx: Option, + uvx: Option, +} + +#[derive(Deserialize)] +struct AcpRegistryBinaryTarget { + archive: String, + cmd: String, + #[serde(default)] + args: Vec, + #[serde(default)] + env: HashMap, +} + +#[derive(Deserialize)] +struct AcpRegistryPackageTarget { + package: String, + #[serde(default)] + args: Vec, + #[serde(default)] + env: HashMap, +} + fn extensions_manifest_url() -> String { let base_url = std::env::var("ATHAS_EXTENSIONS_CDN_URL") .unwrap_or_else(|_| EXTENSIONS_CDN_BASE_URL.to_string()); format!("{}/manifests.json", base_url.trim_end_matches('/')) } +fn acp_registry_url() -> String { + std::env::var("ATHAS_ACP_REGISTRY_URL").unwrap_or_else(|_| ACP_REGISTRY_URL.to_string()) +} + fn current_platform_arch() -> Option<&'static str> { match (std::env::consts::OS, std::env::consts::ARCH) { ("macos", "aarch64") => Some("darwin-arm64"), @@ -181,6 +228,33 @@ fn current_platform_arch() -> Option<&'static str> { } } +fn current_acp_registry_platform() -> Option<&'static str> { + match (std::env::consts::OS, std::env::consts::ARCH) { + ("macos", "aarch64") => Some("darwin-aarch64"), + ("macos", "x86_64") => Some("darwin-x86_64"), + ("linux", "aarch64") => Some("linux-aarch64"), + ("linux", "x86_64") => Some("linux-x86_64"), + ("windows", "aarch64") => Some("windows-aarch64"), + ("windows", "x86_64") => Some("windows-x86_64"), + _ => None, + } +} + +fn registry_command_name(cmd: &str, fallback: &str) -> String { + Path::new(cmd) + .file_name() + .and_then(|name| name.to_str()) + .map(|name| { + if cfg!(windows) { + name.strip_suffix(".exe").unwrap_or(name).to_string() + } else { + name.to_string() + } + }) + .filter(|name| !name.is_empty()) + .unwrap_or_else(|| fallback.to_string()) +} + fn to_agent_config(contribution: MarketplaceAgentContribution) -> AgentConfig { let mut agent = AgentConfig { id: contribution.id, @@ -217,6 +291,115 @@ fn to_agent_config(contribution: MarketplaceAgentContribution) -> AgentConfig { agent } +fn acp_registry_agent_to_config(agent: AcpRegistryAgent) -> Option { + let AcpRegistryAgent { + id, + name, + description, + icon, + distribution, + } = agent; + + if let Some(target) = current_acp_registry_platform() + .and_then(|platform| distribution.binary.as_ref()?.get(platform)) + { + let binary_name = registry_command_name(&target.cmd, &id); + return Some(AgentConfig { + id, + name, + binary_name, + binary_path: None, + args: target.args.clone(), + env_vars: target.env.clone(), + icon, + description: Some(description), + installed: false, + install_runtime: Some(AgentRuntime::Binary), + install_package: Some(target.cmd.clone()), + install_download_url: Some(target.archive.clone()), + install_command: Some(registry_command_name(&target.cmd, "")), + can_install: true, + }); + } + + if let Some(target) = distribution.npx { + let mut args = vec!["-y".to_string(), target.package.clone()]; + args.extend(target.args.clone()); + return Some(AgentConfig { + id, + name, + binary_name: "npx".to_string(), + binary_path: None, + args, + env_vars: target.env, + icon, + description: Some(description), + installed: false, + install_runtime: Some(AgentRuntime::Node), + install_package: Some(target.package), + install_download_url: None, + install_command: None, + can_install: true, + }); + } + + if let Some(target) = distribution.uvx { + let mut args = vec![target.package]; + args.extend(target.args); + return Some(AgentConfig { + id, + name, + binary_name: "uvx".to_string(), + binary_path: None, + args, + env_vars: target.env, + icon, + description: Some(description), + installed: false, + install_runtime: None, + install_package: None, + install_download_url: None, + install_command: None, + can_install: false, + }); + } + + None +} + +fn acp_registry_agents_from_index(index: AcpRegistryIndex) -> Vec { + let mut agents = index + .agents + .into_iter() + .filter_map(acp_registry_agent_to_config) + .collect::>(); + agents.sort_by_key(|agent| agent.name.clone()); + agents +} + +async fn load_acp_registry_agents() -> Result, String> { + let response = reqwest::Client::new() + .get(acp_registry_url()) + .timeout(Duration::from_secs(5)) + .send() + .await + .map_err(|error| format!("Failed to load ACP registry: {}", error))?; + + if !response.status().is_success() { + return Err(format!( + "Failed to load ACP registry: HTTP {}", + response.status() + )); + } + + let registry = response + .json::() + .await + .map_err(|error| format!("Invalid ACP registry: {}", error))?; + + Ok(acp_registry_agents_from_index(registry)) +} + async fn load_marketplace_agents() -> Result, String> { let cache = AGENT_CATALOG_CACHE.get_or_init(|| std::sync::Mutex::new(None)); { @@ -230,6 +413,26 @@ async fn load_marketplace_agents() -> Result, String> { } } + let agents = match load_acp_registry_agents().await { + Ok(agents) => agents, + Err(registry_error) => { + log::warn!("{}", registry_error); + load_legacy_marketplace_agents().await? + } + }; + + let mut cached = cache + .lock() + .map_err(|_| "Agent catalog cache poisoned".to_string())?; + *cached = Some(CachedAgentCatalog { + loaded_at: Instant::now(), + agents: agents.clone(), + }); + + Ok(agents) +} + +async fn load_legacy_marketplace_agents() -> Result, String> { let response = reqwest::Client::new() .get(extensions_manifest_url()) .timeout(Duration::from_secs(5)) @@ -256,14 +459,6 @@ async fn load_marketplace_agents() -> Result, String> { .collect::>(); agents.sort_by_key(|agent| agent.name.clone()); - let mut cached = cache - .lock() - .map_err(|_| "Agent catalog cache poisoned".to_string())?; - *cached = Some(CachedAgentCatalog { - loaded_at: Instant::now(), - agents: agents.clone(), - }); - Ok(agents) } @@ -534,3 +729,190 @@ fn make_wrapper_executable(path: &PathBuf) -> Result<(), String> { Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_registry(json: &str) -> Vec { + let index: AcpRegistryIndex = serde_json::from_str(json).expect("registry fixture"); + acp_registry_agents_from_index(index) + } + + #[test] + fn acp_registry_maps_binary_agent_for_current_platform() { + let agents = parse_registry( + r#"{ + "agents": [ + { + "id": "codex-acp", + "name": "Codex CLI", + "version": "0.14.0", + "description": "ACP adapter for OpenAI's coding assistant", + "distribution": { + "binary": { + "darwin-aarch64": { + "archive": "https://example.com/codex-aarch64.tar.gz", + "cmd": "./codex-acp", + "args": ["--acp"], + "env": { "CODEX_HOME": "/tmp/codex" } + }, + "darwin-x86_64": { + "archive": "https://example.com/codex-x64.tar.gz", + "cmd": "./codex-acp" + }, + "linux-x86_64": { + "archive": "https://example.com/codex-linux.tar.gz", + "cmd": "./codex-acp" + }, + "windows-x86_64": { + "archive": "https://example.com/codex.zip", + "cmd": "./codex-acp.exe" + } + } + }, + "icon": "https://example.com/codex.svg" + } + ] + }"#, + ); + + let agent = agents + .into_iter() + .find(|agent| agent.id == "codex-acp") + .expect("codex agent"); + + assert_eq!(agent.name, "Codex CLI"); + assert_eq!(agent.install_runtime, Some(AgentRuntime::Binary)); + assert_eq!(agent.binary_name, "codex-acp"); + assert!(agent.can_install); + assert!(agent.install_download_url.is_some()); + assert_eq!(agent.icon.as_deref(), Some("https://example.com/codex.svg")); + } + + #[test] + fn acp_registry_maps_npx_agent_to_npx_launch() { + let agents = parse_registry( + r#"{ + "agents": [ + { + "id": "claude-acp", + "name": "Claude Agent", + "version": "0.33.1", + "description": "ACP wrapper for Claude", + "distribution": { + "npx": { + "package": "@agentclientprotocol/claude-agent-acp@0.33.1", + "args": ["--verbose"], + "env": { "ANTHROPIC_HOME": "/tmp/claude" } + } + } + } + ] + }"#, + ); + + let agent = agents + .into_iter() + .find(|agent| agent.id == "claude-acp") + .expect("claude agent"); + + assert_eq!(agent.binary_name, "npx"); + assert_eq!(agent.install_runtime, Some(AgentRuntime::Node)); + assert_eq!( + agent.args, + vec![ + "-y".to_string(), + "@agentclientprotocol/claude-agent-acp@0.33.1".to_string(), + "--verbose".to_string() + ] + ); + assert_eq!( + agent.install_package.as_deref(), + Some("@agentclientprotocol/claude-agent-acp@0.33.1") + ); + assert!(agent.can_install); + } + + #[test] + fn acp_registry_includes_uvx_agents_without_managed_install_claim() { + let agents = parse_registry( + r#"{ + "agents": [ + { + "id": "fast-agent", + "name": "fast-agent", + "version": "0.7.0", + "description": "Code and build agents", + "distribution": { + "uvx": { + "package": "fast-agent-acp==0.7.0", + "args": ["-x"] + } + } + } + ] + }"#, + ); + + let agent = agents + .into_iter() + .find(|agent| agent.id == "fast-agent") + .expect("uvx agent"); + + assert_eq!(agent.binary_name, "uvx"); + assert_eq!( + agent.args, + vec!["fast-agent-acp==0.7.0".to_string(), "-x".to_string()] + ); + assert_eq!(agent.install_runtime, None); + assert!(!agent.can_install); + } + + #[test] + fn acp_registry_prefers_current_platform_binary_over_npx() { + let agents = parse_registry( + r#"{ + "agents": [ + { + "id": "kilo", + "name": "Kilo", + "version": "7.2.40", + "description": "Kilo ACP", + "distribution": { + "binary": { + "darwin-aarch64": { + "archive": "https://example.com/kilo-aarch64.tar.gz", + "cmd": "./kilo" + }, + "darwin-x86_64": { + "archive": "https://example.com/kilo-x64.tar.gz", + "cmd": "./kilo" + }, + "linux-x86_64": { + "archive": "https://example.com/kilo-linux.tar.gz", + "cmd": "./kilo" + }, + "windows-x86_64": { + "archive": "https://example.com/kilo.zip", + "cmd": "kilo.exe" + } + }, + "npx": { + "package": "kilo-code@7.2.40" + } + } + } + ] + }"#, + ); + + let agent = agents + .into_iter() + .find(|agent| agent.id == "kilo") + .expect("kilo agent"); + + assert_eq!(agent.install_runtime, Some(AgentRuntime::Binary)); + assert_eq!(agent.binary_name, "kilo"); + } +} From e5f5b77bb7a1c936b6fbe69e29a34572bfbc4597 Mon Sep 17 00:00:00 2001 From: Finesssee <90105158+Finesssee@users.noreply.github.com> Date: Sat, 9 May 2026 19:47:31 +0700 Subject: [PATCH 4/5] Preserve legacy ACP agents --- src-tauri/src/commands/ai/acp.rs | 172 ++++++++++++++++++++++++++----- 1 file changed, 145 insertions(+), 27 deletions(-) diff --git a/src-tauri/src/commands/ai/acp.rs b/src-tauri/src/commands/ai/acp.rs index 09c0ee435..2c1d10332 100644 --- a/src-tauri/src/commands/ai/acp.rs +++ b/src-tauri/src/commands/ai/acp.rs @@ -32,15 +32,17 @@ pub struct PermissionResponseArgs { #[tauri::command] pub async fn get_available_agents( + app_handle: AppHandle, bridge: State<'_, AcpBridgeState>, ) -> Result, String> { let mut bridge = bridge.lock().await; - refresh_registered_agents(&mut bridge).await; + refresh_registered_agents(&app_handle, &mut bridge).await; Ok(bridge.detect_agents()) } #[tauri::command] pub async fn start_acp_agent( + app_handle: AppHandle, bridge: State<'_, AcpBridgeState>, agent_id: String, workspace_path: Option, @@ -48,7 +50,7 @@ pub async fn start_acp_agent( ) -> Result { let bridge = { let mut bridge = bridge.lock().await; - refresh_registered_agents(&mut bridge).await; + refresh_registered_agents(&app_handle, &mut bridge).await; bridge.detect_agents(); bridge.clone() }; @@ -66,7 +68,7 @@ pub async fn install_acp_agent( ) -> Result { let agent = { let mut bridge = bridge.lock().await; - refresh_registered_agents(&mut bridge).await; + refresh_registered_agents(&app_handle, &mut bridge).await; let agents = bridge.detect_agents(); agents .into_iter() @@ -99,7 +101,7 @@ pub async fn uninstall_acp_agent( ) -> Result { let agent = { let mut bridge = bridge.lock().await; - refresh_registered_agents(&mut bridge).await; + refresh_registered_agents(&app_handle, &mut bridge).await; bridge.invalidate_agent_detection_cache(); let agents = bridge.detect_agents(); agents @@ -335,11 +337,11 @@ fn acp_registry_agent_to_config(agent: AcpRegistryAgent) -> Option icon, description: Some(description), installed: false, - install_runtime: Some(AgentRuntime::Node), - install_package: Some(target.package), + install_runtime: None, + install_package: None, install_download_url: None, install_command: None, - can_install: true, + can_install: false, }); } @@ -377,7 +379,39 @@ fn acp_registry_agents_from_index(index: AcpRegistryIndex) -> Vec { agents } -async fn load_acp_registry_agents() -> Result, String> { +fn acp_registry_cache_path(app_handle: &AppHandle) -> Result { + app_handle + .path() + .app_data_dir() + .map(|dir| dir.join("acp-registry").join("registry.json")) + .map_err(|error| format!("Failed to resolve ACP registry cache path: {}", error)) +} + +fn acp_registry_agents_from_json(json: &str) -> Result, String> { + let registry = serde_json::from_str::(json) + .map_err(|error| format!("Invalid ACP registry: {}", error))?; + Ok(acp_registry_agents_from_index(registry)) +} + +fn load_cached_acp_registry_agents(app_handle: &AppHandle) -> Result, String> { + let cache_path = acp_registry_cache_path(app_handle)?; + let json = fs::read_to_string(&cache_path) + .map_err(|error| format!("Failed to read cached ACP registry: {}", error))?; + acp_registry_agents_from_json(&json) + .map_err(|error| format!("Invalid cached ACP registry: {}", error)) +} + +fn write_acp_registry_cache(app_handle: &AppHandle, json: &str) -> Result<(), String> { + let cache_path = acp_registry_cache_path(app_handle)?; + if let Some(parent) = cache_path.parent() { + fs::create_dir_all(parent) + .map_err(|error| format!("Failed to create ACP registry cache directory: {}", error))?; + } + fs::write(&cache_path, json) + .map_err(|error| format!("Failed to write ACP registry cache: {}", error)) +} + +async fn load_acp_registry_agents(app_handle: &AppHandle) -> Result, String> { let response = reqwest::Client::new() .get(acp_registry_url()) .timeout(Duration::from_secs(5)) @@ -392,15 +426,51 @@ async fn load_acp_registry_agents() -> Result, String> { )); } - let registry = response - .json::() + let json = response + .text() .await - .map_err(|error| format!("Invalid ACP registry: {}", error))?; + .map_err(|error| format!("Failed to read ACP registry response: {}", error))?; - Ok(acp_registry_agents_from_index(registry)) + let agents = acp_registry_agents_from_json(&json)?; + if let Err(error) = write_acp_registry_cache(app_handle, &json) { + log::warn!("{}", error); + } + + Ok(agents) +} + +async fn load_preferred_registry_agents( + app_handle: &AppHandle, +) -> Result, String> { + match load_acp_registry_agents(app_handle).await { + Ok(agents) => Ok(agents), + Err(registry_error) => { + log::warn!("{}", registry_error); + load_cached_acp_registry_agents(app_handle).map_err(|cache_error| { + log::warn!("{}", cache_error); + registry_error + }) + } + } } -async fn load_marketplace_agents() -> Result, String> { +fn merge_agent_catalogs( + mut preferred_agents: Vec, + fallback_agents: Vec, +) -> Vec { + for agent in fallback_agents { + if !preferred_agents + .iter() + .any(|preferred| preferred.id == agent.id) + { + preferred_agents.push(agent); + } + } + preferred_agents.sort_by_key(|agent| agent.name.clone()); + preferred_agents +} + +async fn load_marketplace_agents(app_handle: &AppHandle) -> Result, String> { let cache = AGENT_CATALOG_CACHE.get_or_init(|| std::sync::Mutex::new(None)); { let cached = cache @@ -413,11 +483,25 @@ async fn load_marketplace_agents() -> Result, String> { } } - let agents = match load_acp_registry_agents().await { - Ok(agents) => agents, - Err(registry_error) => { + let registry_agents = load_preferred_registry_agents(app_handle).await; + let legacy_agents = load_legacy_marketplace_agents().await; + let agents = match (registry_agents, legacy_agents) { + (Ok(registry_agents), Ok(legacy_agents)) => { + merge_agent_catalogs(registry_agents, legacy_agents) + } + (Ok(registry_agents), Err(legacy_error)) => { + log::warn!("{}", legacy_error); + registry_agents + } + (Err(registry_error), Ok(legacy_agents)) => { log::warn!("{}", registry_error); - load_legacy_marketplace_agents().await? + legacy_agents + } + (Err(registry_error), Err(legacy_error)) => { + return Err(format!( + "{}; legacy agent catalog also failed: {}", + registry_error, legacy_error + )); } }; @@ -462,8 +546,8 @@ async fn load_legacy_marketplace_agents() -> Result, String> { Ok(agents) } -async fn refresh_registered_agents(bridge: &mut AcpAgentBridge) { - match load_marketplace_agents().await { +async fn refresh_registered_agents(app_handle: &AppHandle, bridge: &mut AcpAgentBridge) { + match load_marketplace_agents(app_handle).await { Ok(agents) => bridge.replace_registered_agents(agents), Err(error) => { log::warn!("{}", error); @@ -735,8 +819,26 @@ mod tests { use super::*; fn parse_registry(json: &str) -> Vec { - let index: AcpRegistryIndex = serde_json::from_str(json).expect("registry fixture"); - acp_registry_agents_from_index(index) + acp_registry_agents_from_json(json).expect("registry fixture") + } + + fn test_agent(id: &str, name: &str) -> AgentConfig { + AgentConfig { + id: id.to_string(), + name: name.to_string(), + binary_name: id.to_string(), + binary_path: None, + args: vec![], + env_vars: HashMap::new(), + icon: None, + description: None, + installed: false, + install_runtime: None, + install_package: None, + install_download_url: None, + install_command: None, + can_install: false, + } } #[test] @@ -818,7 +920,7 @@ mod tests { .expect("claude agent"); assert_eq!(agent.binary_name, "npx"); - assert_eq!(agent.install_runtime, Some(AgentRuntime::Node)); + assert_eq!(agent.install_runtime, None); assert_eq!( agent.args, vec![ @@ -827,11 +929,8 @@ mod tests { "--verbose".to_string() ] ); - assert_eq!( - agent.install_package.as_deref(), - Some("@agentclientprotocol/claude-agent-acp@0.33.1") - ); - assert!(agent.can_install); + assert_eq!(agent.install_package.as_deref(), None); + assert!(!agent.can_install); } #[test] @@ -915,4 +1014,23 @@ mod tests { assert_eq!(agent.install_runtime, Some(AgentRuntime::Binary)); assert_eq!(agent.binary_name, "kilo"); } + + #[test] + fn merge_agent_catalogs_preserves_legacy_agents() { + let agents = merge_agent_catalogs( + vec![test_agent("codex-acp", "Codex Registry")], + vec![ + test_agent("codex-acp", "Codex Legacy"), + test_agent("athas-legacy", "Athas Legacy"), + ], + ); + + assert_eq!(agents.len(), 2); + assert!(agents.iter().any(|agent| agent.id == "athas-legacy")); + let codex = agents + .iter() + .find(|agent| agent.id == "codex-acp") + .expect("codex entry"); + assert_eq!(codex.name, "Codex Registry"); + } } From bc96313bf1c30e413280b4dda5d6f01f60da975f Mon Sep 17 00:00:00 2001 From: Finesssee <90105158+Finesssee@users.noreply.github.com> Date: Sun, 10 May 2026 04:00:35 +0700 Subject: [PATCH 5/5] Remove GoalBuddy state files --- docs/goals/athas-acp-harness-parity/goal.md | 77 ---- .../notes/T001-acp-harness-map.md | 219 ---------- .../goals/athas-acp-harness-parity/state.yaml | 382 ------------------ docs/goals/athas-acp-zed-parity/goal.md | 82 ---- .../notes/T001-acp-zed-map.md | 224 ---------- docs/goals/athas-acp-zed-parity/state.yaml | 304 -------------- 6 files changed, 1288 deletions(-) delete mode 100644 docs/goals/athas-acp-harness-parity/goal.md delete mode 100644 docs/goals/athas-acp-harness-parity/notes/T001-acp-harness-map.md delete mode 100644 docs/goals/athas-acp-harness-parity/state.yaml delete mode 100644 docs/goals/athas-acp-zed-parity/goal.md delete mode 100644 docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md delete mode 100644 docs/goals/athas-acp-zed-parity/state.yaml diff --git a/docs/goals/athas-acp-harness-parity/goal.md b/docs/goals/athas-acp-harness-parity/goal.md deleted file mode 100644 index f94918cb6..000000000 --- a/docs/goals/athas-acp-harness-parity/goal.md +++ /dev/null @@ -1,77 +0,0 @@ -# Athas ACP Harness Parity - -## Objective - -Bring Athas's ACP harness posture up toward Zed's by studying Zed's local ACP harness/test patterns, then extending Athas's existing ACP foundation with comparable behavioral and provider/adaptor harness coverage. - -## Original Request - -Match the amount of ACP harness that Zed has for Athas as well. Look at how Zed does it and do the same for Athas. Athas's foundation is already good; keep going from there. - -## Intake Summary - -- Input shape: `specific` -- Audience: Athas maintainers and contributors working on ACP reliability. -- Authority: `approved` -- Proof type: `test` -- Completion proof: Zed's ACP harness/test posture is mapped, Athas gains the first highest-value comparable harness slices, and local verification proves those Athas harnesses run. -- Likely misfire: Counting tests or cloning Zed structure mechanically without creating reusable, reliable Athas ACP coverage. -- Blind spots considered: Harness parity could mean raw test count, protocol behavior coverage, fake-agent infrastructure, provider/adaptor compatibility, real adapter smoke, or developer tooling; real provider smoke can become auth-heavy or network-flaky. -- Existing plan facts: Use `/Users/sw/Code/zed` as the reference source; do not treat Athas as sparse or broken; extend Athas's existing foundation; optimize for behavioral ACP harness coverage plus provider compatibility harness; include real adapter smoke where practical but avoid auth-heavy or flaky provider runs. - -## Goal Kind - -`specific` - -## Current Tranche - -Map how Zed structures ACP harnesses and tests, identify the closest Athas extension points, implement successive safe harness slices for core ACP behavior and provider/adaptor compatibility, verify them locally, and audit whether the first harness parity tranche is complete. - -## Non-Negotiable Constraints - -- Treat `/Users/sw/Code/zed` as the local reference implementation for ACP harness/test posture. -- Cross-reference Zed before choosing Athas harness work. -- Preserve and extend Athas's existing ACP foundation; do not rewrite working architecture just to mimic Zed names. -- Optimize for reliable harness infrastructure, not raw test count. -- Cover both behavioral ACP flows and provider/adaptor compatibility where practical. -- Prefer deterministic local harnesses. If real adapter smoke needs auth, secrets, network, or brittle local state, record the blocker and use a simulated/fake harness path instead. -- Do not add broad provider expansion unless Zed evidence shows it is required for harness parity. -- Follow Athas repo rules: use Bun for repo scripts, keep changes scoped, and run relevant checks. - -## Stop Rule - -Stop only when a final audit proves the full original outcome for this tranche is complete. - -Do not stop after mapping Zed, picking a harness design, or adding one partial test if safe harness work remains. Continue through verified implementation slices until the first harness parity tranche has concrete Athas tests/harnesses and an audit receipt. - -Do not stop because real provider smoke needs auth, credentials, production access, network state, or policy decisions. Mark that exact smoke task blocked with a receipt, then continue with deterministic local harness work that advances the same parity goal. - -## Canonical Board - -Machine truth lives at: - -`docs/goals/athas-acp-harness-parity/state.yaml` - -If this charter and `state.yaml` disagree, `state.yaml` wins for task status, active task, receipts, verification freshness, and completion truth. - -## Run Command - -```text -/goal Follow docs/goals/athas-acp-harness-parity/goal.md. -``` - -## PM Loop - -On every `/goal` continuation: - -1. Read this charter. -2. Read `state.yaml`. -3. Run the bundled GoalBuddy update checker when available and mention a newer version without blocking. -4. Re-check the intake: original request, input shape, authority, proof, blind spots, existing plan facts, and likely misfire. -5. Work only on the active board task. -6. Assign Scout, Judge, Worker, or PM according to the task. -7. Write a compact task receipt. -8. Update the board. -9. If Judge selected a safe Worker task with `allowed_files`, `verify`, and `stop_if`, activate it and continue unless blocked. -10. Treat a slice audit as a checkpoint, not completion, unless it explicitly proves the full original outcome is complete. -11. Finish only with a Judge/PM audit receipt that maps receipts and verification back to the original user outcome and records `full_outcome_complete: true`. diff --git a/docs/goals/athas-acp-harness-parity/notes/T001-acp-harness-map.md b/docs/goals/athas-acp-harness-parity/notes/T001-acp-harness-map.md deleted file mode 100644 index 8f5cde9c9..000000000 --- a/docs/goals/athas-acp-harness-parity/notes/T001-acp-harness-map.md +++ /dev/null @@ -1,219 +0,0 @@ -# T001 Scout Receipt: ACP Harness Map - -## Result - -Read-only Scout mapped. - -## Summary - -Zed's ACP harness posture is not just more tests; it has reusable fake ACP transports plus GPUI behavioral tests that exercise production connection/thread paths. Athas already has a solid ACP foundation with frontend stream-handler regression tests, Rust bootstrap decision tests, terminal-state tests, registry/catalog seams, and Tauri command surfaces. The closest parity move is to add a deterministic Athas fake ACP connection/client harness around `crates/ai/src/acp/*`, then use it for behavior slices before any provider expansion. - -## Zed ACP Harness/Test Map - -Behavior harness: - -- `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`: GPUI behavioral ACP thread tests using `FakeAgentConnection::new()` and in-thread session updates. -- Representative tests: - - `test_terminal_output_buffered_before_created_renders` - - `test_terminal_output_and_exit_buffered_before_created` - - `test_terminal_kill_allows_wait_for_exit_to_complete` - - `test_push_user_content_block` - - `test_thinking_concatenation` - - `test_ignore_echoed_user_message_chunks_during_active_turn` - - `test_edits_concurrently_to_user` - - `test_reading_from_line` - - `test_reading_empty_file` - - `test_reading_non_existing_file` - - `test_succeeding_canceled_toolcall` - - `test_no_pending_edits_if_tool_calls_are_completed` - - `test_tool_result_refusal` - - `test_user_prompt_refusal_emits_event` - - `test_refusal` - - `test_tool_call_not_found_creates_failed_entry` - - `test_follow_up_message_during_generation_does_not_clear_turn` - - `test_send_returns_cancelled_response_and_marks_tools_as_cancelled` - - `test_running_turn_cleared_when_send_task_dropped` - - `test_session_info_update_replaces_provisional_title_and_emits_event` - - `test_usage_update_populates_token_usage_and_cost` - - `test_usage_update_without_cost_preserves_existing_cost` - - `test_response_usage_does_not_clobber_session_usage` - - `test_clearing_token_usage_also_clears_cost` - -Fake ACP transport/provider harness: - -- `/Users/sw/Code/zed/crates/agent_servers/src/acp.rs`: `FakeAcpAgentServer`, `FakeAcpConnectionHarness`, `FakeAcpAgentConnection`, `build_fake_acp_connection`, `connect_fake_acp_connection`. -- Harness uses `agent_client_protocol::Channel::duplex()`, wires production `connect_client_future` handlers, responds to initialize/auth/new/prompt/load/close/cancel, tracks load/close counts, supports simulated server exit and forced prompt failure. -- Tests: - - `test_loaded_sessions_keep_state_until_last_close` - - `test_load_session_replays_notifications_sent_before_response` - - `test_close_session_during_in_flight_load` - - `test_close_during_load_preserves_other_concurrent_loader` - -Permissions/tools/provider harness: - -- `/Users/sw/Code/zed/crates/agent/src/tests/mod.rs`: fake model/provider and fake terminal environment tests for tools, streaming tools, cancellation, auth/permission behavior, MCP tool compatibility, retry/error handling. -- Key tests: - - `test_basic_tool_calls` - - `test_streaming_tool_calls` - - `test_tool_authorization` - - `test_tool_hallucination` - - `test_cancellation` - - `test_terminal_tool_cancellation_captures_output` - - `test_cancellation_aware_tool_responds_to_cancellation` - - `test_in_progress_send_canceled_by_next_send` - - `test_retry_cancelled_promptly_on_new_send` - - `test_tool_updates_to_completion` - - `test_send_retry_finishes_tool_calls_on_error` - - `test_streaming_tool_completes_when_llm_stream_ends_without_final_input` - - `test_streaming_tool_error_breaks_stream_loop_immediately` - - `test_streaming_tool_error_waits_for_prior_tools_to_complete` -- Permission option tests include: - - `test_permission_options_terminal_with_pattern` - - `test_permission_options_edit_file_with_path_pattern` - - `test_permission_options_fetch_with_domain_pattern` - - `test_permission_options_terminal_pipeline_produces_dropdown_with_patterns` -- `/Users/sw/Code/zed/crates/agent/src/tool_permissions.rs` has a focused permission-rule matrix for allow/deny/path/url/terminal patterns and rule resolution. - -Reference commands: - -- `cargo test -p acp_thread test_usage_update_populates_token_usage_and_cost` -- `cargo test -p acp_thread test_send_returns_cancelled_response_and_marks_tools_as_cancelled` -- `cargo test -p agent_servers test_load_session_replays_notifications_sent_before_response` -- `cargo test -p agent test_cancellation` -- `cargo test -p agent test_permission_options_terminal_with_pattern` - -## Athas Current ACP Harness/Test Map - -Frontend behavior tests: - -- `/Users/sw/Code/athas/src/features/ai/tests/acp-cancellation.test.ts` - - `finalizes active tools before sending backend cancellation` - - `ignores late events after a cancelled turn is force-stopped` -- `/Users/sw/Code/athas/src/features/ai/tests/acp-activity-groups.test.ts` - - `groups running, recent, and error activity without duplicate signatures` -- `/Users/sw/Code/athas/src/features/ai/tests/acp-session-info.test.ts` - - `returns trimmed title updates` - - `ignores empty or unchanged titles` -- `/Users/sw/Code/athas/src/features/ai/tests/session-config-option-classifier.test.ts` - - `prefers ACP semantic categories over label heuristics` - - `falls back to text classification when category is missing or custom` - -Rust behavior tests: - -- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs` - - `loaded_session_bootstrap_preserves_requested_session_id` - - `method_not_found_load_uses_resume_only_when_supported` - - `missing_or_unsupported_load_falls_back_to_new_session` - - `missing_or_unsupported_resume_falls_back_to_new_session` - - `auth_required_errors_are_retriable_before_session_fallbacks` - - `new_session_bootstrap_uses_agent_created_session_id` -- `/Users/sw/Code/athas/crates/ai/src/acp/terminal_state.rs` - - `append_output_truncates_from_beginning` - - `append_output_preserves_utf8_boundaries_when_truncating` - - `exit_status_preserves_none_exit_code_for_signal_termination` - -Registry/adapter tests: - -- `/Users/sw/Code/athas/crates/ai/src/acp/config.rs` - - `managed_wrapper_path_prefers_expected_wrapper_name` - - `check_dir_for_binary_returns_none_for_missing_binary` -- Implementation seams: `AgentRegistry::replace_agents`, `detect_installed`, `detect_codex_adapter`, `managed_wrapper_path`, `find_binary`. - -Extension points: - -- `/Users/sw/Code/athas/crates/ai/src/acp/client.rs`: `AthasAcpClient` maps ACP session notifications, permission requests, file callbacks, terminal callbacks, and extension methods into `AcpEvent`. -- `/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs`: `AcpWorker` owns connection/session/process lifecycle and exposes send/cancel/list/stop/mode/config operations. -- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs`: bootstrap path for initialize/auth/session load/resume/new and initial mode/config event emission. -- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_prompt.rs`: prompt send, auth retry, timeout, and prompt_complete emission. -- `/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts`: frontend event-order/cancellation/session/status/tool/permission handler and current easiest deterministic test seam. -- `/Users/sw/Code/athas/src-tauri/src/commands/ai/acp.rs`: Tauri command shell plus marketplace catalog conversion and `refresh_registered_agents`. - -Current verification options: - -- `bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts` -- `cargo test -p athas-ai acp::bridge_init::tests` -- `cargo test -p athas-ai acp::terminal_state::tests` -- `cargo test -p athas-ai acp::config::tests` -- `bun typecheck` -- `bun check:rust` - -## Harness Parity Matrix - -- Sessions: Zed is strong with fake ACP transport covering load/close ref counts, in-flight load, notifications before response, concurrent loaders, and session info title updates. Athas is medium: bootstrap decision tests and frontend title/session-info tests exist, but no fake ACP transport proves production connection event ordering. -- Cancellation: Zed is strong with cancelled prompt responses, pending tool cancellation, running-turn cleanup, new send canceling previous send, and terminal/subagent/tool cancellation. Athas is medium with frontend cancellation tests but no Rust bridge fake transport proof. -- Streaming: Zed is strong with echoed user chunk suppression, thinking concatenation, streaming tool parsing/completion/error loops, and buffered text flushing. Athas is low-medium: stream handler routes chunks/tools, but deterministic ordering tests are thin. -- Tools: Zed is strong across tool start/update/complete/failure/not-found, file reads/writes, concurrent edits, terminal buffering/kill/output. Athas is medium: event mapping and terminal buffer tests exist, but no client-level fake app/terminal manager harness for ACP callbacks. -- Permissions: Zed is strong across permission options and allow/deny rule matrices. Athas is low-medium: permission requests are emitted and queue cancellation exists, but no focused protocol response matrix. -- Errors: Zed is strong on startup-exited load errors, auth/internal mapping, refusal, hallucinated tools, retry failure, and dropped send cleanup. Athas is medium with startup/auth/session fallback tests and stream handler formatting. -- Usage: Zed is strong with `UsageUpdate` token usage/cost tests. Athas appears absent for ACP usage event/type mapping. -- Provider/adaptor compatibility: Zed is strong with fake ACP AgentServer and fake model/provider/terminal/MCP harnesses. Athas is low-medium with registry/catalog seams and Codex adapter fallback, but no fake adapter executable/transport smoke. - -## Ranked Athas Harness Gaps - -1. No Rust fake ACP connection harness equivalent to Zed's `FakeAcpConnectionHarness`. - - Highest leverage because it can prove sessions, cancellation, streaming, tools, permissions, errors, and usage locally through production-ish bridge/client paths. - - Target files: `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/client.rs`, `crates/ai/src/acp/bridge_prompt.rs`, `crates/ai/src/acp/bridge.rs`, possible `crates/ai/src/acp/test_harness.rs`. - -2. No ACP usage/token-cost mapping or tests. - - Clear Zed parity gap with narrow blast radius; currently no Athas event/type exists for usage. - - Target files: `crates/ai/src/acp/types.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/types/acp.ts`, `src/features/ai/services/acp-stream-handler.ts`. - -3. Client-level permission mapping/cancel/timeout harness is missing. - - Athas has UI queue behavior but not the protocol response matrix for selected/cancelled/auto-reject/fallback options. - - Target files: `crates/ai/src/acp/client.rs`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/components/chat/ai-chat.tsx`. - -4. Streaming/event ordering tests are thin. - - Zed proves duplicate user echo suppression, thinking concatenation, and late turn isolation. - - Target files: `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/tests`, `crates/ai/src/acp/client.rs`. - -5. Provider/adaptor compatibility smoke is config-only. - - Athas detects Codex adapter and marketplace agents, but lacks local fake adapter/fixture validation of initialize/session/prompt/cancel without auth/network. - - Target files: `crates/ai/src/acp/config.rs`, `src-tauri/src/commands/ai/acp.rs`, possible test fixture under `crates/ai/src/acp` or tests. - -6. Terminal/file ACP callback harness is only partial. - - Terminal buffer unit tests exist, but full create/output/wait/kill/release and read/write file callbacks through `AthasAcpClient` are not covered. - - Target files: `crates/ai/src/acp/client.rs`, `crates/ai/src/acp/terminal_state.rs`. - -## Candidate Worker Slices - -W1: - -- Objective: Add the minimal reusable Athas fake ACP harness for `athas-ai` without changing product behavior. -- Allowed files: `crates/ai/src/acp/client.rs`, `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/bridge_prompt.rs`, `crates/ai/src/acp/bridge.rs`, `crates/ai/src/acp/mod.rs`, optional new test-only file `crates/ai/src/acp/test_harness.rs`. -- Verification: `cargo test -p athas-ai acp`, `bun check:rust`. -- Stop if: requires live ACP provider/auth/network/secrets; broad provider expansion; bridge rewrite instead of narrow test seam. - -W2: - -- Objective: Use fake harness to prove session notification ordering and load/resume/new behavior through production-ish paths. -- Allowed files: `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/client.rs`, `crates/ai/src/acp/test_harness.rs`. -- Verification: `cargo test -p athas-ai acp::bridge_init`, `cargo test -p athas-ai acp::client`. -- Stop if: needs external adapter process or new dependencies. - -W3: - -- Objective: Add ACP `UsageUpdate` mapping and deterministic tests. -- Allowed files: `crates/ai/src/acp/types.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/types/acp.ts`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/tests/*usage*.test.ts`. -- Verification: `cargo test -p athas-ai acp`, `bunx vp test run src/features/ai/tests`, `bun typecheck`. -- Stop if: usage semantics require UI design beyond exposing routed state/event, or protocol crate lacks `UsageUpdate`. - -W4: - -- Objective: Add permission protocol harness for selected/cancelled/auto-reject/fallback responses. -- Allowed files: `crates/ai/src/acp/client.rs`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/tests/acp-permissions.test.ts`. -- Verification: `cargo test -p athas-ai acp::client`, `bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-permissions.test.ts`. -- Stop if: requires changing product permission policy instead of harnessing current behavior, or long real-time timeout. - -W5: - -- Objective: Add local fake adapter/provider compatibility smoke without real provider auth. -- Allowed files: `crates/ai/src/acp/config.rs`, `src-tauri/src/commands/ai/acp.rs`, optional test fixture under `crates/ai/src/acp/tests` or src-tauri tests. -- Verification: `cargo test -p athas-ai acp::config`, `bun check:rust`. -- Stop if: requires real Codex/Claude/Gemini login or expands provider catalog instead of proving adapter compatibility. - -## Ambiguity Requiring Judge - -- Whether first tranche should prioritize reusable harness infrastructure (W1) or immediately add visible usage event slice (W3). Scout recommends W1 for leverage, W3 for fastest narrow parity. -- Whether Athas should add a Rust test-only fake app/terminal manager seam now, or keep first harness frontend-only. Zed evidence favors Rust/in-process fake transport. -- Whether provider/adaptor compatibility means fake local adapter smoke only, or real Codex/Claude/Gemini smoke. Evidence and charter favor fake/local deterministic smoke and avoiding auth-heavy real providers. -- Whether `UsageUpdate` is available in Athas's pinned `agent_client_protocol` version; current Athas code has no usage mapping hits, but Worker should confirm dependency API before editing. diff --git a/docs/goals/athas-acp-harness-parity/state.yaml b/docs/goals/athas-acp-harness-parity/state.yaml deleted file mode 100644 index 0e0c73d32..000000000 --- a/docs/goals/athas-acp-harness-parity/state.yaml +++ /dev/null @@ -1,382 +0,0 @@ -# GoalBuddy v2 state.yaml -# Board truth lives here. goal.md is the editable charter; notes/ holds long receipts only. - -version: 2 - -goal: - title: "Athas ACP Harness Parity" - slug: "athas-acp-harness-parity" - kind: specific - tranche: "Zed-referenced ACP harness parity: map Zed harnesses, extend Athas harness coverage, verify, audit, and continue through safe slices until the first harness tranche is complete." - status: done - intake: - original_request: "Match the amount of ACP harness that Zed has for Athas; look at how Zed does it and do the same for Athas while building on Athas's good foundation." - interpreted_outcome: "Athas gains Zed-referenced ACP harness coverage for core behavior and provider/adaptor compatibility, with local tests proving the first tranche." - input_shape: specific - audience: "Athas maintainers and contributors" - authority: approved - proof_type: test - completion_proof: "A Zed harness matrix exists, selected Athas harness slices are implemented, relevant local tests/checks pass, and final audit confirms the first harness parity tranche is complete." - likely_misfire: "Creating many tests or copying Zed structure mechanically without reliable Athas ACP harness value." - blind_spots_considered: - - "Harness parity may mean behavior coverage, fake-agent infrastructure, provider/adaptor compatibility, smoke tooling, or raw test count." - - "Real provider smoke can be flaky if it depends on auth, secrets, network, or local CLI state." - - "Athas foundation is already good; the task is extension, not replacement." - - "Provider compatibility belongs in scope, but broad provider expansion does not." - existing_plan_facts: - - "Use /Users/sw/Code/zed as the local reference source." - - "Study how Zed does ACP harnesses/tests before choosing Athas work." - - "Athas foundation is good; keep going from there." - - "Optimize for behavioral ACP harness coverage plus provider compatibility harness." - - "Include real adapter smoke where practical, but avoid auth-heavy or network-flaky runs." - -rules: - pm_owns_state: true - one_active_task: true - max_write_workers: 1 - no_implementation_without_worker_or_pm_task: true - no_completion_without_judge_or_pm_audit: true - planning_is_not_completion: true - queued_required_worker_blocks_completion: true - continuous_until_full_outcome: true - missing_input_or_credentials_do_not_stop_goal: true - preserve_and_validate_existing_plan: true - intake_misfire_must_be_audited: true - -agents: - scout: installed - worker: installed - judge: installed - -active_task: null - -tasks: - - id: T001 - type: scout - assignee: Scout - status: done - reasoning_hint: medium - objective: "Map Zed's ACP harness/test posture and identify the closest Athas harness extension points." - inputs: - - "/Users/sw/Code/zed" - - "/Users/sw/Code/athas" - - "Prior Athas ACP parity board: docs/goals/athas-acp-zed-parity/state.yaml" - - "Prior Athas ACP parity note: docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md" - constraints: - - "Read-only." - - "Do not edit implementation files." - - "Treat /Users/sw/Code/zed as the reference for ACP harness/test posture." - - "Use concrete file paths, test names, fixtures, fake providers/adapters, and commands." - - "Separate behavior harness gaps from provider/adaptor compatibility harness gaps." - - "Preserve Athas's existing foundation; recommend extension points, not rewrites." - expected_output: - - "Zed ACP harness/test map with file-path and test-name evidence." - - "Athas current ACP harness/test map with file-path and test-name evidence." - - "Harness parity matrix covering sessions, cancellation, streaming, tools, permissions, errors, usage, and provider/adaptor compatibility." - - "Ranked Athas harness gaps by leverage, determinism, and verification strength." - - "Candidate Worker slices with allowed_files, verification options, and stop conditions." - receipt: - result: done - note: "notes/T001-acp-harness-map.md" - summary: "Mapped Zed and Athas ACP harness posture. Zed's leverage comes from fake ACP transports plus behavioral tests. Athas has a solid foundation but lacks a reusable fake ACP connection harness and several coverage slices." - evidence: - - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" - - "/Users/sw/Code/zed/crates/agent_servers/src/acp.rs" - - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs" - - "/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs" - - "/Users/sw/Code/athas/crates/ai/src/acp/client.rs" - - "/Users/sw/Code/athas/src/features/ai/tests/acp-cancellation.test.ts" - candidate_worker_slices: - - "W1: add minimal reusable fake ACP harness for athas-ai." - - "W3: add ACP UsageUpdate mapping and deterministic tests." - - "W4: add permission protocol harness." - - "W5: add local fake adapter/provider compatibility smoke." - - - id: T002 - type: judge - assignee: Judge - status: done - reasoning_hint: high - objective: "Choose the first safe Athas ACP harness implementation slice from Scout evidence." - inputs: - - "T001 receipt or note." - - "Current dirty diff." - - "Athas validation commands." - constraints: - - "Read-only." - - "Do not implement." - - "Require Zed harness evidence for the selected gap." - - "Prefer deterministic local harnesses over auth-heavy or network-flaky real provider runs." - - "Do not choose broad provider expansion." - expected_output: - - "Decision." - - "Why this harness slice is first." - - "Exact Worker objective." - - "allowed_files." - - "verify." - - "stop_if." - - "Deferred or blocked candidates." - receipt: - result: done - decision: "Choose a narrowed W3 slice first: ACP UsageUpdate mapping plus deterministic harness tests. Do not start with the broad reusable fake ACP connection harness yet." - why_this_slice_is_first: - - "Zed has direct behavior-level usage update handling and focused tests." - - "Athas's pinned ACP schema exposes UsageUpdate with used and size." - - "Athas currently drops UsageUpdate in client session notification handling." - - "This is deterministic and local, with no provider auth, network, secrets, real adapter process, or broad provider expansion." - evidence: - - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" - - "/Users/sw/Code/zed/crates/agent_servers/src/acp.rs" - - "/Users/sw/Code/athas/vendor/agent-client-protocol-schema/src/client.rs" - - "/Users/sw/Code/athas/crates/ai/src/acp/client.rs" - next_allowed_task: T003 - deferred_or_blocked_candidates: - - "Deferred: broad fake ACP connection harness." - - "Deferred: session notification ordering/load-resume-new behavior through fake harness." - - "Deferred: permission protocol harness." - - "Deferred: local fake adapter/provider compatibility smoke." - - "Blocked for this step: real Codex/Claude/Gemini provider runs." - - - id: T003 - type: worker - assignee: Worker - status: done - reasoning_hint: default - objective: "Add deterministic ACP UsageUpdate mapping and harness tests as the first Zed-referenced Athas ACP harness slice." - allowed_files: - - "crates/ai/src/acp/types.rs" - - "crates/ai/src/acp/client.rs" - - "src/features/ai/types/acp.ts" - - "src/features/ai/services/acp-stream-handler.ts" - - "src/features/ai/tests/acp-usage.test.ts" - verify: - - "cargo test -p athas-ai acp::client" - - "cargo test -p athas-ai acp" - - "bunx vp test run src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" - - "bun typecheck" - - "bun check:rust" - stop_if: - - "Need files outside allowed_files." - - "The Worker cannot test the Rust client mapping without introducing a broad app-handle or Tauri runtime rewrite." - - "The implementation starts storing/rendering usage in UI state instead of only adding pass-through/mapping coverage." - - "The Worker tries to add cost/currency fields not present in Athas's pinned UsageUpdate schema." - - "The task requires live ACP provider auth, real adapter processes, secrets, network, or a provider catalog expansion." - - "Verification fails twice for reasons not directly caused by this slice." - - "Existing untracked GoalBuddy files are the only dirty files now; stop if unrelated implementation dirty files appear before editing." - receipt: - result: done - summary: "Implemented deterministic ACP UsageUpdate pass-through coverage. The Rust ACP client now maps protocol UsageUpdate notifications into frontend acp-event payloads, and frontend stream handling accepts usage_update as a generic event without mutating chat output or tool state." - changed_files: - - "crates/ai/src/acp/types.rs" - - "crates/ai/src/acp/client.rs" - - "src/features/ai/types/acp.ts" - - "src/features/ai/services/acp-stream-handler.ts" - - "src/features/ai/tests/acp-usage.test.ts" - harness_evidence: - - "crates/ai/src/acp/client.rs::usage_update_maps_to_frontend_event" - - "src/features/ai/tests/acp-usage.test.ts" - verification: - passed: - - "cargo fmt -p athas-ai --check" - - "cargo test -p athas-ai acp::client" - - "cargo test -p athas-ai acp" - - "cargo check -p athas-ai" - - "bunx vp test run src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" - - "bun typecheck" - - "git diff --check" - blocked: - - "bun check:rust exits 1 on pre-existing repo-wide rustfmt diffs in crates/runtime/src/lib.rs, crates/tooling/src/registry.rs, and crates/tooling/src/types.rs; those files are outside the T003 allowed_files and were not changed." - commands: - - cmd: "cargo fmt -p athas-ai --check" - status: pass - - cmd: "cargo test -p athas-ai acp::client" - status: pass - - cmd: "cargo test -p athas-ai acp" - status: pass - - cmd: "cargo check -p athas-ai" - status: pass - - cmd: "bunx vp test run src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" - status: pass - - cmd: "bun typecheck" - status: pass - - cmd: "git diff --check" - status: pass - blocked_verification: - - cmd: "bun check:rust" - reason: "Pre-existing repo-wide rustfmt diffs in crates/runtime/src/lib.rs, crates/tooling/src/registry.rs, and crates/tooling/src/types.rs are outside the T003 allowed_files and were not changed." - zed_reference: - - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" - - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs usage update tests cited by T001" - - - id: T004 - type: judge - assignee: Judge - status: done - reasoning_hint: high - objective: "Audit the first implemented harness slice and choose the next safe harness slice if the tranche is not complete." - inputs: - - "T001 receipt or note." - - "T002 receipt." - - "T003 receipt." - - "Current dirty diff." - - "Last verification." - constraints: - - "Read-only." - - "Reject completion if the slice is only a map or plan." - - "Reject completion if no Athas harness/test was implemented and verified." - - "If not complete, define the next smallest safe Scout, Judge, or Worker task." - expected_output: - - "slice_complete: true | false." - - "full_outcome_complete: true | false." - - "Harness parity evidence." - - "Missing evidence." - - "Next task if not complete." - receipt: - result: done - slice_complete: true - full_outcome_complete: false - summary: "T003 is a valid first harness slice: it adds implemented Rust and frontend coverage for ACP UsageUpdate, references Zed's behavior-level usage harness posture, and keeps the behavior deterministic with no provider auth or UI storage expansion." - harness_parity_evidence: - - "Rust protocol UsageUpdate is mapped into an AcpEvent and covered by cargo test -p athas-ai acp::client." - - "Frontend AcpStreamHandler accepts usage_update through the generic ACP event stream and does not mutate chat output, covered by src/features/ai/tests/acp-usage.test.ts." - - "The broader athas-ai ACP suite still passes with the new event shape." - missing_evidence: - - "Permission request/response behavior is implemented but has no focused harness tests matching Zed's permission-heavy ACP posture." - - "Provider/adaptor fake smoke remains deferred." - next_task: T005 - decision: "Continue with a narrow permission protocol harness slice before broad fake adapter work." - why_next: - - "Zed has extensive permission behavior tests and request_permission handling evidence." - - "Athas already has permission request, option, callback, auto-reject, and selected-option behavior; the gap is deterministic harness coverage." - - "This is local and does not require real providers, secrets, network, or adapter processes." - - - id: T005 - type: worker - assignee: Worker - status: done - reasoning_hint: default - objective: "Add deterministic ACP permission protocol harness coverage for Rust permission selection/mapping and frontend permission request handling, using Zed's permission harness posture as the reference." - allowed_files: - - "crates/ai/src/acp/client.rs" - - "src/features/ai/services/acp-stream-handler.ts" - - "src/features/ai/tests/acp-permission.test.ts" - verify: - - "cargo test -p athas-ai acp::client" - - "cargo test -p athas-ai acp" - - "bunx vp test run src/features/ai/tests/acp-permission.test.ts src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" - - "bun typecheck" - - "bun check:rust" - stop_if: - - "No Judge receipt has selected this slice." - - "Need files outside allowed_files." - - "Need to instantiate a full Tauri AppHandle or run a live ACP provider instead of pure helper/frontend handler harnesses." - - "The implementation changes the permission UI or product behavior rather than adding deterministic harness seams and tests." - - "The task requires auth-heavy, network-flaky, or secret-bearing provider runs." - - "The task broadens into provider expansion without harness-parity evidence." - - "Verification fails twice." - receipt: - result: done - summary: "Added deterministic ACP permission protocol harness coverage without changing user-facing permission behavior. Rust now exposes pure permission mapping/selection seams covered by unit tests, and frontend stream handling is covered for explicit permission callbacks plus safety auto-reject when no handler is registered." - changed_files: - - "crates/ai/src/acp/client.rs" - - "src/features/ai/tests/acp-permission.test.ts" - harness_evidence: - - "crates/ai/src/acp/client.rs::permission_option_kinds_map_to_frontend_kinds" - - "crates/ai/src/acp/client.rs::approved_permission_prefers_allow_options" - - "crates/ai/src/acp/client.rs::rejected_permission_prefers_reject_options" - - "crates/ai/src/acp/client.rs::permission_choice_cancels_when_options_are_empty" - - "src/features/ai/tests/acp-permission.test.ts" - commands: - - cmd: "cargo fmt -p athas-ai" - status: pass - - cmd: "cargo fmt -p athas-ai --check" - status: pass - - cmd: "cargo test -p athas-ai acp::client" - status: pass - - cmd: "cargo test -p athas-ai acp" - status: pass - - cmd: "cargo check -p athas-ai" - status: pass - - cmd: "bunx vp test run src/features/ai/tests/acp-permission.test.ts src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" - status: pass - - cmd: "bun typecheck" - status: pass - - cmd: "git diff --check" - status: pass - blocked_verification: - - cmd: "bun check:rust" - reason: "Pre-existing repo-wide rustfmt diffs in crates/runtime/src/lib.rs, crates/tooling/src/registry.rs, and crates/tooling/src/types.rs are outside the T005 allowed_files and were not changed." - zed_reference: - - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs permission option and parallel permission tests" - - "/Users/sw/Code/zed/crates/agent_servers/src/e2e_tests.rs::test_tool_call_with_permission" - - "/Users/sw/Code/zed/crates/agent_servers/src/acp.rs::handle_request_permission" - - - id: T999 - type: judge - assignee: Judge - status: done - reasoning_hint: high - objective: "Final audit for the first Athas ACP harness parity tranche." - inputs: - - "All done task receipts." - - "Zed harness reference evidence." - - "Last verification." - - "Current dirty diff." - constraints: - - "Read-only." - - "Do not implement." - - "Reject completion if required Worker work is still queued or active." - - "Reject completion if the evidence does not map Athas harness work back to Zed-referenced parity." - - "Reject completion if the result is only a comparison artifact with no verified Athas harness improvement." - expected_output: - - "complete | not_complete." - - "full_outcome_complete: true | false." - - "Completed harness parity behaviors." - - "Missing evidence." - - "Next task if not complete." - receipt: - result: done - decision: complete - summary: "Approve T999. The first Athas ACP harness parity tranche is complete: Zed's ACP harness posture was mapped, then Athas gained two deterministic, Zed-referenced harness slices with Rust and frontend tests covering usage updates and permission protocol behavior." - full_outcome_complete: true - tranche_complete: true - completed_harness_parity_behaviors: - - "UsageUpdate mapping: ACP UsageUpdate notifications now produce frontend usage_update events with used and size, matching the pinned schema and closing the previous silent-drop path." - - "UsageUpdate proof: Rust unit coverage verifies protocol-to-event mapping, and frontend coverage verifies usage_update passes through the generic ACP event stream without mutating chat output or tool state." - - "Permission protocol harness: permission option kind mapping and selected allow/reject/cancel outcomes now have pure Rust tests, reflecting Zed's permission-heavy ACP harness posture." - - "Permission frontend proof: AcpStreamHandler tests verify permission requests reach the permission callback and auto-reject safely when no handler is registered." - zed_reference_evidence: - - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" - - "/Users/sw/Code/zed/crates/agent_servers/src/acp.rs" - - "/Users/sw/Code/zed/crates/agent_servers/src/e2e_tests.rs::test_tool_call_with_permission" - - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs usage and permission tests cited by T001" - verification_summary: - - "cargo test -p athas-ai acp::client passed with 5 client harness tests." - - "cargo test -p athas-ai acp passed with 16 ACP tests." - - "Frontend ACP focused harness run passed with 6 files and 10 tests." - - "bun typecheck passed." - - "cargo check -p athas-ai passed." - - "cargo fmt -p athas-ai --check passed." - - "git diff --check passed." - missing_evidence: - - "No blocking missing evidence for this first harness tranche." - - "Deferred: broad reusable fake ACP connection harness and local fake provider/adaptor smoke remain useful later slices." - - "No live ACP provider smoke was included; acceptable because real provider smoke would risk auth, network, and local adapter state." - - "Verification caveat: bun check:rust remains blocked by unrelated repo-wide rustfmt diffs outside this tranche's allowed files." - next_task_if_not_complete: null - -checks: - dirty_fingerprint: unknown - last_verification: - result: scoped_pass_with_unrelated_blocker - task: T999 - commands: - - "cargo fmt -p athas-ai" - - "cargo fmt -p athas-ai --check" - - "cargo test -p athas-ai acp::client" - - "cargo test -p athas-ai acp" - - "cargo check -p athas-ai" - - "bunx vp test run src/features/ai/tests/acp-permission.test.ts src/features/ai/tests/acp-usage.test.ts src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts src/features/ai/tests/acp-session-info.test.ts src/features/ai/tests/session-config-option-classifier.test.ts" - - "bun typecheck" - - "git diff --check" - - "bun check:rust blocked by unrelated repo-wide rustfmt diffs outside T003/T005 allowed_files" diff --git a/docs/goals/athas-acp-zed-parity/goal.md b/docs/goals/athas-acp-zed-parity/goal.md deleted file mode 100644 index 5867cb24d..000000000 --- a/docs/goals/athas-acp-zed-parity/goal.md +++ /dev/null @@ -1,82 +0,0 @@ -# Athas ACP Zed Parity - -## Objective - -Harden Athas Agent Client Protocol behavior against Zed as the local reference implementation, then complete successive safe verified patches until the first behavioral parity tranche is proven. - -## Original Request - -Make sure Athas ACP is on par with Zed by studying the local Zed repo at `/Users/sw/Code/zed`, hardening Athas ACP, and adding providers or harnesses only where they are needed to patch weak angles. - -## Intake Summary - -- Input shape: `specific` -- Audience: Athas users and maintainers who need reliable ACP behavior. -- Authority: `approved` -- Proof type: `test` -- Completion proof: Zed-referenced ACP behavior gaps are mapped, the highest-leverage Athas ACP gaps are patched, and verification proves the selected core flows now behave correctly in Athas. -- Likely misfire: Producing a comparison document or broad provider wishlist without making and verifying concrete Athas ACP improvements. -- Blind spots considered: Whether "on par with Zed" means protocol correctness, provider breadth, UX polish, or harness maturity; whether provider expansion should be delayed until core behavior is hardened; whether proof should be behavioral rather than just architectural. -- Existing plan facts: Use `/Users/sw/Code/zed` as the source to study and understand; Athas already has a good foundation; patch the bad angles; avoid broad provider expansion in the first tranche unless parity-critical evidence requires it. - -## Goal Kind - -`specific` - -## Current Tranche - -Discover Zed's ACP behavior and Athas's current ACP implementation, identify the highest-leverage behavioral gaps, choose small safe implementation slices, patch Athas, verify with behavioral parity evidence, audit each slice, and continue until the first ACP hardening tranche is complete. - -## Non-Negotiable Constraints - -- Treat `/Users/sw/Code/zed` as the local reference implementation for ACP behavior. -- Cross-reference Zed before changing Athas ACP behavior. -- Preserve Athas's existing foundation; do not rewrite or bulldoze working ACP architecture without evidence. -- Do not broaden into provider expansion by default. -- Add or modify provider support only when Scout/Judge evidence shows it is required for a parity-critical ACP flow. -- Prefer behavioral proof over architecture notes. -- Keep implementation slices small, reviewable, and locally verifiable. -- Follow Athas repo rules: use Bun for repo scripts, update relevant specs when code changes, and run relevant checks before handoff. - -## Stop Rule - -Stop only when a final audit proves the full original outcome is complete. - -Do not stop after planning, discovery, or Judge selection if a safe Worker task can be activated. - -Do not stop after a single verified Worker slice when the broader ACP parity tranche still has safe local follow-up slices. After each slice audit, advance the board to the next highest-leverage safe Worker task and continue. - -Do not stop because a slice needs owner input, credentials, production access, destructive operations, or policy decisions. Mark that exact slice blocked with a receipt, create the smallest safe follow-up or workaround task, and continue all local, non-destructive work that can still move the goal toward the full outcome. - -## Canonical Board - -Machine truth lives at: - -`docs/goals/athas-acp-zed-parity/state.yaml` - -If this charter and `state.yaml` disagree, `state.yaml` wins for task status, active task, receipts, verification freshness, and completion truth. - -## Run Command - -```text -/goal Follow docs/goals/athas-acp-zed-parity/goal.md. -``` - -## PM Loop - -On every `/goal` continuation: - -1. Read this charter. -2. Read `state.yaml`. -3. Run the bundled GoalBuddy update checker when available and mention a newer version without blocking. -4. Re-check the intake: original request, input shape, authority, proof, blind spots, existing plan facts, and likely misfire. -5. Work only on the active board task. -6. Assign Scout, Judge, Worker, or PM according to the task. -7. Write a compact task receipt. -8. Update the board. -9. If Judge selected a safe Worker task with `allowed_files`, `verify`, and `stop_if`, activate it and continue unless blocked. -10. If a problem, suggestion, or follow-up should become a repo artifact, create an approved issue/PR or ask the operator whether to create one. -11. Treat a slice audit as a checkpoint, not completion, unless it explicitly proves the full original outcome is complete. -12. Finish only with a Judge/PM audit receipt that maps receipts and verification back to the original user outcome and records `full_outcome_complete: true`. - -Issue and PR handoffs are supporting artifacts. `state.yaml` remains authoritative, and every external artifact decision must be recorded in a task receipt. diff --git a/docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md b/docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md deleted file mode 100644 index c0916930c..000000000 --- a/docs/goals/athas-acp-zed-parity/notes/T001-acp-zed-map.md +++ /dev/null @@ -1,224 +0,0 @@ -# T001 Scout Receipt: ACP Zed Map - -## Result - -Mapped. - -## Summary - -Athas has a functional ACP client bridge with process lifecycle, initialize/auth/session bootstrap, prompt/cancel, session list/load/resume, file and terminal client capabilities, permission UI, events, and frontend stream handling. Zed's reference is materially deeper in behavioral turn semantics and test coverage: it models ACP as an `AcpThread`, guards prompt turns, cancels/replaces in-flight turns, marks pending tools cancelled, buffers terminal output before terminal creation, tracks usage/cost, and has extensive authorization/cancellation tests. Highest-leverage first tranche should harden Athas core protocol behavior, not broaden providers. - -## Evidence Paths - -Board: - -- `/Users/sw/Code/athas/docs/goals/athas-acp-zed-parity/goal.md` -- `/Users/sw/Code/athas/docs/goals/athas-acp-zed-parity/state.yaml` - -Athas core: - -- `/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs` -- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs` -- `/Users/sw/Code/athas/crates/ai/src/acp/bridge_prompt.rs` -- `/Users/sw/Code/athas/crates/ai/src/acp/client.rs` -- `/Users/sw/Code/athas/crates/ai/src/acp/types.rs` -- `/Users/sw/Code/athas/crates/ai/src/acp/terminal_state.rs` -- `/Users/sw/Code/athas/crates/ai/src/acp/config.rs` -- `/Users/sw/Code/athas/src-tauri/src/commands/ai/acp.rs` - -Athas frontend: - -- `/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts` -- `/Users/sw/Code/athas/src/features/ai/components/chat/ai-chat.tsx` -- `/Users/sw/Code/athas/src/features/ai/types/acp.ts` -- `/Users/sw/Code/athas/src/features/ai/tests/acp-activity-groups.test.ts` -- `/Users/sw/Code/athas/src/features/ai/tests/session-config-option-classifier.test.ts` -- `/Users/sw/Code/athas/src/features/ai/tests/acp-session-info.test.ts` - -Zed reference: - -- `/Users/sw/Code/zed/crates/acp_thread/src/connection.rs` -- `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs` -- `/Users/sw/Code/zed/crates/agent/src/thread.rs` -- `/Users/sw/Code/zed/crates/agent/src/tests/mod.rs` -- `/Users/sw/Code/zed/crates/agent/src/tests/test_tools.rs` -- `/Users/sw/Code/zed/docs/src/ai/external-agents.md` -- `/Users/sw/Code/zed/crates/project/src/agent_registry_store.rs` -- `/Users/sw/Code/zed/crates/migrator/src/migrations/m_2026_02_25/settings.rs` - -## Athas ACP Implementation Map - -Lifecycle: - -- Tauri commands expose start/stop/send/status/respond/mode/config/list/cancel in `src-tauri/src/commands/ai/acp.rs`. -- `AcpAgentBridge` dispatches commands to a dedicated worker thread and emits status in `crates/ai/src/acp/bridge.rs`. -- Worker stops existing agent before initialize and tracks process/connection/session/client in `crates/ai/src/acp/bridge.rs`. -- Process death emits error/status and clears state in `crates/ai/src/acp/bridge.rs`. -- Stop optionally calls `session/close` when advertised, aborts IO, stops child tree, and clears state in `crates/ai/src/acp/bridge.rs`. - -Session bootstrap: - -- Initialize creates `ClientSideConnection`, sends `InitializeRequest` with fs read/write, terminal, and Athas ext-method metadata in `crates/ai/src/acp/bridge_init.rs`. -- `AuthRequired` retry exists for session load/resume/new in `crates/ai/src/acp/bridge_init.rs`. -- Requested session tries `session/load`, then `session/resume` only when resume capability exists, then `session/new` fallback on `MethodNotFound`/`ResourceNotFound` in `crates/ai/src/acp/bridge_init.rs`. -- Initial modes/config options are emitted as frontend events in `crates/ai/src/acp/bridge_init.rs`. - -Streaming and events: - -- Client maps `UserMessageChunk`, `AgentMessageChunk`, `AgentThoughtChunk`, `ToolCall`, `ToolCallUpdate`, modes, config, session info, commands, and plan to `AcpEvent` in `crates/ai/src/acp/client.rs`. -- Frontend `AcpStreamHandler` listens to `acp-event` and routes chunks/tools/permission/status/modes/config/plan/prompt_complete/ui_action in `src/features/ai/services/acp-stream-handler.ts`. -- Chat component keeps global ACP state in sync and appends visible plan/error/status events in `src/features/ai/components/chat/ai-chat.tsx`. - -Cancellation: - -- Backend sends ACP cancel notification in `crates/ai/src/acp/bridge.rs`. -- Frontend force-stops active handler before invoking cancel, then completes UI locally in `src/features/ai/services/acp-stream-handler.ts`. -- Chat stop cancels prompt and responds to queued permissions as cancelled in `src/features/ai/components/chat/ai-chat.tsx`. - -Tools and client capabilities: - -- Permission request emits options and waits up to 300s for response in `crates/ai/src/acp/client.rs`. -- `read_text_file`/`write_text_file` are direct filesystem operations with simple error mapping in `crates/ai/src/acp/client.rs`. -- Terminal create/output/release/wait/kill are implemented over Athas terminal manager in `crates/ai/src/acp/client.rs`. -- Terminal output buffer/truncation is isolated in `crates/ai/src/acp/terminal_state.rs`; tests exist only for buffer truncation, not full ACP ordering. - -Provider handoff: - -- Agent registry is dynamic/manifest-driven; installed detection and managed wrappers live in `crates/ai/src/acp/config.rs`. -- Codex has special direct `codex-acp` or `npx @zed-industries/codex-acp` detection in `crates/ai/src/acp/config.rs`. -- Athas has catalog install/uninstall wrappers in `src-tauri/src/commands/ai/acp.rs`. - -Test health: - -- Athas visible ACP-related tests are mostly frontend utility tests and `terminal_state` unit tests. -- Scout found no Rust ACP bridge/client behavioral tests for prompt/cancel/session/tool ordering. - -## Zed ACP Implementation Map - -Lifecycle and abstraction: - -- Zed centers ACP behind `AgentConnection`: new/load/resume/close/session_history/auth/prompt/retry/cancel/modes/config/list/model selector in `/Users/sw/Code/zed/crates/acp_thread/src/connection.rs`. -- `AcpThread` handles session updates into thread events and UI state in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. - -Turn model: - -- `AcpThread::send` optimistically inserts user message, checkpoints git state, and calls `connection.prompt` with a `UserMessageId` in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. -- `run_turn` cancels any previous turn, tracks `turn_id`, only clears same turn, flushes streaming text, handles `MaxTokens`/`Cancelled`/`Refusal` in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. -- Cancelled responses mark pending tools cancelled and skip completed-plan snapshot in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. - -File and terminal tools: - -- Zed file reads use project buffers, resource-not-found/invalid-params handling, shared snapshots, and agent location in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. -- Zed writes diff against buffer snapshot, update action log, format on save, and save buffer in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. -- Zed terminal creation shells command with env/PAGER handling and terminal entities in `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`. - -Authorization: - -- Built-in tool permissions support allow/deny once/always, path/url/terminal patterns, and pipeline pattern dropdowns in `/Users/sw/Code/zed/crates/agent/src/thread.rs`. -- Authorization loop watches settings so always allow/deny resolves sibling pending prompts in `/Users/sw/Code/zed/crates/agent/src/thread.rs`. -- Third-party/MCP authorization is separated in `/Users/sw/Code/zed/crates/agent/src/thread.rs`. - -Provider breadth: - -- Docs list Gemini CLI, Claude Agent, Codex, GitHub Copilot, registry/custom agents, and MCP forwarding in `/Users/sw/Code/zed/docs/src/ai/external-agents.md`. -- Registry migration maps `gemini`, `claude-acp`, and `codex-acp` in `/Users/sw/Code/zed/crates/migrator/src/migrations/m_2026_02_25/settings.rs`. - -Test health: - -- Zed has direct ACP tests for terminal buffering, cancellation, pending tools, usage/cost, and authorization in `crates/acp_thread/src/acp_thread.rs` and `crates/agent/src/tests/mod.rs`. - -## Behavioral Parity Matrix - -- Lifecycle: Athas has process spawn/init/status/stop/close. Zed has stronger trait separation and turn/thread state. Gap is not an architecture rewrite; harden Athas state transitions around stop/crash/cancel. -- Sessions: Athas supports new/load/resume/list/close capability checks. Zed exposes load/resume/close/list/history through trait plus UI thread. Gap: Athas lacks behavioral tests for load-to-resume fallback and `ResourceNotFound`/`MethodNotFound`/`AuthRequired` branches. -- Streaming: Athas forwards chunks/tools/plans/modes/config/session info. Zed additionally flushes buffered streaming text at turn completion and suppresses duplicate echoed user chunks. Gap: Athas should test event ordering and duplicate user echo behavior. -- Cancellation: Athas sends cancel but frontend marks complete immediately. Zed cancels previous in-flight turn before new send, returns cancelled stop, marks pending tools cancelled, and tests multiple terminal/subagent cases. This is the top protocol behavior gap. -- Tools: Athas maps tool start/update/complete and terminal/file callbacks. Zed has richer buffer-aware file operations, terminal output ordering/buffering, and permission-state propagation. First tranche should target terminal/tool lifecycle semantics, not the full Zed buffer model. -- Errors: Athas maps many backend failures to string errors and synthetic "Tool call failed"; unknown stop reasons become `EndTurn`. Zed distinguishes `MaxTokens`, `Refusal`, `Cancelled`, `ResourceNotFound`, `InvalidParams`, and error events. Gap: preserve stop/error semantics rather than collapse. -- Provider handoff: Athas has dynamic catalog plus Codex adapter fallback. Zed has registry/provider breadth and MCP forwarding. This is provider breadth, not first-tranche protocol hardening, unless a core flow is blocked by a missing adapter. -- Tests: Athas lacks ACP bridge/client behavioral harness; Zed's tests are the main reference. First tranche should add focused Rust or TypeScript tests around one selected behavior. - -## Ranked Gap Candidates - -1. Cancellation semantics and pending tool finalization - - Type: protocol behavior - - Risk: medium - - Zed evidence: `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs`, `/Users/sw/Code/zed/crates/agent/src/tests/mod.rs` - - Athas evidence: `/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs`, `/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts`, `/Users/sw/Code/athas/src/features/ai/components/chat/ai-chat.tsx` - - Likely files: `crates/ai/src/acp/bridge.rs`, `crates/ai/src/acp/bridge_prompt.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/types/acp.ts` - - Verification options: `bun typecheck`, `bunx vp test run src/features/ai/tests`, `cargo test -p athas-ai acp`, focused fake ACP adapter/unit harness if available or added. - -2. Missing ACP behavioral test harness for session load/resume/auth fallbacks - - Type: protocol behavior - - Risk: low-medium - - Zed evidence: `/Users/sw/Code/zed/crates/acp_thread/src/connection.rs` - - Athas evidence: `/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs` - - Likely files: `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/bridge_commands.rs` only if needed for test seams. - - Verification options: `cargo test -p athas-ai acp::bridge_init`, `bun check:rust`. - -3. Tool update ordering and unknown/succeeded-after-cancel handling - - Type: protocol behavior - - Risk: medium - - Zed evidence: `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs` - - Athas evidence: `/Users/sw/Code/athas/crates/ai/src/acp/client.rs`, `/Users/sw/Code/athas/src/features/ai/lib/tool-call-state.ts` - - Likely files: `crates/ai/src/acp/client.rs`, `src/features/ai/lib/tool-call-state.ts`, `src/features/ai/services/acp-stream-handler.ts` - - Verification options: frontend unit test for tool-call-state; Rust unit test for client event mapping if fake `AppHandle` is practical. - -4. UsageUpdate/token-cost events unsupported - - Type: protocol behavior - - Risk: low - - Zed evidence: `/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs` - - Athas evidence: no `UsageUpdate`/`TokenUsage` hits under `/Users/sw/Code/athas/crates/ai` or `src/features/ai` except unrelated provider usage. - - Likely files: `crates/ai/src/acp/types.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/types/acp.ts`, `src/features/ai/services/acp-stream-handler.ts` - - Verification options: typecheck plus unit event mapping test. - -5. Provider breadth and registry/MCP forwarding - - Type: provider breadth - - Risk: high for first tranche - - Zed evidence: `/Users/sw/Code/zed/docs/src/ai/external-agents.md` - - Athas evidence: `/Users/sw/Code/athas/crates/ai/src/acp/config.rs`, `/Users/sw/Code/athas/src-tauri/src/commands/ai/acp.rs` - - Recommendation: defer unless Judge decides a missing provider blocks cancellation/session/tool protocol proof. - -## Candidate Worker Slices - -W1: - -- Objective: Add focused cancellation behavior proof and patch Athas so cancelled ACP turns do not leave stale running/tool state. -- Candidate files: `crates/ai/src/acp/bridge.rs`, `crates/ai/src/acp/bridge_prompt.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/types/acp.ts`, focused tests under `crates/ai/src/acp` or `src/features/ai/tests`. -- Candidate verification: `bun typecheck`, `bunx vp test run src/features/ai/tests`, `cargo test -p athas-ai acp`. -- Stop if: requires provider expansion or no fake/stub path can prove behavior locally. - -W2: - -- Objective: Add `bridge_init` session bootstrap tests for load/resume/new `AuthRequired`/`MethodNotFound`/`ResourceNotFound` decisions. -- Candidate files: `crates/ai/src/acp/bridge_init.rs`, `crates/ai/src/acp/bridge_commands.rs` only if needed for test seams. -- Candidate verification: `cargo test -p athas-ai acp::bridge_init`, `bun check:rust`. -- Stop if: needs live external ACP adapters instead of local stubs. - -W3: - -- Objective: Map ACP `UsageUpdate` into Athas events/types and add event-routing test, without UI polish beyond state availability. -- Candidate files: `crates/ai/src/acp/types.rs`, `crates/ai/src/acp/client.rs`, `src/features/ai/types/acp.ts`, `src/features/ai/services/acp-stream-handler.ts`, `src/features/ai/tests/*`. -- Candidate verification: `bun typecheck`, `bunx vp test run src/features/ai/tests`, `cargo test -p athas-ai acp`. -- Stop if: requires broad analytics/cost UI design. - -## Commands Run By Scout - -- `rg ACP/acp` across Athas board/src/crates/src-tauri. -- `sed`/`nl` reads of Athas ACP bridge/init/prompt/client/types/config/frontend files. -- `rg ACP/acp` across `/Users/sw/Code/zed`. -- `sed`/`nl` reads of Zed `acp_thread`, `connection`, agent thread, docs, tests. -- `jq '.scripts' package.json`. -- `git status --short`. - -## Health Signals - -- Athas repo had untracked `docs/` in `git status`; Scout did not edit or stage anything. -- No dependency installs, builds, tests, or destructive commands were run. -- Athas validation scripts available: `bun typecheck`, `bun check`, `bun check:rust`, `bunx vp test run`. - -## Ambiguity Requiring Judge - -- Whether first Worker should patch behavior immediately or first add a fake ACP harness. Scout recommends W1 only if Judge permits adding the minimal harness/test surface needed to prove cancellation locally. -- Whether `UsageUpdate` belongs in the first tranche. It is a clear Zed parity gap but lower leverage than cancellation/session/tool correctness. -- Whether provider breadth matters for this tranche. Evidence says defer broad provider expansion; only Codex adapter parity appears locally relevant today. diff --git a/docs/goals/athas-acp-zed-parity/state.yaml b/docs/goals/athas-acp-zed-parity/state.yaml deleted file mode 100644 index d8acb3fcd..000000000 --- a/docs/goals/athas-acp-zed-parity/state.yaml +++ /dev/null @@ -1,304 +0,0 @@ -# GoalBuddy v2 state.yaml -# Board truth lives here. goal.md is the editable charter; notes/ holds long receipts only. - -version: 2 - -goal: - title: "Athas ACP Zed Parity" - slug: "athas-acp-zed-parity" - kind: specific - tranche: "Zed-referenced ACP behavioral hardening: discover, patch, verify, audit, and continue through safe slices until the first parity tranche is complete." - status: done - intake: - original_request: "Make Athas ACP on par with Zed by studying /Users/sw/Code/zed, hardening ACP, and adding providers or harnesses where needed." - interpreted_outcome: "Athas ACP behavior is hardened against Zed's local implementation and verified with behavioral parity evidence for the selected core flows." - input_shape: specific - audience: "Athas users and maintainers" - authority: approved - proof_type: test - completion_proof: "Zed-referenced ACP gaps are mapped, selected Athas patches are implemented, relevant tests/checks pass, and a final audit confirms behavioral parity for the first tranche." - likely_misfire: "Stopping at comparison notes, provider brainstorming, or broad architecture commentary without verified Athas ACP behavior changes." - blind_spots_considered: - - "Parity could mean protocol correctness, provider breadth, UX behavior, or harness maturity." - - "Broad provider expansion can distract from hardening core ACP behavior." - - "Athas already has a foundation; the goal is targeted patching, not a rewrite." - - "Proof must be behavioral and verifiable, not just source-backed notes." - existing_plan_facts: - - "Use /Users/sw/Code/zed as the reference source." - - "Cross-reference Zed before changing Athas ACP." - - "Athas already has a good ACP foundation." - - "Patch weak angles in Athas rather than replacing everything." - - "Do not pursue broad provider expansion in the first tranche unless parity-critical evidence requires it." - -rules: - pm_owns_state: true - one_active_task: true - max_write_workers: 1 - no_implementation_without_worker_or_pm_task: true - no_completion_without_judge_or_pm_audit: true - planning_is_not_completion: true - queued_required_worker_blocks_completion: true - continuous_until_full_outcome: true - missing_input_or_credentials_do_not_stop_goal: true - preserve_and_validate_existing_plan: true - intake_misfire_must_be_audited: true - -agents: - scout: installed - worker: installed - judge: installed - -active_task: null - -tasks: - - id: T001 - type: scout - assignee: Scout - status: done - reasoning_hint: medium - objective: "Map Athas ACP against Zed ACP and identify the highest-leverage behavioral gaps for the first hardening tranche." - inputs: - - "/Users/sw/Code/athas" - - "/Users/sw/Code/zed" - - "Athas ACP-related crates, commands, frontend integration, tests, and docs discovered by search." - - "Zed ACP-related implementation, provider integration, lifecycle behavior, tests, and docs discovered by search." - constraints: - - "Read-only." - - "Do not edit implementation files." - - "Treat /Users/sw/Code/zed as the reference source." - - "Use concrete file paths, symbols, tests, and command evidence." - - "Separate protocol behavior gaps from provider breadth gaps." - - "Prefer targeted hardening candidates over rewrite proposals." - expected_output: - - "Athas ACP implementation map with file-path evidence." - - "Zed ACP implementation map with file-path evidence." - - "Behavioral parity matrix covering lifecycle, sessions, streaming, cancellation, tools, errors, and provider handoff where applicable." - - "Ranked Athas gap candidates with risk, likely files, and verification options." - - "Candidate Worker slices that avoid broad provider expansion unless required." - receipt: - result: done - note: "notes/T001-acp-zed-map.md" - summary: "Mapped Athas ACP against Zed. Top gap is cancellation semantics and pending tool finalization; session bootstrap tests, tool ordering, and usage events follow. Provider breadth should stay deferred unless parity-critical." - evidence: - - "/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs" - - "/Users/sw/Code/athas/crates/ai/src/acp/bridge_init.rs" - - "/Users/sw/Code/athas/crates/ai/src/acp/client.rs" - - "/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts" - - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" - - "/Users/sw/Code/zed/crates/acp_thread/src/connection.rs" - - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs" - candidate_worker_slices: - - "W1: cancellation behavior proof and stale running/tool state patch." - - "W2: bridge_init session bootstrap fallback tests." - - "W3: UsageUpdate event mapping and routing test." - - - id: T002 - type: judge - assignee: Judge - status: done - reasoning_hint: high - objective: "Choose the first safe Athas ACP hardening slice from Scout evidence." - inputs: - - "T001 receipt or note." - - "Current dirty diff." - - "Athas project validation commands." - constraints: - - "Read-only." - - "Do not implement." - - "Require Zed evidence for any claimed parity gap." - - "Prefer the smallest slice with strong verification." - - "Do not choose broad provider expansion unless it is required for a parity-critical behavior." - expected_output: - - "Decision." - - "Why this slice is first." - - "Exact Worker objective." - - "allowed_files." - - "verify." - - "stop_if." - - "Deferred or blocked candidates." - receipt: - result: done - decision: "Select a narrowed W1 cancellation-state slice first: harden Athas frontend ACP cancellation so an active cancelled prompt finalizes visible running tool state instead of leaving stale running tool calls behind. Do not attempt the full backend prompt-turn rewrite yet." - why_this_slice_is_first: - - "Smallest parity-critical subset of Scout's top-ranked gap." - - "Zed cancels running turns and marks pending or in-progress tools cancelled; Athas currently force-stops frontend handling without visible tool finalization." - - "Implementable without provider expansion, live ACP adapters, Rust process lifecycle changes, or broad architecture work." - - "Verifiable with focused frontend tests plus typecheck." - evidence: - - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs" - - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs" - - "/Users/sw/Code/athas/crates/ai/src/acp/bridge.rs" - - "/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts" - - "/Users/sw/Code/athas/src/features/ai/lib/tool-call-state.ts" - - "/Users/sw/Code/athas/src/features/ai/types/acp.ts" - next_allowed_task: T003 - deferred_or_blocked_candidates: - - "Deferred: full backend prompt-turn lifecycle hardening." - - "Deferred: bridge_init session fallback tests." - - "Deferred: UsageUpdate/token-cost event mapping." - - "Blocked for this tranche unless Judge reopens: broad provider expansion, registry/MCP forwarding, or adding providers solely for breadth." - full_outcome_complete: false - - - id: T003 - type: worker - assignee: Worker - status: done - reasoning_hint: default - objective: "Add a focused frontend cancellation regression test and patch Athas ACP stream handling so cancelling an active ACP prompt finalizes any active visible tool calls and activity rows as cancelled/failed, prevents late events from resurrecting the cancelled turn, and still sends cancel_acp_prompt to the backend." - allowed_files: - - "src/features/ai/services/acp-stream-handler.ts" - - "src/features/ai/lib/tool-call-state.ts" - - "src/features/ai/lib/acp-event-timeline.ts" - - "src/features/ai/components/chat/ai-chat.tsx" - - "src/features/ai/components/messages/tool-call-display.tsx" - - "src/features/ai/types/acp.ts" - - "src/features/ai/tests/acp-cancellation.test.ts" - verify: - - "bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts" - - "bun typecheck" - - "If the test runner cannot target both files reliably, run: bunx vp test run src/features/ai/tests" - stop_if: - - "The fix requires editing crates/ai/src/acp/* or src-tauri/*; escalate back to Judge for a backend cancellation slice." - - "The fix requires a live ACP provider, external adapter, or broad fake ACP harness instead of local frontend mocks." - - "The fix requires provider expansion, registry/MCP work, new dependencies, or test/build config changes." - - "The Worker cannot produce a deterministic regression test showing active tool state is finalized on cancel." - - "Late-event handling is ambiguous enough that implementing it would silently discard valid future-turn events." - - "Verification fails twice after scoped fixes." - receipt: - result: done - changed_files: - - "src/features/ai/services/acp-stream-handler.ts" - - "src/features/ai/tests/acp-cancellation.test.ts" - commands: - - cmd: "bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts" - status: pass - - cmd: "bun typecheck" - status: pass - summary: "ACP cancellation now finalizes tracked active tools with a Cancelled error before cleanup, both for frontend force-stop cancellation and protocol prompt_complete(cancelled). Regression tests prove active tool finalization, backend cancel invocation, and late-event suppression after cancellation." - - - id: T004 - type: judge - assignee: Judge - status: done - reasoning_hint: high - objective: "Audit the first implemented ACP slice against Zed-referenced behavioral parity and decide whether to continue with another slice." - inputs: - - "T001 receipt or note." - - "T002 receipt." - - "T003 receipt." - - "Current dirty diff." - - "Last verification." - constraints: - - "Read-only." - - "Reject completion if the slice is only documented but not verified." - - "Reject completion if required ACP hardening work remains safely actionable." - - "If not complete, define the next smallest safe Scout, Judge, or Worker task." - expected_output: - - "slice_complete: true | false." - - "full_outcome_complete: true | false." - - "Behavioral parity evidence." - - "Missing evidence." - - "Next task if not complete." - receipt: - result: done - decision: "Approve the first implemented ACP cancellation slice as complete, but do not complete the tranche. Continue with the next safe ACP hardening slice." - slice_complete: true - full_outcome_complete: false - behavioral_parity_evidence: - - "/Users/sw/Code/zed/crates/acp_thread/src/acp_thread.rs marks pending/in-progress tools canceled when cancelled." - - "/Users/sw/Code/zed/crates/agent/src/tests/mod.rs includes cancellation tests." - - "/Users/sw/Code/athas/src/features/ai/services/acp-stream-handler.ts now finalizes active tools before cancelled completion and force-stop cleanup." - - "/Users/sw/Code/athas/src/features/ai/tests/acp-cancellation.test.ts verifies tool finalization, backend cancel invocation, and late-event suppression." - missing_evidence: - - "No Athas Rust/backend behavioral test yet proves session load/resume/new fallbacks or AuthRequired retry branches against the Zed connection/session model." - - "No Athas usage/token-cost ACP event mapping exists yet." - next_allowed_task: T005 - tranche_complete: false - - - id: T005 - type: worker - assignee: Worker - status: done - reasoning_hint: default - objective: "Add focused Rust behavioral tests for ACP session bootstrap fallback parity: requested session load success, session/load MethodNotFound falling through to session/resume when resume is supported, ResourceNotFound/MethodNotFound falling through to session/new, and AuthRequired retry behavior where a deterministic local test seam is practical." - allowed_files: - - "crates/ai/src/acp/bridge_init.rs" - verify: - - "cargo test -p athas-ai acp::bridge_init" - - "bun check:rust" - stop_if: - - "A deterministic local test cannot be added inside crates/ai/src/acp/bridge_init.rs without introducing a live ACP provider, external adapter, new dependency, or broad fake harness." - - "The tests require editing src-tauri, process lifecycle code, build config, Cargo.toml, or files outside allowed_files." - - "The implementation pressure turns into runtime behavior changes beyond small test seams needed inside bridge_init.rs." - - "The Worker discovers that the current bridge_init structure cannot be tested safely without a prior Scout/Judge task to design a narrower seam." - - "Verification fails twice after scoped fixes." - receipt: - result: done - changed_files: - - "crates/ai/src/acp/bridge_init.rs" - commands: - - cmd: "cargo test -p athas-ai acp::bridge_init" - status: pass - - cmd: "cargo check -p athas-ai" - status: pass - - cmd: "cargo fmt -p athas-ai --check" - status: pass - blocked_verification: - - cmd: "bun check:rust" - reason: "Repo-wide rustfmt check reports unrelated pre-existing diffs in crates/runtime/src/lib.rs, crates/tooling/src/registry.rs, and crates/tooling/src/types.rs, outside this task's allowed_files." - summary: "Added deterministic in-file tests and small decision seams for ACP session bootstrap fallback parity: load success preserves requested session id, MethodNotFound load tries resume only when supported, MethodNotFound/ResourceNotFound load/resume fall back to new session, AuthRequired is classified for retry before fallbacks, and new-session bootstrap preserves the agent-created id." - - - id: T999 - type: judge - assignee: Judge - status: done - reasoning_hint: high - objective: "Final audit for the first Athas ACP Zed parity tranche." - inputs: - - "All done task receipts." - - "Zed reference evidence." - - "Last verification." - - "Current dirty diff." - constraints: - - "Read-only." - - "Do not implement." - - "Reject completion if required Worker work is still queued or active." - - "Reject completion if the evidence does not map Athas behavior back to Zed-referenced parity." - - "Reject completion if the result is only a comparison artifact with no verified Athas improvement." - expected_output: - - "complete | not_complete." - - "full_outcome_complete: true | false." - - "Completed parity behaviors." - - "Missing evidence." - - "Next task if not complete." - receipt: - result: done - decision: complete - summary: "Approve T999. The first Athas ACP Zed parity tranche is complete enough to close: it produced Zed-referenced behavioral changes, not just comparison notes, and the implemented slices have focused regression coverage plus scoped frontend, TypeScript, Rust test/check, and Rust format verification." - full_outcome_complete: true - tranche_complete: true - completed_parity_behaviors: - - "Cancellation parity: Athas now finalizes active ACP tools with a Cancelled error before cleanup on protocol prompt_complete(cancelled) and local forceStop/user cancel, matching Zed's pending/in-progress tool cancellation behavior." - - "Cancellation proof: acp-cancellation.test.ts verifies active tool finalization, backend cancel_acp_prompt invocation, completion callback behavior, and late-event suppression after force-stop cancellation." - - "Session bootstrap parity: bridge_init.rs now has explicit decision/build seams covering requested session load success, MethodNotFound load-to-resume only when resume is supported, MethodNotFound/ResourceNotFound fallback to new, AuthRequired retry classification before fallbacks, and new-session id preservation." - - "Session bootstrap proof: Rust unit tests cover load id preservation, load/resume fallback gates, AuthRequired classification, and new-session id preservation." - missing_evidence: - - "No blocking missing evidence for this first tranche." - - "Deferred parity gap: Zed has UsageUpdate/token-cost handling and tests; Athas usage/token-cost ACP event mapping remains a valid later parity slice." - - "Verification caveat: bun check:rust remains blocked by unrelated repo-wide rustfmt diffs outside this tranche's allowed files." - - "No live ACP provider smoke was included; acceptable because this tranche used deterministic local behavioral tests and did not require provider expansion." - next_task_if_not_complete: null - -checks: - dirty_fingerprint: unknown - last_verification: - result: scoped_pass_with_unrelated_blocker - task: T005 - commands: - - "bunx vp test run src/features/ai/tests/acp-cancellation.test.ts src/features/ai/tests/acp-activity-groups.test.ts" - - "bunx vp test run src/features/ai/tests" - - "bun typecheck" - - "cargo test -p athas-ai acp::bridge_init" - - "cargo check -p athas-ai" - - "cargo fmt -p athas-ai --check" - - "bun check:rust blocked by unrelated repo-wide rustfmt diffs outside T005 allowed_files"