diff --git a/src/apps/cli/src/agent/core_adapter.rs b/src/apps/cli/src/agent/core_adapter.rs index 6858dfa43..9d48547a3 100644 --- a/src/apps/cli/src/agent/core_adapter.rs +++ b/src/apps/cli/src/agent/core_adapter.rs @@ -110,6 +110,7 @@ impl Agent for CoreAgentAdapter { self.agent_type.clone(), None, DialogSubmissionPolicy::for_source(DialogTriggerSource::Cli), + None, ) .await?; diff --git a/src/apps/desktop/src/api/agentic_api.rs b/src/apps/desktop/src/api/agentic_api.rs index a9956285b..094ee99a1 100644 --- a/src/apps/desktop/src/api/agentic_api.rs +++ b/src/apps/desktop/src/api/agentic_api.rs @@ -13,6 +13,10 @@ use bitfun_core::agentic::coordination::{ SubagentTimeoutAction, }; use bitfun_core::agentic::core::*; +use bitfun_core::agentic::deep_review_policy::{ + apply_deep_review_queue_control, default_review_team_definition, DeepReviewQueueControlAction, + ReviewTeamDefinition, +}; use bitfun_core::agentic::image_analysis::ImageContextData; use bitfun_core::agentic::tools::image_context::get_image_context; #[derive(Debug, Deserialize)] @@ -84,6 +88,8 @@ pub struct StartDialogTurnRequest { pub turn_id: Option, #[serde(default)] pub image_contexts: Option>, + #[serde(default)] + pub user_message_metadata: Option, } #[derive(Debug, Serialize)] @@ -176,6 +182,37 @@ pub struct SteerDialogTurnResponse { pub steering_id: String, } +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ControlDeepReviewQueueRequest { + pub session_id: String, + pub dialog_turn_id: String, + pub tool_id: String, + pub action: ControlDeepReviewQueueActionDTO, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ControlDeepReviewQueueActionDTO { + Pause, + Continue, + Cancel, + SkipOptional, +} + +impl From for DeepReviewQueueControlAction { + fn from(value: ControlDeepReviewQueueActionDTO) -> Self { + match value { + ControlDeepReviewQueueActionDTO::Pause => DeepReviewQueueControlAction::Pause, + ControlDeepReviewQueueActionDTO::Continue => DeepReviewQueueControlAction::Continue, + ControlDeepReviewQueueActionDTO::Cancel => DeepReviewQueueControlAction::Cancel, + ControlDeepReviewQueueActionDTO::SkipOptional => { + DeepReviewQueueControlAction::SkipOptional + } + } + } +} + #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct CancelSessionRequest { @@ -416,6 +453,7 @@ pub async fn start_dialog_turn( workspace_path, turn_id, image_contexts, + user_message_metadata, } = request; let policy = DialogSubmissionPolicy::for_source(DialogTriggerSource::DesktopUi); @@ -439,6 +477,7 @@ pub async fn start_dialog_turn( workspace_path, policy, None, + user_message_metadata, resolved_images, ) .await @@ -669,6 +708,28 @@ pub async fn steer_dialog_turn( }) } +#[tauri::command] +pub async fn control_deep_review_queue( + request: ControlDeepReviewQueueRequest, +) -> Result<(), String> { + if request.session_id.trim().is_empty() { + return Err("Missing session_id".to_string()); + } + if request.dialog_turn_id.trim().is_empty() { + return Err("Missing dialog_turn_id".to_string()); + } + if request.tool_id.trim().is_empty() { + return Err("Missing tool_id".to_string()); + } + + apply_deep_review_queue_control( + &request.dialog_turn_id, + &request.tool_id, + request.action.into(), + ); + Ok(()) +} + #[tauri::command] pub async fn cancel_session( coordinator: State<'_, Arc>, @@ -896,6 +957,11 @@ pub async fn get_available_modes(state: State<'_, AppState>) -> Result Result { + Ok(default_review_team_definition()) +} + #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] pub struct ModeInfoDTO { diff --git a/src/apps/desktop/src/api/git_api.rs b/src/apps/desktop/src/api/git_api.rs index 3db1b4b24..024f9351d 100644 --- a/src/apps/desktop/src/api/git_api.rs +++ b/src/apps/desktop/src/api/git_api.rs @@ -3,8 +3,8 @@ use crate::api::app_state::AppState; use bitfun_core::infrastructure::storage::StorageOptions; use bitfun_core::service::git::{ - GitAddParams, GitCommitParams, GitDiffParams, GitLogParams, GitPullParams, GitPushParams, - GitService, + GitAddParams, GitChangedFile, GitChangedFilesParams, GitCommitParams, GitDiffParams, + GitLogParams, GitPullParams, GitPushParams, GitService, }; use bitfun_core::service::git::{ GitBranch, GitCommit, GitOperationResult, GitRepository, GitStatus, @@ -91,6 +91,13 @@ pub struct GitDiffRequest { pub params: GitDiffParams, } +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct GitChangedFilesRequest { + pub repository_path: String, + pub params: GitChangedFilesParams, +} + #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase")] pub struct GitResetFilesRequest { @@ -371,6 +378,24 @@ pub async fn git_get_diff( }) } +#[tauri::command] +pub async fn git_get_changed_files( + _state: State<'_, AppState>, + request: GitChangedFilesRequest, +) -> Result, String> { + info!( + "Getting changed Git files for repository: {}", + request.repository_path + ); + + GitService::get_changed_files(&request.repository_path, &request.params) + .await + .map_err(|e| { + error!("Failed to get changed Git files: {}", e); + e.to_string() + }) +} + #[tauri::command] pub async fn git_reset_files( _state: State<'_, AppState>, diff --git a/src/apps/desktop/src/lib.rs b/src/apps/desktop/src/lib.rs index 2579bda61..232224699 100644 --- a/src/apps/desktop/src/lib.rs +++ b/src/apps/desktop/src/lib.rs @@ -561,6 +561,7 @@ pub async fn run() { api::agentic_api::ensure_assistant_bootstrap, api::agentic_api::cancel_dialog_turn, api::agentic_api::steer_dialog_turn, + api::agentic_api::control_deep_review_queue, api::agentic_api::cancel_session, api::agentic_api::set_subagent_timeout, api::agentic_api::delete_session, @@ -572,6 +573,7 @@ pub async fn run() { api::agentic_api::cancel_tool, api::agentic_api::generate_session_title, api::agentic_api::get_available_modes, + api::agentic_api::get_default_review_team_definition, api::btw_api::btw_ask_stream, api::btw_api::btw_cancel, api::editor_ai_api::editor_ai_stream, @@ -693,6 +695,7 @@ pub async fn run() { git_create_branch, git_delete_branch, git_get_diff, + git_get_changed_files, git_reset_files, git_reset_to_commit, git_get_file_content, @@ -1077,12 +1080,29 @@ async fn init_agentic_system() -> anyhow::Result<( tool_pipeline.clone(), )); + // Get execution config from global settings + let exec_config = match bitfun_core::service::config::get_global_config_service().await { + Ok(config_service) => { + match config_service + .get_config::(None) + .await + { + Ok(global_config) => execution::ExecutionEngineConfig { + max_rounds: global_config.ai.max_rounds, + ..Default::default() + }, + Err(_) => Default::default(), + } + } + Err(_) => Default::default(), + }; + let execution_engine = Arc::new(execution::ExecutionEngine::new( round_executor, event_queue.clone(), session_manager.clone(), context_compressor, - execution::ExecutionEngineConfig::default(), + exec_config, )); let coordinator = Arc::new(coordination::ConversationCoordinator::new( diff --git a/src/apps/server/src/rpc_dispatcher.rs b/src/apps/server/src/rpc_dispatcher.rs index 068f4f6e7..cc71a040c 100644 --- a/src/apps/server/src/rpc_dispatcher.rs +++ b/src/apps/server/src/rpc_dispatcher.rs @@ -9,6 +9,9 @@ use anyhow::{Result, anyhow}; use bitfun_core::agentic::agents::SubAgentSource; use bitfun_core::agentic::coordination::{DialogSubmissionPolicy, DialogTriggerSource}; use bitfun_core::agentic::core::SessionConfig; +use bitfun_core::agentic::deep_review_policy::{ + DeepReviewQueueControlAction, apply_deep_review_queue_control, +}; use bitfun_core::service::config::types::SubAgentConfig; use bitfun_core::service::i18n::{LocaleId, LocaleMetadata, sync_global_i18n_service_locale}; use std::collections::HashMap; @@ -380,6 +383,36 @@ pub async fn dispatch( .map_err(|e| anyhow!("{}", e))?; Ok(serde_json::json!({ "success": true })) } + "control_deep_review_queue" => { + let request = extract_request(¶ms)?; + let session_id = get_string(&request, "sessionId")?; + let dialog_turn_id = get_string(&request, "dialogTurnId")?; + let tool_id = get_string(&request, "toolId")?; + let action_raw = get_string(&request, "action")?; + let action = match action_raw.as_str() { + "pause" => DeepReviewQueueControlAction::Pause, + "continue" => DeepReviewQueueControlAction::Continue, + "cancel" => DeepReviewQueueControlAction::Cancel, + "skip_optional" => DeepReviewQueueControlAction::SkipOptional, + other => { + return Err(anyhow!( + "Invalid DeepReview queue control action: {}", + other + )); + } + }; + if session_id.trim().is_empty() { + return Err(anyhow!("Missing sessionId")); + } + if dialog_turn_id.trim().is_empty() { + return Err(anyhow!("Missing dialogTurnId")); + } + if tool_id.trim().is_empty() { + return Err(anyhow!("Missing toolId")); + } + apply_deep_review_queue_control(&dialog_turn_id, &tool_id, action); + Ok(serde_json::json!({ "success": true })) + } "cancel_session" => { let request = extract_request(¶ms)?; let session_id = get_string(&request, "sessionId")?; diff --git a/src/crates/acp/src/runtime/prompt.rs b/src/crates/acp/src/runtime/prompt.rs index cdea66fa9..9dba30b15 100644 --- a/src/crates/acp/src/runtime/prompt.rs +++ b/src/crates/acp/src/runtime/prompt.rs @@ -51,6 +51,7 @@ impl BitfunAcpRuntime { acp_session.mode_id.clone(), Some(acp_session.cwd.clone()), DialogSubmissionPolicy::for_source(DialogTriggerSource::Cli), + None, ) .await .map_err(Self::internal_error)?; @@ -66,6 +67,7 @@ impl BitfunAcpRuntime { acp_session.mode_id.clone(), Some(acp_session.cwd.clone()), DialogSubmissionPolicy::for_source(DialogTriggerSource::Cli), + None, ) .await .map_err(Self::internal_error)?; diff --git a/src/crates/agent-stream/src/lib.rs b/src/crates/agent-stream/src/lib.rs index cbfbc9bec..153d64330 100644 --- a/src/crates/agent-stream/src/lib.rs +++ b/src/crates/agent-stream/src/lib.rs @@ -223,6 +223,11 @@ impl StreamProcessError { } } +#[derive(Debug, Clone, Copy, Default)] +pub struct StreamProcessOptions { + pub recover_partial_on_cancel: bool, +} + /// Stream processing context, encapsulates state during stream processing struct StreamContext { session_id: String, @@ -782,6 +787,32 @@ impl StreamProcessor { /// * `cancellation_token` - Cancellation token #[allow(clippy::too_many_arguments)] pub async fn process_stream( + &self, + stream: futures::stream::BoxStream<'static, Result>, + watchdog_timeout: Option, + raw_sse_rx: Option>, + session_id: String, + dialog_turn_id: String, + round_id: String, + subagent_parent_info: Option, + cancellation_token: &tokio_util::sync::CancellationToken, + ) -> Result { + self.process_stream_with_options( + stream, + watchdog_timeout, + raw_sse_rx, + session_id, + dialog_turn_id, + round_id, + subagent_parent_info, + cancellation_token, + StreamProcessOptions::default(), + ) + .await + } + + #[allow(clippy::too_many_arguments)] + pub async fn process_stream_with_options( &self, mut stream: futures::stream::BoxStream<'static, Result>, watchdog_timeout: Option, @@ -791,6 +822,7 @@ impl StreamProcessor { round_id: String, subagent_parent_info: Option, cancellation_token: &tokio_util::sync::CancellationToken, + options: StreamProcessOptions, ) -> Result { let mut ctx = StreamContext::new(session_id, dialog_turn_id, round_id, subagent_parent_info); @@ -832,6 +864,14 @@ impl StreamProcessor { // Check cancellation token _ = cancellation_token.cancelled() => { debug!("Cancel token detected, stopping stream processing: session_id={}", ctx.session_id); + if options.recover_partial_on_cancel && ctx.can_recover_as_partial_result() { + self.send_thinking_end_if_needed(&mut ctx).await; + ctx.force_finish_pending_tool_calls(); + ctx.partial_recovery_reason = + Some("Stream processing cancelled after partial output".to_string()); + self.log_stream_result(&ctx); + break; + } self.graceful_shutdown_from_ctx(&mut ctx, "User cancelled stream processing".to_string()).await; return Err(StreamProcessError::new( StreamProcessorError::Cancelled("Stream processing cancelled".to_string()), @@ -984,7 +1024,7 @@ impl StreamProcessor { #[cfg(test)] mod tests { - use super::{StreamEventSink, StreamProcessor}; + use super::{StreamEventSink, StreamProcessOptions, StreamProcessor}; use bitfun_ai_adapters::{UnifiedResponse, UnifiedTokenUsage, UnifiedToolCall}; use bitfun_events::{AgenticEvent, AgenticEventPriority as EventPriority}; use futures::StreamExt; @@ -1024,6 +1064,47 @@ mod tests { } } + #[tokio::test] + async fn recovers_partial_text_when_cancellation_allows_partial_recovery() { + let processor = build_processor(); + let (tx, rx) = tokio::sync::mpsc::unbounded_channel(); + tx.send(Ok(UnifiedResponse { + text: Some("Partial reviewer evidence.".to_string()), + ..Default::default() + })) + .expect("send partial chunk"); + let _keep_stream_open = tx; + let cancellation_token = CancellationToken::new(); + let cancel_clone = cancellation_token.clone(); + tokio::spawn(async move { + tokio::time::sleep(Duration::from_millis(10)).await; + cancel_clone.cancel(); + }); + + let result = processor + .process_stream_with_options( + tokio_stream::wrappers::UnboundedReceiverStream::new(rx).boxed(), + None, + None, + "session_1".to_string(), + "turn_1".to_string(), + "round_1".to_string(), + None, + &cancellation_token, + StreamProcessOptions { + recover_partial_on_cancel: true, + }, + ) + .await + .expect("partial stream result"); + + assert_eq!(result.full_text, "Partial reviewer evidence."); + assert!(result + .partial_recovery_reason + .as_deref() + .is_some_and(|reason| reason.contains("cancelled"))); + } + #[tokio::test] async fn keeps_collecting_tool_args_across_usage_chunks() { let processor = build_processor(); diff --git a/src/crates/core/src/agentic/agents/prompts/deep_review_agent.md b/src/crates/core/src/agentic/agents/prompts/deep_review_agent.md index ea2338336..1b2058591 100644 --- a/src/crates/core/src/agentic/agents/prompts/deep_review_agent.md +++ b/src/crates/core/src/agentic/agents/prompts/deep_review_agent.md @@ -27,6 +27,8 @@ The user request may also include a **configured team manifest** with additional The configured manifest may also include an **execution policy** with reviewer timeout, judge timeout, a team review strategy, per-reviewer strategy overrides, preferred reviewer `model_id` values, prompt directives, and file-split parameters. Treat that policy and roster as authoritative. +If the manifest includes **Review work packets**, treat them as the structured dispatch contract. Each packet defines the reviewer, assigned scope, allowed tools, timeout, required output fields, model, and prompt directive for one reviewer or judge task. Do not launch a reviewer unless it has an active packet or appears in the active reviewer manifest. + ### File splitting for large review targets When the review target contains many files, running a single reviewer instance per role may cause timeouts or shallow coverage. The execution policy provides two fields to control this: @@ -38,7 +40,7 @@ When the file count exceeds `reviewer_file_split_threshold` and `max_same_role_i 1. Divide the file list into roughly equal groups (one group per same-role instance, up to `max_same_role_instances`). 2. Launch multiple Task calls with the **same `subagent_type`** in the **same parallel message**, each assigned a distinct file group. -3. In each Task `description`, include a group identifier so the user can track them in the UI (e.g. "Security review [group 1/3]", "Security review [group 2/3]"). +3. In each Task `description`, include a group identifier and packet id so the user and judge can track them in the UI (e.g. "Security review [group 1/3] [packet reviewer:ReviewSecurity:group-1-of-3]", "Security review [group 2/3] [packet reviewer:ReviewSecurity:group-2-of-3]"). 4. In each reviewer Task `prompt`, clearly state which files this instance is responsible for and that it should **not** inspect files outside its assigned group unless a cross-file dependency is strongly suspected. All same-role instances from a single split must be launched in the **same assistant message** to maximize parallelism. @@ -83,6 +85,7 @@ You MUST NOT: Track one reviewer record for every reviewer that was scheduled. Use these status labels conservatively: - `completed` +- `partial_timeout` - `timed_out` - `cancelled_by_user` - `failed` @@ -92,6 +95,11 @@ If a reviewer or the judge fails, times out, or is cancelled: - keep going with the remaining evidence - record the status in `reviewers` +- if the Task result reports `partial_timeout`, copy the useful partial text into `reviewers[].partial_output` and summarize the confidence impact in `report_sections.coverage_notes` +- if the reviewer reports its packet id, copy it into `reviewers[].packet_id` and set `reviewers[].packet_status_source = "reported"` +- if the reviewer omits `packet_id` but the Task was launched from a work packet, infer `reviewers[].packet_id` from the Task description or the matching work packet and set `reviewers[].packet_status_source = "inferred"` +- if no packet id can be reported or inferred, set `reviewers[].packet_status_source = "missing"` and summarize the confidence impact in `report_sections.coverage_notes` +- retry a failed or timed-out reviewer only when useful evidence is missing, and only within the configured retry budget; retry the same `subagent_type` with `retry = true`, a reduced scope, a downgraded strategy when possible, and a shorter timeout - lower confidence as needed - never drop the final report just because one subagent stopped @@ -124,8 +132,11 @@ If a configured reviewer entry provides `model_id`, pass `model_id` with that va If the configured team manifest provides a preferred display label or nickname for a reviewer, reuse that nickname in the Task `description` so the user can easily track each reviewer in the session UI. +Every reviewer Task `description` should also include the work packet id in square brackets, for example `Security review [packet reviewer:ReviewSecurity]` or `Security review [group 1/3] [packet reviewer:ReviewSecurity:group-1-of-3]`. This gives the judge a deterministic fallback when the reviewer forgets to echo `packet_id`. + Each reviewer Task prompt must include: +- the matching work packet verbatim, including `packet_id`, `assigned_scope`, `allowed_tools`, `timeout_seconds`, and `required_output_fields` - the exact review target (for split instances: the assigned file group only) - any user-provided focus text - the reviewer-specific strategy from the configured manifest (`quick`, `normal`, or `deep`) and its exact `prompt_directive` @@ -133,7 +144,9 @@ Each reviewer Task prompt must include: - a request for concrete findings only - a strict output format that is easy to verify later - for split instances: an explicit list of the files this instance is responsible for, and an instruction not to review files outside the assigned group unless a cross-file dependency is critical -- if `reviewer_timeout_seconds > 0`, a time-awareness reminder: "You have a strict timeout. Prioritize: (1) Inspect the diff first, then read only files the diff directly references. (2) Confirm or dismiss each hypothesis before opening a new investigation path. (3) Write your findings early — a partial report with confirmed findings is more valuable than no report at all." +- an instruction to echo the work packet `packet_id` and set `status` in the response +- an instruction that missing `packet_id` will be inferred by the parent only as a lower-confidence fallback, not treated as a successful reported packet +- if `reviewer_timeout_seconds > 0`, a time-awareness reminder: "You have a strict timeout. Prioritize: (1) Inspect the diff first, then read only files the diff directly references. (2) Confirm or dismiss each hypothesis before opening a new investigation path. (3) Write your findings early; a partial report with confirmed findings is more valuable than no report at all." Strategy guidance (fallback only; the configured `prompt_directive` is the source of truth): @@ -161,6 +174,7 @@ Role-specific strategy amplification (append to the reviewer Task prompt when th After the reviewer batch finishes, launch `ReviewJudge` with: +- the matching judge work packet verbatim - the same review target - the full reviewer outputs from every reviewer that ran, including timeout/cancel/failure notes - if file splitting was used, include outputs from **all** same-role instances and label each by group (e.g. "Security Reviewer [group 1/3]") @@ -179,6 +193,8 @@ The judge must explicitly call out: - likely false positives - optimization advice that is too risky or directionally wrong - findings where the reviewer's evidence does not support their conclusion +- reviewer outputs that are missing `packet_id` or `status`; treat those as lower confidence rather than discarding the whole review +- reviewer outputs whose packet id was inferred from scheduling metadata rather than reported by the reviewer - which findings should survive into the final report ### Phase 4: Report and wait for user approval @@ -187,7 +203,14 @@ After the quality gate finishes: 1. Submit the final structured report via `submit_code_review`. 2. Include all validated findings, unresolved items, and concrete next steps in `remediation_plan`. -3. When enough information exists, also populate `report_sections` so the UI can present a compact, multi-dimensional report: +3. For each `reviewers[]` entry, include `packet_id` when reported or inferable and set `packet_status_source` to `reported`, `inferred`, or `missing`. +4. Populate `reliability_signals` with structured status signals when relevant: + - `context_pressure`: large target, constrained token budget, or reduced fan-out affected coverage. + - `compression_preserved`: compression or compaction preserved key facts used in the final decision. + - `partial_reviewer`: one or more reviewers timed out or were cancelled after producing useful partial evidence. + - `user_decision`: an item needs user/product judgment before remediation. + Use `severity = "info" | "warning" | "action"`, include `count` when useful, and set `source = "runtime" | "manifest" | "report" | "inferred"`. +5. When enough information exists, also populate `report_sections` so the UI can present a compact, multi-dimensional report: - `executive_summary`: 1-3 concise bullets with the final decision and most important risk. - `remediation_groups.must_fix`: required correctness/security/regression fixes. - `remediation_groups.should_improve`: non-blocking cleanup or quality improvements. @@ -200,8 +223,8 @@ After the quality gate finishes: - `remediation_groups.verification`: focused verification or follow-up review steps. - `strength_groups`: positive observations grouped under `architecture`, `maintainability`, `tests`, `security`, `performance`, `user_experience`, or `other`. - `coverage_notes`: confidence, timeout/cancel/failure, scope, or manual follow-up notes. -4. Do **not** modify any files during the review phase. -5. Wait for explicit user approval before starting any remediation work. +6. Do **not** modify any files during the review phase. +7. Wait for explicit user approval before starting any remediation work. ### Phase 5: Remediation (only when explicitly instructed) @@ -224,7 +247,11 @@ Your structured result MUST include: - `review_mode = "deep"` - `review_scope` - `reviewers` with one entry for every reviewer that was scheduled, including optional extra reviewers and the judge when relevant +- `reviewers[].packet_id` when reported by the reviewer or inferable from the scheduled packet +- `reviewers[].packet_status_source` as `reported`, `inferred`, or `missing` +- for a timed-out reviewer with captured output, set `status = "partial_timeout"` and include the captured evidence in `partial_output` - `remediation_plan` with concrete next steps, including unresolved items or manual follow-up when needed +- `reliability_signals` with structured context pressure, compression preservation, partial reviewer, and user decision signals when any of those apply - `report_sections` when the final report has enough content to split remediation, strengths, and coverage into the dimensions above Issue writing rules: diff --git a/src/crates/core/src/agentic/agents/prompts/review_architecture_agent.md b/src/crates/core/src/agentic/agents/prompts/review_architecture_agent.md index adb873d7d..7ee303213 100644 --- a/src/crates/core/src/agentic/agents/prompts/review_architecture_agent.md +++ b/src/crates/core/src/agentic/agents/prompts/review_architecture_agent.md @@ -59,6 +59,10 @@ Never modify files or git state. Return markdown only, using this exact structure: +## Packet +packet_id: +status: completed + ## Reviewer Architecture Reviewer diff --git a/src/crates/core/src/agentic/agents/prompts/review_business_logic_agent.md b/src/crates/core/src/agentic/agents/prompts/review_business_logic_agent.md index c05c7e1b7..0669ed784 100644 --- a/src/crates/core/src/agentic/agents/prompts/review_business_logic_agent.md +++ b/src/crates/core/src/agentic/agents/prompts/review_business_logic_agent.md @@ -58,6 +58,10 @@ Never modify files or git state. Return markdown only, using this exact structure: +## Packet +packet_id: +status: completed + ## Reviewer Business Logic Reviewer diff --git a/src/crates/core/src/agentic/agents/prompts/review_frontend_agent.md b/src/crates/core/src/agentic/agents/prompts/review_frontend_agent.md index 4e868efa3..e3a180a1c 100644 --- a/src/crates/core/src/agentic/agents/prompts/review_frontend_agent.md +++ b/src/crates/core/src/agentic/agents/prompts/review_frontend_agent.md @@ -64,6 +64,10 @@ Never modify files or git state. Return markdown only, using this exact structure: +## Packet +packet_id: +status: completed + ## Reviewer Frontend Reviewer diff --git a/src/crates/core/src/agentic/agents/prompts/review_performance_agent.md b/src/crates/core/src/agentic/agents/prompts/review_performance_agent.md index 0cfb81f63..719e29fd7 100644 --- a/src/crates/core/src/agentic/agents/prompts/review_performance_agent.md +++ b/src/crates/core/src/agentic/agents/prompts/review_performance_agent.md @@ -59,6 +59,10 @@ Never modify files or git state. Return markdown only, using this exact structure: +## Packet +packet_id: +status: completed + ## Reviewer Performance Reviewer diff --git a/src/crates/core/src/agentic/agents/prompts/review_quality_gate_agent.md b/src/crates/core/src/agentic/agents/prompts/review_quality_gate_agent.md index 2668d243e..d95cd6893 100644 --- a/src/crates/core/src/agentic/agents/prompts/review_quality_gate_agent.md +++ b/src/crates/core/src/agentic/agents/prompts/review_quality_gate_agent.md @@ -70,6 +70,10 @@ Never modify files or git state. Return markdown only, using this exact structure: +## Packet +packet_id: +status: completed + ## Reviewer Review Quality Inspector diff --git a/src/crates/core/src/agentic/agents/prompts/review_security_agent.md b/src/crates/core/src/agentic/agents/prompts/review_security_agent.md index 02b111374..3cf7b2e5d 100644 --- a/src/crates/core/src/agentic/agents/prompts/review_security_agent.md +++ b/src/crates/core/src/agentic/agents/prompts/review_security_agent.md @@ -59,6 +59,10 @@ Never modify files or git state. Return markdown only, using this exact structure: +## Packet +packet_id: +status: completed + ## Reviewer Security Reviewer diff --git a/src/crates/core/src/agentic/agents/registry.rs b/src/crates/core/src/agentic/agents/registry.rs index 7e44df435..c80fce4ab 100644 --- a/src/crates/core/src/agentic/agents/registry.rs +++ b/src/crates/core/src/agentic/agents/registry.rs @@ -1362,6 +1362,19 @@ mod tests { } } + #[tokio::test] + async fn frontend_reviewer_is_registered_as_review_subagent() { + let registry = AgentRegistry::new(); + let subagents = registry.get_subagents_info(None).await; + let frontend = subagents + .iter() + .find(|agent| agent.id == "ReviewFrontend") + .expect("ReviewFrontend should be registered as a subagent"); + + assert!(frontend.is_review); + assert!(frontend.is_readonly); + } + #[test] fn built_in_deep_review_reviewers_are_marked_as_review_agents() { let registry = AgentRegistry::new(); diff --git a/src/crates/core/src/agentic/context_profile.rs b/src/crates/core/src/agentic/context_profile.rs new file mode 100644 index 000000000..4a98e2d09 --- /dev/null +++ b/src/crates/core/src/agentic/context_profile.rs @@ -0,0 +1,344 @@ +//! Adaptive context profile policy. +//! +//! Profiles keep context behavior aligned with the shape of the agent workload +//! without exposing more knobs to the UI. + +use crate::agentic::session::compression::microcompact::MicrocompactConfig; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ContextProfile { + LongTask, + Conversation, +} + +impl ContextProfile { + pub fn for_agent_type(agent_type: &str) -> Self { + Self::for_agent_context(agent_type, false) + } + + pub fn for_agent_context(agent_type: &str, is_review_subagent: bool) -> Self { + if is_review_subagent || is_long_task_agent(agent_type) { + Self::LongTask + } else { + Self::Conversation + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ModelCapabilityProfile { + Standard, + Weak, +} + +impl ModelCapabilityProfile { + pub fn from_model_id(model_id: Option<&str>) -> Self { + let Some(model_id) = model_id.map(str::trim).filter(|id| !id.is_empty()) else { + return Self::Standard; + }; + let normalized = model_id.to_ascii_lowercase(); + if matches!(normalized.as_str(), "auto" | "fast" | "primary") { + return Self::Standard; + } + + // Weak model detection: match suffix-based markers (e.g., "gpt-4o-mini", + // "gemini-1.5-flash") and exact markers (e.g., "haiku", "mini"). + // Avoid false positives from substring matches (e.g., "gemini-pro" should + // NOT match "mini" inside "gemini"). + let weak_suffixes = ["-haiku", "-mini", "-small", "-lite", "-flash", "-nano"]; + let weak_exact = ["haiku", "mini", "small", "lite", "flash", "nano"]; + // Also match known weak model name patterns where the marker appears + // mid-string but is a genuine weak model (e.g., "claude-3-haiku-20240307"). + let weak_mid_patterns = [ + "-haiku-", "-mini-", "-small-", "-lite-", "-flash-", "-nano-", + ]; + if weak_suffixes.iter().any(|s| normalized.ends_with(s)) + || weak_exact.iter().any(|e| normalized == *e) + || weak_mid_patterns.iter().any(|p| normalized.contains(p)) + { + Self::Weak + } else { + Self::Standard + } + } + + pub fn from_resolved_model(resolved_model_id: &str, provider_model_name: &str) -> Self { + let resolved = Self::from_model_id(Some(resolved_model_id)); + if resolved == Self::Weak { + resolved + } else { + Self::from_model_id(Some(provider_model_name)) + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct ContextProfilePolicy { + pub profile: ContextProfile, + pub microcompact_keep_recent: usize, + pub microcompact_trigger_ratio: f32, + pub compression_contract_limit: usize, + pub subagent_concurrency_cap: usize, + pub repeated_tool_signature_threshold: usize, + pub consecutive_failed_command_threshold: usize, +} + +impl ContextProfilePolicy { + pub fn for_agent_context( + agent_type: &str, + is_review_subagent: bool, + model_capability: ModelCapabilityProfile, + ) -> Self { + let profile = ContextProfile::for_agent_context(agent_type, is_review_subagent); + let mut policy = match profile { + ContextProfile::LongTask => Self::long_task(), + ContextProfile::Conversation => Self::conversation(), + }; + + if model_capability == ModelCapabilityProfile::Weak { + policy.apply_weak_model_override(); + } + + policy + } + + pub fn for_agent_context_and_model( + agent_type: &str, + is_review_subagent: bool, + resolved_model_id: &str, + provider_model_name: &str, + ) -> Self { + Self::for_agent_context( + agent_type, + is_review_subagent, + ModelCapabilityProfile::from_resolved_model(resolved_model_id, provider_model_name), + ) + } + + pub fn for_subagent_context_and_models( + agent_type: &str, + is_review_subagent: bool, + subagent_model_id: Option<&str>, + parent_agent_type: Option<&str>, + parent_is_review_subagent: bool, + parent_model_id: Option<&str>, + ) -> Self { + let child_profile = ContextProfile::for_agent_context(agent_type, is_review_subagent); + let parent_profile = parent_agent_type + .map(|agent_type| { + ContextProfile::for_agent_context(agent_type, parent_is_review_subagent) + }) + .unwrap_or(ContextProfile::Conversation); + let profile = if child_profile == ContextProfile::LongTask + || parent_profile == ContextProfile::LongTask + { + ContextProfile::LongTask + } else { + ContextProfile::Conversation + }; + let model_capability = subagent_model_id + .map(str::trim) + .filter(|model_id| !model_id.is_empty()) + .map(|model_id| ModelCapabilityProfile::from_model_id(Some(model_id))) + .or_else(|| { + parent_model_id + .map(str::trim) + .filter(|model_id| !model_id.is_empty()) + .map(|model_id| ModelCapabilityProfile::from_model_id(Some(model_id))) + }) + .unwrap_or(ModelCapabilityProfile::Standard); + + let mut policy = match profile { + ContextProfile::LongTask => Self::long_task(), + ContextProfile::Conversation => Self::conversation(), + }; + if model_capability == ModelCapabilityProfile::Weak { + policy.apply_weak_model_override(); + } + policy + } + + pub fn microcompact_config(&self) -> MicrocompactConfig { + MicrocompactConfig { + keep_recent: self.microcompact_keep_recent, + trigger_ratio: self.microcompact_trigger_ratio, + } + } + + pub fn effective_subagent_max_concurrency(&self, configured: usize) -> usize { + configured.clamp(1, self.subagent_concurrency_cap) + } + + pub fn effective_loop_threshold(&self, configured: usize) -> usize { + configured + .max(1) + .min(self.repeated_tool_signature_threshold.max(1)) + } + + pub fn has_repeated_tool_loop(&self, repeated_tool_signature_count: usize) -> bool { + repeated_tool_signature_count >= self.repeated_tool_signature_threshold.max(1) + } + + pub fn has_consecutive_command_failure_loop(&self, consecutive_failed_commands: usize) -> bool { + consecutive_failed_commands >= self.consecutive_failed_command_threshold.max(1) + } + + fn long_task() -> Self { + let default_microcompact = MicrocompactConfig::default(); + Self { + profile: ContextProfile::LongTask, + microcompact_keep_recent: default_microcompact.keep_recent, + microcompact_trigger_ratio: default_microcompact.trigger_ratio, + compression_contract_limit: 8, + subagent_concurrency_cap: 5, + repeated_tool_signature_threshold: 3, + consecutive_failed_command_threshold: 2, + } + } + + fn conversation() -> Self { + Self { + profile: ContextProfile::Conversation, + microcompact_keep_recent: 12, + microcompact_trigger_ratio: 0.65, + compression_contract_limit: 4, + subagent_concurrency_cap: 2, + repeated_tool_signature_threshold: 4, + consecutive_failed_command_threshold: 3, + } + } + + fn apply_weak_model_override(&mut self) { + self.microcompact_keep_recent = self.microcompact_keep_recent.min(8); + self.compression_contract_limit = self.compression_contract_limit.min(4); + self.subagent_concurrency_cap = self.subagent_concurrency_cap.min(2); + self.repeated_tool_signature_threshold = self.repeated_tool_signature_threshold.min(2); + self.consecutive_failed_command_threshold = + self.consecutive_failed_command_threshold.min(2); + } +} + +fn is_long_task_agent(agent_type: &str) -> bool { + matches!( + agent_type, + "agentic" | "DeepReview" | "DeepResearch" | "ComputerUse" | "Team" + ) || agent_type.starts_with("Review") +} + +#[cfg(test)] +mod tests { + use super::ModelCapabilityProfile; + + #[test] + fn model_capability_standard_for_empty_or_none() { + assert_eq!( + ModelCapabilityProfile::from_model_id(None), + ModelCapabilityProfile::Standard + ); + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("")), + ModelCapabilityProfile::Standard + ); + assert_eq!( + ModelCapabilityProfile::from_model_id(Some(" ")), + ModelCapabilityProfile::Standard + ); + } + + #[test] + fn model_capability_standard_for_strong_models() { + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("gpt-4o")), + ModelCapabilityProfile::Standard + ); + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("claude-sonnet-4")), + ModelCapabilityProfile::Standard + ); + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("gemini-pro")), + ModelCapabilityProfile::Standard + ); + } + + #[test] + fn model_capability_weak_for_haiku() { + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("claude-3-haiku-20240307")), + ModelCapabilityProfile::Weak + ); + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("anthropic/claude-3-haiku")), + ModelCapabilityProfile::Weak + ); + } + + #[test] + fn model_capability_weak_for_mini() { + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("gpt-4o-mini")), + ModelCapabilityProfile::Weak + ); + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("openai/gpt-4o-mini")), + ModelCapabilityProfile::Weak + ); + } + + #[test] + fn model_capability_weak_for_flash() { + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("gemini-1.5-flash")), + ModelCapabilityProfile::Weak + ); + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("google/gemini-flash")), + ModelCapabilityProfile::Weak + ); + } + + #[test] + fn model_capability_weak_for_lite() { + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("qwen-lite")), + ModelCapabilityProfile::Weak + ); + } + + #[test] + fn model_capability_weak_for_small() { + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("llama-small")), + ModelCapabilityProfile::Weak + ); + } + + #[test] + fn model_capability_weak_for_nano() { + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("gemini-nano")), + ModelCapabilityProfile::Weak + ); + } + + #[test] + fn model_capability_from_resolved_model_prefers_resolved() { + // resolved is weak → returns weak regardless of provider name + assert_eq!( + ModelCapabilityProfile::from_resolved_model("gpt-4o-mini", "gpt-4o"), + ModelCapabilityProfile::Weak + ); + // resolved is standard, provider is weak → returns weak + assert_eq!( + ModelCapabilityProfile::from_resolved_model("gpt-4o", "gpt-4o-mini"), + ModelCapabilityProfile::Weak + ); + // both standard → returns standard + assert_eq!( + ModelCapabilityProfile::from_resolved_model("gpt-4o", "claude-sonnet"), + ModelCapabilityProfile::Standard + ); + } +} diff --git a/src/crates/core/src/agentic/coordination/coordinator.rs b/src/crates/core/src/agentic/coordination/coordinator.rs index be0c94676..a108fccde 100644 --- a/src/crates/core/src/agentic/coordination/coordinator.rs +++ b/src/crates/core/src/agentic/coordination/coordinator.rs @@ -4,12 +4,13 @@ use super::{scheduler::DialogSubmissionPolicy, turn_outcome::TurnOutcome}; use crate::agentic::agents::get_agent_registry; +use crate::agentic::context_profile::ContextProfilePolicy; use crate::agentic::core::{ has_prompt_markup, Message, MessageContent, ProcessingPhase, PromptEnvelope, Session, SessionConfig, SessionKind, SessionState, SessionSummary, TurnStats, }; use crate::agentic::events::{ - AgenticEvent, EventPriority, EventQueue, EventRouter, EventSubscriber, + AgenticEvent, DeepReviewQueueState, EventPriority, EventQueue, EventRouter, EventSubscriber, }; use crate::agentic::execution::{ContextCompactionOutcome, ExecutionContext, ExecutionEngine}; use crate::agentic::fork_agent::{ @@ -51,6 +52,48 @@ const SUBAGENT_TIMEOUT_GRACE_PERIOD: Duration = Duration::from_secs(10); pub struct SubagentResult { /// AI text response pub text: String, + pub status: SubagentResultStatus, + pub reason: Option, + pub ledger_event_id: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SubagentResultStatus { + Completed, + PartialTimeout, +} + +impl SubagentResult { + fn completed(text: String) -> Self { + Self { + text, + status: SubagentResultStatus::Completed, + reason: None, + ledger_event_id: None, + } + } + + fn partial_timeout(text: String, reason: String) -> Self { + Self { + text, + status: SubagentResultStatus::PartialTimeout, + reason: Some(reason), + ledger_event_id: None, + } + } + + fn with_ledger_event_id(mut self, event_id: String) -> Self { + self.ledger_event_id = Some(event_id); + self + } + + pub fn is_partial_timeout(&self) -> bool { + self.status == SubagentResultStatus::PartialTimeout + } + + pub fn ledger_event_id(&self) -> Option<&str> { + self.ledger_event_id.as_deref() + } } struct HiddenSubagentExecutionRequest { @@ -132,20 +175,17 @@ struct SubagentConcurrencyLimiter { } struct SubagentConcurrencyPermitGuard { - permit: Option, - limiter: SubagentConcurrencyLimiter, + permits: Vec<(OwnedSemaphorePermit, SubagentConcurrencyLimiter)>, agent_type: String, } impl SubagentConcurrencyPermitGuard { fn new( - permit: OwnedSemaphorePermit, - limiter: SubagentConcurrencyLimiter, + permits: Vec<(OwnedSemaphorePermit, SubagentConcurrencyLimiter)>, agent_type: String, ) -> Self { Self { - permit: Some(permit), - limiter, + permits, agent_type, } } @@ -153,20 +193,17 @@ impl SubagentConcurrencyPermitGuard { impl Drop for SubagentConcurrencyPermitGuard { fn drop(&mut self) { - let Some(permit) = self.permit.take() else { - return; - }; - - drop(permit); + for (permit, limiter) in std::mem::take(&mut self.permits) { + drop(permit); - let active_subagents = self - .limiter - .max_concurrency - .saturating_sub(self.limiter.semaphore.available_permits()); - debug!( - "Released subagent concurrency permit: agent_type={}, active_subagents={}, max_concurrency={}", - self.agent_type, active_subagents, self.limiter.max_concurrency - ); + let active_subagents = limiter + .max_concurrency + .saturating_sub(limiter.semaphore.available_permits()); + debug!( + "Released subagent concurrency permit: agent_type={}, active_subagents={}, max_concurrency={}", + self.agent_type, active_subagents, limiter.max_concurrency + ); + } } } @@ -256,6 +293,7 @@ pub struct ConversationCoordinator { event_queue: Arc, event_router: Arc, subagent_concurrency_limiter: Arc>>, + subagent_profile_concurrency_limiters: Arc>>, /// Registry for dynamically adjusting subagent timeouts. subagent_timeout_registry: Arc>>>, /// Notifies DialogScheduler of turn outcomes; injected after construction @@ -627,6 +665,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet event_queue, event_router, subagent_concurrency_limiter: Arc::new(RwLock::new(None)), + subagent_profile_concurrency_limiters: Arc::new(RwLock::new(HashMap::new())), subagent_timeout_registry: Arc::new(RwLock::new(HashMap::new())), scheduler_notify_tx: OnceLock::new(), round_preempt_source: OnceLock::new(), @@ -883,6 +922,8 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet tags: Vec::new(), custom_metadata: None, todos: None, + deep_review_run_manifest: None, + deep_review_cache: None, workspace_path: Some(workspace_path.to_string()), workspace_hostname: None, unread_completion: None, @@ -1147,6 +1188,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet agent_type: String, workspace_path: Option, submission_policy: DialogSubmissionPolicy, + user_message_metadata: Option, ) -> BitFunResult<()> { self.start_dialog_turn_internal( session_id, @@ -1157,7 +1199,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet agent_type, workspace_path, submission_policy, - None, + user_message_metadata, false, ) .await @@ -1174,6 +1216,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet agent_type: String, workspace_path: Option, submission_policy: DialogSubmissionPolicy, + user_message_metadata: Option, ) -> BitFunResult<()> { self.start_dialog_turn_internal( session_id, @@ -1184,7 +1227,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet agent_type, workspace_path, submission_policy, - None, + user_message_metadata, false, ) .await @@ -1756,6 +1799,16 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet // Pass turn_index (for operation history/rollback) context_vars.insert("turn_index".to_string(), turn_index.to_string()); + if let Some(run_manifest) = user_message_metadata.as_ref().and_then(|metadata| { + metadata + .get("deepReviewRunManifest") + .or_else(|| metadata.get("deep_review_run_manifest")) + }) { + context_vars.insert( + "deep_review_run_manifest".to_string(), + run_manifest.to_string(), + ); + } let session_workspace_path = session_workspace .as_ref() .map(|workspace| workspace.root_path_string()); @@ -1779,6 +1832,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet workspace_services, round_preempt: self.round_preempt_source.get().cloned(), round_steering: self.round_steering_source.get().cloned(), + recover_partial_on_cancel: false, }; // Auto-generate session title on first message @@ -2451,16 +2505,41 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet limiter } - async fn acquire_subagent_concurrency_permit( + async fn get_subagent_profile_concurrency_limiter( &self, + max_concurrency: usize, + ) -> SubagentConcurrencyLimiter { + let max_concurrency = normalize_subagent_max_concurrency(max_concurrency); + + { + let limiter_guard = self.subagent_profile_concurrency_limiters.read().await; + if let Some(limiter) = limiter_guard.get(&max_concurrency) { + return limiter.clone(); + } + } + + let mut limiter_guard = self.subagent_profile_concurrency_limiters.write().await; + if let Some(limiter) = limiter_guard.get(&max_concurrency) { + return limiter.clone(); + } + + let limiter = SubagentConcurrencyLimiter { + semaphore: Arc::new(Semaphore::new(max_concurrency)), + max_concurrency, + }; + limiter_guard.insert(max_concurrency, limiter.clone()); + limiter + } + + async fn acquire_permit_from_limiter( + &self, + limiter: &SubagentConcurrencyLimiter, agent_type: &str, cancel_token: Option<&CancellationToken>, deadline: Option, - ) -> BitFunResult<(OwnedSemaphorePermit, SubagentConcurrencyLimiter, u128)> { - let limiter = self.get_subagent_concurrency_limiter().await; - let started_waiting = Instant::now(); + label: &str, + ) -> BitFunResult { let semaphore = limiter.semaphore.clone(); - let permit = match (cancel_token, deadline) { (Some(token), Some(deadline)) => { tokio::select! { @@ -2473,8 +2552,8 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet } _ = tokio::time::sleep_until(deadline) => { return Err(BitFunError::Timeout(format!( - "Timed out while waiting for a concurrency slot for subagent '{}'", - agent_type + "Timed out while waiting for a {} concurrency slot for subagent '{}'", + label, agent_type ))); } } @@ -2496,8 +2575,8 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet .map_err(|error| BitFunError::Semaphore(error.to_string()))?, _ = tokio::time::sleep_until(deadline) => { return Err(BitFunError::Timeout(format!( - "Timed out while waiting for a concurrency slot for subagent '{}'", - agent_type + "Timed out while waiting for a {} concurrency slot for subagent '{}'", + label, agent_type ))); } } @@ -2508,16 +2587,104 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet .map_err(|error| BitFunError::Semaphore(error.to_string()))?, }; - let wait_ms = started_waiting.elapsed().as_millis(); let active_subagents = limiter .max_concurrency .saturating_sub(limiter.semaphore.available_permits()); debug!( - "Acquired subagent concurrency permit: agent_type={}, wait_ms={}, active_subagents={}, max_concurrency={}", - agent_type, wait_ms, active_subagents, limiter.max_concurrency + "Acquired subagent {} concurrency permit: agent_type={}, active_subagents={}, max_concurrency={}", + label, agent_type, active_subagents, limiter.max_concurrency ); - Ok((permit, limiter, wait_ms)) + Ok(permit) + } + + async fn acquire_subagent_concurrency_permit( + &self, + agent_type: &str, + profile_concurrency_cap: usize, + cancel_token: Option<&CancellationToken>, + deadline: Option, + ) -> BitFunResult<( + Vec<(OwnedSemaphorePermit, SubagentConcurrencyLimiter)>, + u128, + )> { + let started_waiting = Instant::now(); + + let profile_limiter = self + .get_subagent_profile_concurrency_limiter(profile_concurrency_cap) + .await; + let profile_permit = self + .acquire_permit_from_limiter( + &profile_limiter, + agent_type, + cancel_token, + deadline, + "profile", + ) + .await?; + + let global_limiter = self.get_subagent_concurrency_limiter().await; + let global_permit = self + .acquire_permit_from_limiter( + &global_limiter, + agent_type, + cancel_token, + deadline, + "global", + ) + .await?; + + let wait_ms = started_waiting.elapsed().as_millis(); + debug!( + "Acquired subagent concurrency permits: agent_type={}, wait_ms={}, profile_max_concurrency={}, global_max_concurrency={}", + agent_type, wait_ms, profile_limiter.max_concurrency, global_limiter.max_concurrency + ); + + Ok(( + vec![ + (profile_permit, profile_limiter), + (global_permit, global_limiter), + ], + wait_ms, + )) + } + + fn context_profile_policy_for_subagent( + &self, + agent_type: &str, + session_config: &SessionConfig, + subagent_parent_info: Option<&SubagentParentInfo>, + ) -> ContextProfilePolicy { + if let Some(parent_info) = subagent_parent_info { + if let Some(parent_session) = self.session_manager.get_session(&parent_info.session_id) + { + let parent_is_review_subagent = get_agent_registry() + .get_subagent_is_review(&parent_session.agent_type) + .unwrap_or(false); + let is_review_subagent = get_agent_registry() + .get_subagent_is_review(agent_type) + .unwrap_or(false); + return ContextProfilePolicy::for_subagent_context_and_models( + agent_type, + is_review_subagent, + session_config.model_id.as_deref(), + Some(&parent_session.agent_type), + parent_is_review_subagent, + parent_session.config.model_id.as_deref(), + ); + } + } + + let is_review_subagent = get_agent_registry() + .get_subagent_is_review(agent_type) + .unwrap_or(false); + let model_id = session_config.model_id.as_deref().unwrap_or_default(); + ContextProfilePolicy::for_agent_context_and_model( + agent_type, + is_review_subagent, + model_id, + model_id, + ) } async fn execute_hidden_subagent_internal( @@ -2551,6 +2718,18 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet timeout_seconds.map(|seconds| Instant::now() + Duration::from_secs(seconds)); let (deadline_tx, mut deadline_rx) = watch::channel(initial_deadline); + let context_profile_policy = self.context_profile_policy_for_subagent( + &agent_type, + &session_config, + subagent_parent_info.as_ref(), + ); + debug!( + "Subagent context profile policy selected: agent_type={}, profile={:?}, profile_concurrency_cap={}", + agent_type, + context_profile_policy.profile, + context_profile_policy.subagent_concurrency_cap + ); + // Check cancel token (before creating session) if let Some(token) = cancel_token { if token.is_cancelled() { @@ -2565,11 +2744,15 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet // Use create_subagent_session (not create_session) so that no SessionCreated // event is emitted to the transport layer — subagent sessions are internal // implementation details and must not appear in the UI session list. - let (permit, limiter, wait_ms) = self - .acquire_subagent_concurrency_permit(&agent_type, cancel_token, initial_deadline) + let (permits, wait_ms) = self + .acquire_subagent_concurrency_permit( + &agent_type, + context_profile_policy.subagent_concurrency_cap, + cancel_token, + initial_deadline, + ) .await?; - let _permit_guard = - SubagentConcurrencyPermitGuard::new(permit, limiter, agent_type.clone()); + let _permit_guard = SubagentConcurrencyPermitGuard::new(permits, agent_type.clone()); if let Some(token) = cancel_token { if token.is_cancelled() { @@ -2716,6 +2899,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet // dialog turns only. Leave None so we don't intercept buffer entries // that belong to a different (parent) session/turn. round_steering: None, + recover_partial_on_cancel: true, }; let execution_engine = self.execution_engine.clone(); @@ -2898,15 +3082,41 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet ); } - match tokio::time::timeout(SUBAGENT_TIMEOUT_GRACE_PERIOD, &mut execution_task).await + let partial_timeout_result = match tokio::time::timeout( + SUBAGENT_TIMEOUT_GRACE_PERIOD, + &mut execution_task, + ) + .await { - Ok(Ok(Ok(_))) | Ok(Ok(Err(_))) => {} + Ok(Ok(Ok(exec_result))) => { + let response_text = match exec_result.final_message.content { + MessageContent::Mixed { text, .. } => text, + MessageContent::Text(text) => text, + _ => String::new(), + }; + if response_text.trim().is_empty() { + None + } else { + Some(SubagentResult::partial_timeout( + response_text, + timeout_error_message.clone(), + )) + } + } + Ok(Ok(Err(error))) => { + debug!( + "Subagent returned error during timeout grace period: agent_type={}, session={}, error={}", + agent_type, session_id, error + ); + None + } Ok(Err(error)) => { warn!( "Subagent join failed during timeout grace period: agent_type={}, session={}, error={}", agent_type, session_id, error ); execution_task.abort(); + None } Err(_) => { warn!( @@ -2914,7 +3124,37 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet agent_type, session_id ); execution_task.abort(); + None + } + }; + + if let Some(mut partial_result) = partial_timeout_result { + warn!( + "Subagent timed out with partial output: agent_type={}, session={}, text_len={}", + agent_type, + session_id, + partial_result.text.len() + ); + if let Some(parent_info) = subagent_parent_info.as_ref() { + let event = self.session_manager.record_subagent_partial_timeout( + &parent_info.session_id, + &parent_info.dialog_turn_id, + &agent_type, + &partial_result.text, + Some("timeout"), + ); + partial_result = partial_result.with_ledger_event_id(event.event_id); } + if let Err(cleanup_err) = self.cleanup_subagent_resources(&session_id).await { + warn!( + "Failed to cleanup subagent resources after partial timeout: session={}, error={}", + session_id, cleanup_err + ); + } + let mut registry = self.subagent_timeout_registry.write().await; + registry.remove(&session_id); + + return Ok(partial_result); } if let Err(cleanup_err) = self.cleanup_subagent_resources(&session_id).await { @@ -2974,9 +3214,7 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet let mut registry = self.subagent_timeout_registry.write().await; registry.remove(&session_id); - Ok(SubagentResult { - text: response_text, - }) + Ok(SubagentResult::completed(response_text)) } pub async fn capture_fork_agent_context_snapshot( @@ -3363,6 +3601,24 @@ Update the persona files and delete BOOTSTRAP.md as soon as bootstrap is complet .await; } + pub async fn emit_deep_review_queue_state_changed( + &self, + session_id: &str, + turn_id: &str, + queue_state: DeepReviewQueueState, + ) { + let event = AgenticEvent::DeepReviewQueueStateChanged { + session_id: session_id.to_string(), + turn_id: turn_id.to_string(), + queue_state, + subagent_parent_info: None, + }; + let _ = self + .event_queue + .enqueue(event, Some(EventPriority::High)) + .await; + } + /// Get SessionManager reference (for advanced features like mode management) pub fn get_session_manager(&self) -> &Arc { &self.session_manager @@ -3499,6 +3755,11 @@ impl bitfun_runtime_ports::AgentSubmissionPort for ConversationCoordinator { bitfun_runtime_ports::AgentSubmissionSource::Bot => DialogTriggerSource::Bot, bitfun_runtime_ports::AgentSubmissionSource::Cli => DialogTriggerSource::Cli, }; + let user_message_metadata = if request.metadata.is_empty() { + None + } else { + Some(serde_json::Value::Object(request.metadata.clone())) + }; self.start_dialog_turn( request.session_id, @@ -3508,6 +3769,7 @@ impl bitfun_runtime_ports::AgentSubmissionPort for ConversationCoordinator { session.agent_type.clone(), session.config.workspace_path.clone(), DialogSubmissionPolicy::for_source(trigger_source), + user_message_metadata, ) .await .map_err(|error| { diff --git a/src/crates/core/src/agentic/coordination/scheduler.rs b/src/crates/core/src/agentic/coordination/scheduler.rs index 8f4258825..9f3cde55b 100644 --- a/src/crates/core/src/agentic/coordination/scheduler.rs +++ b/src/crates/core/src/agentic/coordination/scheduler.rs @@ -138,6 +138,7 @@ pub struct QueuedTurn { pub workspace_path: Option, pub policy: DialogSubmissionPolicy, pub reply_route: Option, + pub user_message_metadata: Option, pub image_contexts: Option>, #[allow(dead_code)] pub enqueued_at: SystemTime, @@ -320,6 +321,7 @@ impl DialogScheduler { workspace_path: Option, policy: DialogSubmissionPolicy, reply_route: Option, + user_message_metadata: Option, image_contexts: Option>, ) -> Result { let resolved_turn_id = turn_id.unwrap_or_else(|| Uuid::new_v4().to_string()); @@ -331,6 +333,7 @@ impl DialogScheduler { workspace_path, policy, reply_route, + user_message_metadata, image_contexts, enqueued_at: SystemTime::now(), }; @@ -575,6 +578,7 @@ impl DialogScheduler { queued_turn.agent_type.clone(), queued_turn.workspace_path.clone(), queued_turn.policy, + queued_turn.user_message_metadata.clone(), ) .await } @@ -588,6 +592,7 @@ impl DialogScheduler { queued_turn.agent_type.clone(), queued_turn.workspace_path.clone(), queued_turn.policy, + queued_turn.user_message_metadata.clone(), ) .await } @@ -652,6 +657,7 @@ impl DialogScheduler { DialogSubmissionPolicy::for_source(DialogTriggerSource::AgentSession), None, None, + None, ) .await { diff --git a/src/crates/core/src/agentic/core/message.rs b/src/crates/core/src/agentic/core/message.rs index 017cc98bc..a76c9664d 100644 --- a/src/crates/core/src/agentic/core/message.rs +++ b/src/crates/core/src/agentic/core/message.rs @@ -88,6 +88,9 @@ pub struct CompressionPayload { #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum CompressionEntry { + Contract { + contract: CompressionContract, + }, ModelSummary { text: String, }, @@ -101,6 +104,75 @@ pub enum CompressionEntry { }, } +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct CompressionContract { + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub touched_files: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub verification_commands: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub blocking_failures: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub subagent_statuses: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct CompressionContractItem { + pub target: String, + pub status: String, + pub summary: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub error_kind: Option, +} + +impl CompressionContract { + pub fn is_empty(&self) -> bool { + self.touched_files.is_empty() + && self.verification_commands.is_empty() + && self.blocking_failures.is_empty() + && self.subagent_statuses.is_empty() + } + + pub fn render_for_model(&self) -> String { + let mut lines = vec![ + "Compaction contract: preserve these factual fields when continuing the task." + .to_string(), + ]; + + if !self.touched_files.is_empty() { + lines.push("Touched files:".to_string()); + for file in &self.touched_files { + lines.push(format!("- {}", file)); + } + } + + render_contract_items( + &mut lines, + "Verification commands:", + &self.verification_commands, + ); + render_contract_items(&mut lines, "Blocking failures:", &self.blocking_failures); + render_contract_items(&mut lines, "Subagent statuses:", &self.subagent_statuses); + + lines.join("\n") + } +} + +fn render_contract_items(lines: &mut Vec, title: &str, items: &[CompressionContractItem]) { + if items.is_empty() { + return; + } + + lines.push(title.to_string()); + for item in items { + let mut rendered = format!("- {} [{}]: {}", item.target, item.status, item.summary); + if let Some(error_kind) = item.error_kind.as_ref() { + rendered.push_str(&format!(" ({})", error_kind)); + } + lines.push(rendered); + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CompressedMessage { pub role: CompressedMessageRole, diff --git a/src/crates/core/src/agentic/core/mod.rs b/src/crates/core/src/agentic/core/mod.rs index e515fdd9e..a12a5a9c3 100644 --- a/src/crates/core/src/agentic/core/mod.rs +++ b/src/crates/core/src/agentic/core/mod.rs @@ -11,8 +11,9 @@ pub mod state; pub use dialog_turn::{new_turn_id, TurnStats}; pub use message::{ CompressedMessage, CompressedMessageRole, CompressedTodoItem, CompressedTodoSnapshot, - CompressedToolCall, CompressionEntry, CompressionPayload, Message, MessageContent, MessageRole, - MessageSemanticKind, ToolCall, ToolResult, + CompressedToolCall, CompressionContract, CompressionContractItem, CompressionEntry, + CompressionPayload, Message, MessageContent, MessageRole, MessageSemanticKind, ToolCall, + ToolResult, }; pub use messages_helper::{MessageHelper, RequestReasoningTokenPolicy}; pub use prompt_markup::{ diff --git a/src/crates/core/src/agentic/deep_review_policy.rs b/src/crates/core/src/agentic/deep_review_policy.rs index eff7468b0..452fab01d 100644 --- a/src/crates/core/src/agentic/deep_review_policy.rs +++ b/src/crates/core/src/agentic/deep_review_policy.rs @@ -1,8 +1,12 @@ use crate::service::config::global::GlobalConfigManager; use crate::util::errors::{BitFunError, BitFunResult}; +use dashmap::DashMap; use log::warn; +use serde::Serialize; use serde_json::{json, Value}; -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::sync::LazyLock; +use std::time::{Duration, Instant}; pub const DEEP_REVIEW_AGENT_TYPE: &str = "DeepReview"; pub const REVIEW_JUDGE_AGENT_TYPE: &str = "ReviewJudge"; @@ -12,19 +16,385 @@ pub const REVIEWER_PERFORMANCE_AGENT_TYPE: &str = "ReviewPerformance"; pub const REVIEWER_SECURITY_AGENT_TYPE: &str = "ReviewSecurity"; pub const REVIEWER_ARCHITECTURE_AGENT_TYPE: &str = "ReviewArchitecture"; pub const REVIEWER_FRONTEND_AGENT_TYPE: &str = "ReviewFrontend"; -pub const CORE_REVIEWER_AGENT_TYPES: [&str; 5] = [ +pub const CORE_REVIEWER_AGENT_TYPES: [&str; 4] = [ REVIEWER_BUSINESS_LOGIC_AGENT_TYPE, REVIEWER_PERFORMANCE_AGENT_TYPE, REVIEWER_SECURITY_AGENT_TYPE, REVIEWER_ARCHITECTURE_AGENT_TYPE, - REVIEWER_FRONTEND_AGENT_TYPE, ]; +pub const CONDITIONAL_REVIEWER_AGENT_TYPES: [&str; 1] = [REVIEWER_FRONTEND_AGENT_TYPE]; const DEFAULT_REVIEW_TEAM_CONFIG_PATH: &str = "ai.review_teams.default"; -const DEFAULT_REVIEWER_TIMEOUT_SECONDS: u64 = 0; -const DEFAULT_JUDGE_TIMEOUT_SECONDS: u64 = 0; +const DEFAULT_REVIEWER_TIMEOUT_SECONDS: u64 = 600; +const DEFAULT_JUDGE_TIMEOUT_SECONDS: u64 = 600; +const MAX_TIMEOUT_SECONDS: u64 = 3600; +const BASE_TIMEOUT_QUICK_SECONDS: u64 = 180; +const BASE_TIMEOUT_NORMAL_SECONDS: u64 = 300; +const BASE_TIMEOUT_DEEP_SECONDS: u64 = 600; +const TIMEOUT_PER_FILE_SECONDS: u64 = 15; +const TIMEOUT_PER_100_LINES_SECONDS: u64 = 30; const DEFAULT_REVIEWER_FILE_SPLIT_THRESHOLD: usize = 20; const DEFAULT_MAX_SAME_ROLE_INSTANCES: usize = 3; +const MAX_SAME_ROLE_INSTANCES: usize = 8; +const DEFAULT_MAX_RETRIES_PER_ROLE: usize = 1; +const MAX_RETRIES_PER_ROLE: usize = 3; +const DEFAULT_MAX_PARALLEL_INSTANCES: usize = 4; +const DEFAULT_MAX_QUEUE_WAIT_SECONDS: u64 = 60; +const MAX_QUEUE_WAIT_SECONDS: u64 = 600; +const EFFECTIVE_CONCURRENCY_RECOVERY_SUCCESS_WINDOW: usize = 3; +const BUDGET_TTL: Duration = Duration::from_secs(60 * 60); +const PRUNE_INTERVAL: Duration = Duration::from_secs(300); + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ReviewTeamRoleDefinition { + pub key: String, + pub subagent_id: String, + pub fun_name: String, + pub role_name: String, + pub description: String, + pub responsibilities: Vec, + pub accent_color: String, + pub conditional: bool, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ReviewStrategyManifestProfile { + pub level: String, + pub label: String, + pub summary: String, + pub token_impact: String, + pub runtime_impact: String, + pub default_model_slot: String, + pub prompt_directive: String, + pub role_directives: BTreeMap, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ReviewTeamExecutionPolicyDefinition { + pub reviewer_timeout_seconds: u64, + pub judge_timeout_seconds: u64, + pub reviewer_file_split_threshold: usize, + pub max_same_role_instances: usize, + pub max_retries_per_role: usize, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ReviewTeamDefinition { + pub id: String, + pub name: String, + pub description: String, + pub warning: String, + pub default_model: String, + pub default_strategy_level: String, + pub default_execution_policy: ReviewTeamExecutionPolicyDefinition, + pub core_roles: Vec, + pub strategy_profiles: BTreeMap, + pub disallowed_extra_subagent_ids: Vec, + pub hidden_agent_ids: Vec, +} + +fn review_role( + key: &str, + subagent_id: &str, + fun_name: &str, + role_name: &str, + description: &str, + responsibilities: &[&str], + accent_color: &str, + conditional: bool, +) -> ReviewTeamRoleDefinition { + ReviewTeamRoleDefinition { + key: key.to_string(), + subagent_id: subagent_id.to_string(), + fun_name: fun_name.to_string(), + role_name: role_name.to_string(), + description: description.to_string(), + responsibilities: responsibilities + .iter() + .map(|item| item.to_string()) + .collect(), + accent_color: accent_color.to_string(), + conditional, + } +} + +fn role_directives(entries: &[(&str, &str)]) -> BTreeMap { + entries + .iter() + .map(|(role, directive)| (role.to_string(), directive.to_string())) + .collect() +} + +fn strategy_profile( + level: &str, + label: &str, + summary: &str, + token_impact: &str, + runtime_impact: &str, + default_model_slot: &str, + prompt_directive: &str, + directives: &[(&str, &str)], +) -> ReviewStrategyManifestProfile { + ReviewStrategyManifestProfile { + level: level.to_string(), + label: label.to_string(), + summary: summary.to_string(), + token_impact: token_impact.to_string(), + runtime_impact: runtime_impact.to_string(), + default_model_slot: default_model_slot.to_string(), + prompt_directive: prompt_directive.to_string(), + role_directives: role_directives(directives), + } +} + +pub fn default_review_team_definition() -> ReviewTeamDefinition { + let core_roles = vec![ + review_role( + "businessLogic", + REVIEWER_BUSINESS_LOGIC_AGENT_TYPE, + "Logic Reviewer", + "Business Logic Reviewer", + "A workflow sleuth that inspects business rules, state transitions, recovery paths, and real-user correctness.", + &[ + "Verify workflows, state transitions, and domain rules still behave correctly.", + "Check boundary cases, rollback paths, and data integrity assumptions.", + "Focus on issues that can break user outcomes or product intent.", + ], + "#2563eb", + false, + ), + review_role( + "performance", + REVIEWER_PERFORMANCE_AGENT_TYPE, + "Performance Reviewer", + "Performance Reviewer", + "A speed-focused profiler that hunts hot paths, unnecessary work, blocking calls, and scale-sensitive regressions.", + &[ + "Inspect hot paths, large loops, and unnecessary allocations or recomputation.", + "Flag blocking work, N+1 patterns, and wasteful data movement.", + "Keep performance advice practical and aligned with the existing architecture.", + ], + "#d97706", + false, + ), + review_role( + "security", + REVIEWER_SECURITY_AGENT_TYPE, + "Security Reviewer", + "Security Reviewer", + "A boundary guardian that scans for injection risks, trust leaks, privilege mistakes, and unsafe file or command handling.", + &[ + "Review trust boundaries, auth assumptions, and sensitive data handling.", + "Look for injection, unsafe command execution, and exposure risks.", + "Highlight concrete fixes that reduce risk without broad rewrites.", + ], + "#dc2626", + false, + ), + review_role( + "architecture", + REVIEWER_ARCHITECTURE_AGENT_TYPE, + "Architecture Reviewer", + "Architecture Reviewer", + "A structural watchdog that checks module boundaries, dependency direction, API contract design, and abstraction integrity.", + &[ + "Detect layer boundary violations and wrong-direction imports.", + "Verify API contracts, tool schemas, and transport messages stay consistent.", + "Ensure platform-agnostic code does not leak platform-specific details.", + ], + "#0891b2", + false, + ), + review_role( + "frontend", + REVIEWER_FRONTEND_AGENT_TYPE, + "Frontend Reviewer", + "Frontend Reviewer", + "A UI specialist that checks i18n synchronization, React performance patterns, accessibility, and frontend-backend contract alignment.", + &[ + "Verify i18n key completeness across all locales.", + "Check React performance patterns (memoization, virtualization, effect dependencies).", + "Flag accessibility violations and frontend-backend API contract drift.", + ], + "#059669", + true, + ), + review_role( + "judge", + REVIEW_JUDGE_AGENT_TYPE, + "Review Arbiter", + "Review Quality Inspector", + "An independent third-party arbiter that validates reviewer reports for logical consistency and evidence quality. It spot-checks specific code locations only when a claim needs verification, rather than re-reviewing the codebase from scratch.", + &[ + "Validate, merge, downgrade, or reject reviewer findings based on logical consistency and evidence quality.", + "Filter out false positives and directionally-wrong optimization advice by examining reviewer reasoning.", + "Spot-check specific code locations only when a reviewer claim needs verification.", + "Ensure every surviving issue has an actionable fix or follow-up plan.", + ], + "#7c3aed", + false, + ), + ]; + + let strategy_profiles = BTreeMap::from([ + ( + "quick".to_string(), + strategy_profile( + "quick", + "Quick", + "Fast screening for high-confidence issues in the requested diff or scope.", + "0.4-0.6x", + "0.5-0.7x", + "fast", + "Prefer a concise diff-focused pass. Report only high-confidence correctness, security, or regression risks and avoid speculative design rewrites.", + &[ + ( + REVIEWER_BUSINESS_LOGIC_AGENT_TYPE, + "Only trace logic paths directly changed by the diff. Do not follow call chains beyond one hop. Report only issues where the diff introduces a provably wrong behavior.", + ), + ( + REVIEWER_PERFORMANCE_AGENT_TYPE, + "Scan the diff for known anti-patterns only: nested loops, repeated fetches, blocking calls on hot paths, unnecessary re-renders. Do not trace call chains or estimate impact beyond what the diff shows.", + ), + ( + REVIEWER_SECURITY_AGENT_TYPE, + "Scan the diff for direct security risks only: injection, secret exposure, unsafe commands, missing auth. Do not trace data flows beyond one hop.", + ), + ( + REVIEWER_ARCHITECTURE_AGENT_TYPE, + "Only check imports directly changed by the diff. Flag violations of documented layer boundaries.", + ), + ( + REVIEWER_FRONTEND_AGENT_TYPE, + "Only check i18n key completeness and direct platform boundary violations in changed frontend files.", + ), + ( + REVIEW_JUDGE_AGENT_TYPE, + "This was a quick review. Focus on confirming or rejecting each finding efficiently. If a finding's evidence is thin, reject it rather than spending time verifying.", + ), + ], + ), + ), + ( + "normal".to_string(), + strategy_profile( + "normal", + "Normal", + "Balanced review depth for day-to-day code review with practical evidence.", + "1x", + "1x", + "fast", + "Perform the standard role-specific review. Balance coverage with precision and include concrete evidence for each issue.", + &[ + ( + REVIEWER_BUSINESS_LOGIC_AGENT_TYPE, + "Trace each changed function's direct callers and callees to verify business rules and state transitions. Stop investigating a path once you have enough evidence to confirm or dismiss it.", + ), + ( + REVIEWER_PERFORMANCE_AGENT_TYPE, + "Inspect the diff for anti-patterns, then read surrounding code to confirm impact on hot paths. Report only issues likely to matter at realistic scale.", + ), + ( + REVIEWER_SECURITY_AGENT_TYPE, + "Trace each changed input path from entry point to usage. Check trust boundaries, auth assumptions, and data sanitization. Report only issues with a realistic threat narrative.", + ), + ( + REVIEWER_ARCHITECTURE_AGENT_TYPE, + "Check the diff's imports plus one level of dependency direction. Verify API contract consistency.", + ), + ( + REVIEWER_FRONTEND_AGENT_TYPE, + "Check i18n, React performance patterns, and accessibility in changed components. Verify frontend-backend API contract alignment.", + ), + ( + REVIEW_JUDGE_AGENT_TYPE, + "Validate each finding's logical consistency and evidence quality. Spot-check code only when a claim needs verification.", + ), + ], + ), + ), + ( + "deep".to_string(), + strategy_profile( + "deep", + "Deep", + "Thorough multi-pass review for risky, broad, or release-sensitive changes.", + "1.8-2.5x", + "1.5-2.5x", + "primary", + "Run a thorough role-specific pass. Inspect edge cases, cross-file interactions, failure modes, and remediation tradeoffs before finalizing findings.", + &[ + ( + REVIEWER_BUSINESS_LOGIC_AGENT_TYPE, + "Map full call chains for changed functions. Verify state transitions end-to-end, check rollback and error-recovery paths, and test edge cases in data shape and lifecycle assumptions. Prioritize findings by user-facing impact.", + ), + ( + REVIEWER_PERFORMANCE_AGENT_TYPE, + "In addition to the normal pass, check for latent scaling risks - data structures that degrade at volume, or algorithms that are correct but unnecessarily expensive. Only report if you can estimate the impact. Do not speculate about edge cases or failure modes unrelated to performance.", + ), + ( + REVIEWER_SECURITY_AGENT_TYPE, + "In addition to the normal pass, trace data flows across trust boundaries end-to-end. Check for privilege escalation chains, indirect injection vectors, and failure modes that expose sensitive data. Report only issues with a complete threat narrative.", + ), + ( + REVIEWER_ARCHITECTURE_AGENT_TYPE, + "Map the full dependency graph for changed modules. Check for structural anti-patterns, circular dependencies, and cross-cutting concerns.", + ), + ( + REVIEWER_FRONTEND_AGENT_TYPE, + "Thorough React analysis: effect dependencies, memoization, virtualization. Full accessibility audit. State management pattern review. Cross-layer contract verification.", + ), + ( + REVIEW_JUDGE_AGENT_TYPE, + "This was a deep review with potentially complex findings. Cross-validate findings across reviewers for consistency. For each finding, verify the evidence supports the conclusion and the suggested fix is safe. Pay extra attention to overlapping findings across reviewers or same-role instances.", + ), + ], + ), + ), + ]); + + let mut hidden_agent_ids = vec![ + DEEP_REVIEW_AGENT_TYPE.to_string(), + REVIEW_JUDGE_AGENT_TYPE.to_string(), + ]; + hidden_agent_ids.extend(CORE_REVIEWER_AGENT_TYPES.iter().map(|id| id.to_string())); + hidden_agent_ids.extend( + CONDITIONAL_REVIEWER_AGENT_TYPES + .iter() + .map(|id| id.to_string()), + ); + hidden_agent_ids.sort(); + hidden_agent_ids.dedup(); + + let mut disallowed_extra_subagent_ids = hidden_agent_ids.clone(); + disallowed_extra_subagent_ids.push(REVIEW_FIXER_AGENT_TYPE.to_string()); + disallowed_extra_subagent_ids.sort(); + disallowed_extra_subagent_ids.dedup(); + + ReviewTeamDefinition { + id: "default-review-team".to_string(), + name: "Code Review Team".to_string(), + description: "A multi-reviewer team for deep code review with mandatory logic, performance, security, architecture, conditional frontend, and quality-gate roles.".to_string(), + warning: "Deep review may take longer and usually consumes more tokens than a standard review.".to_string(), + default_model: "fast".to_string(), + default_strategy_level: "normal".to_string(), + default_execution_policy: ReviewTeamExecutionPolicyDefinition { + reviewer_timeout_seconds: 300, + judge_timeout_seconds: 240, + reviewer_file_split_threshold: DEFAULT_REVIEWER_FILE_SPLIT_THRESHOLD, + max_same_role_instances: DEFAULT_MAX_SAME_ROLE_INSTANCES, + max_retries_per_role: DEFAULT_MAX_RETRIES_PER_ROLE, + }, + core_roles, + strategy_profiles, + disallowed_extra_subagent_ids, + hidden_agent_ids, + } +} #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum DeepReviewSubagentRole { @@ -56,6 +426,28 @@ impl DeepReviewStrategyLevel { } } +/// Risk factors used for automatic strategy selection. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ChangeRiskFactors { + pub file_count: usize, + pub total_lines_changed: usize, + pub files_in_security_paths: usize, + pub max_cyclomatic_complexity_delta: usize, + pub cross_crate_changes: usize, +} + +impl Default for ChangeRiskFactors { + fn default() -> Self { + Self { + file_count: 0, + total_lines_changed: 0, + files_in_security_paths: 0, + max_cyclomatic_complexity_delta: 0, + cross_crate_changes: 0, + } + } +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct DeepReviewExecutionPolicy { pub extra_subagent_ids: Vec, @@ -71,6 +463,9 @@ pub struct DeepReviewExecutionPolicy { /// Maximum number of same-role reviewer instances allowed per review turn. /// Clamped to [1, MAX_SAME_ROLE_INSTANCES]. pub max_same_role_instances: usize, + /// Maximum retry launches allowed per reviewer role in one DeepReview turn. + /// Set to 0 to disable automatic reviewer retries. + pub max_retries_per_role: usize, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -80,7 +475,7 @@ pub struct DeepReviewPolicyViolation { } impl DeepReviewPolicyViolation { - fn new(code: &'static str, message: impl Into) -> Self { + pub(crate) fn new(code: &'static str, message: impl Into) -> Self { Self { code, message: message.into(), @@ -96,6 +491,86 @@ impl DeepReviewPolicyViolation { } } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DeepReviewRunManifestGate { + active_subagent_ids: HashSet, + skipped_subagent_reasons: HashMap, +} + +impl DeepReviewRunManifestGate { + pub fn from_value(raw: &Value) -> Option { + let manifest = raw.as_object()?; + if manifest.get("reviewMode").and_then(Value::as_str) != Some("deep") { + return None; + } + + let mut active_subagent_ids = HashSet::new(); + collect_manifest_members(manifest.get("workPackets"), &mut active_subagent_ids); + collect_manifest_members(manifest.get("coreReviewers"), &mut active_subagent_ids); + collect_manifest_members( + manifest.get("enabledExtraReviewers"), + &mut active_subagent_ids, + ); + if let Some(id) = manifest + .get("qualityGateReviewer") + .and_then(manifest_member_subagent_id) + { + active_subagent_ids.insert(id); + } + + if active_subagent_ids.is_empty() { + return None; + } + + let mut skipped_subagent_reasons = HashMap::new(); + if let Some(skipped) = manifest.get("skippedReviewers").and_then(Value::as_array) { + for member in skipped { + let Some(id) = manifest_member_subagent_id(member) else { + continue; + }; + let reason = member + .get("reason") + .and_then(Value::as_str) + .unwrap_or("skipped") + .trim(); + skipped_subagent_reasons.insert( + id, + if reason.is_empty() { + "skipped".to_string() + } else { + reason.to_string() + }, + ); + } + } + + Some(Self { + active_subagent_ids, + skipped_subagent_reasons, + }) + } + + pub fn ensure_active(&self, subagent_type: &str) -> Result<(), DeepReviewPolicyViolation> { + if self.active_subagent_ids.contains(subagent_type) { + return Ok(()); + } + + let reason = self + .skipped_subagent_reasons + .get(subagent_type) + .map(String::as_str) + .unwrap_or("missing_from_manifest"); + + Err(DeepReviewPolicyViolation::new( + "deep_review_subagent_not_active_for_target", + format!( + "DeepReview subagent '{}' is not active for this review target (reason: {})", + subagent_type, reason + ), + )) + } +} + impl Default for DeepReviewExecutionPolicy { fn default() -> Self { Self { @@ -106,6 +581,7 @@ impl Default for DeepReviewExecutionPolicy { judge_timeout_seconds: DEFAULT_JUDGE_TIMEOUT_SECONDS, reviewer_file_split_threshold: DEFAULT_REVIEWER_FILE_SPLIT_THRESHOLD, max_same_role_instances: DEFAULT_MAX_SAME_ROLE_INSTANCES, + max_retries_per_role: DEFAULT_MAX_RETRIES_PER_ROLE, } } } @@ -126,13 +602,13 @@ impl DeepReviewExecutionPolicy { reviewer_timeout_seconds: clamp_u64( config.get("reviewer_timeout_seconds"), 0, - u64::MAX, + MAX_TIMEOUT_SECONDS, DEFAULT_REVIEWER_TIMEOUT_SECONDS, ), judge_timeout_seconds: clamp_u64( config.get("judge_timeout_seconds"), 0, - u64::MAX, + MAX_TIMEOUT_SECONDS, DEFAULT_JUDGE_TIMEOUT_SECONDS, ), reviewer_file_split_threshold: clamp_usize( @@ -147,6 +623,12 @@ impl DeepReviewExecutionPolicy { usize::MAX, DEFAULT_MAX_SAME_ROLE_INSTANCES, ), + max_retries_per_role: clamp_usize( + config.get("max_retries_per_role"), + 0, + MAX_RETRIES_PER_ROLE, + DEFAULT_MAX_RETRIES_PER_ROLE, + ), } } @@ -155,6 +637,7 @@ impl DeepReviewExecutionPolicy { subagent_type: &str, ) -> Result { if CORE_REVIEWER_AGENT_TYPES.contains(&subagent_type) + || CONDITIONAL_REVIEWER_AGENT_TYPES.contains(&subagent_type) || self .extra_subagent_ids .iter() @@ -204,6 +687,104 @@ impl DeepReviewExecutionPolicy { ) } + pub fn predictive_timeout( + &self, + role: DeepReviewSubagentRole, + strategy: DeepReviewStrategyLevel, + file_count: usize, + line_count: usize, + reviewer_count: usize, + ) -> u64 { + let base = match strategy { + DeepReviewStrategyLevel::Quick => BASE_TIMEOUT_QUICK_SECONDS, + DeepReviewStrategyLevel::Normal => BASE_TIMEOUT_NORMAL_SECONDS, + DeepReviewStrategyLevel::Deep => BASE_TIMEOUT_DEEP_SECONDS, + }; + let file_overhead = u64::try_from(file_count) + .unwrap_or(u64::MAX) + .saturating_mul(TIMEOUT_PER_FILE_SECONDS); + let line_overhead = u64::try_from(line_count / 100) + .unwrap_or(u64::MAX) + .saturating_mul(TIMEOUT_PER_100_LINES_SECONDS); + let raw = base + .saturating_add(file_overhead) + .saturating_add(line_overhead); + let multiplier = match role { + DeepReviewSubagentRole::Reviewer => 1, + DeepReviewSubagentRole::Judge => { + let reviewer_count = u64::try_from(reviewer_count.max(1)).unwrap_or(u64::MAX); + 1 + reviewer_count.saturating_sub(1) / 3 + } + }; + + raw.saturating_mul(multiplier).min(MAX_TIMEOUT_SECONDS) + } + + pub fn with_run_manifest_execution_policy(&self, raw_manifest: &Value) -> Self { + let Some(manifest) = raw_manifest.as_object() else { + return self.clone(); + }; + if manifest.get("reviewMode").and_then(Value::as_str) != Some("deep") { + return self.clone(); + } + + let mut policy = self.clone(); + if let Some(strategy_level) = + DeepReviewStrategyLevel::from_value(manifest.get("strategyLevel")) + { + policy.strategy_level = strategy_level; + } + + let Some(execution_policy) = manifest.get("executionPolicy").and_then(Value::as_object) + else { + return policy; + }; + + policy.reviewer_timeout_seconds = clamp_u64( + execution_policy.get("reviewerTimeoutSeconds"), + 0, + MAX_TIMEOUT_SECONDS, + policy.reviewer_timeout_seconds, + ); + policy.judge_timeout_seconds = clamp_u64( + execution_policy.get("judgeTimeoutSeconds"), + 0, + MAX_TIMEOUT_SECONDS, + policy.judge_timeout_seconds, + ); + policy.reviewer_file_split_threshold = clamp_usize( + execution_policy.get("reviewerFileSplitThreshold"), + 0, + usize::MAX, + policy.reviewer_file_split_threshold, + ); + policy.max_same_role_instances = clamp_usize( + execution_policy.get("maxSameRoleInstances"), + 1, + MAX_SAME_ROLE_INSTANCES, + policy.max_same_role_instances, + ); + policy.max_retries_per_role = clamp_usize( + execution_policy.get("maxRetriesPerRole"), + 0, + MAX_RETRIES_PER_ROLE, + policy.max_retries_per_role, + ); + + policy + } + + /// Extract the concurrency policy from a run manifest, if present. + pub fn concurrency_policy_from_manifest( + &self, + raw_manifest: &Value, + ) -> DeepReviewConcurrencyPolicy { + raw_manifest + .get("concurrencyPolicy") + .map(DeepReviewConcurrencyPolicy::from_manifest) + .unwrap_or_default() + } + /// Returns true when the file count exceeds the split threshold and /// `max_same_role_instances > 1`, meaning the orchestrator should /// partition the file list across multiple same-role reviewer instances. @@ -225,160 +806,1843 @@ impl DeepReviewExecutionPolicy { / self.reviewer_file_split_threshold; needed.clamp(1, self.max_same_role_instances) } -} -pub async fn load_default_deep_review_policy() -> BitFunResult { - let config_service = GlobalConfigManager::get_service().await.map_err(|error| { - BitFunError::config(format!( - "Failed to load DeepReview execution policy because config service is unavailable: {}", - error - )) - })?; + /// Auto-select strategy level based on change risk factors. + /// Returns the recommended level and a human-readable rationale. + pub fn auto_select_strategy( + &self, + risk: &ChangeRiskFactors, + ) -> (DeepReviewStrategyLevel, String) { + let score = risk.file_count + + risk.total_lines_changed / 100 + + risk.files_in_security_paths * 3 + + risk.cross_crate_changes * 2; - let raw_config = match config_service - .get_config::(Some(DEFAULT_REVIEW_TEAM_CONFIG_PATH)) - .await - { - Ok(config) => Some(config), - Err(error) if is_missing_default_review_team_config_error(&error) => { - warn!( - "DeepReview policy config missing at {}, using defaults", - DEFAULT_REVIEW_TEAM_CONFIG_PATH - ); - None - } - Err(error) => { - return Err(BitFunError::config(format!( - "Failed to load DeepReview execution policy from {}: {}", - DEFAULT_REVIEW_TEAM_CONFIG_PATH, error - ))); + match score { + 0..=5 => ( + DeepReviewStrategyLevel::Quick, + format!( + "Small change ({} files, {} lines). Quick scan sufficient.", + risk.file_count, risk.total_lines_changed + ), + ), + 6..=20 => ( + DeepReviewStrategyLevel::Normal, + format!( + "Medium change ({} files, {} lines). Standard review recommended.", + risk.file_count, risk.total_lines_changed + ), + ), + _ => ( + DeepReviewStrategyLevel::Deep, + format!( + "Large/high-risk change ({} files, {} lines, {} security files). Deep review recommended.", + risk.file_count, risk.total_lines_changed, risk.files_in_security_paths + ), + ), } - }; + } +} - Ok(DeepReviewExecutionPolicy::from_config_value( - raw_config.as_ref(), - )) +/// Dynamic concurrency control for deep review reviewer launches. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DeepReviewConcurrencyPolicy { + /// Maximum parallel reviewer instances at once. + pub max_parallel_instances: usize, + /// Whether to stagger launches (wait N seconds between batches). + pub stagger_seconds: u64, + /// Maximum time an over-cap reviewer launch can wait before being skipped. + pub max_queue_wait_seconds: u64, + /// Whether to batch extras separately from core reviewers. + pub batch_extras_separately: bool, } -pub fn is_missing_default_review_team_config_error(error: &BitFunError) -> bool { - matches!(error, BitFunError::NotFound(message) - if message == &format!("Config path '{}' not found", DEFAULT_REVIEW_TEAM_CONFIG_PATH)) +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DeepReviewEffectiveConcurrencySnapshot { + pub configured_max_parallel_instances: usize, + pub learned_parallel_instances: usize, + pub effective_parallel_instances: usize, + pub user_override_parallel_instances: Option, + pub retry_after_remaining_ms: Option, } -fn normalize_extra_subagent_ids(raw: Option<&Value>) -> Vec { - let Some(values) = raw.and_then(Value::as_array) else { - return Vec::new(); - }; +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct DeepReviewRuntimeDiagnostics { + pub queue_wait_count: usize, + pub queue_wait_total_ms: u64, + pub queue_wait_max_ms: u64, + pub provider_capacity_queue_count: usize, + pub provider_capacity_retry_count: usize, + pub provider_capacity_retry_success_count: usize, + pub capacity_skip_count: usize, + pub effective_parallel_min: Option, + pub effective_parallel_final: Option, + pub manual_queue_action_count: usize, + pub manual_retry_count: usize, + pub auto_retry_count: usize, + pub auto_retry_suppressed_reason_counts: BTreeMap, + pub shared_context_total_calls: usize, + pub shared_context_duplicate_calls: usize, + pub shared_context_duplicate_context_count: usize, +} - let disallowed = disallowed_extra_subagent_ids(); - let mut seen = HashSet::new(); - let mut normalized = Vec::new(); +impl DeepReviewRuntimeDiagnostics { + fn is_empty(&self) -> bool { + self.queue_wait_count == 0 + && self.queue_wait_total_ms == 0 + && self.queue_wait_max_ms == 0 + && self.provider_capacity_queue_count == 0 + && self.provider_capacity_retry_count == 0 + && self.provider_capacity_retry_success_count == 0 + && self.capacity_skip_count == 0 + && self.effective_parallel_min.is_none() + && self.effective_parallel_final.is_none() + && self.manual_queue_action_count == 0 + && self.manual_retry_count == 0 + && self.auto_retry_count == 0 + && self.auto_retry_suppressed_reason_counts.is_empty() + && self.shared_context_total_calls == 0 + && self.shared_context_duplicate_calls == 0 + && self.shared_context_duplicate_context_count == 0 + } - for value in values { - let Some(id) = value_to_id(value) else { - continue; - }; - if id.is_empty() || disallowed.contains(id.as_str()) || !seen.insert(id.clone()) { - continue; - } - normalized.push(id); + fn observe_effective_parallel(&mut self, effective_parallel_instances: usize) { + self.effective_parallel_min = Some( + self.effective_parallel_min + .map_or(effective_parallel_instances, |current| { + current.min(effective_parallel_instances) + }), + ); + self.effective_parallel_final = Some(effective_parallel_instances); } - normalized + fn merge_shared_context(&mut self, snapshot: DeepReviewSharedContextMeasurementSnapshot) { + self.shared_context_total_calls = snapshot.total_calls; + self.shared_context_duplicate_calls = snapshot.duplicate_calls; + self.shared_context_duplicate_context_count = snapshot.duplicate_context_count; + } } -fn normalize_member_strategy_overrides( - raw: Option<&Value>, -) -> HashMap { - let Some(values) = raw.and_then(Value::as_object) else { - return HashMap::new(); - }; +#[derive(Debug, Clone)] +struct DeepReviewEffectiveConcurrencyState { + configured_max_parallel_instances: usize, + learned_parallel_instances: usize, + user_override_parallel_instances: Option, + successful_observation_count: usize, + retry_after_until: Option, +} - let mut normalized = HashMap::new(); - for (subagent_id, value) in values { - let id = subagent_id.trim(); - let Some(strategy_level) = DeepReviewStrategyLevel::from_value(Some(value)) else { - continue; - }; - if !id.is_empty() { - normalized.insert(id.to_string(), strategy_level); +impl DeepReviewEffectiveConcurrencyState { + fn new(configured_max_parallel_instances: usize) -> Self { + let configured_max_parallel_instances = + Self::normalize_configured_max(configured_max_parallel_instances); + Self { + configured_max_parallel_instances, + learned_parallel_instances: configured_max_parallel_instances, + user_override_parallel_instances: None, + successful_observation_count: 0, + retry_after_until: None, } } - normalized -} + fn normalize_configured_max(configured_max_parallel_instances: usize) -> usize { + configured_max_parallel_instances.max(1) + } -fn disallowed_extra_subagent_ids() -> HashSet<&'static str> { - CORE_REVIEWER_AGENT_TYPES - .into_iter() - .chain([ - REVIEW_JUDGE_AGENT_TYPE, - DEEP_REVIEW_AGENT_TYPE, - REVIEW_FIXER_AGENT_TYPE, - ]) - .collect() -} + fn rebase_configured_max(&mut self, configured_max_parallel_instances: usize) { + let configured_max_parallel_instances = + Self::normalize_configured_max(configured_max_parallel_instances); + if self.configured_max_parallel_instances == configured_max_parallel_instances { + return; + } -fn value_to_id(value: &Value) -> Option { - match value { - Value::String(s) => Some(s.trim().to_string()), - _ => None, + self.configured_max_parallel_instances = configured_max_parallel_instances; + self.learned_parallel_instances = self + .learned_parallel_instances + .clamp(1, configured_max_parallel_instances); + self.user_override_parallel_instances = self + .user_override_parallel_instances + .map(|value| value.clamp(1, configured_max_parallel_instances)); } -} -fn clamp_u64(raw: Option<&Value>, min: u64, max: u64, fallback: u64) -> u64 { - let Some(value) = raw.and_then(number_as_i64) else { - return fallback; - }; + fn effective_parallel_instances(&self, now: Instant) -> usize { + if let Some(user_override) = self.user_override_parallel_instances { + return user_override.clamp(1, self.configured_max_parallel_instances); + } - let min_i64 = i64::try_from(min).unwrap_or(i64::MAX); - let max_i64 = i64::try_from(max).unwrap_or(i64::MAX); - value.clamp(min_i64, max_i64) as u64 -} + if self + .retry_after_until + .is_some_and(|retry_after_until| retry_after_until > now) + { + return 1; + } -fn clamp_usize(raw: Option<&Value>, min: usize, max: usize, fallback: usize) -> usize { - let Some(value) = raw.and_then(number_as_i64) else { - return fallback; - }; + self.learned_parallel_instances + .clamp(1, self.configured_max_parallel_instances) + } - let min_i64 = i64::try_from(min).unwrap_or(i64::MAX); - let max_i64 = i64::try_from(max).unwrap_or(i64::MAX); - value.clamp(min_i64, max_i64) as usize -} + fn record_capacity_error( + &mut self, + reason: DeepReviewCapacityQueueReason, + retry_after: Option, + now: Instant, + ) { + self.successful_observation_count = 0; + self.learned_parallel_instances = self.learned_parallel_instances.saturating_sub(1).max(1); -fn number_as_i64(value: &Value) -> Option { - value.as_i64().or_else(|| { - value - .as_u64() - .map(|value| i64::try_from(value).unwrap_or(i64::MAX)) - }) -} + if matches!(reason, DeepReviewCapacityQueueReason::RetryAfter) || retry_after.is_some() { + self.retry_after_until = retry_after.map(|duration| now + duration); + } + } -#[cfg(test)] -mod tests { - use super::{ - is_missing_default_review_team_config_error, DeepReviewExecutionPolicy, - DeepReviewStrategyLevel, DeepReviewSubagentRole, REVIEW_FIXER_AGENT_TYPE, - }; - use crate::util::errors::BitFunError; - use serde_json::json; + fn record_success(&mut self, now: Instant) { + if self + .retry_after_until + .is_some_and(|retry_after_until| retry_after_until > now) + { + return; + } + if self + .retry_after_until + .is_some_and(|retry_after_until| retry_after_until <= now) + { + self.retry_after_until = None; + } - #[test] - fn only_missing_default_review_team_path_can_fallback_to_defaults() { - assert!(is_missing_default_review_team_config_error( - &BitFunError::NotFound("Config path 'ai.review_teams.default' not found".to_string()) - )); - assert!(!is_missing_default_review_team_config_error( - &BitFunError::config("Config service unavailable") - )); - assert!(!is_missing_default_review_team_config_error( - &BitFunError::config("Config path 'ai.review_teams.default.extra' not found") - )); + if self.learned_parallel_instances >= self.configured_max_parallel_instances { + self.successful_observation_count = 0; + return; + } + + self.successful_observation_count = self.successful_observation_count.saturating_add(1); + if self.successful_observation_count >= EFFECTIVE_CONCURRENCY_RECOVERY_SUCCESS_WINDOW { + self.learned_parallel_instances = + (self.learned_parallel_instances + 1).min(self.configured_max_parallel_instances); + self.successful_observation_count = 0; + } } - #[test] - fn default_policy_is_read_only_with_normal_strategy() { - let policy = DeepReviewExecutionPolicy::default(); + fn set_user_override(&mut self, user_override_parallel_instances: Option) { + self.user_override_parallel_instances = user_override_parallel_instances + .map(|value| value.clamp(1, self.configured_max_parallel_instances)); + } + + fn snapshot(&self, now: Instant) -> DeepReviewEffectiveConcurrencySnapshot { + let retry_after_remaining_ms = + self.retry_after_until + .and_then(|retry_after_until| match retry_after_until > now { + true => Some( + u64::try_from(retry_after_until.duration_since(now).as_millis()) + .unwrap_or(u64::MAX), + ), + false => None, + }); + + DeepReviewEffectiveConcurrencySnapshot { + configured_max_parallel_instances: self.configured_max_parallel_instances, + learned_parallel_instances: self + .learned_parallel_instances + .clamp(1, self.configured_max_parallel_instances), + effective_parallel_instances: self.effective_parallel_instances(now), + user_override_parallel_instances: self.user_override_parallel_instances, + retry_after_remaining_ms, + } + } +} + +impl Default for DeepReviewConcurrencyPolicy { + fn default() -> Self { + Self { + max_parallel_instances: DEFAULT_MAX_PARALLEL_INSTANCES, + stagger_seconds: 0, + max_queue_wait_seconds: DEFAULT_MAX_QUEUE_WAIT_SECONDS, + batch_extras_separately: true, + } + } +} + +impl DeepReviewConcurrencyPolicy { + pub fn from_manifest(raw: &Value) -> Self { + let Some(obj) = raw.as_object() else { + return Self::default(); + }; + + Self { + max_parallel_instances: clamp_usize( + obj.get("maxParallelInstances"), + 1, + 16, + DEFAULT_MAX_PARALLEL_INSTANCES, + ), + stagger_seconds: clamp_u64(obj.get("staggerSeconds"), 0, 60, 0), + max_queue_wait_seconds: clamp_u64( + obj.get("maxQueueWaitSeconds"), + 0, + MAX_QUEUE_WAIT_SECONDS, + DEFAULT_MAX_QUEUE_WAIT_SECONDS, + ), + batch_extras_separately: obj + .get("batchExtrasSeparately") + .and_then(Value::as_bool) + .unwrap_or(true), + } + } + + /// Compute the effective max same-role instances, capped by both + /// the execution policy's `max_same_role_instances` and the + /// concurrency policy's `max_parallel_instances / role_count`. + pub fn effective_max_same_role_instances(&self, policy: &DeepReviewExecutionPolicy) -> usize { + let role_count = reviewer_agent_type_count() + policy.extra_subagent_ids.len(); + let max_per_role = self.max_parallel_instances / role_count.max(1); + max_per_role.max(1).min(policy.max_same_role_instances) + } + + /// Check whether the current number of active launches exceeds the cap. + /// Returns `Ok(())` if the launch is allowed, or an error describing why not. + pub fn check_launch_allowed( + &self, + active_count: usize, + role: DeepReviewSubagentRole, + is_judge_pending: bool, + ) -> Result<(), DeepReviewPolicyViolation> { + match role { + DeepReviewSubagentRole::Reviewer => { + if active_count >= self.max_parallel_instances { + return Err(DeepReviewPolicyViolation::new( + "deep_review_concurrency_cap_reached", + format!( + "Maximum parallel reviewer instances reached ({}/{}). Wait for running reviewers to complete before launching more.", + active_count, self.max_parallel_instances + ), + )); + } + } + DeepReviewSubagentRole::Judge => { + if active_count > 0 { + return Err(DeepReviewPolicyViolation::new( + "deep_review_judge_launch_blocked_by_reviewers", + format!( + "ReviewJudge cannot launch while {} reviewer(s) are still active. Wait for reviewers to complete first.", + active_count + ), + )); + } + if is_judge_pending { + return Err(DeepReviewPolicyViolation::new( + "deep_review_judge_already_pending", + "ReviewJudge is already pending or running in this turn.", + )); + } + } + } + Ok(()) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum DeepReviewCapacityQueueReason { + ProviderRateLimit, + ProviderConcurrencyLimit, + RetryAfter, + LocalConcurrencyCap, + TemporaryOverload, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct DeepReviewCapacityQueueDecision { + pub queueable: bool, + pub reason: Option, + pub retry_after_seconds: Option, +} + +impl DeepReviewCapacityQueueDecision { + fn queueable(reason: DeepReviewCapacityQueueReason, retry_after_seconds: Option) -> Self { + Self { + queueable: true, + reason: Some(reason), + retry_after_seconds, + } + } + + fn fail_fast() -> Self { + Self { + queueable: false, + reason: None, + retry_after_seconds: None, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum DeepReviewReviewerQueueStatus { + QueuedForCapacity, + PausedByUser, + Running, + CapacitySkipped, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct DeepReviewReviewerQueueState { + pub status: DeepReviewReviewerQueueStatus, + pub reason: Option, + pub queue_elapsed_ms: u64, + pub run_elapsed_ms: u64, +} + +impl DeepReviewReviewerQueueState { + pub fn queued_for_capacity( + reason: DeepReviewCapacityQueueReason, + queue_elapsed_ms: u64, + ) -> Self { + Self { + status: DeepReviewReviewerQueueStatus::QueuedForCapacity, + reason: Some(reason), + queue_elapsed_ms, + run_elapsed_ms: 0, + } + } + + pub fn paused_by_user(queue_elapsed_ms: u64) -> Self { + Self { + status: DeepReviewReviewerQueueStatus::PausedByUser, + reason: None, + queue_elapsed_ms, + run_elapsed_ms: 0, + } + } + + pub fn running(queue_elapsed_ms: u64, run_elapsed_ms: u64) -> Self { + Self { + status: DeepReviewReviewerQueueStatus::Running, + reason: None, + queue_elapsed_ms, + run_elapsed_ms, + } + } + + pub fn capacity_skipped(reason: DeepReviewCapacityQueueReason, queue_elapsed_ms: u64) -> Self { + Self { + status: DeepReviewReviewerQueueStatus::CapacitySkipped, + reason: Some(reason), + queue_elapsed_ms, + run_elapsed_ms: 0, + } + } + + pub fn timeout_elapsed_ms(&self) -> u64 { + self.run_elapsed_ms + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum DeepReviewQueueControlAction { + Pause, + Continue, + Cancel, + SkipOptional, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct DeepReviewQueueControlSnapshot { + pub paused: bool, + pub cancelled: bool, + pub skip_optional: bool, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct DeepReviewQueueControlKey { + parent_dialog_turn_id: String, + tool_id: String, +} + +impl DeepReviewQueueControlKey { + fn new(parent_dialog_turn_id: &str, tool_id: &str) -> Option { + let parent_dialog_turn_id = parent_dialog_turn_id.trim(); + let tool_id = tool_id.trim(); + if parent_dialog_turn_id.is_empty() || tool_id.is_empty() { + return None; + } + + Some(Self { + parent_dialog_turn_id: parent_dialog_turn_id.to_string(), + tool_id: tool_id.to_string(), + }) + } +} + +#[derive(Default)] +struct DeepReviewQueueControlTracker { + paused_tools: DashMap, + cancelled_tools: DashMap, + skip_optional_turns: DashMap, +} + +impl DeepReviewQueueControlTracker { + fn apply( + &self, + parent_dialog_turn_id: &str, + tool_id: &str, + action: DeepReviewQueueControlAction, + ) -> DeepReviewQueueControlSnapshot { + let now = Instant::now(); + let Some(key) = DeepReviewQueueControlKey::new(parent_dialog_turn_id, tool_id) else { + return DeepReviewQueueControlSnapshot { + paused: false, + cancelled: false, + skip_optional: false, + }; + }; + + match action { + DeepReviewQueueControlAction::Pause => { + self.paused_tools.insert(key.clone(), now); + } + DeepReviewQueueControlAction::Continue => { + self.paused_tools.remove(&key); + } + DeepReviewQueueControlAction::Cancel => { + self.cancelled_tools.insert(key.clone(), now); + self.paused_tools.remove(&key); + } + DeepReviewQueueControlAction::SkipOptional => { + self.skip_optional_turns + .insert(key.parent_dialog_turn_id.clone(), now); + } + } + + self.snapshot(parent_dialog_turn_id, tool_id) + } + + fn snapshot( + &self, + parent_dialog_turn_id: &str, + tool_id: &str, + ) -> DeepReviewQueueControlSnapshot { + let Some(key) = DeepReviewQueueControlKey::new(parent_dialog_turn_id, tool_id) else { + return DeepReviewQueueControlSnapshot { + paused: false, + cancelled: false, + skip_optional: false, + }; + }; + let skip_optional = self + .skip_optional_turns + .contains_key(&key.parent_dialog_turn_id); + + DeepReviewQueueControlSnapshot { + paused: self.paused_tools.contains_key(&key), + cancelled: self.cancelled_tools.contains_key(&key), + skip_optional, + } + } + + fn clear_tool(&self, parent_dialog_turn_id: &str, tool_id: &str) { + if let Some(key) = DeepReviewQueueControlKey::new(parent_dialog_turn_id, tool_id) { + self.paused_tools.remove(&key); + self.cancelled_tools.remove(&key); + } + } +} + +pub fn classify_deep_review_capacity_error( + code: &str, + message: &str, + retry_after_seconds: Option, +) -> DeepReviewCapacityQueueDecision { + let code = code.trim().to_ascii_lowercase(); + let message = message.trim().to_ascii_lowercase(); + let combined = format!("{code} {message}"); + + if contains_any( + &combined, + &[ + "auth", + "api key", + "unauthorized", + "permission", + "quota", + "billing", + "exhausted", + "invalid_model", + "invalid model", + "model does not exist", + "user_cancel", + "cancelled", + "canceled", + "invalid_tooling", + "subagent_not_allowed", + "not allowed", + "policy", + "validation", + ], + ) { + return DeepReviewCapacityQueueDecision::fail_fast(); + } + + if code == "deep_review_concurrency_cap_reached" { + return DeepReviewCapacityQueueDecision::queueable( + DeepReviewCapacityQueueReason::LocalConcurrencyCap, + retry_after_seconds, + ); + } + + if retry_after_seconds.is_some() { + return DeepReviewCapacityQueueDecision::queueable( + DeepReviewCapacityQueueReason::RetryAfter, + retry_after_seconds, + ); + } + + if contains_any(&combined, &["rate limit", "rate_limit", "429"]) { + return DeepReviewCapacityQueueDecision::queueable( + DeepReviewCapacityQueueReason::ProviderRateLimit, + retry_after_seconds, + ); + } + + if contains_any( + &combined, + &[ + "too many concurrent", + "concurrency limit", + "parallel request", + "concurrent requests", + "max concurrent", + ], + ) { + return DeepReviewCapacityQueueDecision::queueable( + DeepReviewCapacityQueueReason::ProviderConcurrencyLimit, + retry_after_seconds, + ); + } + + if contains_any( + &combined, + &[ + "temporarily overloaded", + "temporary overload", + "overloaded", + "capacity", + "try again later", + "retry later", + ], + ) { + return DeepReviewCapacityQueueDecision::queueable( + DeepReviewCapacityQueueReason::TemporaryOverload, + retry_after_seconds, + ); + } + + DeepReviewCapacityQueueDecision::fail_fast() +} + +fn contains_any(value: &str, needles: &[&str]) -> bool { + needles.iter().any(|needle| value.contains(needle)) +} + +#[derive(Debug)] +struct DeepReviewTurnBudget { + judge_calls: usize, + /// Tracks total reviewer calls (across all roles) per turn. + /// Capped by `max_same_role_instances * reviewer_agent_type_count() + + /// extra_subagent_ids.len()` so the orchestrator cannot spawn an unbounded + /// number of same-role instances. + reviewer_calls: usize, + reviewer_calls_by_subagent: HashMap, + retries_used_by_subagent: HashMap, + active_reviewers: usize, + concurrency_cap_rejections: usize, + capacity_skips: usize, + shared_context_uses: HashMap, + effective_concurrency: Option, + runtime_diagnostics: DeepReviewRuntimeDiagnostics, + updated_at: Instant, +} + +impl DeepReviewTurnBudget { + fn new(now: Instant) -> Self { + Self { + judge_calls: 0, + reviewer_calls: 0, + reviewer_calls_by_subagent: HashMap::new(), + retries_used_by_subagent: HashMap::new(), + active_reviewers: 0, + concurrency_cap_rejections: 0, + capacity_skips: 0, + shared_context_uses: HashMap::new(), + effective_concurrency: None, + runtime_diagnostics: DeepReviewRuntimeDiagnostics::default(), + updated_at: now, + } + } + + fn effective_concurrency_mut( + &mut self, + configured_max_parallel_instances: usize, + ) -> &mut DeepReviewEffectiveConcurrencyState { + let state = self.effective_concurrency.get_or_insert_with(|| { + DeepReviewEffectiveConcurrencyState::new(configured_max_parallel_instances) + }); + state.rebase_configured_max(configured_max_parallel_instances); + state + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct DeepReviewSharedContextDuplicate { + pub tool_name: String, + pub file_path: String, + pub call_count: usize, + pub reviewer_count: usize, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct DeepReviewSharedContextMeasurementSnapshot { + pub total_calls: usize, + pub duplicate_calls: usize, + pub duplicate_context_count: usize, + pub repeated_contexts: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct DeepReviewSharedContextKey { + tool_name: String, + file_path: String, +} + +#[derive(Debug, Clone, Default)] +struct DeepReviewSharedContextUseRecord { + call_count: usize, + reviewer_types: HashSet, +} + +pub struct DeepReviewActiveReviewerGuard<'a> { + tracker: &'a DeepReviewBudgetTracker, + parent_dialog_turn_id: String, + released: bool, +} + +impl Drop for DeepReviewActiveReviewerGuard<'_> { + fn drop(&mut self) { + if !self.released { + self.tracker + .finish_active_reviewer(&self.parent_dialog_turn_id); + self.released = true; + } + } +} + +pub struct DeepReviewBudgetTracker { + turns: DashMap, + last_pruned_at: std::sync::Mutex, +} + +impl Default for DeepReviewBudgetTracker { + fn default() -> Self { + Self { + turns: DashMap::new(), + last_pruned_at: std::sync::Mutex::new(Instant::now()), + } + } +} + +impl DeepReviewBudgetTracker { + fn update_runtime_diagnostics( + &self, + parent_dialog_turn_id: &str, + update: impl FnOnce(&mut DeepReviewRuntimeDiagnostics), + ) { + if parent_dialog_turn_id.trim().is_empty() { + return; + } + + let now = Instant::now(); + if let Ok(last_pruned) = self.last_pruned_at.lock() { + if now.saturating_duration_since(*last_pruned) >= PRUNE_INTERVAL { + drop(last_pruned); + self.prune_stale(now); + } + } + + let mut budget = self + .turns + .entry(parent_dialog_turn_id.to_string()) + .or_insert_with(|| DeepReviewTurnBudget::new(now)); + update(&mut budget.runtime_diagnostics); + budget.updated_at = now; + } + + pub fn record_runtime_queue_wait(&self, parent_dialog_turn_id: &str, queue_elapsed_ms: u64) { + if queue_elapsed_ms == 0 { + return; + } + self.update_runtime_diagnostics(parent_dialog_turn_id, |diagnostics| { + diagnostics.queue_wait_count = diagnostics.queue_wait_count.saturating_add(1); + diagnostics.queue_wait_total_ms = diagnostics + .queue_wait_total_ms + .saturating_add(queue_elapsed_ms); + diagnostics.queue_wait_max_ms = diagnostics.queue_wait_max_ms.max(queue_elapsed_ms); + }); + } + + pub fn record_runtime_provider_capacity_queue(&self, parent_dialog_turn_id: &str) { + self.update_runtime_diagnostics(parent_dialog_turn_id, |diagnostics| { + diagnostics.provider_capacity_queue_count = + diagnostics.provider_capacity_queue_count.saturating_add(1); + }); + } + + pub fn record_runtime_provider_capacity_retry(&self, parent_dialog_turn_id: &str) { + self.update_runtime_diagnostics(parent_dialog_turn_id, |diagnostics| { + diagnostics.provider_capacity_retry_count = + diagnostics.provider_capacity_retry_count.saturating_add(1); + }); + } + + pub fn record_runtime_provider_capacity_retry_success(&self, parent_dialog_turn_id: &str) { + self.update_runtime_diagnostics(parent_dialog_turn_id, |diagnostics| { + diagnostics.provider_capacity_retry_success_count = diagnostics + .provider_capacity_retry_success_count + .saturating_add(1); + }); + } + + pub fn record_runtime_capacity_skip( + &self, + parent_dialog_turn_id: &str, + _reason: DeepReviewCapacityQueueReason, + ) { + self.update_runtime_diagnostics(parent_dialog_turn_id, |diagnostics| { + diagnostics.capacity_skip_count = diagnostics.capacity_skip_count.saturating_add(1); + }); + } + + pub fn record_runtime_manual_queue_action(&self, parent_dialog_turn_id: &str) { + self.update_runtime_diagnostics(parent_dialog_turn_id, |diagnostics| { + diagnostics.manual_queue_action_count = + diagnostics.manual_queue_action_count.saturating_add(1); + }); + } + + pub fn record_runtime_manual_retry(&self, parent_dialog_turn_id: &str) { + self.update_runtime_diagnostics(parent_dialog_turn_id, |diagnostics| { + diagnostics.manual_retry_count = diagnostics.manual_retry_count.saturating_add(1); + }); + } + + pub fn record_runtime_auto_retry(&self, parent_dialog_turn_id: &str) { + self.update_runtime_diagnostics(parent_dialog_turn_id, |diagnostics| { + diagnostics.auto_retry_count = diagnostics.auto_retry_count.saturating_add(1); + }); + } + + pub fn record_runtime_auto_retry_suppressed(&self, parent_dialog_turn_id: &str, reason: &str) { + let reason = reason.trim(); + if reason.is_empty() { + return; + } + self.update_runtime_diagnostics(parent_dialog_turn_id, |diagnostics| { + *diagnostics + .auto_retry_suppressed_reason_counts + .entry(reason.to_string()) + .or_insert(0) += 1; + }); + } + + pub fn runtime_diagnostics_snapshot( + &self, + parent_dialog_turn_id: &str, + ) -> Option { + let budget = self.turns.get(parent_dialog_turn_id)?; + let mut diagnostics = budget.runtime_diagnostics.clone(); + diagnostics.merge_shared_context(shared_context_measurement_snapshot_from_uses( + &budget.shared_context_uses, + )); + (!diagnostics.is_empty()).then_some(diagnostics) + } + + pub fn record_shared_context_tool_use( + &self, + parent_dialog_turn_id: &str, + subagent_type: &str, + tool_name: &str, + file_path: &str, + ) -> DeepReviewSharedContextMeasurementSnapshot { + if parent_dialog_turn_id.trim().is_empty() { + return DeepReviewSharedContextMeasurementSnapshot::default(); + } + let Some(tool_name) = normalize_shared_context_tool_name(tool_name) else { + return self.shared_context_measurement_snapshot(parent_dialog_turn_id); + }; + let Some(file_path) = normalize_shared_context_file_path(file_path) else { + return self.shared_context_measurement_snapshot(parent_dialog_turn_id); + }; + + let now = Instant::now(); + if let Ok(last_pruned) = self.last_pruned_at.lock() { + if now.saturating_duration_since(*last_pruned) >= PRUNE_INTERVAL { + drop(last_pruned); + self.prune_stale(now); + } + } + + let mut budget = self + .turns + .entry(parent_dialog_turn_id.to_string()) + .or_insert_with(|| DeepReviewTurnBudget::new(now)); + let record = budget + .shared_context_uses + .entry(DeepReviewSharedContextKey { + tool_name: tool_name.to_string(), + file_path, + }) + .or_default(); + record.call_count = record.call_count.saturating_add(1); + if !subagent_type.trim().is_empty() { + record + .reviewer_types + .insert(subagent_type.trim().to_string()); + } + budget.updated_at = now; + + shared_context_measurement_snapshot_from_uses(&budget.shared_context_uses) + } + + pub fn shared_context_measurement_snapshot( + &self, + parent_dialog_turn_id: &str, + ) -> DeepReviewSharedContextMeasurementSnapshot { + self.turns + .get(parent_dialog_turn_id) + .map(|budget| { + shared_context_measurement_snapshot_from_uses(&budget.shared_context_uses) + }) + .unwrap_or_default() + } + + pub fn record_task( + &self, + parent_dialog_turn_id: &str, + policy: &DeepReviewExecutionPolicy, + role: DeepReviewSubagentRole, + subagent_type: &str, + is_retry: bool, + ) -> Result<(), DeepReviewPolicyViolation> { + let now = Instant::now(); + if let Ok(last_pruned) = self.last_pruned_at.lock() { + if now.saturating_duration_since(*last_pruned) >= PRUNE_INTERVAL { + drop(last_pruned); + self.prune_stale(now); + } + } + + let mut budget = self + .turns + .entry(parent_dialog_turn_id.to_string()) + .or_insert_with(|| DeepReviewTurnBudget::new(now)); + + match role { + DeepReviewSubagentRole::Reviewer => { + let subagent_type = normalize_budget_subagent_type(subagent_type)?; + if is_retry { + if policy.max_retries_per_role == 0 { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_budget_exhausted", + format!( + "Retry budget is disabled for DeepReview reviewer '{}'", + subagent_type + ), + )); + } + if !budget + .reviewer_calls_by_subagent + .contains_key(subagent_type.as_str()) + { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_without_initial_attempt", + format!( + "Cannot retry DeepReview reviewer '{}' before an initial attempt in this turn", + subagent_type + ), + )); + } + let retry_count = budget + .retries_used_by_subagent + .entry(subagent_type.clone()) + .or_insert(0); + if *retry_count >= policy.max_retries_per_role { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_budget_exhausted", + format!( + "Retry budget exhausted for DeepReview reviewer '{}' (max retries: {})", + subagent_type, policy.max_retries_per_role + ), + )); + } + *retry_count += 1; + budget.updated_at = now; + return Ok(()); + } + + let max_reviewer_calls = policy.max_same_role_instances + * (reviewer_agent_type_count() + policy.extra_subagent_ids.len()); + if budget.reviewer_calls >= max_reviewer_calls { + return Err(DeepReviewPolicyViolation::new( + "deep_review_reviewer_budget_exhausted", + format!( + "Reviewer launch budget exhausted for this DeepReview turn (max calls: {})", + max_reviewer_calls + ), + )); + } + budget.reviewer_calls += 1; + *budget + .reviewer_calls_by_subagent + .entry(subagent_type) + .or_insert(0) += 1; + } + DeepReviewSubagentRole::Judge => { + if is_retry { + return Err(DeepReviewPolicyViolation::new( + "deep_review_judge_retry_disallowed", + "ReviewJudge retry is not covered by the reviewer retry budget", + )); + } + let max_judge_calls = 1; + if budget.judge_calls >= max_judge_calls { + return Err(DeepReviewPolicyViolation::new( + "deep_review_judge_budget_exhausted", + format!( + "ReviewJudge launch budget exhausted for this DeepReview turn (max calls: {})", + max_judge_calls + ), + )); + } + + budget.judge_calls += 1; + } + } + + budget.updated_at = now; + Ok(()) + } + + pub fn record_concurrency_cap_rejection(&self, parent_dialog_turn_id: &str) { + if parent_dialog_turn_id.trim().is_empty() { + return; + } + + let now = Instant::now(); + if let Ok(last_pruned) = self.last_pruned_at.lock() { + if now.saturating_duration_since(*last_pruned) >= PRUNE_INTERVAL { + drop(last_pruned); + self.prune_stale(now); + } + } + + let mut budget = self + .turns + .entry(parent_dialog_turn_id.to_string()) + .or_insert_with(|| DeepReviewTurnBudget::new(now)); + budget.concurrency_cap_rejections += 1; + budget.updated_at = now; + } + + pub fn record_capacity_skip(&self, parent_dialog_turn_id: &str) { + if parent_dialog_turn_id.trim().is_empty() { + return; + } + + let now = Instant::now(); + if let Ok(last_pruned) = self.last_pruned_at.lock() { + if now.saturating_duration_since(*last_pruned) >= PRUNE_INTERVAL { + drop(last_pruned); + self.prune_stale(now); + } + } + + let mut budget = self + .turns + .entry(parent_dialog_turn_id.to_string()) + .or_insert_with(|| DeepReviewTurnBudget::new(now)); + budget.capacity_skips += 1; + budget.runtime_diagnostics.capacity_skip_count = budget + .runtime_diagnostics + .capacity_skip_count + .saturating_add(1); + budget.updated_at = now; + } + + pub fn begin_active_reviewer<'a>( + &'a self, + parent_dialog_turn_id: &str, + ) -> DeepReviewActiveReviewerGuard<'a> { + let now = Instant::now(); + let mut budget = self + .turns + .entry(parent_dialog_turn_id.to_string()) + .or_insert_with(|| DeepReviewTurnBudget::new(now)); + budget.active_reviewers = budget.active_reviewers.saturating_add(1); + budget.updated_at = now; + + DeepReviewActiveReviewerGuard { + tracker: self, + parent_dialog_turn_id: parent_dialog_turn_id.to_string(), + released: false, + } + } + + pub fn try_begin_active_reviewer<'a>( + &'a self, + parent_dialog_turn_id: &str, + max_active_reviewers: usize, + ) -> Option> { + let now = Instant::now(); + let mut budget = self + .turns + .entry(parent_dialog_turn_id.to_string()) + .or_insert_with(|| DeepReviewTurnBudget::new(now)); + if budget.active_reviewers >= max_active_reviewers { + return None; + } + + budget.active_reviewers = budget.active_reviewers.saturating_add(1); + budget.updated_at = now; + Some(DeepReviewActiveReviewerGuard { + tracker: self, + parent_dialog_turn_id: parent_dialog_turn_id.to_string(), + released: false, + }) + } + + fn finish_active_reviewer(&self, parent_dialog_turn_id: &str) { + if let Some(mut budget) = self.turns.get_mut(parent_dialog_turn_id) { + budget.active_reviewers = budget.active_reviewers.saturating_sub(1); + budget.updated_at = Instant::now(); + } + } + + fn prune_stale(&self, now: Instant) { + self.turns + .retain(|_, budget| now.saturating_duration_since(budget.updated_at) <= BUDGET_TTL); + if let Ok(mut last_pruned) = self.last_pruned_at.lock() { + *last_pruned = now; + } + } + + /// Explicitly clean up all budget tracking data. + /// Call this when the application is shutting down or when the review session ends. + pub fn cleanup(&self) { + self.turns.clear(); + if let Ok(mut last_pruned) = self.last_pruned_at.lock() { + *last_pruned = Instant::now(); + } + } + + /// Returns the number of reviewer calls recorded for a given turn. + /// Used by the concurrency enforcement to check if a new launch is allowed. + pub fn active_reviewer_count(&self, parent_dialog_turn_id: &str) -> usize { + self.turns + .get(parent_dialog_turn_id) + .map(|budget| budget.active_reviewers) + .unwrap_or(0) + } + + /// Returns true if a judge call has been recorded for a given turn. + pub fn has_judge_been_launched(&self, parent_dialog_turn_id: &str) -> bool { + self.turns + .get(parent_dialog_turn_id) + .map(|budget| budget.judge_calls > 0) + .unwrap_or(false) + } + + pub fn concurrency_cap_rejection_count(&self, parent_dialog_turn_id: &str) -> usize { + self.turns + .get(parent_dialog_turn_id) + .map(|budget| budget.concurrency_cap_rejections) + .unwrap_or(0) + } + + pub fn capacity_skip_count(&self, parent_dialog_turn_id: &str) -> usize { + self.turns + .get(parent_dialog_turn_id) + .map(|budget| budget.capacity_skips) + .unwrap_or(0) + } + + pub fn effective_concurrency_snapshot( + &self, + parent_dialog_turn_id: &str, + configured_max_parallel_instances: usize, + ) -> DeepReviewEffectiveConcurrencySnapshot { + if parent_dialog_turn_id.trim().is_empty() { + return DeepReviewEffectiveConcurrencyState::new(configured_max_parallel_instances) + .snapshot(Instant::now()); + } + + let now = Instant::now(); + let mut budget = self + .turns + .entry(parent_dialog_turn_id.to_string()) + .or_insert_with(|| DeepReviewTurnBudget::new(now)); + budget.updated_at = now; + budget + .effective_concurrency_mut(configured_max_parallel_instances) + .snapshot(now) + } + + pub fn effective_parallel_instances( + &self, + parent_dialog_turn_id: &str, + configured_max_parallel_instances: usize, + ) -> usize { + self.effective_concurrency_snapshot( + parent_dialog_turn_id, + configured_max_parallel_instances, + ) + .effective_parallel_instances + } + + pub fn record_effective_concurrency_capacity_error( + &self, + parent_dialog_turn_id: &str, + configured_max_parallel_instances: usize, + reason: DeepReviewCapacityQueueReason, + retry_after: Option, + ) -> DeepReviewEffectiveConcurrencySnapshot { + if parent_dialog_turn_id.trim().is_empty() { + return DeepReviewEffectiveConcurrencyState::new(configured_max_parallel_instances) + .snapshot(Instant::now()); + } + + let now = Instant::now(); + let mut budget = self + .turns + .entry(parent_dialog_turn_id.to_string()) + .or_insert_with(|| DeepReviewTurnBudget::new(now)); + budget.updated_at = now; + let snapshot = { + let state = budget.effective_concurrency_mut(configured_max_parallel_instances); + state.record_capacity_error(reason, retry_after, now); + state.snapshot(now) + }; + budget + .runtime_diagnostics + .observe_effective_parallel(snapshot.effective_parallel_instances); + snapshot + } + + pub fn record_effective_concurrency_success( + &self, + parent_dialog_turn_id: &str, + configured_max_parallel_instances: usize, + ) -> DeepReviewEffectiveConcurrencySnapshot { + if parent_dialog_turn_id.trim().is_empty() { + return DeepReviewEffectiveConcurrencyState::new(configured_max_parallel_instances) + .snapshot(Instant::now()); + } + + let now = Instant::now(); + let mut budget = self + .turns + .entry(parent_dialog_turn_id.to_string()) + .or_insert_with(|| DeepReviewTurnBudget::new(now)); + budget.updated_at = now; + let snapshot = { + let state = budget.effective_concurrency_mut(configured_max_parallel_instances); + state.record_success(now); + state.snapshot(now) + }; + budget + .runtime_diagnostics + .observe_effective_parallel(snapshot.effective_parallel_instances); + snapshot + } + + pub fn set_effective_concurrency_user_override( + &self, + parent_dialog_turn_id: &str, + configured_max_parallel_instances: usize, + user_override_parallel_instances: Option, + ) -> DeepReviewEffectiveConcurrencySnapshot { + if parent_dialog_turn_id.trim().is_empty() { + return DeepReviewEffectiveConcurrencyState::new(configured_max_parallel_instances) + .snapshot(Instant::now()); + } + + let now = Instant::now(); + let mut budget = self + .turns + .entry(parent_dialog_turn_id.to_string()) + .or_insert_with(|| DeepReviewTurnBudget::new(now)); + budget.updated_at = now; + let snapshot = { + let state = budget.effective_concurrency_mut(configured_max_parallel_instances); + state.set_user_override(user_override_parallel_instances); + state.snapshot(now) + }; + budget + .runtime_diagnostics + .observe_effective_parallel(snapshot.effective_parallel_instances); + snapshot + } +} + +fn normalize_shared_context_tool_name(tool_name: &str) -> Option<&'static str> { + let tool_name = tool_name.trim(); + if tool_name.eq_ignore_ascii_case("Read") { + Some("Read") + } else if tool_name.eq_ignore_ascii_case("GetFileDiff") { + Some("GetFileDiff") + } else { + None + } +} + +fn normalize_shared_context_file_path(file_path: &str) -> Option { + let mut file_path = file_path.trim().replace('\\', "/"); + while file_path.starts_with("./") { + file_path = file_path[2..].to_string(); + } + (!file_path.is_empty()).then_some(file_path) +} + +fn shared_context_measurement_snapshot_from_uses( + uses: &HashMap, +) -> DeepReviewSharedContextMeasurementSnapshot { + let total_calls = uses.values().map(|record| record.call_count).sum(); + let duplicate_calls = uses + .values() + .map(|record| record.call_count.saturating_sub(1)) + .sum(); + let mut repeated_contexts: Vec = uses + .iter() + .filter_map(|(key, record)| { + (record.call_count > 1).then(|| DeepReviewSharedContextDuplicate { + tool_name: key.tool_name.clone(), + file_path: key.file_path.clone(), + call_count: record.call_count, + reviewer_count: record.reviewer_types.len(), + }) + }) + .collect(); + repeated_contexts.sort_by(|left, right| { + right + .call_count + .cmp(&left.call_count) + .then_with(|| right.reviewer_count.cmp(&left.reviewer_count)) + .then_with(|| left.tool_name.cmp(&right.tool_name)) + .then_with(|| left.file_path.cmp(&right.file_path)) + }); + let duplicate_context_count = repeated_contexts.len(); + + DeepReviewSharedContextMeasurementSnapshot { + total_calls, + duplicate_calls, + duplicate_context_count, + repeated_contexts, + } +} + +static GLOBAL_DEEP_REVIEW_BUDGET_TRACKER: LazyLock = + LazyLock::new(DeepReviewBudgetTracker::default); +static GLOBAL_DEEP_REVIEW_QUEUE_CONTROL_TRACKER: LazyLock = + LazyLock::new(DeepReviewQueueControlTracker::default); + +pub async fn load_default_deep_review_policy() -> BitFunResult { + let config_service = GlobalConfigManager::get_service().await.map_err(|error| { + BitFunError::config(format!( + "Failed to load DeepReview execution policy because config service is unavailable: {}", + error + )) + })?; + + let raw_config = match config_service + .get_config::(Some(DEFAULT_REVIEW_TEAM_CONFIG_PATH)) + .await + { + Ok(config) => Some(config), + Err(error) if is_missing_default_review_team_config_error(&error) => { + warn!( + "DeepReview policy config missing at {}, using defaults", + DEFAULT_REVIEW_TEAM_CONFIG_PATH + ); + None + } + Err(error) => { + return Err(BitFunError::config(format!( + "Failed to load DeepReview execution policy from {}: {}", + DEFAULT_REVIEW_TEAM_CONFIG_PATH, error + ))); + } + }; + + Ok(DeepReviewExecutionPolicy::from_config_value( + raw_config.as_ref(), + )) +} + +pub fn is_missing_default_review_team_config_error(error: &BitFunError) -> bool { + matches!(error, BitFunError::NotFound(message) + if message == &format!("Config path '{}' not found", DEFAULT_REVIEW_TEAM_CONFIG_PATH)) +} + +pub fn record_deep_review_task_budget( + parent_dialog_turn_id: &str, + policy: &DeepReviewExecutionPolicy, + role: DeepReviewSubagentRole, + subagent_type: &str, + is_retry: bool, +) -> Result<(), DeepReviewPolicyViolation> { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.record_task( + parent_dialog_turn_id, + policy, + role, + subagent_type, + is_retry, + ) +} + +pub fn record_deep_review_concurrency_cap_rejection(parent_dialog_turn_id: &str) { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.record_concurrency_cap_rejection(parent_dialog_turn_id) +} + +pub fn record_deep_review_capacity_skip(parent_dialog_turn_id: &str) { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.record_capacity_skip(parent_dialog_turn_id) +} + +pub fn record_deep_review_runtime_queue_wait(parent_dialog_turn_id: &str, queue_elapsed_ms: u64) { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER + .record_runtime_queue_wait(parent_dialog_turn_id, queue_elapsed_ms) +} + +pub fn record_deep_review_runtime_provider_capacity_queue(parent_dialog_turn_id: &str) { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.record_runtime_provider_capacity_queue(parent_dialog_turn_id) +} + +pub fn record_deep_review_runtime_provider_capacity_retry(parent_dialog_turn_id: &str) { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.record_runtime_provider_capacity_retry(parent_dialog_turn_id) +} + +pub fn record_deep_review_runtime_provider_capacity_retry_success(parent_dialog_turn_id: &str) { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER + .record_runtime_provider_capacity_retry_success(parent_dialog_turn_id) +} + +pub fn record_deep_review_runtime_capacity_skip( + parent_dialog_turn_id: &str, + reason: DeepReviewCapacityQueueReason, +) { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.record_runtime_capacity_skip(parent_dialog_turn_id, reason) +} + +pub fn record_deep_review_runtime_manual_queue_action(parent_dialog_turn_id: &str) { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.record_runtime_manual_queue_action(parent_dialog_turn_id) +} + +pub fn record_deep_review_runtime_manual_retry(parent_dialog_turn_id: &str) { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.record_runtime_manual_retry(parent_dialog_turn_id) +} + +pub fn record_deep_review_runtime_auto_retry(parent_dialog_turn_id: &str) { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.record_runtime_auto_retry(parent_dialog_turn_id) +} + +pub fn record_deep_review_runtime_auto_retry_suppressed(parent_dialog_turn_id: &str, reason: &str) { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER + .record_runtime_auto_retry_suppressed(parent_dialog_turn_id, reason) +} + +pub fn record_deep_review_shared_context_tool_use( + parent_dialog_turn_id: &str, + subagent_type: &str, + tool_name: &str, + file_path: &str, +) -> DeepReviewSharedContextMeasurementSnapshot { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.record_shared_context_tool_use( + parent_dialog_turn_id, + subagent_type, + tool_name, + file_path, + ) +} + +pub fn deep_review_shared_context_measurement_snapshot( + parent_dialog_turn_id: &str, +) -> DeepReviewSharedContextMeasurementSnapshot { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.shared_context_measurement_snapshot(parent_dialog_turn_id) +} + +pub fn deep_review_runtime_diagnostics_snapshot( + parent_dialog_turn_id: &str, +) -> Option { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.runtime_diagnostics_snapshot(parent_dialog_turn_id) +} + +pub fn try_begin_deep_review_active_reviewer( + parent_dialog_turn_id: &str, + max_active_reviewers: usize, +) -> Option> { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER + .try_begin_active_reviewer(parent_dialog_turn_id, max_active_reviewers) +} + +pub fn deep_review_effective_concurrency_snapshot( + parent_dialog_turn_id: &str, + configured_max_parallel_instances: usize, +) -> DeepReviewEffectiveConcurrencySnapshot { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER + .effective_concurrency_snapshot(parent_dialog_turn_id, configured_max_parallel_instances) +} + +pub fn deep_review_effective_parallel_instances( + parent_dialog_turn_id: &str, + configured_max_parallel_instances: usize, +) -> usize { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER + .effective_parallel_instances(parent_dialog_turn_id, configured_max_parallel_instances) +} + +pub fn record_deep_review_effective_concurrency_capacity_error( + parent_dialog_turn_id: &str, + configured_max_parallel_instances: usize, + reason: DeepReviewCapacityQueueReason, + retry_after: Option, +) -> DeepReviewEffectiveConcurrencySnapshot { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.record_effective_concurrency_capacity_error( + parent_dialog_turn_id, + configured_max_parallel_instances, + reason, + retry_after, + ) +} + +pub fn record_deep_review_effective_concurrency_success( + parent_dialog_turn_id: &str, + configured_max_parallel_instances: usize, +) -> DeepReviewEffectiveConcurrencySnapshot { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.record_effective_concurrency_success( + parent_dialog_turn_id, + configured_max_parallel_instances, + ) +} + +pub fn set_deep_review_effective_concurrency_user_override( + parent_dialog_turn_id: &str, + configured_max_parallel_instances: usize, + user_override_parallel_instances: Option, +) -> DeepReviewEffectiveConcurrencySnapshot { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.set_effective_concurrency_user_override( + parent_dialog_turn_id, + configured_max_parallel_instances, + user_override_parallel_instances, + ) +} + +/// Returns the number of active reviewer calls for a given turn. +pub fn deep_review_active_reviewer_count(parent_dialog_turn_id: &str) -> usize { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.active_reviewer_count(parent_dialog_turn_id) +} + +/// Returns true if a judge has been launched for a given turn. +pub fn deep_review_has_judge_been_launched(parent_dialog_turn_id: &str) -> bool { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.has_judge_been_launched(parent_dialog_turn_id) +} + +pub fn deep_review_concurrency_cap_rejection_count(parent_dialog_turn_id: &str) -> usize { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.concurrency_cap_rejection_count(parent_dialog_turn_id) +} + +pub fn deep_review_capacity_skip_count(parent_dialog_turn_id: &str) -> usize { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER.capacity_skip_count(parent_dialog_turn_id) +} + +pub fn apply_deep_review_queue_control( + parent_dialog_turn_id: &str, + tool_id: &str, + action: DeepReviewQueueControlAction, +) -> DeepReviewQueueControlSnapshot { + GLOBAL_DEEP_REVIEW_QUEUE_CONTROL_TRACKER.apply(parent_dialog_turn_id, tool_id, action) +} + +pub fn deep_review_queue_control_snapshot( + parent_dialog_turn_id: &str, + tool_id: &str, +) -> DeepReviewQueueControlSnapshot { + GLOBAL_DEEP_REVIEW_QUEUE_CONTROL_TRACKER.snapshot(parent_dialog_turn_id, tool_id) +} + +pub fn clear_deep_review_queue_control_for_tool(parent_dialog_turn_id: &str, tool_id: &str) { + GLOBAL_DEEP_REVIEW_QUEUE_CONTROL_TRACKER.clear_tool(parent_dialog_turn_id, tool_id) +} + +/// Returns the number of retries used for a specific subagent type in a given turn. +pub fn deep_review_retries_used(parent_dialog_turn_id: &str, subagent_type: &str) -> usize { + GLOBAL_DEEP_REVIEW_BUDGET_TRACKER + .turns + .get(parent_dialog_turn_id) + .map(|budget| { + budget + .retries_used_by_subagent + .get(subagent_type) + .copied() + .unwrap_or(0) + }) + .unwrap_or(0) +} + +/// Returns the fallback max retries per role when an effective run policy is unavailable. +pub fn deep_review_max_retries_per_role(_parent_dialog_turn_id: &str) -> usize { + DEFAULT_MAX_RETRIES_PER_ROLE +} + +fn collect_manifest_members(raw: Option<&Value>, output: &mut HashSet) { + let Some(values) = raw.and_then(Value::as_array) else { + return; + }; + + for member in values { + if let Some(id) = manifest_member_subagent_id(member) { + output.insert(id); + } + } +} + +fn manifest_member_subagent_id(value: &Value) -> Option { + let id = value + .get("subagentId") + .or_else(|| value.get("subagent_id")) + .and_then(Value::as_str)? + .trim(); + (!id.is_empty()).then(|| id.to_string()) +} + +fn normalize_extra_subagent_ids(raw: Option<&Value>) -> Vec { + let Some(values) = raw.and_then(Value::as_array) else { + return Vec::new(); + }; + + let disallowed = disallowed_extra_subagent_ids(); + let mut seen = HashSet::new(); + let mut normalized = Vec::new(); + + for value in values { + let Some(id) = value_to_id(value) else { + continue; + }; + if id.is_empty() || disallowed.contains(id.as_str()) || !seen.insert(id.clone()) { + continue; + } + normalized.push(id); + } + + normalized +} + +fn normalize_member_strategy_overrides( + raw: Option<&Value>, +) -> HashMap { + let Some(values) = raw.and_then(Value::as_object) else { + return HashMap::new(); + }; + + let mut normalized = HashMap::new(); + for (subagent_id, value) in values { + let id = subagent_id.trim(); + let Some(strategy_level) = DeepReviewStrategyLevel::from_value(Some(value)) else { + continue; + }; + if !id.is_empty() { + normalized.insert(id.to_string(), strategy_level); + } + } + + normalized +} + +fn disallowed_extra_subagent_ids() -> HashSet<&'static str> { + CORE_REVIEWER_AGENT_TYPES + .into_iter() + .chain(CONDITIONAL_REVIEWER_AGENT_TYPES) + .chain([ + REVIEW_JUDGE_AGENT_TYPE, + DEEP_REVIEW_AGENT_TYPE, + REVIEW_FIXER_AGENT_TYPE, + ]) + .collect() +} + +fn reviewer_agent_type_count() -> usize { + CORE_REVIEWER_AGENT_TYPES.len() + CONDITIONAL_REVIEWER_AGENT_TYPES.len() +} + +fn normalize_budget_subagent_type( + subagent_type: &str, +) -> Result { + let normalized = subagent_type.trim(); + if normalized.is_empty() { + return Err(DeepReviewPolicyViolation::new( + "deep_review_subagent_type_missing", + "DeepReview task budget requires a non-empty subagent type", + )); + } + + Ok(normalized.to_string()) +} + +fn value_to_id(value: &Value) -> Option { + match value { + Value::String(s) => Some(s.trim().to_string()), + _ => None, + } +} + +fn clamp_u64(raw: Option<&Value>, min: u64, max: u64, fallback: u64) -> u64 { + let Some(value) = raw.and_then(number_as_i64) else { + return fallback; + }; + + let min_i64 = i64::try_from(min).unwrap_or(i64::MAX); + let max_i64 = i64::try_from(max).unwrap_or(i64::MAX); + value.clamp(min_i64, max_i64) as u64 +} + +fn clamp_usize(raw: Option<&Value>, min: usize, max: usize, fallback: usize) -> usize { + let Some(value) = raw.and_then(number_as_i64) else { + return fallback; + }; + + let min_i64 = i64::try_from(min).unwrap_or(i64::MAX); + let max_i64 = i64::try_from(max).unwrap_or(i64::MAX); + value.clamp(min_i64, max_i64) as usize +} + +fn number_as_i64(value: &Value) -> Option { + value.as_i64().or_else(|| { + value + .as_u64() + .map(|value| i64::try_from(value).unwrap_or(i64::MAX)) + }) +} + +/// Incremental review cache stores completed reviewer outputs keyed by packet_id. +/// When a deep review is re-run with the same target fingerprint, cached outputs +/// are reused instead of re-dispatching reviewers. +#[derive(Clone)] +pub struct DeepReviewIncrementalCache { + fingerprint: String, + packets: HashMap, +} + +impl DeepReviewIncrementalCache { + pub fn new(fingerprint: &str) -> Self { + Self { + fingerprint: fingerprint.to_string(), + packets: HashMap::new(), + } + } + + pub fn from_value(value: &Value) -> Self { + let obj = value.as_object(); + let fingerprint = obj + .and_then(|o| o.get("fingerprint")) + .and_then(Value::as_str) + .unwrap_or("") + .to_string(); + let packets = obj + .and_then(|o| o.get("packets")) + .and_then(Value::as_object) + .map(|map| { + map.iter() + .filter_map(|(k, v)| v.as_str().map(|s| (k.clone(), s.to_string()))) + .collect() + }) + .unwrap_or_default(); + Self { + fingerprint, + packets, + } + } + + pub fn to_value(&self) -> Value { + json!({ + "fingerprint": self.fingerprint, + "packets": self.packets, + }) + } + + pub fn fingerprint(&self) -> &str { + &self.fingerprint + } + + pub fn store_packet(&mut self, packet_id: &str, output: &str) { + self.packets + .insert(packet_id.to_string(), output.to_string()); + } + + pub fn get_packet(&self, packet_id: &str) -> Option<&str> { + self.packets.get(packet_id).map(|s| s.as_str()) + } + + pub fn is_empty(&self) -> bool { + self.packets.is_empty() + } + + pub fn len(&self) -> usize { + self.packets.len() + } + + /// Check if the cached fingerprint matches the fingerprint in the run manifest. + /// Returns false if the manifest has no incrementalReviewCache section. + pub fn matches_manifest(&self, manifest: &Value) -> bool { + manifest + .get("incrementalReviewCache") + .and_then(|ic| ic.get("fingerprint")) + .and_then(Value::as_str) + .map(|fp| fp == self.fingerprint) + .unwrap_or(false) + } +} + +#[cfg(test)] +mod tests { + use super::{ + is_missing_default_review_team_config_error, DeepReviewBudgetTracker, + DeepReviewExecutionPolicy, DeepReviewIncrementalCache, DeepReviewRunManifestGate, + DeepReviewStrategyLevel, DeepReviewSubagentRole, REVIEWER_ARCHITECTURE_AGENT_TYPE, + REVIEWER_PERFORMANCE_AGENT_TYPE, REVIEWER_SECURITY_AGENT_TYPE, REVIEW_FIXER_AGENT_TYPE, + REVIEW_JUDGE_AGENT_TYPE, + }; + use crate::util::errors::BitFunError; + use serde_json::json; + use serde_json::Value; + use std::time::Duration; + + #[test] + fn only_missing_default_review_team_path_can_fallback_to_defaults() { + assert!(is_missing_default_review_team_config_error( + &BitFunError::NotFound("Config path 'ai.review_teams.default' not found".to_string()) + )); + assert!(!is_missing_default_review_team_config_error( + &BitFunError::config("Config service unavailable") + )); + assert!(!is_missing_default_review_team_config_error( + &BitFunError::config("Config path 'ai.review_teams.default.extra' not found") + )); + } + + #[test] + fn default_policy_is_read_only_with_normal_strategy() { + let policy = DeepReviewExecutionPolicy::default(); assert_eq!(policy.strategy_level, DeepReviewStrategyLevel::Normal); assert!(policy.member_strategy_overrides.is_empty()); @@ -391,6 +2655,53 @@ mod tests { ); } + #[test] + fn frontend_reviewer_is_conditional_not_core() { + let policy = DeepReviewExecutionPolicy::default(); + + assert!(!super::CORE_REVIEWER_AGENT_TYPES.contains(&super::REVIEWER_FRONTEND_AGENT_TYPE)); + assert!( + super::CONDITIONAL_REVIEWER_AGENT_TYPES.contains(&super::REVIEWER_FRONTEND_AGENT_TYPE) + ); + assert_eq!( + policy + .classify_subagent(super::REVIEWER_FRONTEND_AGENT_TYPE) + .unwrap(), + DeepReviewSubagentRole::Reviewer + ); + } + + #[test] + fn default_review_team_definition_exposes_role_manifest() { + let definition = super::default_review_team_definition(); + let role_ids: Vec<&str> = definition + .core_roles + .iter() + .map(|role| role.subagent_id.as_str()) + .collect(); + + assert_eq!(definition.default_strategy_level, "normal"); + assert!(role_ids.contains(&super::REVIEWER_BUSINESS_LOGIC_AGENT_TYPE)); + assert!(role_ids.contains(&super::REVIEWER_ARCHITECTURE_AGENT_TYPE)); + assert!(role_ids.contains(&super::REVIEWER_FRONTEND_AGENT_TYPE)); + assert!(role_ids.contains(&super::REVIEW_JUDGE_AGENT_TYPE)); + assert!(definition.core_roles.iter().any(|role| { + role.subagent_id == super::REVIEWER_FRONTEND_AGENT_TYPE && role.conditional + })); + assert!(definition + .hidden_agent_ids + .contains(&super::REVIEWER_FRONTEND_AGENT_TYPE.to_string())); + assert!(definition + .disallowed_extra_subagent_ids + .contains(&super::REVIEWER_FRONTEND_AGENT_TYPE.to_string())); + assert!(definition + .strategy_profiles + .get("quick") + .expect("quick strategy") + .role_directives + .contains_key(super::REVIEWER_FRONTEND_AGENT_TYPE)); + } + #[test] fn parses_review_strategy_and_member_overrides_from_config() { let raw = json!({ @@ -443,6 +2754,77 @@ mod tests { assert_eq!(result.unwrap_err().code, "deep_review_subagent_not_allowed"); } + #[test] + fn run_manifest_gate_allows_only_active_reviewers() { + let manifest = json!({ + "reviewMode": "deep", + "coreReviewers": [ + { "subagentId": "ReviewBusinessLogic" } + ], + "enabledExtraReviewers": [ + { "subagentId": "ExtraReviewer" } + ], + "qualityGateReviewer": { "subagentId": "ReviewJudge" }, + "skippedReviewers": [ + { "subagentId": "ReviewFrontend", "reason": "not_applicable" } + ] + }); + + let gate = DeepReviewRunManifestGate::from_value(&manifest) + .expect("valid run manifest should produce a gate"); + + gate.ensure_active("ReviewBusinessLogic").unwrap(); + gate.ensure_active("ExtraReviewer").unwrap(); + gate.ensure_active("ReviewJudge").unwrap(); + + let violation = gate.ensure_active("ReviewFrontend").unwrap_err(); + assert_eq!(violation.code, "deep_review_subagent_not_active_for_target"); + assert!(violation.message.contains("ReviewFrontend")); + assert!(violation.message.contains("not_applicable")); + } + + #[test] + fn run_manifest_gate_is_absent_without_review_team_shape() { + let manifest = json!({ + "reviewMode": "deep", + "skippedReviewers": [ + { "subagentId": "ReviewFrontend", "reason": "not_applicable" } + ] + }); + + assert!(DeepReviewRunManifestGate::from_value(&manifest).is_none()); + } + + #[test] + fn run_manifest_gate_accepts_work_packet_roster() { + let manifest = json!({ + "reviewMode": "deep", + "workPackets": [ + { + "packetId": "reviewer:ReviewBusinessLogic", + "subagentId": "ReviewBusinessLogic" + }, + { + "packet_id": "judge:ReviewJudge", + "subagent_id": "ReviewJudge" + } + ], + "skippedReviewers": [ + { "subagentId": "ReviewFrontend", "reason": "not_applicable" } + ] + }); + + let gate = DeepReviewRunManifestGate::from_value(&manifest) + .expect("work packet manifest should produce a gate"); + + gate.ensure_active("ReviewBusinessLogic").unwrap(); + gate.ensure_active("ReviewJudge").unwrap(); + + let violation = gate.ensure_active("ReviewFrontend").unwrap_err(); + assert_eq!(violation.code, "deep_review_subagent_not_active_for_target"); + assert!(violation.message.contains("not_applicable")); + } + #[test] fn classify_always_rejects_review_fixer() { let policy = DeepReviewExecutionPolicy::default(); @@ -485,6 +2867,43 @@ mod tests { .contains(&"DeepReview".to_string())); } + #[test] + fn budget_tracker_caps_judge_calls_per_turn() { + let policy = DeepReviewExecutionPolicy::default(); + let tracker = DeepReviewBudgetTracker::default(); + + // turn-1: one judge call allowed + tracker + .record_task( + "turn-1", + &policy, + DeepReviewSubagentRole::Judge, + REVIEW_JUDGE_AGENT_TYPE, + false, + ) + .unwrap(); + assert!(tracker + .record_task( + "turn-1", + &policy, + DeepReviewSubagentRole::Judge, + REVIEW_JUDGE_AGENT_TYPE, + false, + ) + .is_err()); + + // turn-2: fresh budget, should succeed + tracker + .record_task( + "turn-2", + &policy, + DeepReviewSubagentRole::Judge, + REVIEW_JUDGE_AGENT_TYPE, + false, + ) + .unwrap(); + } + #[test] fn effective_timeout_zero_cap_allows_any_requested() { let policy = DeepReviewExecutionPolicy::from_config_value(Some(&json!({ @@ -503,6 +2922,63 @@ mod tests { ); } + #[test] + fn predictive_timeout_scales_with_target_size_and_reviewer_count() { + let policy = DeepReviewExecutionPolicy::default(); + + assert_eq!( + policy.predictive_timeout( + DeepReviewSubagentRole::Reviewer, + DeepReviewStrategyLevel::Normal, + 25, + 0, + 5, + ), + 675 + ); + assert_eq!( + policy.predictive_timeout( + DeepReviewSubagentRole::Judge, + DeepReviewStrategyLevel::Normal, + 25, + 0, + 5, + ), + 1350 + ); + } + + #[test] + fn run_manifest_execution_policy_overrides_static_timeouts() { + let policy = DeepReviewExecutionPolicy::from_config_value(Some(&json!({ + "reviewer_timeout_seconds": 300, + "judge_timeout_seconds": 240, + "reviewer_file_split_threshold": 20, + "max_same_role_instances": 3 + }))); + let manifest = json!({ + "reviewMode": "deep", + "strategyLevel": "normal", + "executionPolicy": { + "reviewerTimeoutSeconds": 675, + "judgeTimeoutSeconds": 1350, + "reviewerFileSplitThreshold": 10, + "maxSameRoleInstances": 4 + }, + "coreReviewers": [ + { "subagentId": "ReviewBusinessLogic" } + ], + "qualityGateReviewer": { "subagentId": "ReviewJudge" } + }); + + let effective = policy.with_run_manifest_execution_policy(&manifest); + + assert_eq!(effective.reviewer_timeout_seconds, 675); + assert_eq!(effective.judge_timeout_seconds, 1350); + assert_eq!(effective.reviewer_file_split_threshold, 10); + assert_eq!(effective.max_same_role_instances, 4); + } + #[test] fn default_file_split_threshold_and_max_instances() { let policy = DeepReviewExecutionPolicy::default(); @@ -563,7 +3039,113 @@ mod tests { } #[test] - fn max_same_role_instances_only_enforces_positive_minimum() { + fn budget_tracker_caps_reviewer_calls_by_max_same_role_instances() { + let policy = DeepReviewExecutionPolicy::from_config_value(Some(&json!({ + "max_same_role_instances": 2 + }))); + let tracker = DeepReviewBudgetTracker::default(); + + // Default policy: 5 core reviewers * 2 max instances = 10 reviewer calls allowed + for _ in 0..10 { + tracker + .record_task( + "turn-1", + &policy, + DeepReviewSubagentRole::Reviewer, + "ReviewBusinessLogic", + false, + ) + .unwrap(); + } + // 11th reviewer call should be rejected + assert!(tracker + .record_task( + "turn-1", + &policy, + DeepReviewSubagentRole::Reviewer, + "ReviewSecurity", + false, + ) + .is_err()); + } + + #[test] + fn budget_tracker_allows_one_retry_after_initial_reviewer_budget() { + let policy = DeepReviewExecutionPolicy::from_config_value(Some(&json!({ + "max_same_role_instances": 1, + "max_retries_per_role": 1 + }))); + let tracker = DeepReviewBudgetTracker::default(); + + for reviewer in [ + "ReviewBusinessLogic", + "ReviewPerformance", + "ReviewSecurity", + "ReviewArchitecture", + "ReviewFrontend", + ] { + tracker + .record_task( + "turn-1", + &policy, + DeepReviewSubagentRole::Reviewer, + reviewer, + false, + ) + .unwrap(); + } + + assert!(tracker + .record_task( + "turn-1", + &policy, + DeepReviewSubagentRole::Reviewer, + "ReviewSecurity", + false, + ) + .is_err()); + tracker + .record_task( + "turn-1", + &policy, + DeepReviewSubagentRole::Reviewer, + "ReviewSecurity", + true, + ) + .unwrap(); + + let violation = tracker + .record_task( + "turn-1", + &policy, + DeepReviewSubagentRole::Reviewer, + "ReviewSecurity", + true, + ) + .unwrap_err(); + assert_eq!(violation.code, "deep_review_retry_budget_exhausted"); + } + + #[test] + fn budget_tracker_rejects_retry_without_initial_reviewer_call() { + let policy = DeepReviewExecutionPolicy::default(); + let tracker = DeepReviewBudgetTracker::default(); + + let violation = tracker + .record_task( + "turn-1", + &policy, + DeepReviewSubagentRole::Reviewer, + "ReviewSecurity", + true, + ) + .unwrap_err(); + + assert_eq!(violation.code, "deep_review_retry_without_initial_attempt"); + } + + #[test] + fn max_same_role_instances_clamped_to_range() { // Value 0 should be clamped to 1 let policy = DeepReviewExecutionPolicy::from_config_value(Some(&json!({ "max_same_role_instances": 0 @@ -576,4 +3158,644 @@ mod tests { }))); assert_eq!(policy.max_same_role_instances, 100); } + + #[test] + fn auto_select_strategy_quick_for_small_changes() { + let policy = DeepReviewExecutionPolicy::default(); + let risk = super::ChangeRiskFactors { + file_count: 2, + total_lines_changed: 80, + files_in_security_paths: 0, + max_cyclomatic_complexity_delta: 0, + cross_crate_changes: 0, + }; + let (level, rationale) = policy.auto_select_strategy(&risk); + assert_eq!(level, DeepReviewStrategyLevel::Quick); + assert!(rationale.contains("2 files")); + assert!(rationale.contains("80 lines")); + } + + #[test] + fn auto_select_strategy_normal_for_medium_changes() { + let policy = DeepReviewExecutionPolicy::default(); + let risk = super::ChangeRiskFactors { + file_count: 8, + total_lines_changed: 400, + files_in_security_paths: 0, + max_cyclomatic_complexity_delta: 0, + cross_crate_changes: 0, + }; + let (level, rationale) = policy.auto_select_strategy(&risk); + assert_eq!(level, DeepReviewStrategyLevel::Normal); + assert!(rationale.contains("8 files")); + } + + #[test] + fn auto_select_strategy_deep_for_large_or_risky_changes() { + let policy = DeepReviewExecutionPolicy::default(); + let risk = super::ChangeRiskFactors { + file_count: 30, + total_lines_changed: 2000, + files_in_security_paths: 3, + max_cyclomatic_complexity_delta: 0, + cross_crate_changes: 2, + }; + let (level, rationale) = policy.auto_select_strategy(&risk); + assert_eq!(level, DeepReviewStrategyLevel::Deep); + assert!(rationale.contains("30 files")); + assert!(rationale.contains("3 security files")); + } + + #[test] + fn auto_select_strategy_security_paths_boost_score() { + let policy = super::DeepReviewExecutionPolicy::default(); + // 4 files + 0 lines/100 + 2 security * 3 = 10 → Normal + let risk = super::ChangeRiskFactors { + file_count: 4, + total_lines_changed: 0, + files_in_security_paths: 2, + max_cyclomatic_complexity_delta: 0, + cross_crate_changes: 0, + }; + let (level, _) = policy.auto_select_strategy(&risk); + assert_eq!(level, DeepReviewStrategyLevel::Normal); + } + + #[test] + fn concurrency_policy_default_values() { + let policy = super::DeepReviewConcurrencyPolicy::default(); + assert_eq!(policy.max_parallel_instances, 4); + assert_eq!(policy.stagger_seconds, 0); + assert!(policy.batch_extras_separately); + } + + #[test] + fn concurrency_policy_from_manifest() { + let raw = json!({ + "maxParallelInstances": 6, + "staggerSeconds": 5, + "batchExtrasSeparately": false + }); + let policy = super::DeepReviewConcurrencyPolicy::from_manifest(&raw); + assert_eq!(policy.max_parallel_instances, 6); + assert_eq!(policy.stagger_seconds, 5); + assert!(!policy.batch_extras_separately); + } + + #[test] + fn concurrency_effective_max_same_role_instances() { + let exec_policy = DeepReviewExecutionPolicy::default(); + let conc_policy = super::DeepReviewConcurrencyPolicy { + max_parallel_instances: 4, + stagger_seconds: 0, + max_queue_wait_seconds: 60, + batch_extras_separately: true, + }; + // 5 reviewer types (4 core + 1 conditional), 4 / 5 = 0 → clamped to 1 + assert_eq!( + conc_policy.effective_max_same_role_instances(&exec_policy), + 1 + ); + + let conc_policy_12 = super::DeepReviewConcurrencyPolicy { + max_parallel_instances: 12, + stagger_seconds: 0, + max_queue_wait_seconds: 60, + batch_extras_separately: true, + }; + // 12 / 5 = 2, capped by default max_same_role_instances (3) → 2 + assert_eq!( + conc_policy_12.effective_max_same_role_instances(&exec_policy), + 2 + ); + } + + #[test] + fn concurrency_check_launch_allowed() { + let policy = super::DeepReviewConcurrencyPolicy::default(); + // 0 active reviewers → reviewer allowed + assert!(policy + .check_launch_allowed(0, DeepReviewSubagentRole::Reviewer, false) + .is_ok()); + // 4 active reviewers (at cap) → reviewer blocked + let err = policy + .check_launch_allowed(4, DeepReviewSubagentRole::Reviewer, false) + .unwrap_err(); + assert_eq!(err.code, "deep_review_concurrency_cap_reached"); + // 1 active reviewer → judge blocked + let err = policy + .check_launch_allowed(1, DeepReviewSubagentRole::Judge, false) + .unwrap_err(); + assert_eq!(err.code, "deep_review_judge_launch_blocked_by_reviewers"); + // 0 active reviewers, judge not pending → judge allowed + assert!(policy + .check_launch_allowed(0, DeepReviewSubagentRole::Judge, false) + .is_ok()); + // 0 active reviewers, judge pending → blocked + let err = policy + .check_launch_allowed(0, DeepReviewSubagentRole::Judge, true) + .unwrap_err(); + assert_eq!(err.code, "deep_review_judge_already_pending"); + } + + #[test] + fn concurrency_policy_from_run_manifest() { + let policy = DeepReviewExecutionPolicy::default(); + let manifest = json!({ + "reviewMode": "deep", + "concurrencyPolicy": { + "maxParallelInstances": 3, + "staggerSeconds": 10, + "maxQueueWaitSeconds": 45 + } + }); + let conc = policy.concurrency_policy_from_manifest(&manifest); + assert_eq!(conc.max_parallel_instances, 3); + assert_eq!(conc.stagger_seconds, 10); + assert_eq!(conc.max_queue_wait_seconds, 45); + assert!(conc.batch_extras_separately); + } + + #[test] + fn active_reviewer_guard_tracks_running_reviewers_only() { + let tracker = DeepReviewBudgetTracker::default(); + let policy = DeepReviewExecutionPolicy::default(); + + tracker + .record_task( + "turn-active", + &policy, + DeepReviewSubagentRole::Reviewer, + REVIEWER_SECURITY_AGENT_TYPE, + false, + ) + .unwrap(); + assert_eq!(tracker.active_reviewer_count("turn-active"), 0); + + { + let _guard = tracker.begin_active_reviewer("turn-active"); + assert_eq!(tracker.active_reviewer_count("turn-active"), 1); + } + + assert_eq!(tracker.active_reviewer_count("turn-active"), 0); + } + + #[test] + fn active_reviewer_try_begin_respects_capacity_atomically() { + let tracker = DeepReviewBudgetTracker::default(); + let first = tracker + .try_begin_active_reviewer("turn-atomic", 1) + .expect("first reviewer should acquire capacity"); + + assert!(tracker + .try_begin_active_reviewer("turn-atomic", 1) + .is_none()); + assert_eq!(tracker.active_reviewer_count("turn-atomic"), 1); + + drop(first); + + assert!(tracker + .try_begin_active_reviewer("turn-atomic", 1) + .is_some()); + } + + #[test] + fn capacity_skip_count_is_tracked_separately_from_hard_rejections() { + let tracker = DeepReviewBudgetTracker::default(); + + tracker.record_capacity_skip("turn-skip"); + tracker.record_capacity_skip("turn-skip"); + tracker.record_concurrency_cap_rejection("turn-skip"); + + assert_eq!(tracker.capacity_skip_count("turn-skip"), 2); + assert_eq!(tracker.concurrency_cap_rejection_count("turn-skip"), 1); + } + + #[test] + fn shared_context_measurement_tracks_duplicate_readonly_file_context_without_content() { + let tracker = DeepReviewBudgetTracker::default(); + + tracker.record_shared_context_tool_use( + "turn-shared-context", + REVIEWER_SECURITY_AGENT_TYPE, + "Read", + ".\\src\\lib.rs", + ); + tracker.record_shared_context_tool_use( + "turn-shared-context", + REVIEWER_PERFORMANCE_AGENT_TYPE, + "Read", + "src/lib.rs", + ); + tracker.record_shared_context_tool_use( + "turn-shared-context", + REVIEWER_SECURITY_AGENT_TYPE, + "GetFileDiff", + "src/lib.rs", + ); + tracker.record_shared_context_tool_use( + "turn-shared-context", + REVIEWER_ARCHITECTURE_AGENT_TYPE, + "Read", + "src/other.rs", + ); + + let snapshot = tracker.shared_context_measurement_snapshot("turn-shared-context"); + + assert_eq!(snapshot.total_calls, 4); + assert_eq!(snapshot.duplicate_calls, 1); + assert_eq!(snapshot.duplicate_context_count, 1); + assert_eq!(snapshot.repeated_contexts.len(), 1); + assert_eq!(snapshot.repeated_contexts[0].tool_name, "Read"); + assert_eq!(snapshot.repeated_contexts[0].file_path, "src/lib.rs"); + assert_eq!(snapshot.repeated_contexts[0].call_count, 2); + assert_eq!(snapshot.repeated_contexts[0].reviewer_count, 2); + } + + #[test] + fn runtime_diagnostics_records_queue_and_capacity_transitions_as_counts() { + let tracker = DeepReviewBudgetTracker::default(); + + tracker.record_runtime_queue_wait("turn-runtime", 1_250); + tracker.record_runtime_queue_wait("turn-runtime", 2_500); + tracker.record_runtime_capacity_skip( + "turn-runtime", + super::DeepReviewCapacityQueueReason::ProviderConcurrencyLimit, + ); + + let diagnostics = tracker + .runtime_diagnostics_snapshot("turn-runtime") + .expect("runtime diagnostics should exist"); + + assert_eq!(diagnostics.queue_wait_count, 2); + assert_eq!(diagnostics.queue_wait_total_ms, 3_750); + assert_eq!(diagnostics.queue_wait_max_ms, 2_500); + assert_eq!(diagnostics.capacity_skip_count, 1); + assert_eq!(diagnostics.provider_capacity_queue_count, 0); + } + + #[test] + fn runtime_diagnostics_merges_shared_context_without_content() { + let tracker = DeepReviewBudgetTracker::default(); + + tracker.record_shared_context_tool_use( + "turn-runtime-shared", + REVIEWER_SECURITY_AGENT_TYPE, + "Read", + "src/lib.rs", + ); + tracker.record_shared_context_tool_use( + "turn-runtime-shared", + REVIEWER_ARCHITECTURE_AGENT_TYPE, + "Read", + "src/lib.rs", + ); + + let diagnostics = tracker + .runtime_diagnostics_snapshot("turn-runtime-shared") + .expect("runtime diagnostics should exist"); + + assert_eq!(diagnostics.shared_context_total_calls, 2); + assert_eq!(diagnostics.shared_context_duplicate_context_count, 1); + assert!(!format!("{diagnostics:?}").contains("fn ")); + } + + #[test] + fn effective_concurrency_lowers_after_capacity_errors_without_exceeding_hard_cap() { + let tracker = DeepReviewBudgetTracker::default(); + + assert_eq!(tracker.effective_parallel_instances("turn-effective", 4), 4); + + tracker.record_effective_concurrency_capacity_error( + "turn-effective", + 4, + super::DeepReviewCapacityQueueReason::LocalConcurrencyCap, + None, + ); + assert_eq!(tracker.effective_parallel_instances("turn-effective", 4), 3); + + for _ in 0..8 { + tracker.record_effective_concurrency_capacity_error( + "turn-effective", + 4, + super::DeepReviewCapacityQueueReason::LocalConcurrencyCap, + None, + ); + } + assert_eq!(tracker.effective_parallel_instances("turn-effective", 4), 1); + } + + #[test] + fn effective_concurrency_recovers_after_success_observation_window() { + let tracker = DeepReviewBudgetTracker::default(); + + tracker.record_effective_concurrency_capacity_error( + "turn-recover", + 4, + super::DeepReviewCapacityQueueReason::LocalConcurrencyCap, + None, + ); + assert_eq!(tracker.effective_parallel_instances("turn-recover", 4), 3); + + tracker.record_effective_concurrency_success("turn-recover", 4); + tracker.record_effective_concurrency_success("turn-recover", 4); + assert_eq!(tracker.effective_parallel_instances("turn-recover", 4), 3); + + tracker.record_effective_concurrency_success("turn-recover", 4); + assert_eq!(tracker.effective_parallel_instances("turn-recover", 4), 4); + } + + #[test] + fn effective_concurrency_respects_retry_after_before_recovery() { + let tracker = DeepReviewBudgetTracker::default(); + + let snapshot = tracker.record_effective_concurrency_capacity_error( + "turn-retry-after", + 4, + super::DeepReviewCapacityQueueReason::RetryAfter, + Some(Duration::from_secs(60)), + ); + assert_eq!(snapshot.learned_parallel_instances, 3); + assert_eq!(snapshot.effective_parallel_instances, 1); + assert!(snapshot.retry_after_remaining_ms.unwrap_or_default() > 0); + + for _ in 0..3 { + tracker.record_effective_concurrency_success("turn-retry-after", 4); + } + assert_eq!( + tracker.effective_parallel_instances("turn-retry-after", 4), + 1 + ); + } + + #[test] + fn effective_concurrency_user_override_is_bounded_and_visible() { + let tracker = DeepReviewBudgetTracker::default(); + + tracker.record_effective_concurrency_capacity_error( + "turn-override", + 4, + super::DeepReviewCapacityQueueReason::ProviderConcurrencyLimit, + None, + ); + tracker.set_effective_concurrency_user_override("turn-override", 4, Some(9)); + + let snapshot = tracker.effective_concurrency_snapshot("turn-override", 4); + assert_eq!(snapshot.configured_max_parallel_instances, 4); + assert_eq!(snapshot.learned_parallel_instances, 3); + assert_eq!(snapshot.user_override_parallel_instances, Some(4)); + assert_eq!(snapshot.effective_parallel_instances, 4); + + tracker.set_effective_concurrency_user_override("turn-override", 4, Some(0)); + let snapshot = tracker.effective_concurrency_snapshot("turn-override", 4); + assert_eq!(snapshot.user_override_parallel_instances, Some(1)); + assert_eq!(snapshot.effective_parallel_instances, 1); + } + + #[test] + fn capacity_error_classifier_queues_only_transient_capacity_failures() { + let queueable_cases = [ + ( + "provider_rate_limit", + "Provider rate limit exceeded", + None, + super::DeepReviewCapacityQueueReason::ProviderRateLimit, + ), + ( + "provider_error", + "Too many concurrent requests for this account", + None, + super::DeepReviewCapacityQueueReason::ProviderConcurrencyLimit, + ), + ( + "provider_unavailable", + "Model is temporarily overloaded", + None, + super::DeepReviewCapacityQueueReason::TemporaryOverload, + ), + ( + "provider_error", + "Retry later", + Some(30), + super::DeepReviewCapacityQueueReason::RetryAfter, + ), + ( + "deep_review_concurrency_cap_reached", + "Maximum parallel reviewer instances reached", + None, + super::DeepReviewCapacityQueueReason::LocalConcurrencyCap, + ), + ]; + + for (code, message, retry_after_seconds, expected_reason) in queueable_cases { + let decision = + super::classify_deep_review_capacity_error(code, message, retry_after_seconds); + assert!(decision.queueable, "{code} should be queueable"); + assert_eq!(decision.reason, Some(expected_reason)); + } + } + + #[test] + fn capacity_error_classifier_fails_fast_for_non_capacity_failures() { + let non_queueable_cases = [ + ("authentication_failed", "API key is invalid"), + ( + "provider_quota_exhausted", + "Quota exhausted for this billing period", + ), + ("billing_required", "Billing is not configured"), + ("invalid_model", "The requested model does not exist"), + ("user_cancelled", "User cancelled the operation"), + ( + "deep_review_subagent_not_allowed", + "Subagent is not allowed", + ), + ("invalid_tooling", "Review agent is missing GetFileDiff"), + ]; + + for (code, message) in non_queueable_cases { + let decision = super::classify_deep_review_capacity_error(code, message, None); + assert!(!decision.queueable, "{code} should fail fast"); + assert_eq!(decision.reason, None); + } + } + + #[test] + fn queue_state_keeps_queue_wait_out_of_reviewer_timeout() { + let queued = super::DeepReviewReviewerQueueState::queued_for_capacity( + super::DeepReviewCapacityQueueReason::ProviderConcurrencyLimit, + 45_000, + ); + assert_eq!( + queued.status, + super::DeepReviewReviewerQueueStatus::QueuedForCapacity + ); + assert_eq!(queued.queue_elapsed_ms, 45_000); + assert_eq!(queued.run_elapsed_ms, 0); + assert_eq!(queued.timeout_elapsed_ms(), 0); + + let running = super::DeepReviewReviewerQueueState::running(45_000, 8_000); + assert_eq!( + running.status, + super::DeepReviewReviewerQueueStatus::Running + ); + assert_eq!(running.queue_elapsed_ms, 45_000); + assert_eq!(running.run_elapsed_ms, 8_000); + assert_eq!(running.timeout_elapsed_ms(), 8_000); + } + + #[test] + fn paused_queue_state_does_not_consume_reviewer_timeout() { + let paused = super::DeepReviewReviewerQueueState::paused_by_user(120_000); + + assert_eq!( + paused.status, + super::DeepReviewReviewerQueueStatus::PausedByUser + ); + assert_eq!(paused.queue_elapsed_ms, 120_000); + assert_eq!(paused.run_elapsed_ms, 0); + assert_eq!(paused.timeout_elapsed_ms(), 0); + assert_eq!(paused.reason, None); + } + + #[test] + fn queue_control_pause_continue_cancel_are_tool_scoped() { + let turn_id = "turn-queue-control-tool"; + let primary_tool_id = "tool-queue-control-a"; + let other_tool_id = "tool-queue-control-b"; + + let paused = super::apply_deep_review_queue_control( + turn_id, + primary_tool_id, + super::DeepReviewQueueControlAction::Pause, + ); + assert!(paused.paused); + assert!(!paused.cancelled); + + let other = super::deep_review_queue_control_snapshot(turn_id, other_tool_id); + assert!(!other.paused); + assert!(!other.cancelled); + + let continued = super::apply_deep_review_queue_control( + turn_id, + primary_tool_id, + super::DeepReviewQueueControlAction::Continue, + ); + assert!(!continued.paused); + assert!(!continued.cancelled); + + let cancelled = super::apply_deep_review_queue_control( + turn_id, + primary_tool_id, + super::DeepReviewQueueControlAction::Cancel, + ); + assert!(!cancelled.paused); + assert!(cancelled.cancelled); + + super::clear_deep_review_queue_control_for_tool(turn_id, primary_tool_id); + let cleared = super::deep_review_queue_control_snapshot(turn_id, primary_tool_id); + assert!(!cleared.paused); + assert!(!cleared.cancelled); + } + + #[test] + fn queue_control_skip_optional_is_turn_scoped() { + let turn_id = "turn-queue-control-optional"; + let primary_tool_id = "tool-queue-control-primary"; + let other_tool_id = "tool-queue-control-other"; + + let snapshot = super::apply_deep_review_queue_control( + turn_id, + primary_tool_id, + super::DeepReviewQueueControlAction::SkipOptional, + ); + assert!(snapshot.skip_optional); + + let other = super::deep_review_queue_control_snapshot(turn_id, other_tool_id); + assert!(other.skip_optional); + + super::clear_deep_review_queue_control_for_tool(turn_id, primary_tool_id); + let after_tool_clear = super::deep_review_queue_control_snapshot(turn_id, other_tool_id); + assert!(after_tool_clear.skip_optional); + } + + // --- Incremental review cache tests --- + + #[test] + fn incremental_cache_builds_and_reads() { + let mut cache = DeepReviewIncrementalCache::new("fp-abc123"); + assert_eq!(cache.fingerprint(), "fp-abc123"); + assert!(cache.is_empty()); + + cache.store_packet("reviewer:ReviewSecurity", "Found 2 security issues"); + cache.store_packet("reviewer:ReviewBusinessLogic", "All good"); + assert_eq!(cache.len(), 2); + assert!(!cache.is_empty()); + + assert_eq!( + cache.get_packet("reviewer:ReviewSecurity"), + Some("Found 2 security issues") + ); + assert_eq!(cache.get_packet("reviewer:ReviewArchitecture"), None); + } + + #[test] + fn incremental_cache_matches_fingerprint() { + let cache = DeepReviewIncrementalCache::new("fp-abc123"); + let manifest = json!({ + "incrementalReviewCache": { + "fingerprint": "fp-abc123" + } + }); + assert!(cache.matches_manifest(&manifest)); + + let wrong_manifest = json!({ + "incrementalReviewCache": { + "fingerprint": "fp-other" + } + }); + assert!(!cache.matches_manifest(&wrong_manifest)); + } + + #[test] + fn incremental_cache_to_and_from_value() { + let mut cache = DeepReviewIncrementalCache::new("fp-test"); + cache.store_packet("reviewer:ReviewSecurity", "sec result"); + cache.store_packet("reviewer:ReviewBusinessLogic", "logic result"); + + let value = cache.to_value(); + let restored = DeepReviewIncrementalCache::from_value(&value); + assert_eq!(restored.fingerprint(), "fp-test"); + assert_eq!(restored.len(), 2); + assert_eq!( + restored.get_packet("reviewer:ReviewSecurity"), + Some("sec result") + ); + } + + #[test] + fn incremental_cache_preserves_split_packet_keys() { + let mut cache = DeepReviewIncrementalCache::new("fp-split"); + cache.store_packet("reviewer:ReviewSecurity:group-1-of-2", "sec group 1"); + cache.store_packet("reviewer:ReviewSecurity:group-2-of-2", "sec group 2"); + + let restored = DeepReviewIncrementalCache::from_value(&cache.to_value()); + + assert_eq!( + restored.get_packet("reviewer:ReviewSecurity:group-1-of-2"), + Some("sec group 1") + ); + assert_eq!( + restored.get_packet("reviewer:ReviewSecurity:group-2-of-2"), + Some("sec group 2") + ); + assert_eq!(restored.get_packet("ReviewSecurity"), None); + } + + #[test] + fn incremental_cache_from_null_value() { + let cache = DeepReviewIncrementalCache::from_value(&Value::Null); + assert!(cache.is_empty()); + assert_eq!(cache.fingerprint(), ""); + } } diff --git a/src/crates/core/src/agentic/events/types.rs b/src/crates/core/src/agentic/events/types.rs index f326e1762..c40b4ce34 100644 --- a/src/crates/core/src/agentic/events/types.rs +++ b/src/crates/core/src/agentic/events/types.rs @@ -8,7 +8,8 @@ use crate::agentic::core::SessionState; pub use bitfun_events::agentic::ErrorCategory; pub use bitfun_events::{ AgenticEvent as BaseAgenticEvent, AgenticEventEnvelope as EventEnvelope, - AgenticEventPriority as EventPriority, SubagentParentInfo, ToolEventData, + AgenticEventPriority as EventPriority, DeepReviewQueueReason, DeepReviewQueueState, + DeepReviewQueueStatus, SubagentParentInfo, ToolEventData, }; // ============ Core layer AgenticEvent extension ============ diff --git a/src/crates/core/src/agentic/execution/execution_engine.rs b/src/crates/core/src/agentic/execution/execution_engine.rs index fcddb59b5..26d0878bf 100644 --- a/src/crates/core/src/agentic/execution/execution_engine.rs +++ b/src/crates/core/src/agentic/execution/execution_engine.rs @@ -7,6 +7,7 @@ use super::types::{ExecutionContext, ExecutionResult, RoundContext, RoundResult} use crate::agentic::agents::{ get_agent_registry, PromptBuilder, PromptBuilderContext, RemoteExecutionHints, }; +use crate::agentic::context_profile::{ContextProfilePolicy, ModelCapabilityProfile}; use crate::agentic::core::{ render_system_reminder, Message, MessageContent, MessageHelper, MessageRole, MessageSemanticKind, RequestReasoningTokenPolicy, Session, @@ -27,20 +28,34 @@ use crate::infrastructure::ai::get_global_ai_client_factory; use crate::service::config::get_global_config_service; use crate::service::config::types::{ModelCapability, ModelCategory}; use crate::service::remote_ssh::workspace_state::get_remote_workspace_manager; -use crate::util::elapsed_ms_u64; use crate::util::errors::{BitFunError, BitFunResult}; use crate::util::token_counter::TokenCounter; use crate::util::types::Message as AIMessage; use crate::util::types::ToolDefinition; +use crate::util::{elapsed_ms_u64, truncate_at_char_boundary}; use log::{debug, error, info, trace, warn}; +use sha2::{Digest, Sha256}; use std::collections::{HashMap, HashSet}; use std::path::Path; use std::sync::Arc; use tokio_util::sync::CancellationToken; /// Execution engine configuration -#[derive(Debug, Clone, Default)] -pub struct ExecutionEngineConfig; +#[derive(Debug, Clone)] +pub struct ExecutionEngineConfig { + pub max_rounds: usize, + /// Max consecutive rounds with identical tool-call signatures before loop detection triggers. + pub max_consecutive_same_tool: usize, +} + +impl Default for ExecutionEngineConfig { + fn default() -> Self { + Self { + max_rounds: crate::service::config::types::DEFAULT_MAX_ROUNDS, + max_consecutive_same_tool: 3, + } + } +} #[derive(Debug, Clone)] pub struct ContextCompactionOutcome { @@ -55,12 +70,168 @@ pub struct ContextCompactionOutcome { pub applied: bool, } +#[derive(Debug, Clone)] +struct ContextHealthSnapshot { + token_usage_ratio: f32, + microcompact_count: usize, + full_compression_count: usize, + compression_failure_count: u32, + repeated_tool_signature_count: usize, + consecutive_failed_commands: usize, +} + +impl ContextHealthSnapshot { + fn from_runtime_observations( + token_usage_ratio: f32, + microcompact_count: usize, + full_compression_count: usize, + compression_failure_count: u32, + recent_tool_signatures: &[String], + messages: &[Message], + ) -> Self { + Self { + token_usage_ratio, + microcompact_count, + full_compression_count, + compression_failure_count, + repeated_tool_signature_count: Self::repeated_tool_signature_count( + recent_tool_signatures, + ), + consecutive_failed_commands: Self::consecutive_failed_commands(messages), + } + } + + fn token_usage_ratio(current_tokens: usize, context_window: usize) -> f32 { + if context_window == 0 { + return 0.0; + } + current_tokens as f32 / context_window as f32 + } + + fn log(&self, session_id: &str, turn_id: &str, round_index: usize, stage: &str) { + debug!( + "Context health snapshot: session_id={}, turn_id={}, round_index={}, stage={}, token_usage={:.3}, microcompact_count={}, full_compression_count={}, compression_failure_count={}, repeated_tool_signature_count={}, consecutive_failed_commands={}", + session_id, + turn_id, + round_index, + stage, + self.token_usage_ratio, + self.microcompact_count, + self.full_compression_count, + self.compression_failure_count, + self.repeated_tool_signature_count, + self.consecutive_failed_commands + ); + } + + fn log_policy_thresholds( + &self, + session_id: &str, + turn_id: &str, + round_index: usize, + policy: &ContextProfilePolicy, + ) { + if policy.has_repeated_tool_loop(self.repeated_tool_signature_count) { + debug!( + "Context profile repeated-tool threshold reached: session_id={}, turn_id={}, round_index={}, profile={:?}, repeated_tool_signature_count={}, threshold={}", + session_id, + turn_id, + round_index, + policy.profile, + self.repeated_tool_signature_count, + policy.repeated_tool_signature_threshold + ); + } + + if policy.has_consecutive_command_failure_loop(self.consecutive_failed_commands) { + warn!( + "Context profile command-failure threshold reached: session_id={}, turn_id={}, round_index={}, profile={:?}, consecutive_failed_commands={}, threshold={}", + session_id, + turn_id, + round_index, + policy.profile, + self.consecutive_failed_commands, + policy.consecutive_failed_command_threshold + ); + } + } + + fn repeated_tool_signature_count(recent_tool_signatures: &[String]) -> usize { + let Some(last_signature) = recent_tool_signatures.last() else { + return 0; + }; + + let repeated_count = recent_tool_signatures + .iter() + .rev() + .take_while(|signature| *signature == last_signature) + .count(); + + if repeated_count >= 2 { + repeated_count + } else { + 0 + } + } + + fn consecutive_failed_commands(messages: &[Message]) -> usize { + let mut failures = 0; + for message in messages.iter().rev() { + let Some(failed) = Self::command_result_failed(message) else { + continue; + }; + + if failed { + failures += 1; + } else { + break; + } + } + failures + } + + fn command_result_failed(message: &Message) -> Option { + let MessageContent::ToolResult { + tool_name, + result, + is_error, + .. + } = &message.content + else { + return None; + }; + + if !matches!(tool_name.as_str(), "Bash" | "Git") { + return None; + } + + Some(Self::tool_result_failed(result, *is_error)) + } + + fn tool_result_failed(result: &serde_json::Value, is_error: bool) -> bool { + is_error + || Self::bool_field(result, "timed_out") == Some(true) + || Self::bool_field(result, "interrupted") == Some(true) + || Self::bool_field(result, "success") == Some(false) + || Self::numeric_field(result, "exit_code").is_some_and(|code| code != 0) + } + + fn bool_field(value: &serde_json::Value, key: &str) -> Option { + value.get(key).and_then(|field| field.as_bool()) + } + + fn numeric_field(value: &serde_json::Value, key: &str) -> Option { + value.get(key).and_then(|field| field.as_i64()) + } +} + /// Execution engine pub struct ExecutionEngine { round_executor: Arc, event_queue: Arc, session_manager: Arc, context_compressor: Arc, + config: ExecutionEngineConfig, } impl ExecutionEngine { @@ -72,13 +243,14 @@ impl ExecutionEngine { event_queue: Arc, session_manager: Arc, context_compressor: Arc, - _config: ExecutionEngineConfig, + config: ExecutionEngineConfig, ) -> Self { Self { round_executor, event_queue, session_manager, context_compressor, + config, } } @@ -93,6 +265,20 @@ impl ExecutionEngine { ) } + fn tool_signature_args_summary(args_str: &str) -> String { + if args_str.len() <= 128 { + return args_str.to_string(); + } + + let args_hash = hex::encode(Sha256::digest(args_str.as_bytes())); + format!( + "{}..#{}:sha256={}", + truncate_at_char_boundary(args_str, 64), + args_str.len(), + args_hash + ) + } + fn assistant_has_tool_calls(message: &Message) -> bool { matches!( &message.content, @@ -512,6 +698,7 @@ impl ExecutionEngine { steering_interrupt: None, cancellation_token: CancellationToken::new(), workspace_services: context.workspace_services.clone(), + recover_partial_on_cancel: context.recover_partial_on_cancel, }; // Tools are disabled here (None) — model must respond in plain text. @@ -742,6 +929,7 @@ impl ExecutionEngine { context_window: usize, tool_definitions: &Option>, system_prompt_message: Message, + compression_contract_limit: usize, tail_policy: CompressionTailPolicy, ) -> BitFunResult)>> { let event_subagent_parent_info = subagent_parent_info.map(|info| info.clone().into()); @@ -783,14 +971,18 @@ impl ExecutionEngine { .await; // Execute compression + let compression_contract = self + .session_manager + .compression_contract_for_session(session_id, compression_contract_limit); match self .context_compressor - .compress_turns( + .compress_turns_with_contract( session_id, context_window, turn_index_to_keep, turns, tail_policy, + compression_contract, ) .await { @@ -955,9 +1147,30 @@ impl ExecutionEngine { }); } + let is_review_subagent = get_agent_registry() + .get_subagent_is_review(&session.agent_type) + .unwrap_or(false); + let model_id = session.config.model_id.as_deref().unwrap_or_default(); + let context_profile_policy = ContextProfilePolicy::for_agent_context_and_model( + &session.agent_type, + is_review_subagent, + model_id, + model_id, + ); + let compression_contract = self.session_manager.compression_contract_for_session( + session_id, + context_profile_policy.compression_contract_limit, + ); match self .context_compressor - .compress_turns(session_id, context_window, turns.len(), turns, tail_policy) + .compress_turns_with_contract( + session_id, + context_window, + turns.len(), + turns, + tail_policy, + compression_contract, + ) .await { Ok(compression_result) => { @@ -1215,6 +1428,32 @@ impl ExecutionEngine { ); } + let model_capability_profile = ModelCapabilityProfile::from_resolved_model( + &resolved_primary_model_id, + &ai_client.config.model, + ); + let is_review_subagent = agent_registry + .get_subagent_is_review(&agent_type) + .unwrap_or(false); + let context_profile_policy = ContextProfilePolicy::for_agent_context( + &agent_type, + is_review_subagent, + model_capability_profile, + ); + debug!( + "Context profile policy selected: session_id={}, agent_type={}, profile={:?}, model_capability={:?}, microcompact_keep_recent={}, microcompact_trigger_ratio={:.2}, compression_contract_limit={}, subagent_concurrency_cap={}, repeated_tool_signature_threshold={}, consecutive_failed_command_threshold={}", + context.session_id, + agent_type, + context_profile_policy.profile, + model_capability_profile, + context_profile_policy.microcompact_keep_recent, + context_profile_policy.microcompact_trigger_ratio, + context_profile_policy.compression_contract_limit, + context_profile_policy.subagent_concurrency_cap, + context_profile_policy.repeated_tool_signature_threshold, + context_profile_policy.consecutive_failed_command_threshold + ); + // 3. Get System Prompt from current Agent debug!( "Building system prompt from agent: {}, model={}", @@ -1259,6 +1498,13 @@ impl ExecutionEngine { let mut consecutive_compression_failures: u32 = 0; const MAX_CONSECUTIVE_COMPRESSION_FAILURES: u32 = 3; + // P0: Loop detection: track recent tool call signatures + let mut recent_tool_signatures: Vec = Vec::new(); + let mut loop_detected = false; + let mut microcompact_count = 0usize; + let mut full_compression_count = 0usize; + let mut compression_failure_count = 0u32; + // Save the last token usage statistics let mut last_usage: Option = None; @@ -1320,8 +1566,7 @@ impl ExecutionEngine { let enable_context_compression = session.config.enable_context_compression; let compression_threshold = session.config.compression_threshold; - let microcompact_config = - crate::agentic::session::compression::microcompact::MicrocompactConfig::default(); + let microcompact_config = context_profile_policy.microcompact_config(); let mut execution_context_vars = context.context.clone(); execution_context_vars.insert( @@ -1363,6 +1608,15 @@ impl ExecutionEngine { // Loop to execute model rounds loop { + if completed_rounds >= self.config.max_rounds { + warn!( + "Reached max rounds limit: {}, stopping execution", + self.config.max_rounds + ); + finalization_reason = Some("max_rounds"); + break; + } + // Check and compress before sending AI request let mut current_tokens = Self::estimate_request_tokens_internal(&messages, tool_definitions.as_deref()); @@ -1381,20 +1635,29 @@ impl ExecutionEngine { if enable_context_compression && token_usage_ratio >= microcompact_config.trigger_ratio { if let Some(mc_result) = - crate::agentic::session::compression::microcompact::microcompact_messages( + crate::agentic::session::compression::microcompact::microcompact_messages_with_evidence( &mut messages, µcompact_config, + crate::agentic::session::compression::microcompact::MicrocompactEvidenceScope { + session_id: &context.session_id, + turn_id: &context.dialog_turn_id, + }, ) { + microcompact_count += 1; + for event in mc_result.evidence_events.iter().cloned() { + self.session_manager.append_evidence_event(event); + } current_tokens = Self::estimate_request_tokens_internal( &mut messages, tool_definitions.as_deref(), ); debug!( - "Round {} after microcompact: cleared={}, kept={}, tokens now {} ({:.1}%)", + "Round {} after microcompact: cleared={}, kept={}, evidence_events={}, tokens now {} ({:.1}%)", round_index, mc_result.tools_cleared, mc_result.tools_kept, + mc_result.evidence_events_preserved, current_tokens, (current_tokens as f32 / context_window as f32) * 100.0 ); @@ -1440,6 +1703,7 @@ impl ExecutionEngine { context_window, &tool_definitions, system_prompt_message.clone(), + context_profile_policy.compression_contract_limit, CompressionTailPolicy::PreserveLiveFrontier, ) .await @@ -1455,6 +1719,7 @@ impl ExecutionEngine { ); messages = compressed_messages; + full_compression_count += 1; consecutive_compression_failures = 0; } Ok(None) => { @@ -1463,6 +1728,7 @@ impl ExecutionEngine { } Err(e) => { consecutive_compression_failures += 1; + compression_failure_count += 1; error!( "Round {} compression failed ({}/{}): {}, continuing with uncompressed context", round_index, @@ -1496,6 +1762,23 @@ impl ExecutionEngine { ); } + let before_send_tokens = + Self::estimate_request_tokens_internal(&messages, tool_definitions.as_deref()); + ContextHealthSnapshot::from_runtime_observations( + ContextHealthSnapshot::token_usage_ratio(before_send_tokens, context_window), + microcompact_count, + full_compression_count, + compression_failure_count, + &recent_tool_signatures, + &messages, + ) + .log( + &context.session_id, + &context.dialog_turn_id, + round_index, + "before_send", + ); + // Create round context let mut round_context_vars = execution_context_vars.clone(); if context.skip_tool_confirmation { @@ -1523,6 +1806,7 @@ impl ExecutionEngine { }), cancellation_token: CancellationToken::new(), workspace_services: context.workspace_services.clone(), + recover_partial_on_cancel: context.recover_partial_on_cancel, }; // Execute single model round @@ -1608,6 +1892,62 @@ impl ExecutionEngine { last_partial_recovery_reason = round_result.partial_recovery_reason.clone(); } + // P0: Consecutive same-tool-call loop detection + if !round_result.tool_calls.is_empty() { + let mut sigs: Vec = round_result + .tool_calls + .iter() + .map(|tc| { + let args_str = tc.arguments.to_string(); + let args_summary = Self::tool_signature_args_summary(&args_str); + format!("{}:{}", tc.tool_name, args_summary) + }) + .collect(); + sigs.sort(); + let round_sig = sigs.join("|"); + recent_tool_signatures.push(round_sig); + } else { + recent_tool_signatures.clear(); + } + + let after_round_tokens = + Self::estimate_request_tokens_internal(&messages, tool_definitions.as_deref()); + let after_round_health = ContextHealthSnapshot::from_runtime_observations( + ContextHealthSnapshot::token_usage_ratio(after_round_tokens, context_window), + microcompact_count, + full_compression_count, + compression_failure_count, + &recent_tool_signatures, + &messages, + ); + after_round_health.log( + &context.session_id, + &context.dialog_turn_id, + round_index, + "after_round", + ); + after_round_health.log_policy_thresholds( + &context.session_id, + &context.dialog_turn_id, + round_index, + &context_profile_policy, + ); + + let max_consec = context_profile_policy + .effective_loop_threshold(self.config.max_consecutive_same_tool); + if recent_tool_signatures.len() >= max_consec { + let tail = &recent_tool_signatures[recent_tool_signatures.len() - max_consec..]; + if tail.windows(2).all(|w| w[0] == w[1]) { + warn!( + "Loop detected: {} consecutive rounds with identical tool signatures, stopping", + max_consec + ); + loop_detected = true; + finalization_reason = Some("loop_detected"); + break; + } + } + // User-steering messages submitted while this turn is running: drain and inject // them as user messages into the working history before starting the next round // (Codex-style mid-turn injection). This does NOT end the current turn, in @@ -1884,7 +2224,11 @@ impl ExecutionEngine { let finish_reason = FinishReason::Complete; // success reflects whether we ended with a usable final answer. - let success = !matches!(effective_finish_reason, "finalize_failed" | "empty_round"); + let success = !loop_detected + && !matches!( + effective_finish_reason, + "finalize_failed" | "empty_round" | "max_rounds" + ); // Emit dialog turn completed event debug!("Preparing to send DialogTurnCompleted event"); @@ -2085,11 +2429,12 @@ impl ExecutionEngine { #[cfg(test)] mod tests { - use super::ExecutionEngine; + use super::{ContextHealthSnapshot, ExecutionEngine}; use crate::agentic::core::{Message, ToolCall, ToolResult}; use crate::service::config::types::AIConfig; use crate::service::config::types::AIModelConfig; use serde_json::json; + use sha2::{Digest, Sha256}; fn build_model(id: &str, name: &str, model_name: &str) -> AIModelConfig { AIModelConfig { @@ -2134,6 +2479,76 @@ mod tests { ); } + #[test] + fn tool_signature_args_summary_truncates_on_utf8_boundary() { + let args = format!("{}{}", "a".repeat(62), "案".repeat(30)); + let args_hash = hex::encode(Sha256::digest(args.as_bytes())); + + let summary = ExecutionEngine::tool_signature_args_summary(&args); + + assert_eq!( + summary, + format!("{}..#{}:sha256={}", "a".repeat(62), args.len(), args_hash) + ); + } + + #[test] + fn tool_signature_args_summary_keeps_short_arguments() { + let args = r#"{"content":"short"}"#; + + let summary = ExecutionEngine::tool_signature_args_summary(args); + + assert_eq!(summary, args); + } + + #[test] + fn tool_signature_args_summary_distinguishes_same_prefix_and_length() { + let first = format!("{}{}", "x".repeat(64), "a".repeat(80)); + let second = format!("{}{}", "x".repeat(64), "b".repeat(80)); + + let first_summary = ExecutionEngine::tool_signature_args_summary(&first); + let second_summary = ExecutionEngine::tool_signature_args_summary(&second); + + assert_eq!(first.len(), second.len()); + assert_ne!(first, second); + assert_ne!(first_summary, second_summary); + } + + #[test] + fn context_health_snapshot_scores_repeated_tool_signatures() { + let signatures = vec![ + r#"Bash:{"command":"cargo test"}"#.to_string(), + r#"Bash:{"command":"cargo test"}"#.to_string(), + r#"Bash:{"command":"cargo test"}"#.to_string(), + ]; + + let snapshot = + ContextHealthSnapshot::from_runtime_observations(0.82, 2, 1, 0, &signatures, &[]); + + assert!((snapshot.token_usage_ratio - 0.82).abs() < f32::EPSILON); + assert_eq!(snapshot.microcompact_count, 2); + assert_eq!(snapshot.full_compression_count, 1); + assert_eq!(snapshot.compression_failure_count, 0); + assert_eq!(snapshot.repeated_tool_signature_count, 3); + assert_eq!(snapshot.consecutive_failed_commands, 0); + } + + #[test] + fn context_health_snapshot_counts_consecutive_failed_commands() { + let messages = vec![ + command_result("Bash", true, Some(0)), + command_result("Bash", false, Some(1)), + command_result("Git", false, Some(128)), + ]; + + let snapshot = + ContextHealthSnapshot::from_runtime_observations(0.44, 0, 0, 2, &[], &messages); + + assert_eq!(snapshot.repeated_tool_signature_count, 0); + assert_eq!(snapshot.consecutive_failed_commands, 2); + assert_eq!(snapshot.compression_failure_count, 2); + } + #[test] fn assistant_has_tool_calls_detects_mixed_tool_message() { let message = Message::assistant_with_tools( @@ -2187,4 +2602,20 @@ mod tests { assistant, ])); } + + fn command_result(tool_name: &str, success: bool, exit_code: Option) -> Message { + Message::tool_result(ToolResult { + tool_id: format!("{}-tool", tool_name), + tool_name: tool_name.to_string(), + result: json!({ + "success": success, + "exit_code": exit_code, + "command": format!("{} command", tool_name), + }), + result_for_assistant: None, + is_error: !success, + duration_ms: Some(1), + image_attachments: None, + }) + } } diff --git a/src/crates/core/src/agentic/execution/round_executor.rs b/src/crates/core/src/agentic/execution/round_executor.rs index a95766fd4..5946699d0 100644 --- a/src/crates/core/src/agentic/execution/round_executor.rs +++ b/src/crates/core/src/agentic/execution/round_executor.rs @@ -2,7 +2,7 @@ //! //! Executes a single model round: calls AI, processes streaming responses, executes tools -use super::stream_processor::{StreamProcessor, StreamResult}; +use super::stream_processor::{StreamProcessOptions, StreamProcessor, StreamResult}; use super::types::{FinishReason, RoundContext, RoundResult}; use crate::agentic::core::{Message, ToolCall}; use crate::agentic::events::{AgenticEvent, EventPriority, EventQueue, ToolEventData}; @@ -190,7 +190,7 @@ impl RoundExecutor { let stream_started_at = Instant::now(); match self .stream_processor - .process_stream( + .process_stream_with_options( ai_stream, StreamProcessor::derive_watchdog_timeout(ai_client.stream_idle_timeout()), raw_sse_rx, // Pass raw SSE data receiver (for error diagnosis) @@ -199,6 +199,9 @@ impl RoundExecutor { round_id.clone(), subagent_parent_info.clone(), &cancel_token, + StreamProcessOptions { + recover_partial_on_cancel: context.recover_partial_on_cancel, + }, ) .await { diff --git a/src/crates/core/src/agentic/execution/stream_processor.rs b/src/crates/core/src/agentic/execution/stream_processor.rs index 1b69922d2..3221e167a 100644 --- a/src/crates/core/src/agentic/execution/stream_processor.rs +++ b/src/crates/core/src/agentic/execution/stream_processor.rs @@ -12,7 +12,9 @@ use std::time::Duration; use tokio::sync::mpsc; use tokio_util::sync::CancellationToken; -pub use bitfun_agent_stream::{StreamProcessorError, ToolCall as StreamToolCall}; +pub use bitfun_agent_stream::{ + StreamProcessOptions, StreamProcessorError, ToolCall as StreamToolCall, +}; /// Stream processing result exposed through bitfun-core compatibility types. #[derive(Debug, Clone)] @@ -91,9 +93,36 @@ impl StreamProcessor { round_id: String, subagent_parent_info: Option, cancellation_token: &CancellationToken, + ) -> Result { + self.process_stream_with_options( + stream, + watchdog_timeout, + raw_sse_rx, + session_id, + dialog_turn_id, + round_id, + subagent_parent_info, + cancellation_token, + StreamProcessOptions::default(), + ) + .await + } + + #[allow(clippy::too_many_arguments)] + pub async fn process_stream_with_options( + &self, + stream: BoxStream<'static, Result>, + watchdog_timeout: Option, + raw_sse_rx: Option>, + session_id: String, + dialog_turn_id: String, + round_id: String, + subagent_parent_info: Option, + cancellation_token: &CancellationToken, + options: StreamProcessOptions, ) -> Result { self.inner - .process_stream( + .process_stream_with_options( stream, watchdog_timeout, raw_sse_rx, @@ -102,6 +131,7 @@ impl StreamProcessor { round_id, subagent_parent_info.map(Into::into), cancellation_token, + options, ) .await .map(Into::into) diff --git a/src/crates/core/src/agentic/execution/types.rs b/src/crates/core/src/agentic/execution/types.rs index 5afd01c82..51ce884d6 100644 --- a/src/crates/core/src/agentic/execution/types.rs +++ b/src/crates/core/src/agentic/execution/types.rs @@ -32,6 +32,9 @@ pub struct ExecutionContext { /// When set, engine drains pending user steering messages at each round boundary /// and injects them into the dialog history without ending the turn. pub round_steering: Option>, + /// When true, stream cancellation may be converted into a partial assistant + /// result if text/tool output has already been produced. + pub recover_partial_on_cancel: bool, } /// Round context @@ -54,6 +57,7 @@ pub struct RoundContext { pub steering_interrupt: Option, pub cancellation_token: CancellationToken, pub workspace_services: Option, + pub recover_partial_on_cancel: bool, } /// Round result diff --git a/src/crates/core/src/agentic/mod.rs b/src/crates/core/src/agentic/mod.rs index 6303e305d..12ca1c86d 100644 --- a/src/crates/core/src/agentic/mod.rs +++ b/src/crates/core/src/agentic/mod.rs @@ -17,6 +17,7 @@ pub mod execution; pub mod tools; // Coordination module +pub mod context_profile; pub mod coordination; pub mod deep_review_policy; @@ -43,6 +44,7 @@ mod util; pub mod insights; pub use agents::*; +pub use context_profile::*; pub use coordination::*; pub use core::*; pub use events::{queue, router, types as event_types}; diff --git a/src/crates/core/src/agentic/persistence/manager.rs b/src/crates/core/src/agentic/persistence/manager.rs index 3447e1191..5adb0de6f 100644 --- a/src/crates/core/src/agentic/persistence/manager.rs +++ b/src/crates/core/src/agentic/persistence/manager.rs @@ -659,6 +659,9 @@ impl PersistenceManager { tags: existing.map(|value| value.tags.clone()).unwrap_or_default(), custom_metadata: existing.and_then(|value| value.custom_metadata.clone()), todos: existing.and_then(|value| value.todos.clone()), + deep_review_run_manifest: existing + .and_then(|value| value.deep_review_run_manifest.clone()), + deep_review_cache: existing.and_then(|value| value.deep_review_cache.clone()), workspace_path: Some(workspace_root), workspace_hostname, unread_completion: existing.and_then(|value| value.unread_completion.clone()), diff --git a/src/crates/core/src/agentic/session/compression/compressor.rs b/src/crates/core/src/agentic/session/compression/compressor.rs index 3426fab3c..1c706261f 100644 --- a/src/crates/core/src/agentic/session/compression/compressor.rs +++ b/src/crates/core/src/agentic/session/compression/compressor.rs @@ -3,11 +3,12 @@ //! Responsible only for transforming a session context into a compressed one. use super::fallback::{ - build_structured_compression_summary, CompressionFallbackOptions, CompressionSummaryArtifact, + build_structured_compression_summary_with_contract, CompressionFallbackOptions, + CompressionSummaryArtifact, }; use crate::agentic::core::{ - render_system_reminder, CompressedTodoSnapshot, CompressionEntry, CompressionPayload, Message, - MessageHelper, MessageRole, MessageSemanticKind, + render_system_reminder, CompressedTodoSnapshot, CompressionContract, CompressionEntry, + CompressionPayload, Message, MessageHelper, MessageRole, MessageSemanticKind, }; use crate::infrastructure::ai::{get_global_ai_client_factory, AIClient}; use crate::util::errors::{BitFunError, BitFunResult}; @@ -192,12 +193,32 @@ impl ContextCompressor { } pub async fn compress_turns( + &self, + session_id: &str, + context_window: usize, + turn_index_to_keep: usize, + turns: Vec, + tail_policy: CompressionTailPolicy, + ) -> BitFunResult { + self.compress_turns_with_contract( + session_id, + context_window, + turn_index_to_keep, + turns, + tail_policy, + None, + ) + .await + } + + pub async fn compress_turns_with_contract( &self, session_id: &str, context_window: usize, turn_index_to_keep: usize, mut turns: Vec, tail_policy: CompressionTailPolicy, + contract: Option, ) -> BitFunResult { if turns.is_empty() { debug!("No turns need compression: session_id={}", session_id); @@ -230,7 +251,7 @@ impl ContextCompressor { let mut has_model_summary = false; if !turns.is_empty() { let mut summary_artifact = self - .execute_compression_with_fallback(turns, context_window) + .execute_compression_with_fallback(turns, context_window, contract) .await?; if turns_to_keep.is_empty() { self.append_todo_snapshot(&mut summary_artifact, last_todo.clone()); @@ -340,6 +361,7 @@ impl ContextCompressor { &self, turns_to_compress: Vec, context_window: usize, + contract: Option, ) -> BitFunResult { let summary_result = match get_global_ai_client_factory().await { Ok(ai_client_factory) => match ai_client_factory @@ -347,8 +369,13 @@ impl ContextCompressor { .await { Ok(ai_client) => { - self.execute_compression(ai_client, turns_to_compress.clone(), context_window) - .await + self.execute_compression( + ai_client, + turns_to_compress.clone(), + context_window, + contract.as_ref(), + ) + .await } Err(err) => Err(BitFunError::AIClient(format!( "Failed to get AI client: {}", @@ -364,12 +391,26 @@ impl ContextCompressor { match summary_result { Ok(summary) => { trace!("Compression summary: {}", summary); + let mut payload = CompressionPayload::from_summary(summary.clone()); + let summary_text = + if let Some(contract) = contract.filter(|contract| !contract.is_empty()) { + payload.entries.insert( + 0, + CompressionEntry::Contract { + contract: contract.clone(), + }, + ); + format!( + "{}\n\nPrevious conversation is summarized below:\n{}", + contract.render_for_model(), + summary + ) + } else { + format!("Previous conversation is summarized below:\n{}", summary) + }; Ok(CompressionSummaryArtifact { - summary_text: format!( - "Previous conversation is summarized below:\n{}", - summary - ), - payload: CompressionPayload::from_summary(summary), + summary_text, + payload, used_model_summary: true, }) } @@ -378,12 +419,13 @@ impl ContextCompressor { "Model-based compression failed, falling back to structured local compression: {}", err ); - let summary_artifact = build_structured_compression_summary( + let summary_artifact = build_structured_compression_summary_with_contract( turns_to_compress .into_iter() .map(|turn| turn.messages) .collect(), &self.build_fallback_options(context_window), + contract, ); Ok(summary_artifact) } @@ -426,6 +468,7 @@ impl ContextCompressor { ai_client: Arc, turns_to_compress: Vec, context_window: usize, + contract: Option<&CompressionContract>, ) -> BitFunResult { debug!("Compressing {} turn(s)", turns_to_compress.len()); @@ -483,6 +526,7 @@ Be thorough and precise. Do not lose important technical details from either the ai_client.clone(), gen_system_message_for_summary(&summary), cur_messages, + contract, ) .await?; cur_messages = Vec::new(); @@ -506,6 +550,7 @@ Be thorough and precise. Do not lose important technical details from either the ai_client.clone(), gen_system_message_for_summary(&summary), messages_part1, + contract, ) .await?; request_cnt += 1; @@ -518,6 +563,7 @@ Be thorough and precise. Do not lose important technical details from either the ai_client.clone(), gen_system_message_for_summary(&summary), messages_part2, + contract, ) .await?; request_cnt += 1; @@ -540,6 +586,7 @@ Be thorough and precise. Do not lose important technical details from either the ai_client.clone(), gen_system_message_for_summary(&summary), cur_messages, + contract, ) .await?; request_cnt += 1; @@ -553,9 +600,16 @@ Be thorough and precise. Do not lose important technical details from either the ai_client: Arc, system_message_for_summary: Message, messages: Vec, + contract: Option<&CompressionContract>, ) -> BitFunResult { let raw_summary = self - .generate_summary_with_retry(ai_client, system_message_for_summary, messages, 2) + .generate_summary_with_retry( + ai_client, + system_message_for_summary, + messages, + contract, + 2, + ) .await?; Self::normalize_model_summary_output(&raw_summary).ok_or_else(|| { BitFunError::AIClient( @@ -570,6 +624,7 @@ Be thorough and precise. Do not lose important technical details from either the ai_client: Arc, system_message_for_summary: Message, messages: Vec, + contract: Option<&CompressionContract>, max_tries: usize, ) -> BitFunResult { let mut summary_messages = vec![AIMessage::from(system_message_for_summary)]; @@ -578,7 +633,7 @@ Be thorough and precise. Do not lose important technical details from either the ai_msg.reasoning_content = None; ai_msg })); - summary_messages.push(AIMessage::user(self.get_compact_prompt())); + summary_messages.push(AIMessage::user(self.get_compact_prompt(contract))); let mut last_error = None; let base_wait_time_ms = 500; @@ -624,9 +679,21 @@ Be thorough and precise. Do not lose important technical details from either the Err(BitFunError::AIClient(error_msg)) } - fn get_compact_prompt(&self) -> String { - r#"Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions. + fn get_compact_prompt(&self, contract: Option<&CompressionContract>) -> String { + let contract_instruction = contract + .filter(|contract| !contract.is_empty()) + .map(|contract| { + format!( + "\n\nThe following compaction contract is authoritative factual context from tool observations. Preserve every field from it in the final :\n{}\n", + contract.render_for_model() + ) + }) + .unwrap_or_default(); + + format!( + r#"Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions. This summary should be thorough in capturing technical details, code patterns, and architectural decisions that would be essential for continuing development work without losing context. +{contract_instruction} Before providing your final summary, wrap your analysis in tags to organize your thoughts and ensure you've covered all necessary points. Then output the final retained summary in tags. Important: only the content inside will be kept as compressed history. The section is transient and will be discarded, so do not put any required final information only in . @@ -712,7 +779,7 @@ Here's an example of how your output should be structured: Please provide your summary based on the conversation so far, following this structure and ensuring precision and thoroughness in your response. "# - .to_string() + ) } } @@ -729,7 +796,8 @@ fn extract_tag_content<'a>(text: &'a str, tag: &str) -> Option<&'a str> { mod tests { use super::{CompressionTailPolicy, ContextCompressor, TurnWithTokens}; use crate::agentic::core::{ - render_system_reminder, CompressionEntry, CompressionPayload, Message, MessageSemanticKind, + render_system_reminder, CompressionContract, CompressionContractItem, CompressionEntry, + CompressionPayload, Message, MessageSemanticKind, }; fn make_turn(messages: Vec) -> TurnWithTokens { @@ -858,6 +926,28 @@ mod tests { assert!(marker.contains("historical context")); } + #[test] + fn model_summary_prompt_includes_compaction_contract() { + let compressor = ContextCompressor::new(Default::default()); + let contract = CompressionContract { + touched_files: vec!["src/lib.rs".to_string()], + verification_commands: vec![CompressionContractItem { + target: "cargo test".to_string(), + status: "succeeded".to_string(), + summary: "Tests passed.".to_string(), + error_kind: None, + }], + blocking_failures: Vec::new(), + subagent_statuses: Vec::new(), + }; + + let prompt = compressor.get_compact_prompt(Some(&contract)); + + assert!(prompt.contains("authoritative factual context")); + assert!(prompt.contains("src/lib.rs")); + assert!(prompt.contains("cargo test")); + } + #[test] fn model_summary_output_uses_summary_tag_body_only() { let normalized = ContextCompressor::normalize_model_summary_output( diff --git a/src/crates/core/src/agentic/session/compression/fallback/mod.rs b/src/crates/core/src/agentic/session/compression/fallback/mod.rs index 2e46b9596..dc0aa0b62 100644 --- a/src/crates/core/src/agentic/session/compression/fallback/mod.rs +++ b/src/crates/core/src/agentic/session/compression/fallback/mod.rs @@ -4,6 +4,7 @@ mod render; mod sanitize; mod types; +use crate::agentic::core::{CompressionContract, CompressionEntry}; use builder::build_entries_from_turns; use payload::trim_payload_to_budget; use render::render_payload_for_model; @@ -14,7 +15,18 @@ pub fn build_structured_compression_summary( turns: Vec>, options: &CompressionFallbackOptions, ) -> CompressionSummaryArtifact { - let entries = build_entries_from_turns(turns, options); + build_structured_compression_summary_with_contract(turns, options, None) +} + +pub fn build_structured_compression_summary_with_contract( + turns: Vec>, + options: &CompressionFallbackOptions, + contract: Option, +) -> CompressionSummaryArtifact { + let mut entries = build_entries_from_turns(turns, options); + if let Some(contract) = contract.filter(|contract| !contract.is_empty()) { + entries.insert(0, CompressionEntry::Contract { contract }); + } let trimmed_payload = trim_payload_to_budget(entries, options); let summary_text = render_payload_for_model(&trimmed_payload); diff --git a/src/crates/core/src/agentic/session/compression/fallback/payload.rs b/src/crates/core/src/agentic/session/compression/fallback/payload.rs index 29497e80a..4721cae6e 100644 --- a/src/crates/core/src/agentic/session/compression/fallback/payload.rs +++ b/src/crates/core/src/agentic/session/compression/fallback/payload.rs @@ -14,15 +14,29 @@ pub(super) fn trim_payload_to_budget( } let units = flatten_entries_to_units(entries); - let mut selected_units = Vec::new(); - - for unit in units.into_iter().rev() { + let mut selected_units: Vec = units + .iter() + .filter_map(|unit| match unit { + CompressionUnit::Contract { .. } => Some(unit.clone()), + _ => None, + }) + .collect(); + let history_units: Vec = units + .into_iter() + .filter(|unit| !matches!(unit, CompressionUnit::Contract { .. })) + .collect(); + + for unit in history_units.into_iter().rev() { let mut candidate_units = vec![unit.clone()]; candidate_units.extend(selected_units.clone()); let candidate_payload = rebuild_payload_from_units(candidate_units); if estimate_payload_tokens(&candidate_payload) <= options.max_tokens { - selected_units.insert(0, unit); + let history_insert_index = selected_units + .iter() + .take_while(|selected| matches!(selected, CompressionUnit::Contract { .. })) + .count(); + selected_units.insert(history_insert_index, unit); } } @@ -34,6 +48,9 @@ fn flatten_entries_to_units(entries: Vec) -> Vec { + units.push(CompressionUnit::Contract { contract }); + } CompressionEntry::ModelSummary { text } => { units.push(CompressionUnit::ModelSummary { text }); } @@ -72,6 +89,16 @@ fn rebuild_payload_from_units(units: Vec) -> CompressionPayload for unit in units { match unit { + CompressionUnit::Contract { contract } => { + flush_rebuilt_turn( + &mut entries, + &mut current_turn_entry_id, + &mut current_turn_id, + &mut current_messages, + &mut current_todo, + ); + entries.push(CompressionEntry::Contract { contract }); + } CompressionUnit::ModelSummary { text } => { flush_rebuilt_turn( &mut entries, diff --git a/src/crates/core/src/agentic/session/compression/fallback/render.rs b/src/crates/core/src/agentic/session/compression/fallback/render.rs index 4e635d128..fd0335226 100644 --- a/src/crates/core/src/agentic/session/compression/fallback/render.rs +++ b/src/crates/core/src/agentic/session/compression/fallback/render.rs @@ -1,5 +1,6 @@ use crate::agentic::core::{ - CompressedMessage, CompressedMessageRole, CompressionEntry, CompressionPayload, + CompressedMessage, CompressedMessageRole, CompressionContract, CompressionEntry, + CompressionPayload, }; use serde_json::{json, Value}; @@ -9,12 +10,16 @@ pub(super) fn render_payload_for_model(payload: &CompressionPayload) -> String { .to_string(); } - let mut sections = Vec::new(); + let mut contract_sections = Vec::new(); + let mut history_sections = Vec::new(); for (index, entry) in payload.entries.iter().enumerate() { match entry { + CompressionEntry::Contract { contract } => { + contract_sections.push(render_contract(contract)); + } CompressionEntry::ModelSummary { text } => { - sections.push(format!( + history_sections.push(format!( "Earlier summarized history {}:\n{}", index + 1, text @@ -41,14 +46,20 @@ pub(super) fn render_payload_for_model(payload: &CompressionPayload) -> String { } } } - sections.push(lines.join("\n")); + history_sections.push(lines.join("\n")); } } } + let mut sections = contract_sections; + sections.extend(history_sections); sections.join("\n\n") } +fn render_contract(contract: &CompressionContract) -> String { + contract.render_for_model() +} + fn render_compressed_message( lines: &mut Vec, message: &CompressedMessage, diff --git a/src/crates/core/src/agentic/session/compression/fallback/tests.rs b/src/crates/core/src/agentic/session/compression/fallback/tests.rs index 29b7fa840..57a956ed9 100644 --- a/src/crates/core/src/agentic/session/compression/fallback/tests.rs +++ b/src/crates/core/src/agentic/session/compression/fallback/tests.rs @@ -1,7 +1,11 @@ -use super::{build_structured_compression_summary, CompressionFallbackOptions}; +use super::{ + build_structured_compression_summary, build_structured_compression_summary_with_contract, + CompressionFallbackOptions, +}; use crate::agentic::core::{ - render_system_reminder, render_user_query, CompressedMessageRole, CompressionEntry, - CompressionPayload, Message, MessageSemanticKind, ToolCall, ToolResult, + render_system_reminder, render_user_query, CompressedMessageRole, CompressionContract, + CompressionContractItem, CompressionEntry, CompressionPayload, Message, MessageSemanticKind, + ToolCall, ToolResult, }; use serde_json::json; @@ -175,3 +179,58 @@ fn groups_consecutive_assistant_messages_under_single_role_header() { .summary_text .contains("Updated the styling changes.")); } + +#[test] +fn renders_contract_facts_even_when_tool_results_are_cleared() { + let contract = CompressionContract { + touched_files: vec!["src/main.rs".to_string()], + verification_commands: vec![CompressionContractItem { + target: "cargo test".to_string(), + status: "succeeded".to_string(), + summary: "Verification command completed.".to_string(), + error_kind: None, + }], + blocking_failures: vec![CompressionContractItem { + target: "pnpm run type-check:web".to_string(), + status: "failed".to_string(), + summary: "Type check failed before compression.".to_string(), + error_kind: Some("exit_code:2".to_string()), + }], + subagent_statuses: vec![CompressionContractItem { + target: "ReviewSecurity".to_string(), + status: "partial_timeout".to_string(), + summary: "Security reviewer timed out after partial output.".to_string(), + error_kind: Some("timeout".to_string()), + }], + }; + + let summary_artifact = build_structured_compression_summary_with_contract( + vec![vec![Message::tool_result(ToolResult { + tool_id: "tool_1".to_string(), + tool_name: "Read".to_string(), + result: json!({"content": "large output omitted"}), + result_for_assistant: Some("large output omitted".to_string()), + is_error: false, + duration_ms: None, + image_attachments: None, + })]], + &default_options(), + Some(contract), + ); + + assert!(summary_artifact + .summary_text + .contains("Compaction contract:")); + assert!(summary_artifact.summary_text.contains("src/main.rs")); + assert!(summary_artifact.summary_text.contains("cargo test")); + assert!(summary_artifact + .summary_text + .contains("pnpm run type-check:web")); + assert!(summary_artifact.summary_text.contains("exit_code:2")); + assert!(summary_artifact.summary_text.contains("ReviewSecurity")); + assert!(summary_artifact.summary_text.contains("partial_timeout")); + assert!(matches!( + &summary_artifact.payload.entries[0], + CompressionEntry::Contract { .. } + )); +} diff --git a/src/crates/core/src/agentic/session/compression/fallback/types.rs b/src/crates/core/src/agentic/session/compression/fallback/types.rs index dd27488d5..e90edaf42 100644 --- a/src/crates/core/src/agentic/session/compression/fallback/types.rs +++ b/src/crates/core/src/agentic/session/compression/fallback/types.rs @@ -1,4 +1,6 @@ -use crate::agentic::core::{CompressedMessage, CompressedTodoSnapshot, CompressionPayload}; +use crate::agentic::core::{ + CompressedMessage, CompressedTodoSnapshot, CompressionContract, CompressionPayload, +}; #[derive(Debug, Clone)] pub struct CompressionFallbackOptions { @@ -18,6 +20,9 @@ pub struct CompressionSummaryArtifact { #[derive(Debug, Clone)] pub(super) enum CompressionUnit { + Contract { + contract: CompressionContract, + }, ModelSummary { text: String, }, diff --git a/src/crates/core/src/agentic/session/compression/microcompact.rs b/src/crates/core/src/agentic/session/compression/microcompact.rs index c751b51e3..92b7ce9c7 100644 --- a/src/crates/core/src/agentic/session/compression/microcompact.rs +++ b/src/crates/core/src/agentic/session/compression/microcompact.rs @@ -9,6 +9,9 @@ //! Design reference: Claude Code `microCompact.ts` (time-based clearing path). use crate::agentic::core::{Message, MessageContent}; +use crate::agentic::session::{ + EvidenceLedgerEvent, EvidenceLedgerEventStatus, EvidenceLedgerTargetKind, +}; use log::{debug, info}; use std::collections::HashSet; @@ -57,6 +60,15 @@ impl Default for MicrocompactConfig { pub struct MicrocompactResult { pub tools_cleared: usize, pub tools_kept: usize, + pub evidence_events: Vec, + pub evidence_events_preserved: usize, +} + +/// Session/turn scope used when preserving facts for cleared tool results. +#[derive(Debug, Clone, Copy)] +pub struct MicrocompactEvidenceScope<'a> { + pub session_id: &'a str, + pub turn_id: &'a str, } /// Run microcompact on the message list **in place**. @@ -66,6 +78,23 @@ pub struct MicrocompactResult { pub fn microcompact_messages( messages: &mut [Message], config: &MicrocompactConfig, +) -> Option { + microcompact_messages_internal(messages, config, None) +} + +/// Run microcompact and preserve a ledger event for each cleared tool result. +pub fn microcompact_messages_with_evidence( + messages: &mut [Message], + config: &MicrocompactConfig, + evidence_scope: MicrocompactEvidenceScope<'_>, +) -> Option { + microcompact_messages_internal(messages, config, Some(evidence_scope)) +} + +fn microcompact_messages_internal( + messages: &mut [Message], + config: &MicrocompactConfig, + evidence_scope: Option>, ) -> Option { let compactable = default_compactable_tools(); @@ -96,7 +125,25 @@ pub fn microcompact_messages( } let mut cleared = 0usize; + let mut evidence_events = Vec::new(); for &idx in to_clear { + let already_cleared = matches!( + &messages[idx].content, + MessageContent::ToolResult { + result_for_assistant, + .. + } if result_for_assistant.as_deref() == Some(CLEARED_PLACEHOLDER) + ); + if already_cleared { + continue; + } + + if let Some(scope) = evidence_scope { + if let Some(event) = build_evidence_event_for_tool_result(&messages[idx], scope) { + evidence_events.push(event); + } + } + let msg = &mut messages[idx]; if let MessageContent::ToolResult { ref mut result, @@ -105,10 +152,6 @@ pub fn microcompact_messages( .. } = msg.content { - // Skip if already cleared - if result_for_assistant.as_deref() == Some(CLEARED_PLACEHOLDER) { - continue; - } *result = serde_json::json!(CLEARED_PLACEHOLDER); *result_for_assistant = Some(CLEARED_PLACEHOLDER.to_string()); *image_attachments = None; @@ -123,27 +166,176 @@ pub fn microcompact_messages( } let kept = compactable_indices.len() - cleared; + let evidence_events_preserved = evidence_events.len(); info!( - "Microcompact: cleared {} tool result(s), kept {} recent", - cleared, kept + "Microcompact: cleared {} tool result(s), kept {} recent, preserved {} evidence event(s)", + cleared, kept, evidence_events_preserved ); debug!( - "Microcompact details: total_compactable={}, keep_recent={}, cleared={}", + "Microcompact details: total_compactable={}, keep_recent={}, cleared={}, evidence_events={}", compactable_indices.len(), config.keep_recent, - cleared + cleared, + evidence_events_preserved ); Some(MicrocompactResult { tools_cleared: cleared, tools_kept: kept, + evidence_events, + evidence_events_preserved, }) } +fn build_evidence_event_for_tool_result( + message: &Message, + scope: MicrocompactEvidenceScope<'_>, +) -> Option { + let MessageContent::ToolResult { + tool_name, + result, + is_error, + .. + } = &message.content + else { + return None; + }; + + let turn_id = message.metadata.turn_id.as_deref().unwrap_or(scope.turn_id); + let target_kind = infer_target_kind(tool_name); + let target = infer_target(tool_name, result); + let status = infer_event_status(result, *is_error); + let mut event = EvidenceLedgerEvent::new( + scope.session_id, + turn_id, + tool_name, + target_kind, + target, + status, + format!( + "Preserved {} tool result before microcompact clearing.", + tool_name + ), + ); + + if let Some(error_kind) = infer_error_kind(result, *is_error) { + event = event.with_error_kind(error_kind); + } + + let touched_files = infer_touched_files(tool_name, result); + if !touched_files.is_empty() { + event = event.with_touched_files(touched_files); + } + + if let Some(artifact_path) = infer_artifact_path(result) { + event = event.with_artifact_path(artifact_path); + } + + Some(event) +} + +fn infer_target_kind(tool_name: &str) -> EvidenceLedgerTargetKind { + match tool_name { + "Bash" | "Git" => EvidenceLedgerTargetKind::Command, + "Read" | "Grep" | "Glob" | "LS" | "Edit" | "Write" | "Delete" | "GetFileDiff" => { + EvidenceLedgerTargetKind::File + } + _ => EvidenceLedgerTargetKind::Unknown, + } +} + +fn infer_target(tool_name: &str, result: &serde_json::Value) -> String { + match tool_name { + "Bash" | "Git" => string_field(result, "command") + .or_else(|| { + let operation = string_field(result, "operation")?; + Some(format!("git {}", operation)) + }) + .unwrap_or_else(|| tool_name.to_string()), + "Read" | "Edit" | "Write" | "Delete" | "GetFileDiff" => string_field(result, "file_path") + .or_else(|| string_field(result, "path")) + .unwrap_or_else(|| tool_name.to_string()), + "Grep" => string_field(result, "pattern") + .or_else(|| string_field(result, "path")) + .unwrap_or_else(|| tool_name.to_string()), + "Glob" => string_field(result, "pattern") + .or_else(|| string_field(result, "path")) + .unwrap_or_else(|| tool_name.to_string()), + "LS" => string_field(result, "path") + .or_else(|| string_field(result, "directory")) + .unwrap_or_else(|| tool_name.to_string()), + _ => string_field(result, "target").unwrap_or_else(|| tool_name.to_string()), + } +} + +fn infer_event_status(result: &serde_json::Value, is_error: bool) -> EvidenceLedgerEventStatus { + if is_error + || bool_field(result, "timed_out") == Some(true) + || bool_field(result, "interrupted") == Some(true) + || bool_field(result, "success") == Some(false) + || numeric_field(result, "exit_code").is_some_and(|code| code != 0) + { + EvidenceLedgerEventStatus::Failed + } else { + EvidenceLedgerEventStatus::Succeeded + } +} + +fn infer_error_kind(result: &serde_json::Value, is_error: bool) -> Option { + if bool_field(result, "timed_out") == Some(true) { + return Some("timeout".to_string()); + } + if bool_field(result, "interrupted") == Some(true) { + return Some("interrupted".to_string()); + } + if let Some(exit_code) = numeric_field(result, "exit_code") { + if exit_code != 0 { + return Some(format!("exit_code:{}", exit_code)); + } + } + if is_error || result.get("error").is_some() || bool_field(result, "success") == Some(false) { + return Some("tool_error".to_string()); + } + None +} + +fn infer_touched_files(tool_name: &str, result: &serde_json::Value) -> Vec { + match tool_name { + "Edit" | "Write" | "Delete" => string_field(result, "file_path") + .or_else(|| string_field(result, "path")) + .into_iter() + .collect(), + _ => Vec::new(), + } +} + +fn infer_artifact_path(result: &serde_json::Value) -> Option { + string_field(result, "artifact_path") + .or_else(|| string_field(result, "output_file")) + .or_else(|| string_field(result, "transcript_path")) +} + +fn string_field(result: &serde_json::Value, key: &str) -> Option { + result + .get(key) + .and_then(|value| value.as_str()) + .filter(|value| !value.trim().is_empty()) + .map(ToString::to_string) +} + +fn bool_field(result: &serde_json::Value, key: &str) -> Option { + result.get(key).and_then(|value| value.as_bool()) +} + +fn numeric_field(result: &serde_json::Value, key: &str) -> Option { + result.get(key).and_then(|value| value.as_i64()) +} + #[cfg(test)] mod tests { use super::*; use crate::agentic::core::{Message, ToolResult}; + use serde_json::json; fn make_tool_result(tool_name: &str, content: &str) -> Message { Message::tool_result(ToolResult { @@ -157,6 +349,22 @@ mod tests { }) } + fn make_tool_result_with_data( + tool_name: &str, + data: serde_json::Value, + assistant_text: &str, + ) -> Message { + Message::tool_result(ToolResult { + tool_id: format!("id_{}", tool_name), + tool_name: tool_name.to_string(), + result: data, + result_for_assistant: Some(assistant_text.to_string()), + is_error: false, + duration_ms: None, + image_attachments: None, + }) + } + #[test] fn clears_old_compactable_results() { let mut messages = vec![ @@ -251,4 +459,121 @@ mod tests { let r2 = microcompact_messages(&mut messages, &config); assert!(r2.is_none()); } + + #[test] + fn preserves_read_target_before_clearing_tool_result() { + let mut messages = vec![ + make_tool_result_with_data( + "Read", + json!({ + "file_path": "src/main.rs", + "content": "fn main() {}", + "success": true + }), + "Read lines 1-1 from src/main.rs", + ) + .with_turn_id("turn-old".to_string()), + make_tool_result("Read", "recent"), + ]; + + let config = MicrocompactConfig { + keep_recent: 1, + trigger_ratio: 0.0, + }; + let result = microcompact_messages_with_evidence( + &mut messages, + &config, + MicrocompactEvidenceScope { + session_id: "session-a", + turn_id: "turn-current", + }, + ) + .expect("microcompact result"); + + assert_eq!(result.tools_cleared, 1); + assert_eq!(result.evidence_events_preserved, 1); + assert_eq!(result.evidence_events[0].session_id, "session-a"); + assert_eq!(result.evidence_events[0].turn_id, "turn-old"); + assert_eq!(result.evidence_events[0].tool_name, "Read"); + assert_eq!( + result.evidence_events[0].target_kind, + EvidenceLedgerTargetKind::File + ); + assert_eq!(result.evidence_events[0].target, "src/main.rs"); + assert_eq!( + result.evidence_events[0].status, + EvidenceLedgerEventStatus::Succeeded + ); + } + + #[test] + fn preserves_failed_command_error_kind_before_clearing() { + let mut messages = vec![ + make_tool_result_with_data( + "Bash", + json!({ + "command": "cargo test", + "success": false, + "exit_code": 1, + "output": "test failed" + }), + "Command failed", + ), + make_tool_result("Read", "recent"), + ]; + + let config = MicrocompactConfig { + keep_recent: 1, + trigger_ratio: 0.0, + }; + let result = microcompact_messages_with_evidence( + &mut messages, + &config, + MicrocompactEvidenceScope { + session_id: "session-a", + turn_id: "turn-a", + }, + ) + .expect("microcompact result"); + + let event = &result.evidence_events[0]; + assert_eq!(event.target_kind, EvidenceLedgerTargetKind::Command); + assert_eq!(event.target, "cargo test"); + assert_eq!(event.status, EvidenceLedgerEventStatus::Failed); + assert_eq!( + event.exit_code_or_error_kind.as_deref(), + Some("exit_code:1") + ); + } + + #[test] + fn preserves_mutated_file_in_touched_files_before_clearing() { + let mut messages = vec![ + make_tool_result_with_data( + "Edit", + json!({ + "file_path": "src/lib.rs", + "success": true + }), + "Successfully edited src/lib.rs", + ), + make_tool_result("Read", "recent"), + ]; + + let config = MicrocompactConfig { + keep_recent: 1, + trigger_ratio: 0.0, + }; + let result = microcompact_messages_with_evidence( + &mut messages, + &config, + MicrocompactEvidenceScope { + session_id: "session-a", + turn_id: "turn-a", + }, + ) + .expect("microcompact result"); + + assert_eq!(result.evidence_events[0].touched_files, vec!["src/lib.rs"]); + } } diff --git a/src/crates/core/src/agentic/session/evidence_ledger.rs b/src/crates/core/src/agentic/session/evidence_ledger.rs new file mode 100644 index 000000000..c3ec77f6f --- /dev/null +++ b/src/crates/core/src/agentic/session/evidence_ledger.rs @@ -0,0 +1,540 @@ +use crate::agentic::core::{CompressionContract, CompressionContractItem}; +use dashmap::DashMap; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; + +const MAX_PARTIAL_OUTPUT_BYTES: usize = 8_000; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum EvidenceLedgerTargetKind { + #[serde(rename = "file")] + File, + #[serde(rename = "command")] + Command, + #[serde(rename = "subagent")] + Subagent, + #[serde(rename = "artifact")] + Artifact, + #[serde(rename = "checkpoint")] + Checkpoint, + #[serde(rename = "unknown")] + Unknown, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum EvidenceLedgerEventStatus { + #[serde(rename = "created")] + Created, + #[serde(rename = "succeeded")] + Succeeded, + #[serde(rename = "failed")] + Failed, + #[serde(rename = "partial_timeout")] + PartialTimeout, + #[serde(rename = "cancelled")] + Cancelled, + #[serde(rename = "unknown")] + Unknown, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct EvidenceLedgerCheckpoint { + #[serde(skip_serializing_if = "Option::is_none")] + pub current_branch: Option, + pub dirty_state_summary: String, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub touched_files: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub diff_hash: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct EvidenceLedgerEvent { + pub event_id: String, + pub session_id: String, + pub turn_id: String, + pub tool_name: String, + pub target_kind: EvidenceLedgerTargetKind, + pub target: String, + pub status: EvidenceLedgerEventStatus, + pub exit_code_or_error_kind: Option, + pub touched_files: Vec, + pub artifact_path: Option, + pub summary: String, + pub partial_output: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub checkpoint: Option, + pub created_at_ms: u64, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct EvidenceLedgerSummaryItem { + pub event_id: String, + pub turn_id: String, + pub tool_name: String, + pub target_kind: EvidenceLedgerTargetKind, + pub target: String, + pub status: EvidenceLedgerEventStatus, + pub summary: String, + pub error_kind: Option, + pub partial_output: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub checkpoint: Option, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct EvidenceLedgerSummary { + pub touched_files: Vec, + pub latest_failed_commands: Vec, + pub latest_verification_commands: Vec, + pub partial_subagent_results: Vec, + pub latest_checkpoints: Vec, +} + +#[derive(Debug, Default)] +pub struct SessionEvidenceLedger { + events_by_session: Arc>>, +} + +impl EvidenceLedgerEvent { + pub fn new( + session_id: impl Into, + turn_id: impl Into, + tool_name: impl Into, + target_kind: EvidenceLedgerTargetKind, + target: impl Into, + status: EvidenceLedgerEventStatus, + summary: impl Into, + ) -> Self { + Self { + event_id: uuid::Uuid::new_v4().to_string(), + session_id: session_id.into(), + turn_id: turn_id.into(), + tool_name: tool_name.into(), + target_kind, + target: target.into(), + status, + exit_code_or_error_kind: None, + touched_files: Vec::new(), + artifact_path: None, + summary: summary.into(), + partial_output: None, + checkpoint: None, + created_at_ms: current_time_millis(), + } + } + + pub fn checkpoint_created( + session_id: impl Into, + turn_id: impl Into, + tool_name: impl Into, + target: impl Into, + checkpoint: EvidenceLedgerCheckpoint, + ) -> Self { + let target = target.into(); + Self::new( + session_id, + turn_id, + tool_name, + EvidenceLedgerTargetKind::Checkpoint, + target.clone(), + EvidenceLedgerEventStatus::Created, + format!("Checkpoint created before modifying {}.", target), + ) + .with_touched_files(checkpoint.touched_files.clone()) + .with_checkpoint(checkpoint) + } + + pub fn with_error_kind(mut self, error_kind: impl Into) -> Self { + self.exit_code_or_error_kind = Some(error_kind.into()); + self + } + + pub fn with_partial_output(mut self, partial_output: impl Into) -> Self { + let partial_output = partial_output.into(); + self.partial_output = Some(truncate_string_at_char_boundary( + &partial_output, + MAX_PARTIAL_OUTPUT_BYTES, + )); + self + } + + pub fn with_touched_files(mut self, touched_files: Vec) -> Self { + self.touched_files = touched_files; + self + } + + pub fn with_artifact_path(mut self, artifact_path: impl Into) -> Self { + self.artifact_path = Some(artifact_path.into()); + self + } + + pub fn with_checkpoint(mut self, checkpoint: EvidenceLedgerCheckpoint) -> Self { + self.checkpoint = Some(checkpoint); + self + } +} + +impl SessionEvidenceLedger { + pub fn new() -> Self { + Self::default() + } + + pub fn append(&self, event: EvidenceLedgerEvent) -> EvidenceLedgerEvent { + self.events_by_session + .entry(event.session_id.clone()) + .or_default() + .push(event.clone()); + event + } + + pub fn events_for_turn(&self, session_id: &str, turn_id: &str) -> Vec { + self.events_by_session + .get(session_id) + .map(|events| { + events + .iter() + .filter(|event| event.turn_id == turn_id) + .cloned() + .collect() + }) + .unwrap_or_default() + } + + pub fn summary_for_session(&self, session_id: &str, limit: usize) -> EvidenceLedgerSummary { + let Some(events) = self.events_by_session.get(session_id) else { + return EvidenceLedgerSummary::default(); + }; + + let mut touched_files = Vec::new(); + let mut latest_failed_commands = Vec::new(); + let mut latest_verification_commands = Vec::new(); + let mut partial_subagent_results = Vec::new(); + let mut latest_checkpoints = Vec::new(); + + for event in events.iter().rev() { + for file in &event.touched_files { + if !touched_files.contains(file) { + touched_files.push(file.clone()); + } + } + + if event.target_kind == EvidenceLedgerTargetKind::Command + && event.status == EvidenceLedgerEventStatus::Failed + && latest_failed_commands.len() < limit + { + latest_failed_commands.push(event.into()); + } + + if event.target_kind == EvidenceLedgerTargetKind::Command + && is_verification_command(&event.target) + && latest_verification_commands.len() < limit + { + latest_verification_commands.push(event.into()); + } + + if event.target_kind == EvidenceLedgerTargetKind::Subagent + && event.status == EvidenceLedgerEventStatus::PartialTimeout + && partial_subagent_results.len() < limit + { + partial_subagent_results.push(event.into()); + } + + if event.target_kind == EvidenceLedgerTargetKind::Checkpoint + && event.status == EvidenceLedgerEventStatus::Created + && latest_checkpoints.len() < limit + { + latest_checkpoints.push(event.into()); + } + } + + touched_files.truncate(limit); + + EvidenceLedgerSummary { + touched_files, + latest_failed_commands, + latest_verification_commands, + partial_subagent_results, + latest_checkpoints, + } + } +} + +impl From<&EvidenceLedgerEvent> for EvidenceLedgerSummaryItem { + fn from(event: &EvidenceLedgerEvent) -> Self { + Self { + event_id: event.event_id.clone(), + turn_id: event.turn_id.clone(), + tool_name: event.tool_name.clone(), + target_kind: event.target_kind.clone(), + target: event.target.clone(), + status: event.status.clone(), + summary: event.summary.clone(), + error_kind: event.exit_code_or_error_kind.clone(), + partial_output: event.partial_output.clone(), + checkpoint: event.checkpoint.clone(), + } + } +} + +impl From for CompressionContract { + fn from(summary: EvidenceLedgerSummary) -> Self { + Self { + touched_files: summary.touched_files, + verification_commands: summary + .latest_verification_commands + .into_iter() + .map(compression_contract_item_from_summary_item) + .collect(), + blocking_failures: summary + .latest_failed_commands + .into_iter() + .map(compression_contract_item_from_summary_item) + .collect(), + subagent_statuses: summary + .partial_subagent_results + .into_iter() + .map(compression_contract_item_from_summary_item) + .collect(), + } + } +} + +fn compression_contract_item_from_summary_item( + item: EvidenceLedgerSummaryItem, +) -> CompressionContractItem { + CompressionContractItem { + target: item.target, + status: event_status_label(&item.status).to_string(), + summary: item.summary, + error_kind: item.error_kind, + } +} + +fn event_status_label(status: &EvidenceLedgerEventStatus) -> &'static str { + match status { + EvidenceLedgerEventStatus::Created => "created", + EvidenceLedgerEventStatus::Succeeded => "succeeded", + EvidenceLedgerEventStatus::Failed => "failed", + EvidenceLedgerEventStatus::PartialTimeout => "partial_timeout", + EvidenceLedgerEventStatus::Cancelled => "cancelled", + EvidenceLedgerEventStatus::Unknown => "unknown", + } +} + +fn current_time_millis() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|duration| duration.as_millis().min(u128::from(u64::MAX)) as u64) + .unwrap_or(0) +} + +fn is_verification_command(command: &str) -> bool { + let command = command.to_ascii_lowercase(); + command.contains(" test") + || command.starts_with("test") + || command.contains("cargo test") + || command.contains("pnpm") + || command.contains("npm test") + || command.contains("yarn test") + || command.contains("vitest") + || command.contains("type-check") + || command.contains("lint") +} + +fn truncate_string_at_char_boundary(value: &str, max_bytes: usize) -> String { + crate::util::truncate_at_char_boundary(value, max_bytes).to_string() +} + +#[cfg(test)] +mod tests { + use super::{ + EvidenceLedgerCheckpoint, EvidenceLedgerEvent, EvidenceLedgerEventStatus, + EvidenceLedgerTargetKind, SessionEvidenceLedger, + }; + + #[test] + fn ledger_reads_events_scoped_by_session_and_turn() { + let ledger = SessionEvidenceLedger::new(); + let event = EvidenceLedgerEvent::new( + "session-a", + "turn-a", + "Task", + EvidenceLedgerTargetKind::Subagent, + "ReviewSecurity", + EvidenceLedgerEventStatus::PartialTimeout, + "Security reviewer timed out after partial output.", + ) + .with_error_kind("timeout") + .with_partial_output("Found token logging before timeout."); + + let appended = ledger.append(event); + + assert!(!appended.event_id.is_empty()); + assert_eq!( + ledger.events_for_turn("session-a", "turn-a"), + vec![appended.clone()] + ); + assert!(ledger.events_for_turn("session-a", "other-turn").is_empty()); + assert!(ledger.events_for_turn("other-session", "turn-a").is_empty()); + } + + #[test] + fn checkpoint_created_event_preserves_recovery_boundary_metadata() { + let checkpoint = EvidenceLedgerCheckpoint { + current_branch: Some("feature/context".to_string()), + dirty_state_summary: "staged=1, unstaged=2, untracked=3".to_string(), + touched_files: vec!["src/lib.rs".to_string()], + diff_hash: Some("abc123".to_string()), + }; + + let event = EvidenceLedgerEvent::checkpoint_created( + "session-a", + "turn-a", + "Edit", + "src/lib.rs", + checkpoint.clone(), + ); + + assert_eq!(event.target_kind, EvidenceLedgerTargetKind::Checkpoint); + assert_eq!(event.status, EvidenceLedgerEventStatus::Created); + assert_eq!(event.touched_files, vec!["src/lib.rs"]); + assert_eq!(event.checkpoint.as_ref(), Some(&checkpoint)); + } + + #[test] + fn summary_projects_latest_checkpoints() { + let ledger = SessionEvidenceLedger::new(); + ledger.append(EvidenceLedgerEvent::checkpoint_created( + "session-a", + "turn-a", + "Delete", + "src/old.rs", + EvidenceLedgerCheckpoint { + current_branch: Some("feature/context".to_string()), + dirty_state_summary: "staged=0, unstaged=1, untracked=0".to_string(), + touched_files: vec!["src/old.rs".to_string()], + diff_hash: Some("def456".to_string()), + }, + )); + + let summary = ledger.summary_for_session("session-a", 10); + + assert_eq!(summary.latest_checkpoints.len(), 1); + assert_eq!(summary.latest_checkpoints[0].target, "src/old.rs"); + assert_eq!( + summary.latest_checkpoints[0] + .checkpoint + .as_ref() + .and_then(|checkpoint| checkpoint.current_branch.as_deref()), + Some("feature/context") + ); + } + + #[test] + fn summary_projects_partial_subagent_results() { + let ledger = SessionEvidenceLedger::new(); + ledger.append( + EvidenceLedgerEvent::new( + "session-a", + "turn-a", + "Task", + EvidenceLedgerTargetKind::Subagent, + "ReviewSecurity", + EvidenceLedgerEventStatus::PartialTimeout, + "Security reviewer timed out after partial output.", + ) + .with_error_kind("timeout") + .with_partial_output("Found token logging before timeout."), + ); + + let summary = ledger.summary_for_session("session-a", 10); + + assert_eq!(summary.partial_subagent_results.len(), 1); + assert_eq!(summary.partial_subagent_results[0].target, "ReviewSecurity"); + assert_eq!( + summary.partial_subagent_results[0] + .partial_output + .as_deref(), + Some("Found token logging before timeout.") + ); + } + + #[test] + fn partial_output_is_truncated_on_utf8_boundary() { + let ledger = SessionEvidenceLedger::new(); + let output = format!("{}{}", "a".repeat(7_999), "测"); + ledger.append( + EvidenceLedgerEvent::new( + "session-a", + "turn-a", + "Task", + EvidenceLedgerTargetKind::Subagent, + "ReviewSecurity", + EvidenceLedgerEventStatus::PartialTimeout, + "Security reviewer timed out after partial output.", + ) + .with_partial_output(output), + ); + + let summary = ledger.summary_for_session("session-a", 10); + let partial_output = summary.partial_subagent_results[0] + .partial_output + .as_deref() + .expect("partial output"); + + assert_eq!(partial_output.len(), 7_999); + assert!(partial_output.is_char_boundary(partial_output.len())); + } + + #[test] + fn summary_projects_into_compression_contract() { + let ledger = SessionEvidenceLedger::new(); + ledger.append( + EvidenceLedgerEvent::new( + "session-a", + "turn-a", + "Edit", + EvidenceLedgerTargetKind::File, + "src/main.rs", + EvidenceLedgerEventStatus::Succeeded, + "Edited main file.", + ) + .with_touched_files(vec!["src/main.rs".to_string()]), + ); + ledger.append( + EvidenceLedgerEvent::new( + "session-a", + "turn-a", + "Bash", + EvidenceLedgerTargetKind::Command, + "cargo test", + EvidenceLedgerEventStatus::Failed, + "Tests failed before compression.", + ) + .with_error_kind("exit_code:1"), + ); + ledger.append(EvidenceLedgerEvent::new( + "session-a", + "turn-a", + "Task", + EvidenceLedgerTargetKind::Subagent, + "ReviewSecurity", + EvidenceLedgerEventStatus::PartialTimeout, + "Security reviewer timed out after partial output.", + )); + + let contract: crate::agentic::core::CompressionContract = + ledger.summary_for_session("session-a", 10).into(); + + assert_eq!(contract.touched_files, vec!["src/main.rs"]); + assert_eq!(contract.verification_commands[0].target, "cargo test"); + assert_eq!( + contract.blocking_failures[0].error_kind.as_deref(), + Some("exit_code:1") + ); + assert_eq!(contract.subagent_statuses[0].target, "ReviewSecurity"); + assert_eq!(contract.subagent_statuses[0].status, "partial_timeout"); + } +} diff --git a/src/crates/core/src/agentic/session/mod.rs b/src/crates/core/src/agentic/session/mod.rs index 1b0b22a94..54578fb87 100644 --- a/src/crates/core/src/agentic/session/mod.rs +++ b/src/crates/core/src/agentic/session/mod.rs @@ -4,8 +4,10 @@ pub mod compression; pub mod context_store; +pub mod evidence_ledger; pub mod session_manager; pub use compression::*; pub use context_store::*; +pub use evidence_ledger::*; pub use session_manager::*; diff --git a/src/crates/core/src/agentic/session/session_manager.rs b/src/crates/core/src/agentic/session/session_manager.rs index e5610cdca..d7eee507b 100644 --- a/src/crates/core/src/agentic/session/session_manager.rs +++ b/src/crates/core/src/agentic/session/session_manager.rs @@ -3,19 +3,23 @@ //! Responsible for session CRUD, lifecycle management, and resource association use crate::agentic::core::{ - new_turn_id, CompressionState, Message, MessageSemanticKind, ProcessingPhase, Session, - SessionConfig, SessionKind, SessionState, SessionSummary, TurnStats, + new_turn_id, CompressionContract, CompressionState, Message, MessageSemanticKind, + ProcessingPhase, Session, SessionConfig, SessionKind, SessionState, SessionSummary, TurnStats, }; use crate::agentic::image_analysis::ImageContextData; use crate::agentic::persistence::PersistenceManager; -use crate::agentic::session::SessionContextStore; +use crate::agentic::session::{ + EvidenceLedgerCheckpoint, EvidenceLedgerEvent, EvidenceLedgerEventStatus, + EvidenceLedgerSummary, EvidenceLedgerTargetKind, SessionContextStore, SessionEvidenceLedger, +}; use crate::infrastructure::ai::get_global_ai_client_factory; use crate::service::config::{ get_app_language_code, get_global_config_service, short_model_user_language_instruction, subscribe_config_updates, ConfigUpdateEvent, }; use crate::service::session::{ - DialogTurnData, DialogTurnKind, ModelRoundData, TextItemData, TurnStatus, UserMessageData, + DialogTurnData, DialogTurnKind, ModelRoundData, SessionMetadata, TextItemData, TurnStatus, + UserMessageData, }; use crate::service::snapshot::ensure_snapshot_manager_for_workspace; use crate::util::errors::{BitFunError, BitFunResult}; @@ -374,6 +378,30 @@ mod tests { assert_eq!(title, "New Session"); } + + #[tokio::test] + async fn records_subagent_partial_timeout_in_evidence_ledger() { + let persistence_manager = Arc::new( + PersistenceManager::new(Arc::new(PathManager::new().expect("path manager"))) + .expect("persistence manager"), + ); + let manager = test_manager(persistence_manager); + + let event = manager.record_subagent_partial_timeout( + "session-a", + "turn-a", + "ReviewSecurity", + "Found token logging before timeout.", + Some("timeout"), + ); + + assert!(!event.event_id.is_empty()); + let events = manager.evidence_events_for_turn("session-a", "turn-a"); + assert_eq!(events, vec![event.clone()]); + let summary = manager.evidence_summary_for_session("session-a", 10); + assert_eq!(summary.partial_subagent_results.len(), 1); + assert_eq!(summary.partial_subagent_results[0].event_id, event.event_id); + } } /// Session manager @@ -389,6 +417,7 @@ pub struct SessionManager { /// Sub-components context_store: Arc, + evidence_ledger: Arc, persistence_manager: Arc, /// Configuration @@ -725,6 +754,7 @@ impl SessionManager { sessions: Arc::new(DashMap::new()), session_workspace_index: Arc::new(DashMap::new()), context_store, + evidence_ledger: Arc::new(SessionEvidenceLedger::new()), persistence_manager, config, }; @@ -739,6 +769,76 @@ impl SessionManager { manager } + pub fn append_evidence_event(&self, event: EvidenceLedgerEvent) -> EvidenceLedgerEvent { + self.evidence_ledger.append(event) + } + + pub fn record_checkpoint_created( + &self, + session_id: &str, + turn_id: &str, + tool_name: &str, + target: &str, + checkpoint: EvidenceLedgerCheckpoint, + ) -> EvidenceLedgerEvent { + self.append_evidence_event(EvidenceLedgerEvent::checkpoint_created( + session_id, turn_id, tool_name, target, checkpoint, + )) + } + + pub fn evidence_events_for_turn( + &self, + session_id: &str, + turn_id: &str, + ) -> Vec { + self.evidence_ledger.events_for_turn(session_id, turn_id) + } + + pub fn evidence_summary_for_session( + &self, + session_id: &str, + limit: usize, + ) -> EvidenceLedgerSummary { + self.evidence_ledger.summary_for_session(session_id, limit) + } + + pub fn compression_contract_for_session( + &self, + session_id: &str, + limit: usize, + ) -> Option { + let contract: CompressionContract = + self.evidence_summary_for_session(session_id, limit).into(); + (!contract.is_empty()).then_some(contract) + } + + pub fn record_subagent_partial_timeout( + &self, + session_id: &str, + turn_id: &str, + subagent_type: &str, + partial_output: &str, + error_kind: Option<&str>, + ) -> EvidenceLedgerEvent { + let summary = format!( + "Subagent {} timed out after producing partial output.", + subagent_type + ); + let event = EvidenceLedgerEvent::new( + session_id, + turn_id, + "Task", + EvidenceLedgerTargetKind::Subagent, + subagent_type, + EvidenceLedgerEventStatus::PartialTimeout, + summary, + ) + .with_error_kind(error_kind.unwrap_or("timeout")) + .with_partial_output(partial_output); + + self.append_evidence_event(event) + } + /// Decide whether the given session model id is still usable. /// /// `model_id` is treated as "usable" when: @@ -839,6 +939,7 @@ impl SessionManager { let sessions = self.sessions.clone(); let session_workspace_index = self.session_workspace_index.clone(); let context_store = self.context_store.clone(); + let evidence_ledger = self.evidence_ledger.clone(); let persistence_manager = self.persistence_manager.clone(); let manager_config = self.config.clone(); @@ -857,6 +958,7 @@ impl SessionManager { sessions, session_workspace_index, context_store, + evidence_ledger, persistence_manager, config: manager_config, }; @@ -1691,6 +1793,26 @@ impl SessionManager { } } + pub async fn load_session_metadata( + &self, + workspace_path: &Path, + session_id: &str, + ) -> BitFunResult> { + self.persistence_manager + .load_session_metadata(workspace_path, session_id) + .await + } + + pub async fn save_session_metadata( + &self, + workspace_path: &Path, + metadata: &SessionMetadata, + ) -> BitFunResult<()> { + self.persistence_manager + .save_session_metadata(workspace_path, metadata) + .await + } + // ============ Dialog Turn Management ============ #[allow(clippy::too_many_arguments)] diff --git a/src/crates/core/src/agentic/tools/framework.rs b/src/crates/core/src/agentic/tools/framework.rs index f67b55ac1..4e54f9070 100644 --- a/src/crates/core/src/agentic/tools/framework.rs +++ b/src/crates/core/src/agentic/tools/framework.rs @@ -1,4 +1,7 @@ //! Tool framework - Tool interface definition and execution context +use crate::agentic::coordination::get_global_coordinator; +use crate::agentic::deep_review_policy::record_deep_review_shared_context_tool_use; +use crate::agentic::session::EvidenceLedgerCheckpoint; use crate::agentic::tools::restrictions::{ is_local_path_within_root, is_remote_posix_path_within_root, ToolPathOperation, ToolRuntimeRestrictions, @@ -10,13 +13,16 @@ use crate::agentic::tools::workspace_paths::{ use crate::agentic::workspace::WorkspaceServices; use crate::agentic::WorkspaceBinding; use crate::infrastructure::get_path_manager_arc; +use crate::service::git::{GitDiffParams, GitService}; use crate::service::remote_ssh::workspace_state::remote_workspace_runtime_root; use crate::service::{get_workspace_runtime_service_arc, WorkspaceRuntimeContext}; use crate::util::errors::BitFunResult; use crate::util::types::ToolImageAttachment; use async_trait::async_trait; +use log::warn; use serde::{Deserialize, Serialize}; use serde_json::Value; +use sha2::{Digest, Sha256}; use std::collections::HashMap; use std::path::{Path, PathBuf}; use tokio_util::sync::CancellationToken; @@ -95,6 +101,107 @@ impl ToolUseContext { self.workspace_services.as_ref().map(|s| s.shell.as_ref()) } + pub async fn record_light_checkpoint( + &self, + tool_name: &str, + target: &str, + touched_files: Vec, + ) { + let Some(session_id) = self.session_id.as_deref() else { + return; + }; + let Some(turn_id) = self.dialog_turn_id.as_deref() else { + return; + }; + let Some(coordinator) = get_global_coordinator() else { + return; + }; + + let checkpoint = self.build_light_checkpoint(touched_files).await; + coordinator + .get_session_manager() + .record_checkpoint_created(session_id, turn_id, tool_name, target, checkpoint); + } + + async fn build_light_checkpoint(&self, touched_files: Vec) -> EvidenceLedgerCheckpoint { + let mut checkpoint = EvidenceLedgerCheckpoint { + current_branch: None, + dirty_state_summary: "workspace_unavailable".to_string(), + touched_files, + diff_hash: None, + }; + + if self.is_remote() { + checkpoint.dirty_state_summary = + "remote_workspace_git_metadata_unavailable".to_string(); + return checkpoint; + } + + let Some(workspace_root) = self.workspace_root() else { + return checkpoint; + }; + + match GitService::get_status(workspace_root).await { + Ok(status) => { + checkpoint.current_branch = Some(status.current_branch); + checkpoint.dirty_state_summary = format!( + "staged={}, unstaged={}, untracked={}", + status.staged.len(), + status.unstaged.len(), + status.untracked.len() + ); + } + Err(error) => { + checkpoint.dirty_state_summary = format!("git_status_unavailable: {}", error); + } + } + + checkpoint.diff_hash = self + .checkpoint_diff_hash(workspace_root, &checkpoint.touched_files) + .await; + checkpoint + } + + async fn checkpoint_diff_hash( + &self, + workspace_root: &Path, + touched_files: &[String], + ) -> Option { + let files = touched_files + .iter() + .filter_map(|file| git_relative_path(workspace_root, file)) + .collect::>(); + + if files.is_empty() { + return None; + } + + let mut diff = String::new(); + for staged in [false, true] { + let params = GitDiffParams { + files: Some(files.clone()), + staged: Some(staged), + ..Default::default() + }; + match GitService::get_diff(workspace_root, ¶ms).await { + Ok(part) => diff.push_str(&part), + Err(error) => { + warn!( + "Failed to collect checkpoint diff hash: staged={}, error={}", + staged, error + ); + return None; + } + } + } + + if diff.is_empty() { + return None; + } + + Some(hex::encode(Sha256::digest(diff.as_bytes()))) + } + pub fn enforce_tool_runtime_restrictions(&self, tool_name: &str) -> BitFunResult<()> { self.runtime_tool_restrictions .ensure_tool_allowed(tool_name) @@ -358,7 +465,7 @@ impl ToolUseContext { } #[cfg(test)] -mod tests { +mod path_resolution_tests { use super::ToolUseContext; use crate::agentic::tools::ToolRuntimeRestrictions; use crate::agentic::WorkspaceBinding; @@ -511,6 +618,74 @@ impl ToolResult { } } +fn git_relative_path(workspace_root: &Path, path: &str) -> Option { + if is_bitfun_runtime_uri(path) { + return None; + } + + let path = Path::new(path); + let relative = if path.is_absolute() { + path.strip_prefix(workspace_root).ok()? + } else { + path + }; + + Some(relative.to_string_lossy().replace('\\', "/")) +} + +fn custom_data_str<'a>(context: &'a ToolUseContext, key: &str) -> Option<&'a str> { + context + .custom_data + .get(key) + .and_then(Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) +} + +fn maybe_record_deep_review_shared_context_tool_use( + tool_name: &str, + input: &Value, + context: &ToolUseContext, +) { + if !tool_name.eq_ignore_ascii_case("Read") && !tool_name.eq_ignore_ascii_case("GetFileDiff") { + return; + } + if !custom_data_str(context, "deep_review_subagent_role") + .is_some_and(|role| role.eq_ignore_ascii_case("reviewer")) + { + return; + } + let Some(parent_turn_id) = custom_data_str(context, "deep_review_parent_dialog_turn_id") else { + return; + }; + let Some(file_path) = input + .get("file_path") + .and_then(Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) + else { + return; + }; + let measured_path = if context.is_remote() { + None + } else { + context + .workspace_root() + .and_then(|workspace_root| git_relative_path(workspace_root, file_path)) + } + .unwrap_or_else(|| file_path.to_string()); + let subagent_type = custom_data_str(context, "deep_review_subagent_type") + .or(context.agent_type.as_deref()) + .unwrap_or("unknown"); + + record_deep_review_shared_context_tool_use( + parent_turn_id, + subagent_type, + tool_name, + &measured_path, + ); +} + /// Tool trait #[async_trait] pub trait Tool: Send + Sync { @@ -638,7 +813,7 @@ pub trait Tool: Send + Sync { /// execution to [`call_impl`], so most tools should override `call_impl` /// instead of overriding this method directly. async fn call(&self, input: &Value, context: &ToolUseContext) -> BitFunResult> { - if let Some(cancellation_token) = context.cancellation_token.as_ref() { + let result = if let Some(cancellation_token) = context.cancellation_token.as_ref() { tokio::select! { result = self.call_impl(input, context) => { result @@ -650,7 +825,11 @@ pub trait Tool: Send + Sync { } } else { self.call_impl(input, context).await + }; + if result.is_ok() { + maybe_record_deep_review_shared_context_tool_use(self.name(), input, context); } + result } } @@ -659,3 +838,90 @@ pub trait Tool: Send + Sync { pub struct ToolRenderOptions { pub verbose: bool, } + +#[cfg(test)] +mod shared_context_tests { + use super::{Tool, ToolResult, ToolUseContext}; + use crate::agentic::deep_review_policy::deep_review_shared_context_measurement_snapshot; + use crate::agentic::tools::ToolRuntimeRestrictions; + use crate::util::errors::BitFunResult; + use async_trait::async_trait; + use serde_json::{json, Value}; + use std::collections::HashMap; + + struct MeasurementReadTool; + + #[async_trait] + impl Tool for MeasurementReadTool { + fn name(&self) -> &str { + "Read" + } + + async fn description(&self) -> BitFunResult { + Ok("Read file".to_string()) + } + + fn input_schema(&self) -> Value { + json!({ + "type": "object", + "properties": { + "file_path": { "type": "string" } + } + }) + } + + async fn call_impl( + &self, + _input: &Value, + _context: &ToolUseContext, + ) -> BitFunResult> { + Ok(vec![ToolResult::ok( + json!({ "ok": true }), + Some("ok".to_string()), + )]) + } + } + + #[tokio::test] + async fn call_records_deep_review_read_file_measurement_without_touching_result() { + let parent_turn_id = format!("turn-framework-measure-{}", uuid::Uuid::new_v4()); + let mut custom_data = HashMap::new(); + custom_data.insert( + "deep_review_parent_dialog_turn_id".to_string(), + json!(parent_turn_id.clone()), + ); + custom_data.insert("deep_review_subagent_role".to_string(), json!("reviewer")); + custom_data.insert( + "deep_review_subagent_type".to_string(), + json!("ReviewSecurity"), + ); + let context = ToolUseContext { + tool_call_id: Some("tool-read".to_string()), + agent_type: Some("ReviewSecurity".to_string()), + session_id: Some("subagent-session".to_string()), + dialog_turn_id: Some("subagent-turn".to_string()), + workspace: None, + custom_data, + computer_use_host: None, + cancellation_token: None, + runtime_tool_restrictions: ToolRuntimeRestrictions::default(), + workspace_services: None, + }; + let tool = MeasurementReadTool; + + let result = tool + .call(&json!({ "file_path": ".\\src\\lib.rs" }), &context) + .await + .expect("read tool call should succeed"); + tool.call(&json!({ "file_path": "src/lib.rs" }), &context) + .await + .expect("read tool call should succeed"); + + assert_eq!(result.len(), 1); + let snapshot = deep_review_shared_context_measurement_snapshot(&parent_turn_id); + assert_eq!(snapshot.total_calls, 2); + assert_eq!(snapshot.duplicate_calls, 1); + assert_eq!(snapshot.repeated_contexts[0].tool_name, "Read"); + assert_eq!(snapshot.repeated_contexts[0].file_path, "src/lib.rs"); + } +} diff --git a/src/crates/core/src/agentic/tools/implementations/bash_tool.rs b/src/crates/core/src/agentic/tools/implementations/bash_tool.rs index b9dc5a19f..cad5f1dd5 100644 --- a/src/crates/core/src/agentic/tools/implementations/bash_tool.rs +++ b/src/crates/core/src/agentic/tools/implementations/bash_tool.rs @@ -696,6 +696,12 @@ Usage notes: .ok_or_else(|| BitFunError::tool("command is required".to_string()))?; let requested_working_directory = Self::resolve_working_directory(input, context)?; + if command_needs_light_checkpoint(command_str) { + context + .record_light_checkpoint("Bash", command_str, Vec::new()) + .await; + } + // Remote workspace: execute via injected workspace shell if context.is_remote() { let Some(ws_shell) = context.ws_shell() else { @@ -1086,6 +1092,39 @@ Usage notes: } } +fn command_needs_light_checkpoint(command: &str) -> bool { + let command = command.trim().to_ascii_lowercase(); + let mutating_prefixes = [ + "rm ", + "rmdir ", + "del ", + "erase ", + "move ", + "mv ", + "cp ", + "git reset", + "git clean", + "git checkout", + "git switch", + "git merge", + "git rebase", + "git pull", + "git stash", + "git commit", + "cargo fmt", + "cargo fix", + "rustfmt", + "prettier --write", + ]; + + mutating_prefixes + .iter() + .any(|prefix| command.starts_with(prefix)) + || command.contains(" --fix") + || command.contains(" > ") + || command.contains(" >> ") +} + impl BashTool { fn background_output_file_path( context: &ToolUseContext, @@ -1290,6 +1329,15 @@ impl BashTool { mod tests { use super::*; + #[test] + fn checkpoint_detection_flags_mutating_bash_commands() { + assert!(command_needs_light_checkpoint("cargo fmt")); + assert!(command_needs_light_checkpoint("pnpm lint --fix")); + assert!(command_needs_light_checkpoint("rm -rf target/tmp")); + assert!(!command_needs_light_checkpoint("cargo test")); + assert!(!command_needs_light_checkpoint("git status")); + } + #[test] fn truncate_output_preserving_tail_keeps_end_of_output() { let input = "BEGIN-".to_string() + &"x".repeat(120) + "-IMPORTANT-END"; diff --git a/src/crates/core/src/agentic/tools/implementations/code_review_tool.rs b/src/crates/core/src/agentic/tools/implementations/code_review_tool.rs index 075950dc0..293752a42 100644 --- a/src/crates/core/src/agentic/tools/implementations/code_review_tool.rs +++ b/src/crates/core/src/agentic/tools/implementations/code_review_tool.rs @@ -2,17 +2,32 @@ //! //! Used to get structured code review results. +use crate::agentic::agents::get_agent_registry; +use crate::agentic::context_profile::ContextProfilePolicy; +use crate::agentic::coordination::get_global_coordinator; +use crate::agentic::core::CompressionContract; +use crate::agentic::deep_review_policy::{ + deep_review_runtime_diagnostics_snapshot, DeepReviewIncrementalCache, + DeepReviewRuntimeDiagnostics, +}; use crate::agentic::tools::framework::{Tool, ToolResult, ToolUseContext}; use crate::service::config::get_app_language_code; use crate::service::i18n::code_review_copy_for_language; use crate::util::errors::BitFunResult; use async_trait::async_trait; -use log::warn; +use log::{debug, warn}; use serde_json::{json, Value}; +use std::collections::HashSet; /// Code review tool definition pub struct CodeReviewTool; +struct DeepReviewCacheUpdate { + value: Value, + hit_count: usize, + miss_count: usize, +} + impl CodeReviewTool { pub fn new() -> Self { Self @@ -204,6 +219,19 @@ impl CodeReviewTool { "type": "string", "description": reviewer_summary_desc }, + "partial_output": { + "type": "string", + "description": "Partial reviewer output captured before timeout or cancellation" + }, + "packet_id": { + "type": "string", + "description": "Deep Review work packet id associated with this reviewer output" + }, + "packet_status_source": { + "type": "string", + "enum": ["reported", "inferred", "missing"], + "description": "Whether packet_id/status was reported by the reviewer, inferred from scheduling metadata, or missing" + }, "issue_count": { "type": "integer", "description": "Validated issue count for this reviewer" @@ -333,6 +361,52 @@ impl CodeReviewTool { }, "additionalProperties": false }, + "reliability_signals": { + "type": "array", + "description": "Structured reliability/status signals for Deep Review report UI and export", + "items": { + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": [ + "context_pressure", + "compression_preserved", + "cache_hit", + "cache_miss", + "concurrency_limited", + "partial_reviewer", + "retry_guidance", + "skipped_reviewers", + "token_budget_limited", + "user_decision" + ], + "description": "Reliability signal category" + }, + "severity": { + "type": "string", + "enum": ["info", "warning", "action"], + "description": "User-facing severity of this signal" + }, + "count": { + "type": "integer", + "minimum": 0, + "description": "Optional affected item count" + }, + "source": { + "type": "string", + "enum": ["runtime", "manifest", "report", "inferred"], + "description": "Where this reliability signal came from" + }, + "detail": { + "type": "string", + "description": "Short user-facing detail for this signal" + } + }, + "required": ["kind", "severity"], + "additionalProperties": false + } + }, "schema_version": { "type": "integer", "description": "Schema version for forward compatibility", @@ -351,10 +425,608 @@ impl CodeReviewTool { .is_some_and(|agent_type| agent_type == "DeepReview") } + fn normalized_non_empty_string(value: Option<&Value>) -> Option { + value + .and_then(Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(str::to_string) + } + + fn packet_string_field<'a>(packet: &'a Value, keys: &[&str]) -> Option<&'a str> { + keys.iter() + .find_map(|key| packet.get(*key).and_then(Value::as_str)) + .map(str::trim) + .filter(|value| !value.is_empty()) + } + + fn reviewer_match_tokens(reviewer: &Value) -> Vec { + ["name", "specialty"] + .iter() + .filter_map(|key| Self::normalized_non_empty_string(reviewer.get(*key))) + .map(|value| value.to_ascii_lowercase()) + .collect() + } + + fn packet_match_tokens(packet: &Value) -> Vec { + [ + &["subagentId", "subagent_id", "subagent_type"][..], + &["displayName", "display_name"][..], + &["roleName", "role"][..], + ] + .iter() + .filter_map(|keys| Self::packet_string_field(packet, keys)) + .map(|value| value.to_ascii_lowercase()) + .collect() + } + + fn infer_unique_packet_id_for_reviewer( + reviewer: &Value, + run_manifest: Option<&Value>, + ) -> Option { + let reviewer_tokens = Self::reviewer_match_tokens(reviewer); + if reviewer_tokens.is_empty() { + return None; + } + + let manifest = run_manifest?; + let packets = manifest + .get("workPackets") + .or_else(|| manifest.get("work_packets"))? + .as_array()?; + let mut matches = packets.iter().filter_map(|packet| { + let packet_id = Self::packet_string_field(packet, &["packetId", "packet_id"])?; + let packet_tokens = Self::packet_match_tokens(packet); + let matched = packet_tokens + .iter() + .any(|packet_token| reviewer_tokens.iter().any(|token| token == packet_token)); + matched.then(|| packet_id.to_string()) + }); + let first = matches.next()?; + if matches.next().is_some() { + None + } else { + Some(first) + } + } + + fn fill_deep_review_packet_metadata(input: &mut Value, run_manifest: Option<&Value>) { + let Some(reviewers) = input.get_mut("reviewers").and_then(Value::as_array_mut) else { + return; + }; + + for reviewer in reviewers { + let packet_id = Self::normalized_non_empty_string(reviewer.get("packet_id")); + let packet_status_source = + Self::normalized_non_empty_string(reviewer.get("packet_status_source")); + let inferred_packet_id = if packet_id.is_none() { + Self::infer_unique_packet_id_for_reviewer(reviewer, run_manifest) + } else { + None + }; + + let Some(object) = reviewer.as_object_mut() else { + continue; + }; + + if packet_id.is_some() { + if packet_status_source.is_none() { + object.insert("packet_status_source".to_string(), json!("reported")); + } + } else if let Some(inferred_packet_id) = inferred_packet_id { + object.insert("packet_id".to_string(), json!(inferred_packet_id)); + object.insert("packet_status_source".to_string(), json!("inferred")); + } else if packet_status_source.is_none() { + object.insert("packet_status_source".to_string(), json!("missing")); + } + } + } + + fn value_for_any_key<'a>(value: &'a Value, keys: &[&str]) -> Option<&'a Value> { + keys.iter().find_map(|key| value.get(*key)) + } + + fn bool_for_any_key(value: &Value, keys: &[&str]) -> bool { + Self::value_for_any_key(value, keys) + .and_then(Value::as_bool) + .unwrap_or(false) + } + + fn u64_for_any_key(value: &Value, keys: &[&str]) -> Option { + Self::value_for_any_key(value, keys).and_then(Value::as_u64) + } + + fn has_non_empty_array_for_any_key(value: &Value, keys: &[&str]) -> bool { + Self::value_for_any_key(value, keys) + .and_then(Value::as_array) + .is_some_and(|items| !items.is_empty()) + } + + fn count_partial_reviewers(input: &Value) -> usize { + input + .get("reviewers") + .and_then(Value::as_array) + .map(|reviewers| { + reviewers + .iter() + .filter(|reviewer| { + let status = reviewer + .get("status") + .and_then(Value::as_str) + .map(str::trim) + .unwrap_or_default(); + let has_partial_output = reviewer + .get("partial_output") + .and_then(Value::as_str) + .map(str::trim) + .is_some_and(|output| !output.is_empty()); + status == "partial_timeout" + || (matches!(status, "timed_out" | "cancelled_by_user") + && has_partial_output) + }) + .count() + }) + .unwrap_or(0) + } + + fn count_manifest_skipped_reviewers(run_manifest: Option<&Value>) -> usize { + run_manifest + .and_then(|manifest| { + Self::value_for_any_key(manifest, &["skippedReviewers", "skipped_reviewers"]) + }) + .and_then(Value::as_array) + .map(Vec::len) + .unwrap_or(0) + } + + fn count_token_budget_limited_reviewers(run_manifest: Option<&Value>) -> usize { + let Some(manifest) = run_manifest else { + return 0; + }; + let mut skipped_by_budget = HashSet::new(); + + if let Some(skipped_ids) = + Self::value_for_any_key(manifest, &["tokenBudget", "token_budget"]) + .and_then(|token_budget| { + Self::value_for_any_key( + token_budget, + &["skippedReviewerIds", "skipped_reviewer_ids"], + ) + }) + .and_then(Value::as_array) + { + for value in skipped_ids { + if let Some(id) = value.as_str().map(str::trim).filter(|id| !id.is_empty()) { + skipped_by_budget.insert(id.to_string()); + } + } + } + + if let Some(skipped_reviewers) = + Self::value_for_any_key(manifest, &["skippedReviewers", "skipped_reviewers"]) + .and_then(Value::as_array) + { + for reviewer in skipped_reviewers { + let reason = Self::packet_string_field(reviewer, &["reason"]); + if reason != Some("budget_limited") { + continue; + } + if let Some(id) = + Self::packet_string_field(reviewer, &["subagentId", "subagent_id"]) + { + skipped_by_budget.insert(id.to_string()); + } + } + } + + skipped_by_budget.len() + } + + fn count_decision_items(input: &Value) -> usize { + let needs_decision_count = input + .pointer("/report_sections/remediation_groups/needs_decision") + .and_then(Value::as_array) + .map(|items| { + items + .iter() + .filter_map(Value::as_str) + .map(str::trim) + .filter(|item| !item.is_empty()) + .count() + }) + .unwrap_or(0); + if needs_decision_count > 0 { + return needs_decision_count; + } + + let recommended_action = input + .pointer("/summary/recommended_action") + .and_then(Value::as_str) + .map(str::trim) + .unwrap_or_default(); + usize::from(recommended_action == "block") + } + + fn has_reliability_signal(input: &Value, kind: &str) -> bool { + input + .get("reliability_signals") + .and_then(Value::as_array) + .is_some_and(|signals| { + signals.iter().any(|signal| { + signal + .get("kind") + .and_then(Value::as_str) + .is_some_and(|value| value == kind) + }) + }) + } + + fn push_reliability_signal_if_missing(input: &mut Value, signal: Value) { + let Some(kind) = signal.get("kind").and_then(Value::as_str) else { + return; + }; + if Self::has_reliability_signal(input, kind) { + return; + } + if !input + .get("reliability_signals") + .is_some_and(Value::is_array) + { + input["reliability_signals"] = json!([]); + } + if let Some(signals) = input + .get_mut("reliability_signals") + .and_then(Value::as_array_mut) + { + signals.push(signal); + } + } + + fn compression_contract_for_context(context: &ToolUseContext) -> Option { + let session_id = context.session_id.as_deref()?; + let coordinator = get_global_coordinator()?; + let session = coordinator.get_session_manager().get_session(session_id)?; + let agent_type = Some(session.agent_type.as_str()); + let model_id = session.config.model_id.as_deref(); + let limit = Self::reliability_contract_limit(agent_type, model_id); + let contract = coordinator + .get_session_manager() + .compression_contract_for_session(session_id, limit)?; + Self::should_report_compression_preserved( + session.compression_state.compression_count, + Some(&contract), + ) + .then_some(contract) + } + + fn reliability_contract_limit(agent_type: Option<&str>, model_id: Option<&str>) -> usize { + let agent_type = agent_type + .map(str::trim) + .filter(|agent_type| !agent_type.is_empty()) + .unwrap_or("DeepReview"); + let model_id = model_id + .map(str::trim) + .filter(|model_id| !model_id.is_empty()) + .unwrap_or_default(); + let is_review_subagent = get_agent_registry() + .get_subagent_is_review(agent_type) + .unwrap_or(false); + + ContextProfilePolicy::for_agent_context_and_model( + agent_type, + is_review_subagent, + model_id, + model_id, + ) + .compression_contract_limit + } + + fn should_report_compression_preserved( + compression_count: usize, + compression_contract: Option<&CompressionContract>, + ) -> bool { + compression_count > 0 && compression_contract.is_some_and(|contract| !contract.is_empty()) + } + + fn compression_contract_signal_count(contract: &CompressionContract) -> usize { + contract.touched_files.len() + + contract.verification_commands.len() + + contract.blocking_failures.len() + + contract.subagent_statuses.len() + } + + fn fill_deep_review_reliability_signals( + input: &mut Value, + run_manifest: Option<&Value>, + compression_contract: Option<&CompressionContract>, + ) { + if let Some(token_budget) = run_manifest.and_then(|manifest| { + Self::value_for_any_key(manifest, &["tokenBudget", "token_budget"]) + }) { + let has_context_pressure = + Self::bool_for_any_key( + token_budget, + &["largeDiffSummaryFirst", "large_diff_summary_first"], + ) || Self::has_non_empty_array_for_any_key(token_budget, &["warnings"]); + if has_context_pressure { + let count = Self::u64_for_any_key( + token_budget, + &["estimatedReviewerCalls", "estimated_reviewer_calls"], + ) + .unwrap_or(0); + Self::push_reliability_signal_if_missing( + input, + json!({ + "kind": "context_pressure", + "severity": "info", + "count": count, + "source": "runtime" + }), + ); + } + } + + let skipped_reviewer_count = Self::count_manifest_skipped_reviewers(run_manifest); + if skipped_reviewer_count > 0 { + Self::push_reliability_signal_if_missing( + input, + json!({ + "kind": "skipped_reviewers", + "severity": "info", + "count": skipped_reviewer_count, + "source": "manifest" + }), + ); + } + + let token_budget_limited_reviewer_count = + Self::count_token_budget_limited_reviewers(run_manifest); + if token_budget_limited_reviewer_count > 0 { + Self::push_reliability_signal_if_missing( + input, + json!({ + "kind": "token_budget_limited", + "severity": "warning", + "count": token_budget_limited_reviewer_count, + "source": "manifest" + }), + ); + } + + if let Some(contract) = compression_contract.filter(|contract| !contract.is_empty()) { + let count = Self::compression_contract_signal_count(contract); + if count > 0 { + Self::push_reliability_signal_if_missing( + input, + json!({ + "kind": "compression_preserved", + "severity": "info", + "count": count, + "source": "runtime" + }), + ); + } + } + + let partial_reviewer_count = Self::count_partial_reviewers(input); + if partial_reviewer_count > 0 { + Self::push_reliability_signal_if_missing( + input, + json!({ + "kind": "partial_reviewer", + "severity": "warning", + "count": partial_reviewer_count, + "source": "runtime" + }), + ); + } + + if partial_reviewer_count > 0 { + Self::push_reliability_signal_if_missing( + input, + json!({ + "kind": "retry_guidance", + "severity": "warning", + "count": partial_reviewer_count, + "source": "runtime" + }), + ); + } + + let decision_item_count = Self::count_decision_items(input); + if decision_item_count > 0 { + Self::push_reliability_signal_if_missing( + input, + json!({ + "kind": "user_decision", + "severity": "action", + "count": decision_item_count, + "source": "report" + }), + ); + } + } + + fn fill_deep_review_runtime_tracker_signals(input: &mut Value, dialog_turn_id: Option<&str>) { + let Some(dialog_turn_id) = dialog_turn_id + .map(str::trim) + .filter(|value| !value.is_empty()) + else { + return; + }; + let count = + crate::agentic::deep_review_policy::deep_review_concurrency_cap_rejection_count( + dialog_turn_id, + ) + crate::agentic::deep_review_policy::deep_review_capacity_skip_count(dialog_turn_id); + if count > 0 { + Self::push_reliability_signal_if_missing( + input, + json!({ + "kind": "concurrency_limited", + "severity": "warning", + "count": count, + "source": "runtime" + }), + ); + } + } + + fn log_deep_review_runtime_diagnostics(dialog_turn_id: Option<&str>) { + let Some(dialog_turn_id) = dialog_turn_id + .map(str::trim) + .filter(|value| !value.is_empty()) + else { + return; + }; + let Some(DeepReviewRuntimeDiagnostics { + queue_wait_count, + queue_wait_total_ms, + queue_wait_max_ms, + provider_capacity_queue_count, + provider_capacity_retry_count, + provider_capacity_retry_success_count, + capacity_skip_count, + effective_parallel_min, + effective_parallel_final, + manual_queue_action_count, + manual_retry_count, + auto_retry_count, + auto_retry_suppressed_reason_counts, + shared_context_total_calls, + shared_context_duplicate_calls, + shared_context_duplicate_context_count, + }) = deep_review_runtime_diagnostics_snapshot(dialog_turn_id) + else { + return; + }; + let auto_retry_suppressed_reason_counts = + serde_json::to_string(&auto_retry_suppressed_reason_counts) + .unwrap_or_else(|_| "{}".to_string()); + + debug!( + "DeepReview runtime diagnostics: queue_wait_count={}, queue_wait_total_ms={}, queue_wait_max_ms={}, provider_capacity_queue_count={}, provider_capacity_retry_count={}, provider_capacity_retry_success_count={}, capacity_skip_count={}, effective_parallel_min={}, effective_parallel_final={}, manual_queue_action_count={}, manual_retry_count={}, auto_retry_count={}, auto_retry_suppressed_reason_counts={}, shared_context_total_calls={}, shared_context_duplicate_calls={}, shared_context_duplicate_context_count={}", + queue_wait_count, + queue_wait_total_ms, + queue_wait_max_ms, + provider_capacity_queue_count, + provider_capacity_retry_count, + provider_capacity_retry_success_count, + capacity_skip_count, + effective_parallel_min + .map(|value| value.to_string()) + .unwrap_or_else(|| "none".to_string()), + effective_parallel_final + .map(|value| value.to_string()) + .unwrap_or_else(|| "none".to_string()), + manual_queue_action_count, + manual_retry_count, + auto_retry_count, + auto_retry_suppressed_reason_counts, + shared_context_total_calls, + shared_context_duplicate_calls, + shared_context_duplicate_context_count + ); + } + + fn deep_review_cache_fingerprint(run_manifest: Option<&Value>) -> Option { + let manifest = run_manifest?; + let cache_config = Self::value_for_any_key( + manifest, + &["incrementalReviewCache", "incremental_review_cache"], + )?; + Self::packet_string_field(cache_config, &["fingerprint"]).map(str::to_string) + } + + fn deep_review_cache_from_completed_reviewers( + input: &Value, + run_manifest: Option<&Value>, + existing_cache: Option<&Value>, + ) -> Option { + let fingerprint = Self::deep_review_cache_fingerprint(run_manifest)?; + let matching_existing_cache = existing_cache + .map(DeepReviewIncrementalCache::from_value) + .filter(|cache| cache.fingerprint() == fingerprint); + let mut cache = matching_existing_cache + .clone() + .unwrap_or_else(|| DeepReviewIncrementalCache::new(&fingerprint)); + let mut stored_count = 0usize; + let mut hit_count = 0usize; + let mut miss_count = 0usize; + + if let Some(reviewers) = input.get("reviewers").and_then(Value::as_array) { + for reviewer in reviewers { + let is_completed = reviewer + .get("status") + .and_then(Value::as_str) + .map(str::trim) + .is_some_and(|status| status == "completed"); + if !is_completed { + continue; + } + let Some(packet_id) = Self::normalized_non_empty_string(reviewer.get("packet_id")) + else { + continue; + }; + if matching_existing_cache + .as_ref() + .and_then(|cache| cache.get_packet(&packet_id)) + .is_some() + { + hit_count += 1; + } else { + miss_count += 1; + } + let output = + serde_json::to_string(reviewer).unwrap_or_else(|_| reviewer.to_string()); + cache.store_packet(&packet_id, &output); + stored_count += 1; + } + } + + (stored_count > 0).then(|| DeepReviewCacheUpdate { + value: cache.to_value(), + hit_count, + miss_count, + }) + } + + async fn persist_deep_review_cache( + context: &ToolUseContext, + cache_value: Value, + ) -> BitFunResult<()> { + let Some(session_id) = context.session_id.as_deref() else { + return Ok(()); + }; + let Some(workspace) = context.workspace.as_ref() else { + return Ok(()); + }; + let Some(coordinator) = get_global_coordinator() else { + return Ok(()); + }; + let session_storage_path = workspace.session_storage_path(); + let session_manager = coordinator.get_session_manager(); + let Some(mut metadata) = session_manager + .load_session_metadata(&session_storage_path, session_id) + .await? + else { + return Ok(()); + }; + + metadata.deep_review_cache = Some(cache_value); + session_manager + .save_session_metadata(&session_storage_path, &metadata) + .await + } + /// Validate and fill missing fields with default values /// /// When AI-returned data is missing certain fields, fill with default values to avoid entire review failure - fn validate_and_fill_defaults(input: &mut Value, deep_review: bool) { + fn validate_and_fill_defaults( + input: &mut Value, + deep_review: bool, + run_manifest: Option<&Value>, + compression_contract: Option<&CompressionContract>, + ) { // Fill summary default values if input.get("summary").is_none() { warn!("CodeReview tool missing summary field, using default values"); @@ -410,6 +1082,10 @@ impl CodeReviewTool { if input.get("reviewers").is_none() { input["reviewers"] = json!([]); } + if deep_review { + Self::fill_deep_review_packet_metadata(input, run_manifest); + Self::fill_deep_review_reliability_signals(input, run_manifest, compression_contract); + } if input.get("remediation_plan").is_none() { input["remediation_plan"] = json!([]); @@ -493,10 +1169,94 @@ impl Tool for CodeReviewTool { context: &ToolUseContext, ) -> BitFunResult> { let mut filled_input = input.clone(); + let deep_review = Self::is_deep_review_context(Some(context)); + let compression_contract = deep_review + .then(|| Self::compression_contract_for_context(context)) + .flatten(); + let mut run_manifest = context.custom_data.get("deep_review_run_manifest").cloned(); + let mut existing_cache = run_manifest + .as_ref() + .and_then(|manifest| manifest.get("deepReviewCache")) + .cloned(); + if deep_review && (run_manifest.is_none() || existing_cache.is_none()) { + if let (Some(session_id), Some(workspace), Some(coordinator)) = ( + context.session_id.as_deref(), + context.workspace.as_ref(), + get_global_coordinator(), + ) { + let session_storage_path = workspace.session_storage_path(); + match coordinator + .get_session_manager() + .load_session_metadata(&session_storage_path, session_id) + .await + { + Ok(Some(metadata)) => { + if run_manifest.is_none() { + run_manifest = metadata.deep_review_run_manifest; + } + if existing_cache.is_none() { + existing_cache = metadata.deep_review_cache; + } + } + Ok(None) => {} + Err(error) => { + warn!( + "Failed to load DeepReview session metadata for review cache: session_id={}, error={}", + session_id, error + ); + } + } + } + } Self::validate_and_fill_defaults( &mut filled_input, - Self::is_deep_review_context(Some(context)), + deep_review, + run_manifest.as_ref(), + compression_contract.as_ref(), ); + if deep_review { + Self::fill_deep_review_runtime_tracker_signals( + &mut filled_input, + context.dialog_turn_id.as_deref(), + ); + Self::log_deep_review_runtime_diagnostics(context.dialog_turn_id.as_deref()); + if let Some(cache_update) = Self::deep_review_cache_from_completed_reviewers( + &filled_input, + run_manifest.as_ref(), + existing_cache.as_ref(), + ) { + if cache_update.hit_count > 0 { + Self::push_reliability_signal_if_missing( + &mut filled_input, + json!({ + "kind": "cache_hit", + "severity": "info", + "count": cache_update.hit_count, + "source": "runtime" + }), + ); + } + if cache_update.miss_count > 0 { + Self::push_reliability_signal_if_missing( + &mut filled_input, + json!({ + "kind": "cache_miss", + "severity": "info", + "count": cache_update.miss_count, + "source": "runtime" + }), + ); + } + if let Err(error) = + Self::persist_deep_review_cache(context, cache_update.value).await + { + warn!( + "Failed to persist DeepReview incremental cache: error={}", + error + ); + } + } + } Ok(vec![ToolResult::Result { data: filled_input, @@ -509,6 +1269,7 @@ impl Tool for CodeReviewTool { #[cfg(test)] mod tests { use super::CodeReviewTool; + use crate::agentic::core::{CompressionContract, CompressionContractItem}; use crate::agentic::tools::framework::{Tool, ToolResult, ToolUseContext}; use serde_json::json; use std::collections::HashMap; @@ -550,6 +1311,65 @@ mod tests { } } + #[tokio::test] + async fn deep_review_schema_accepts_reviewer_partial_output() { + let tool = CodeReviewTool::new(); + let context = tool_context(Some("DeepReview")); + let schema = tool + .input_schema_for_model_with_context(Some(&context)) + .await; + let reviewer_properties = &schema["properties"]["reviewers"]["items"]["properties"]; + + assert_eq!(reviewer_properties["partial_output"]["type"], "string"); + } + + #[tokio::test] + async fn deep_review_schema_accepts_reviewer_packet_fallback_metadata() { + let tool = CodeReviewTool::new(); + let context = tool_context(Some("DeepReview")); + let schema = tool + .input_schema_for_model_with_context(Some(&context)) + .await; + let reviewer_properties = &schema["properties"]["reviewers"]["items"]["properties"]; + + assert_eq!(reviewer_properties["packet_id"]["type"], "string"); + assert_eq!( + reviewer_properties["packet_status_source"]["enum"], + json!(["reported", "inferred", "missing"]) + ); + } + + #[tokio::test] + async fn deep_review_schema_accepts_structured_reliability_signals() { + let tool = CodeReviewTool::new(); + let context = tool_context(Some("DeepReview")); + let schema = tool + .input_schema_for_model_with_context(Some(&context)) + .await; + let reliability_properties = + &schema["properties"]["reliability_signals"]["items"]["properties"]; + + assert_eq!( + reliability_properties["kind"]["enum"], + json!([ + "context_pressure", + "compression_preserved", + "cache_hit", + "cache_miss", + "concurrency_limited", + "partial_reviewer", + "retry_guidance", + "skipped_reviewers", + "token_budget_limited", + "user_decision" + ]) + ); + assert_eq!( + reliability_properties["source"]["enum"], + json!(["runtime", "manifest", "report", "inferred"]) + ); + } + #[tokio::test] async fn deep_review_submission_defaults_missing_mode_to_deep() { let tool = CodeReviewTool::new(); @@ -577,4 +1397,628 @@ mod tests { assert!(data["reviewers"].as_array().is_some()); assert!(data["remediation_plan"].as_array().is_some()); } + + #[tokio::test] + async fn deep_review_submission_infers_unique_reviewer_packet_from_manifest() { + let tool = CodeReviewTool::new(); + let mut context = tool_context(Some("DeepReview")); + context.custom_data.insert( + "deep_review_run_manifest".to_string(), + json!({ + "workPackets": [ + { + "packetId": "reviewer:ReviewSecurity", + "phase": "reviewer", + "subagentId": "ReviewSecurity", + "displayName": "Security Reviewer", + "roleName": "Security Reviewer" + } + ] + }), + ); + + let result = tool + .call_impl( + &json!({ + "summary": { + "overall_assessment": "No blocking issues", + "risk_level": "low", + "recommended_action": "approve" + }, + "issues": [], + "positive_points": [], + "reviewers": [ + { + "name": "Security Reviewer", + "specialty": "security", + "status": "completed", + "summary": "Checked the security packet." + } + ] + }), + &context, + ) + .await + .expect("submit review result"); + + let ToolResult::Result { data, .. } = &result[0] else { + panic!("expected tool result"); + }; + assert_eq!(data["reviewers"][0]["packet_id"], "reviewer:ReviewSecurity"); + assert_eq!(data["reviewers"][0]["packet_status_source"], "inferred"); + } + + #[tokio::test] + async fn deep_review_submission_marks_uninferable_packet_metadata_as_missing() { + let tool = CodeReviewTool::new(); + let context = tool_context(Some("DeepReview")); + let result = tool + .call_impl( + &json!({ + "summary": { + "overall_assessment": "No blocking issues", + "risk_level": "low", + "recommended_action": "approve" + }, + "issues": [], + "positive_points": [], + "reviewers": [ + { + "name": "Unknown Reviewer", + "specialty": "unknown", + "status": "completed", + "summary": "Packet was omitted." + } + ] + }), + &context, + ) + .await + .expect("submit review result"); + + let ToolResult::Result { data, .. } = &result[0] else { + panic!("expected tool result"); + }; + assert!(data["reviewers"][0].get("packet_id").is_none()); + assert_eq!(data["reviewers"][0]["packet_status_source"], "missing"); + } + + #[tokio::test] + async fn deep_review_submission_marks_existing_packet_metadata_as_reported() { + let tool = CodeReviewTool::new(); + let context = tool_context(Some("DeepReview")); + let result = tool + .call_impl( + &json!({ + "summary": { + "overall_assessment": "No blocking issues", + "risk_level": "low", + "recommended_action": "approve" + }, + "issues": [], + "positive_points": [], + "reviewers": [ + { + "name": "Security Reviewer", + "specialty": "security", + "status": "completed", + "summary": "Packet was reported.", + "packet_id": "reviewer:ReviewSecurity" + } + ] + }), + &context, + ) + .await + .expect("submit review result"); + + let ToolResult::Result { data, .. } = &result[0] else { + panic!("expected tool result"); + }; + assert_eq!(data["reviewers"][0]["packet_id"], "reviewer:ReviewSecurity"); + assert_eq!(data["reviewers"][0]["packet_status_source"], "reported"); + } + + #[tokio::test] + async fn deep_review_submission_fills_runtime_reliability_signals() { + let tool = CodeReviewTool::new(); + let mut context = tool_context(Some("DeepReview")); + context.custom_data.insert( + "deep_review_run_manifest".to_string(), + json!({ + "tokenBudget": { + "largeDiffSummaryFirst": true, + "warnings": [], + "estimatedReviewerCalls": 7, + "skippedReviewerIds": ["CustomPerf"] + }, + "skippedReviewers": [ + { + "subagentId": "ReviewFrontend", + "reason": "not_applicable" + }, + { + "subagentId": "CustomPerf", + "reason": "budget_limited" + } + ] + }), + ); + + let result = tool + .call_impl( + &json!({ + "summary": { + "overall_assessment": "Review completed with reduced confidence", + "risk_level": "medium", + "recommended_action": "request_changes" + }, + "issues": [], + "positive_points": [], + "reviewers": [ + { + "name": "Security Reviewer", + "specialty": "security", + "status": "partial_timeout", + "summary": "Timed out after partial evidence.", + "partial_output": "Found one likely issue before timeout." + } + ], + "report_sections": { + "remediation_groups": { + "needs_decision": [ + "Decide whether to block the release." + ] + } + } + }), + &context, + ) + .await + .expect("submit review result"); + + let ToolResult::Result { data, .. } = &result[0] else { + panic!("expected tool result"); + }; + assert_eq!( + data["reliability_signals"], + json!([ + { + "kind": "context_pressure", + "severity": "info", + "count": 7, + "source": "runtime" + }, + { + "kind": "skipped_reviewers", + "severity": "info", + "count": 2, + "source": "manifest" + }, + { + "kind": "token_budget_limited", + "severity": "warning", + "count": 1, + "source": "manifest" + }, + { + "kind": "partial_reviewer", + "severity": "warning", + "count": 1, + "source": "runtime" + }, + { + "kind": "retry_guidance", + "severity": "warning", + "count": 1, + "source": "runtime" + }, + { + "kind": "user_decision", + "severity": "action", + "count": 1, + "source": "report" + } + ]) + ); + } + + #[tokio::test] + async fn deep_review_submission_fills_concurrency_limited_from_runtime_tracker() { + use crate::agentic::deep_review_policy::record_deep_review_concurrency_cap_rejection; + + let tool = CodeReviewTool::new(); + let mut context = tool_context(Some("DeepReview")); + context.dialog_turn_id = Some("turn-code-review-cap-signal".to_string()); + record_deep_review_concurrency_cap_rejection("turn-code-review-cap-signal"); + + let result = tool + .call_impl( + &json!({ + "summary": { + "overall_assessment": "Review completed with launch backpressure", + "risk_level": "medium", + "recommended_action": "approve" + }, + "issues": [], + "positive_points": [] + }), + &context, + ) + .await + .expect("submit review result"); + + let ToolResult::Result { data, .. } = &result[0] else { + panic!("expected tool result"); + }; + assert_eq!( + data["reliability_signals"], + json!([ + { + "kind": "concurrency_limited", + "severity": "warning", + "count": 1, + "source": "runtime" + } + ]) + ); + } + + #[tokio::test] + async fn deep_review_shared_context_diagnostics_stays_out_of_report() { + use crate::agentic::deep_review_policy::{ + deep_review_runtime_diagnostics_snapshot, record_deep_review_shared_context_tool_use, + }; + + let turn_id = "turn-code-review-shared-context-diagnostics"; + record_deep_review_shared_context_tool_use(turn_id, "ReviewSecurity", "Read", "src/lib.rs"); + record_deep_review_shared_context_tool_use( + turn_id, + "ReviewPerformance", + "Read", + "src/lib.rs", + ); + record_deep_review_shared_context_tool_use( + turn_id, + "ReviewArchitecture", + "GetFileDiff", + "src/lib.rs", + ); + + let diagnostics = deep_review_runtime_diagnostics_snapshot(turn_id) + .expect("diagnostics should be available for measured turn"); + assert_eq!(diagnostics.shared_context_total_calls, 3); + assert_eq!(diagnostics.shared_context_duplicate_calls, 1); + assert_eq!(diagnostics.shared_context_duplicate_context_count, 1); + + let tool = CodeReviewTool::new(); + let mut context = tool_context(Some("DeepReview")); + context.dialog_turn_id = Some(turn_id.to_string()); + + let result = tool + .call_impl( + &json!({ + "summary": { + "overall_assessment": "Review completed", + "risk_level": "low", + "recommended_action": "approve" + }, + "issues": [], + "positive_points": [] + }), + &context, + ) + .await + .expect("submit review result"); + + let ToolResult::Result { data, .. } = &result[0] else { + panic!("expected tool result"); + }; + assert!(data.get("shared_context_measurement").is_none()); + assert!(data.get("runtime_diagnostics").is_none()); + assert!(data.get("reliability_signals").is_none()); + } + + #[tokio::test] + async fn deep_review_submission_folds_capacity_skips_into_concurrency_limited_signal() { + use crate::agentic::deep_review_policy::record_deep_review_capacity_skip; + + record_deep_review_capacity_skip("turn-code-review-capacity-skip"); + + let tool = CodeReviewTool::new(); + let mut context = tool_context(Some("DeepReview")); + context.dialog_turn_id = Some("turn-code-review-capacity-skip".to_string()); + + let result = tool + .call_impl( + &json!({ + "summary": { + "overall_assessment": "Review completed after queue skip", + "risk_level": "medium", + "recommended_action": "approve" + }, + "issues": [], + "positive_points": [] + }), + &context, + ) + .await + .expect("submit review result"); + + let ToolResult::Result { data, .. } = &result[0] else { + panic!("expected tool result"); + }; + + assert_eq!( + data["reliability_signals"], + json!([ + { + "kind": "concurrency_limited", + "severity": "warning", + "count": 1, + "source": "runtime" + } + ]) + ); + } + + #[test] + fn deep_review_defaults_include_compression_contract_reliability_signal() { + let contract = CompressionContract { + touched_files: vec!["src/web-ui/src/flow_chat/utils/codeReviewReport.ts".to_string()], + verification_commands: vec![CompressionContractItem { + target: "pnpm --dir src/web-ui run test:run".to_string(), + status: "succeeded".to_string(), + summary: "Frontend report tests passed.".to_string(), + error_kind: None, + }], + blocking_failures: vec![], + subagent_statuses: vec![], + }; + let mut input = json!({ + "summary": { + "overall_assessment": "No blocking issues", + "risk_level": "low", + "recommended_action": "approve" + }, + "issues": [], + "positive_points": [] + }); + + CodeReviewTool::validate_and_fill_defaults(&mut input, true, None, Some(&contract)); + + assert_eq!( + input["reliability_signals"], + json!([ + { + "kind": "compression_preserved", + "severity": "info", + "count": 2, + "source": "runtime" + } + ]) + ); + } + + #[test] + fn deep_review_reliability_contract_limit_uses_context_profile_policy() { + assert_eq!( + CodeReviewTool::reliability_contract_limit(Some("DeepReview"), Some("gpt-5")), + 8 + ); + assert_eq!( + CodeReviewTool::reliability_contract_limit(Some("DeepReview"), Some("gpt-5-mini")), + 4 + ); + } + + #[test] + fn deep_review_compression_signal_requires_completed_compression() { + let contract = CompressionContract { + touched_files: vec!["src/main.rs".to_string()], + verification_commands: vec![], + blocking_failures: vec![], + subagent_statuses: vec![], + }; + + assert!(!CodeReviewTool::should_report_compression_preserved( + 0, + Some(&contract) + )); + assert!(CodeReviewTool::should_report_compression_preserved( + 1, + Some(&contract) + )); + assert!(!CodeReviewTool::should_report_compression_preserved( + 1, + Some(&CompressionContract::default()) + )); + } + + #[test] + fn deep_review_incremental_cache_stores_completed_reviewers_by_packet_id() { + use crate::agentic::deep_review_policy::DeepReviewIncrementalCache; + + let manifest = json!({ + "incrementalReviewCache": { + "fingerprint": "fp-review-v2" + }, + "workPackets": [ + { + "packetId": "reviewer:ReviewSecurity:group-1-of-1", + "phase": "reviewer", + "subagentId": "ReviewSecurity", + "displayName": "Security Reviewer" + }, + { + "packetId": "reviewer:ReviewPerformance:group-1-of-1", + "phase": "reviewer", + "subagentId": "ReviewPerformance", + "displayName": "Performance Reviewer" + } + ] + }); + let mut input = json!({ + "summary": { + "overall_assessment": "Review completed", + "risk_level": "medium", + "recommended_action": "request_changes" + }, + "issues": [], + "positive_points": [], + "reviewers": [ + { + "name": "Security Reviewer", + "specialty": "security", + "status": "completed", + "summary": "Found one high-risk issue." + }, + { + "name": "Performance Reviewer", + "specialty": "performance", + "status": "partial_timeout", + "summary": "Timed out before completion.", + "partial_output": "Large render path was still being checked." + } + ] + }); + + CodeReviewTool::validate_and_fill_defaults(&mut input, true, Some(&manifest), None); + let cache_update = CodeReviewTool::deep_review_cache_from_completed_reviewers( + &input, + Some(&manifest), + None, + ) + .expect("completed reviewer should produce cache value"); + let cache = DeepReviewIncrementalCache::from_value(&cache_update.value); + + assert_eq!(cache.fingerprint(), "fp-review-v2"); + assert_eq!(cache_update.hit_count, 0); + assert_eq!(cache_update.miss_count, 1); + assert!(cache + .get_packet("reviewer:ReviewSecurity:group-1-of-1") + .is_some_and(|output| output.contains("Found one high-risk issue."))); + assert_eq!( + cache.get_packet("reviewer:ReviewPerformance:group-1-of-1"), + None + ); + } + + #[test] + fn deep_review_incremental_cache_replaces_stale_existing_cache() { + use crate::agentic::deep_review_policy::DeepReviewIncrementalCache; + + let manifest = json!({ + "incrementalReviewCache": { + "fingerprint": "fp-new" + }, + "workPackets": [ + { + "packetId": "reviewer:ReviewSecurity", + "phase": "reviewer", + "subagentId": "ReviewSecurity", + "displayName": "Security Reviewer" + } + ] + }); + let mut stale_cache = DeepReviewIncrementalCache::new("fp-old"); + stale_cache.store_packet("reviewer:ReviewSecurity", "stale output"); + let mut input = json!({ + "summary": { + "overall_assessment": "Review completed", + "risk_level": "low", + "recommended_action": "approve" + }, + "issues": [], + "positive_points": [], + "reviewers": [ + { + "name": "Security Reviewer", + "specialty": "security", + "status": "completed", + "summary": "Fresh security output." + } + ] + }); + + CodeReviewTool::validate_and_fill_defaults(&mut input, true, Some(&manifest), None); + let cache_update = CodeReviewTool::deep_review_cache_from_completed_reviewers( + &input, + Some(&manifest), + Some(&stale_cache.to_value()), + ) + .expect("completed reviewer should replace stale cache"); + let cache = DeepReviewIncrementalCache::from_value(&cache_update.value); + + assert_eq!(cache.fingerprint(), "fp-new"); + assert_eq!(cache_update.hit_count, 0); + assert_eq!(cache_update.miss_count, 1); + assert!(cache + .get_packet("reviewer:ReviewSecurity") + .is_some_and(|output| output.contains("Fresh security output."))); + assert!(!cache + .get_packet("reviewer:ReviewSecurity") + .is_some_and(|output| output.contains("stale output"))); + } + + #[test] + fn deep_review_incremental_cache_counts_existing_packet_hits() { + use crate::agentic::deep_review_policy::DeepReviewIncrementalCache; + + let manifest = json!({ + "incrementalReviewCache": { + "fingerprint": "fp-existing" + }, + "workPackets": [ + { + "packetId": "reviewer:ReviewSecurity", + "phase": "reviewer", + "subagentId": "ReviewSecurity", + "displayName": "Security Reviewer" + }, + { + "packetId": "reviewer:ReviewPerformance", + "phase": "reviewer", + "subagentId": "ReviewPerformance", + "displayName": "Performance Reviewer" + } + ] + }); + let mut existing_cache = DeepReviewIncrementalCache::new("fp-existing"); + existing_cache.store_packet("reviewer:ReviewSecurity", "cached security output"); + let mut input = json!({ + "summary": { + "overall_assessment": "Review completed", + "risk_level": "medium", + "recommended_action": "request_changes" + }, + "issues": [], + "positive_points": [], + "reviewers": [ + { + "name": "Security Reviewer", + "specialty": "security", + "status": "completed", + "summary": "Reused security output." + }, + { + "name": "Performance Reviewer", + "specialty": "performance", + "status": "completed", + "summary": "Fresh performance output." + } + ] + }); + + CodeReviewTool::validate_and_fill_defaults(&mut input, true, Some(&manifest), None); + let cache_update = CodeReviewTool::deep_review_cache_from_completed_reviewers( + &input, + Some(&manifest), + Some(&existing_cache.to_value()), + ) + .expect("completed reviewers should update cache"); + + assert_eq!(cache_update.hit_count, 1); + assert_eq!(cache_update.miss_count, 1); + } } diff --git a/src/crates/core/src/agentic/tools/implementations/delete_file_tool.rs b/src/crates/core/src/agentic/tools/implementations/delete_file_tool.rs index d27dc38e7..3a5495537 100644 --- a/src/crates/core/src/agentic/tools/implementations/delete_file_tool.rs +++ b/src/crates/core/src/agentic/tools/implementations/delete_file_tool.rs @@ -12,7 +12,7 @@ use tokio::fs; /// File deletion tool - provides safe file/directory deletion functionality /// -/// This tool automatically integrates with the snapshot system, all deletion operations are recorded and support rollback +/// This tool records a lightweight checkpoint before deletion. Rollback is not automatic. pub struct DeleteFileTool; impl Default for DeleteFileTool { @@ -34,7 +34,7 @@ impl Tool for DeleteFileTool { } async fn description(&self) -> BitFunResult { - Ok(r#"Deletes a file or directory from the filesystem. This operation is tracked by the snapshot system and can be rolled back if needed. + Ok(r#"Deletes a file or directory from the filesystem. This operation records a lightweight checkpoint before deletion, but rollback is not automatic. Usage guidelines: 1. **File Deletion**: @@ -53,8 +53,8 @@ Usage guidelines: - The path must exist in the filesystem 4. **Safety Features**: - - All deletions are tracked by the snapshot system - - Users can review and roll back deletions if needed + - Deletions record a lightweight checkpoint when session context is available + - The checkpoint captures Git branch/dirty-state metadata when cheap - The tool requires user confirmation for execution 5. **Best Practices**: @@ -79,8 +79,8 @@ Example for directory: Important notes: - NEVER use bash `rm` commands when this tool is available - - This tool provides better safety through the snapshot system - - All deletions can be rolled back through the snapshot interface + - This tool provides better safety through checkpoint metadata + - Rollback is not automatic; use the recorded checkpoint metadata to guide recovery - The tool will fail gracefully if permissions are insufficient"#.to_string()) } @@ -294,6 +294,13 @@ Important notes: let resolved = context.resolve_tool_path(path_str)?; context.enforce_path_operation(ToolPathOperation::Delete, &resolved)?; + context + .record_light_checkpoint( + "Delete", + &resolved.logical_path, + vec![resolved.logical_path.clone()], + ) + .await; // Remote workspace path: delete via shell command if resolved.uses_remote_workspace_backend() { diff --git a/src/crates/core/src/agentic/tools/implementations/file_edit_tool.rs b/src/crates/core/src/agentic/tools/implementations/file_edit_tool.rs index e8950ee6c..a23e42d52 100644 --- a/src/crates/core/src/agentic/tools/implementations/file_edit_tool.rs +++ b/src/crates/core/src/agentic/tools/implementations/file_edit_tool.rs @@ -200,6 +200,13 @@ Usage: let resolved = context.resolve_tool_path(file_path)?; context.enforce_path_operation(ToolPathOperation::Edit, &resolved)?; + context + .record_light_checkpoint( + "Edit", + &resolved.logical_path, + vec![resolved.logical_path.clone()], + ) + .await; // For remote workspace paths, use the abstract FS to read → edit in memory → write back. if resolved.uses_remote_workspace_backend() { diff --git a/src/crates/core/src/agentic/tools/implementations/file_write_tool.rs b/src/crates/core/src/agentic/tools/implementations/file_write_tool.rs index 3a77c1f45..8e34c4a3b 100644 --- a/src/crates/core/src/agentic/tools/implementations/file_write_tool.rs +++ b/src/crates/core/src/agentic/tools/implementations/file_write_tool.rs @@ -190,6 +190,13 @@ Usage: let resolved = context.resolve_tool_path(file_path)?; context.enforce_path_operation(ToolPathOperation::Write, &resolved)?; + context + .record_light_checkpoint( + "Write", + &resolved.logical_path, + vec![resolved.logical_path.clone()], + ) + .await; let content = input .get("content") diff --git a/src/crates/core/src/agentic/tools/implementations/git_tool.rs b/src/crates/core/src/agentic/tools/implementations/git_tool.rs index 007c2f1e7..6d9e27d95 100644 --- a/src/crates/core/src/agentic/tools/implementations/git_tool.rs +++ b/src/crates/core/src/agentic/tools/implementations/git_tool.rs @@ -1071,6 +1071,16 @@ When creating commits, use this format for the commit message: args.unwrap_or("") ); + if git_operation_needs_light_checkpoint(operation, args) { + context + .record_light_checkpoint( + "Git", + &format!("git {} {}", operation, args.unwrap_or("").trim()), + Vec::new(), + ) + .await; + } + let start_time = std::time::Instant::now(); // Remote SSH workspace: run git on the server (not libgit2 on the PC). @@ -1126,6 +1136,15 @@ When creating commits, use this format for the commit message: } } +fn git_operation_needs_light_checkpoint(operation: &str, args: Option<&str>) -> bool { + match operation { + "add" | "commit" | "pull" | "checkout" | "switch" | "merge" | "rebase" | "stash" + | "reset" | "restore" | "clean" | "cherry-pick" => true, + "branch" => args.is_some_and(|value| !value.trim().is_empty()), + _ => false, + } +} + impl Default for GitTool { fn default() -> Self { Self::new() @@ -1136,7 +1155,7 @@ impl Default for GitTool { mod tests { use crate::agentic::tools::framework::Tool; - use super::{GitTool, ParsedDiffArgs}; + use super::{git_operation_needs_light_checkpoint, GitTool, ParsedDiffArgs}; use serde_json::json; #[tokio::test] @@ -1165,6 +1184,28 @@ mod tests { .contains("operation is required")); } + #[test] + fn checkpoint_detection_flags_mutating_git_operations() { + assert!(git_operation_needs_light_checkpoint( + "checkout", + Some("main") + )); + assert!(git_operation_needs_light_checkpoint( + "reset", + Some("--hard HEAD") + )); + assert!(git_operation_needs_light_checkpoint( + "branch", + Some("-D old") + )); + assert!(!git_operation_needs_light_checkpoint("status", None)); + assert!(!git_operation_needs_light_checkpoint( + "diff", + Some("-- src/lib.rs") + )); + assert!(!git_operation_needs_light_checkpoint("branch", None)); + } + #[test] fn parse_diff_args_empty() { let r = GitTool::parse_diff_args(""); diff --git a/src/crates/core/src/agentic/tools/implementations/session_message_tool.rs b/src/crates/core/src/agentic/tools/implementations/session_message_tool.rs index b4718c6be..226cc8723 100644 --- a/src/crates/core/src/agentic/tools/implementations/session_message_tool.rs +++ b/src/crates/core/src/agentic/tools/implementations/session_message_tool.rs @@ -408,6 +408,7 @@ When overriding an existing session's agent_type, only switching between "agenti source_workspace_path: source_workspace, }), None, + None, ) .await .map_err(BitFunError::tool)?; diff --git a/src/crates/core/src/agentic/tools/implementations/task_tool.rs b/src/crates/core/src/agentic/tools/implementations/task_tool.rs index 6d5e1137d..74c80e753 100644 --- a/src/crates/core/src/agentic/tools/implementations/task_tool.rs +++ b/src/crates/core/src/agentic/tools/implementations/task_tool.rs @@ -1,6 +1,21 @@ use crate::agentic::agents::{get_agent_registry, AgentInfo}; use crate::agentic::coordination::get_global_coordinator; -use crate::agentic::deep_review_policy::{load_default_deep_review_policy, DEEP_REVIEW_AGENT_TYPE}; +use crate::agentic::deep_review_policy::{ + classify_deep_review_capacity_error, clear_deep_review_queue_control_for_tool, + deep_review_active_reviewer_count, deep_review_effective_concurrency_snapshot, + deep_review_effective_parallel_instances, deep_review_has_judge_been_launched, + deep_review_max_retries_per_role, deep_review_queue_control_snapshot, + load_default_deep_review_policy, record_deep_review_capacity_skip, + record_deep_review_effective_concurrency_capacity_error, + record_deep_review_effective_concurrency_success, record_deep_review_runtime_queue_wait, + record_deep_review_task_budget, try_begin_deep_review_active_reviewer, + DeepReviewActiveReviewerGuard, DeepReviewCapacityQueueReason, DeepReviewConcurrencyPolicy, + DeepReviewExecutionPolicy, DeepReviewIncrementalCache, DeepReviewPolicyViolation, + DeepReviewRunManifestGate, DeepReviewSubagentRole, DEEP_REVIEW_AGENT_TYPE, +}; +use crate::agentic::events::{ + DeepReviewQueueReason, DeepReviewQueueState, DeepReviewQueueStatus, ErrorCategory, +}; use crate::agentic::tools::framework::{ Tool, ToolRenderOptions, ToolResult, ToolUseContext, ValidationResult, }; @@ -8,13 +23,37 @@ use crate::agentic::tools::pipeline::SubagentParentInfo; use crate::agentic::tools::InputValidator; use crate::util::errors::{BitFunError, BitFunResult}; use async_trait::async_trait; +use log::warn; use serde_json::{json, Value}; +use std::collections::{HashMap, HashSet}; use std::path::Path; +use tokio::time::{sleep, Duration, Instant}; pub struct TaskTool; const LARGE_TASK_PROMPT_SOFT_LINE_LIMIT: usize = 180; const LARGE_TASK_PROMPT_SOFT_BYTE_LIMIT: usize = 16 * 1024; +#[cfg(test)] +const DEEP_REVIEW_QUEUE_POLL_INTERVAL: Duration = Duration::from_millis(10); +#[cfg(not(test))] +const DEEP_REVIEW_QUEUE_POLL_INTERVAL: Duration = Duration::from_secs(1); + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum DeepReviewQueueWaitSkipReason { + QueueExpired, + UserCancelled, + OptionalSkipped, +} + +enum DeepReviewQueueWaitOutcome { + Ready { + guard: DeepReviewActiveReviewerGuard<'static>, + }, + Skipped { + queue_elapsed_ms: u64, + skip_reason: DeepReviewQueueWaitSkipReason, + }, +} impl Default for TaskTool { fn default() -> Self { @@ -27,6 +66,639 @@ impl TaskTool { Self } + fn string_for_any_key<'a>(value: &'a Value, keys: &[&str]) -> Option<&'a str> { + keys.iter().find_map(|key| { + value + .get(*key) + .and_then(Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) + }) + } + + fn value_for_any_key<'a>(value: &'a Value, keys: &[&str]) -> Option<&'a Value> { + keys.iter().find_map(|key| value.get(*key)) + } + + fn u64_for_any_key(value: &Value, keys: &[&str]) -> Option { + keys.iter() + .find_map(|key| value.get(*key).and_then(Value::as_u64)) + } + + fn string_array_for_any_key( + value: &Value, + keys: &[&str], + ) -> Result, DeepReviewPolicyViolation> { + let Some(array) = Self::value_for_any_key(value, keys).and_then(Value::as_array) else { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_missing_coverage", + format!("Retry coverage requires array field '{}'", keys[0]), + )); + }; + + let mut result = Vec::with_capacity(array.len()); + for item in array { + let Some(path) = item.as_str().map(str::trim).filter(|path| !path.is_empty()) else { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_invalid_coverage", + format!( + "Retry coverage field '{}' must contain non-empty strings", + keys[0] + ), + )); + }; + result.push(path.to_string()); + } + + Ok(result) + } + + fn work_packets_from_manifest(run_manifest: Option<&Value>) -> Option<&Vec> { + run_manifest? + .get("workPackets") + .or_else(|| run_manifest?.get("work_packets"))? + .as_array() + } + + fn packet_id_from_description(description: Option<&str>) -> Option { + let description = description?; + let start = description.find("[packet ")? + "[packet ".len(); + let packet_id = description[start..].split(']').next()?.trim(); + (!packet_id.is_empty()).then(|| packet_id.to_string()) + } + + fn packet_belongs_to_subagent(packet: &Value, subagent_type: &str) -> bool { + Self::string_for_any_key( + packet, + &["subagentId", "subagent_id", "subagentType", "subagent_type"], + ) + .is_some_and(|value| value == subagent_type) + } + + fn packet_id_for_manifest_packet(packet: &Value) -> Option<&str> { + Self::string_for_any_key(packet, &["packetId", "packet_id"]) + } + + fn deep_review_packet_id_for_cache( + subagent_type: &str, + description: Option<&str>, + run_manifest: Option<&Value>, + ) -> Option { + let packets = Self::work_packets_from_manifest(run_manifest)?; + + if let Some(description_packet_id) = Self::packet_id_from_description(description) { + return packets + .iter() + .any(|packet| { + Self::packet_id_for_manifest_packet(packet) + .is_some_and(|packet_id| packet_id == description_packet_id) + && Self::packet_belongs_to_subagent(packet, subagent_type) + }) + .then_some(description_packet_id); + } + + let mut matches = packets.iter().filter_map(|packet| { + if Self::packet_belongs_to_subagent(packet, subagent_type) { + Self::packet_id_for_manifest_packet(packet).map(str::to_string) + } else { + None + } + }); + let packet_id = matches.next()?; + if matches.next().is_some() { + None + } else { + Some(packet_id) + } + } + + fn attach_deep_review_cache(run_manifest: &mut Value, cache_value: Option) { + if run_manifest.get("deepReviewCache").is_some() { + return; + } + let Some(cache_value) = cache_value else { + return; + }; + if let Some(object) = run_manifest.as_object_mut() { + object.insert("deepReviewCache".to_string(), cache_value); + } + } + + fn deep_review_retry_guidance_max_retries( + effective_policy: Option<&DeepReviewExecutionPolicy>, + dialog_turn_id: &str, + ) -> usize { + effective_policy + .map(|policy| policy.max_retries_per_role) + .unwrap_or_else(|| deep_review_max_retries_per_role(dialog_turn_id)) + } + + fn manifest_packet_by_id<'a>( + run_manifest: Option<&'a Value>, + packet_id: &str, + subagent_type: &str, + ) -> Option<&'a Value> { + Self::work_packets_from_manifest(run_manifest)? + .iter() + .find(|packet| { + Self::packet_id_for_manifest_packet(packet).is_some_and(|id| id == packet_id) + && Self::packet_belongs_to_subagent(packet, subagent_type) + }) + } + + fn assigned_scope_files_for_packet( + packet: &Value, + ) -> Result, DeepReviewPolicyViolation> { + let Some(scope) = Self::value_for_any_key(packet, &["assignedScope", "assigned_scope"]) + else { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_missing_packet_scope", + "Retry source packet is missing assigned_scope", + )); + }; + Self::string_array_for_any_key(scope, &["files"]) + } + + fn is_retryable_capacity_reason(reason: &str) -> bool { + matches!( + reason, + "local_concurrency_cap" + | "provider_rate_limit" + | "provider_concurrency_limit" + | "retry_after" + | "temporary_overload" + ) + } + + fn ensure_deep_review_retry_coverage( + input: &Value, + subagent_type: &str, + run_manifest: Option<&Value>, + ) -> Result, DeepReviewPolicyViolation> { + let Some(coverage) = Self::value_for_any_key(input, &["retry_coverage", "retryCoverage"]) + else { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_missing_coverage", + "DeepReview retry requires structured retry_coverage metadata", + )); + }; + let packet_id = Self::string_for_any_key(coverage, &["source_packet_id", "sourcePacketId"]) + .ok_or_else(|| { + DeepReviewPolicyViolation::new( + "deep_review_retry_missing_packet_id", + "DeepReview retry coverage requires source_packet_id", + ) + })?; + let source_status = Self::string_for_any_key(coverage, &["source_status", "sourceStatus"]) + .ok_or_else(|| { + DeepReviewPolicyViolation::new( + "deep_review_retry_missing_status", + "DeepReview retry coverage requires source_status", + ) + })?; + match source_status { + "partial_timeout" => {} + "capacity_skipped" => { + let capacity_reason = + Self::string_for_any_key(coverage, &["capacity_reason", "capacityReason"]) + .unwrap_or_default(); + if !Self::is_retryable_capacity_reason(capacity_reason) { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_non_retryable_status", + format!( + "DeepReview retry cannot redispatch non-transient capacity reason '{}'", + capacity_reason + ), + )); + } + } + other => { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_non_retryable_status", + format!( + "DeepReview retry only supports partial_timeout or transient capacity failures, not '{}'", + other + ), + )); + } + } + + let packet = Self::manifest_packet_by_id(run_manifest, packet_id, subagent_type) + .ok_or_else(|| { + DeepReviewPolicyViolation::new( + "deep_review_retry_unknown_packet", + format!( + "DeepReview retry source packet '{}' does not match reviewer '{}'", + packet_id, subagent_type + ), + ) + })?; + let original_files = Self::assigned_scope_files_for_packet(packet)?; + Self::ensure_deep_review_retry_timeout(input, packet)?; + let retry_scope_files = + Self::string_array_for_any_key(coverage, &["retry_scope_files", "retryScopeFiles"])?; + let covered_files = + Self::string_array_for_any_key(coverage, &["covered_files", "coveredFiles"])?; + if retry_scope_files.is_empty() { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_empty_scope", + "DeepReview retry requires at least one retry_scope_files entry", + )); + } + + let original_file_set: HashSet<&str> = original_files.iter().map(String::as_str).collect(); + let mut retry_file_set = HashSet::new(); + for file in &retry_scope_files { + if !retry_file_set.insert(file.as_str()) { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_duplicate_scope_file", + format!("DeepReview retry scope repeats file '{}'", file), + )); + } + if !original_file_set.contains(file.as_str()) { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_scope_outside_packet", + format!( + "DeepReview retry file '{}' is outside source packet '{}'", + file, packet_id + ), + )); + } + } + if retry_scope_files.len() >= original_files.len() { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_scope_not_reduced", + "DeepReview retry_scope_files must be smaller than the source packet scope", + )); + } + + for file in &covered_files { + if !original_file_set.contains(file.as_str()) { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_coverage_outside_packet", + format!( + "DeepReview retry covered file '{}' is outside source packet '{}'", + file, packet_id + ), + )); + } + if retry_file_set.contains(file.as_str()) { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_coverage_overlaps_scope", + format!( + "DeepReview retry covered file '{}' cannot also be in retry_scope_files", + file + ), + )); + } + } + + Ok(retry_scope_files) + } + + fn ensure_deep_review_retry_timeout( + input: &Value, + packet: &Value, + ) -> Result<(), DeepReviewPolicyViolation> { + let retry_timeout_seconds = + Self::u64_for_any_key(input, &["timeout_seconds", "timeoutSeconds"]).unwrap_or(0); + if retry_timeout_seconds == 0 { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_timeout_required", + "DeepReview retry requires a positive timeout_seconds value", + )); + } + + let source_timeout_seconds = + Self::u64_for_any_key(packet, &["timeoutSeconds", "timeout_seconds"]).unwrap_or(0); + if source_timeout_seconds > 0 && retry_timeout_seconds >= source_timeout_seconds { + return Err(DeepReviewPolicyViolation::new( + "deep_review_retry_timeout_not_reduced", + format!( + "DeepReview retry timeout_seconds ({}) must be lower than source timeout ({})", + retry_timeout_seconds, source_timeout_seconds + ), + )); + } + + Ok(()) + } + + fn prompt_with_deep_review_retry_scope(prompt: &str, retry_scope_files: &[String]) -> String { + let mut scoped_prompt = String::new(); + scoped_prompt.push_str("\n"); + scoped_prompt.push_str( + "This is a bounded DeepReview retry. Review only the following retry_scope_files and treat any other files as background context only:\n", + ); + for file in retry_scope_files { + scoped_prompt.push_str("- "); + scoped_prompt.push_str(file); + scoped_prompt.push('\n'); + } + scoped_prompt.push_str("\n\n"); + scoped_prompt.push_str(prompt); + scoped_prompt + } + + fn queue_reason_to_event_reason( + reason: DeepReviewCapacityQueueReason, + ) -> DeepReviewQueueReason { + match reason { + DeepReviewCapacityQueueReason::ProviderRateLimit => { + DeepReviewQueueReason::ProviderRateLimit + } + DeepReviewCapacityQueueReason::ProviderConcurrencyLimit => { + DeepReviewQueueReason::ProviderConcurrencyLimit + } + DeepReviewCapacityQueueReason::RetryAfter => DeepReviewQueueReason::RetryAfter, + DeepReviewCapacityQueueReason::LocalConcurrencyCap => { + DeepReviewQueueReason::LocalConcurrencyCap + } + DeepReviewCapacityQueueReason::TemporaryOverload => { + DeepReviewQueueReason::TemporaryOverload + } + } + } + + fn queue_reason_to_snake_case(reason: DeepReviewCapacityQueueReason) -> &'static str { + match reason { + DeepReviewCapacityQueueReason::ProviderRateLimit => "provider_rate_limit", + DeepReviewCapacityQueueReason::ProviderConcurrencyLimit => "provider_concurrency_limit", + DeepReviewCapacityQueueReason::RetryAfter => "retry_after", + DeepReviewCapacityQueueReason::LocalConcurrencyCap => "local_concurrency_cap", + DeepReviewCapacityQueueReason::TemporaryOverload => "temporary_overload", + } + } + + fn deep_review_capacity_reason_for_provider_error( + error: &BitFunError, + ) -> Option { + let detail = error.error_detail(); + let error_message = error.to_string(); + let code = detail.provider_code.as_deref().unwrap_or_default(); + let message = detail + .provider_message + .as_deref() + .unwrap_or(error_message.as_str()); + let decision = classify_deep_review_capacity_error(code, message, None); + if decision.queueable { + return decision.reason; + } + + match detail.category { + ErrorCategory::RateLimit => Some(DeepReviewCapacityQueueReason::ProviderRateLimit), + ErrorCategory::ProviderUnavailable => { + Some(DeepReviewCapacityQueueReason::TemporaryOverload) + } + _ => None, + } + } + + fn deep_review_capacity_skip_result_for_provider_reason( + reason: DeepReviewCapacityQueueReason, + dialog_turn_id: &str, + subagent_type: &str, + conc_policy: &DeepReviewConcurrencyPolicy, + duration_ms: u128, + ) -> (Value, String) { + let snapshot = record_deep_review_effective_concurrency_capacity_error( + dialog_turn_id, + conc_policy.max_parallel_instances, + reason, + None, + ); + record_deep_review_capacity_skip(dialog_turn_id); + + let duration_ms = u64::try_from(duration_ms).unwrap_or(u64::MAX); + let reason_code = Self::queue_reason_to_snake_case(reason); + let assistant_message = format!( + "Subagent '{}' was skipped because the provider reported transient DeepReview capacity pressure.\n", + subagent_type, reason_code + ); + let data = json!({ + "duration": duration_ms, + "status": "capacity_skipped", + "queue_elapsed_ms": 0, + "max_queue_wait_seconds": conc_policy.max_queue_wait_seconds, + "queue_skip_reason": reason_code, + "effective_parallel_instances": snapshot.effective_parallel_instances + }); + + (data, assistant_message) + } + + async fn emit_deep_review_queue_state( + session_id: &str, + dialog_turn_id: &str, + tool_id: &str, + subagent_type: &str, + status: DeepReviewQueueStatus, + reason: Option, + queued_reviewer_count: usize, + active_reviewer_count: usize, + optional_reviewer_count: Option, + effective_parallel_instances: Option, + queue_elapsed_ms: u64, + max_queue_wait_seconds: u64, + ) { + let run_elapsed_ms = matches!(&status, DeepReviewQueueStatus::Running).then_some(0); + if let Some(coordinator) = get_global_coordinator() { + coordinator + .emit_deep_review_queue_state_changed( + session_id, + dialog_turn_id, + DeepReviewQueueState { + tool_id: tool_id.to_string(), + subagent_type: subagent_type.to_string(), + status, + reason: reason.map(Self::queue_reason_to_event_reason), + queued_reviewer_count, + active_reviewer_count: Some(active_reviewer_count), + effective_parallel_instances, + optional_reviewer_count, + queue_elapsed_ms: Some(queue_elapsed_ms), + run_elapsed_ms, + max_queue_wait_seconds: Some(max_queue_wait_seconds), + session_concurrency_high: false, + }, + ) + .await; + } + } + + async fn wait_for_deep_review_reviewer_capacity( + session_id: &str, + dialog_turn_id: &str, + tool_id: &str, + subagent_type: &str, + conc_policy: &DeepReviewConcurrencyPolicy, + is_optional_reviewer: bool, + ) -> BitFunResult { + let decision = classify_deep_review_capacity_error( + "deep_review_concurrency_cap_reached", + "Maximum parallel reviewer instances reached", + None, + ); + let reason = decision + .reason + .unwrap_or(DeepReviewCapacityQueueReason::LocalConcurrencyCap); + let started_at = Instant::now(); + let max_wait = Duration::from_secs(conc_policy.max_queue_wait_seconds); + let mut paused_since: Option = None; + let mut paused_total = Duration::ZERO; + let optional_reviewer_count = is_optional_reviewer.then_some(1); + + loop { + let now = Instant::now(); + let current_pause_elapsed = paused_since + .map(|paused_at| now.saturating_duration_since(paused_at)) + .unwrap_or_default(); + let queue_elapsed = now + .saturating_duration_since(started_at) + .saturating_sub(paused_total) + .saturating_sub(current_pause_elapsed); + let queue_elapsed_ms = u64::try_from(queue_elapsed.as_millis()).unwrap_or(u64::MAX); + let active_reviewers = deep_review_active_reviewer_count(dialog_turn_id); + let effective_parallel_instances = deep_review_effective_parallel_instances( + dialog_turn_id, + conc_policy.max_parallel_instances, + ); + + let control_snapshot = deep_review_queue_control_snapshot(dialog_turn_id, tool_id); + if control_snapshot.cancelled + || (is_optional_reviewer && control_snapshot.skip_optional) + { + record_deep_review_runtime_queue_wait(dialog_turn_id, queue_elapsed_ms); + record_deep_review_capacity_skip(dialog_turn_id); + clear_deep_review_queue_control_for_tool(dialog_turn_id, tool_id); + Self::emit_deep_review_queue_state( + session_id, + dialog_turn_id, + tool_id, + subagent_type, + DeepReviewQueueStatus::CapacitySkipped, + Some(reason), + 0, + active_reviewers, + optional_reviewer_count, + Some(effective_parallel_instances), + queue_elapsed_ms, + conc_policy.max_queue_wait_seconds, + ) + .await; + return Ok(DeepReviewQueueWaitOutcome::Skipped { + queue_elapsed_ms, + skip_reason: if control_snapshot.cancelled { + DeepReviewQueueWaitSkipReason::UserCancelled + } else { + DeepReviewQueueWaitSkipReason::OptionalSkipped + }, + }); + } + + if control_snapshot.paused { + if paused_since.is_none() { + paused_since = Some(now); + } + Self::emit_deep_review_queue_state( + session_id, + dialog_turn_id, + tool_id, + subagent_type, + DeepReviewQueueStatus::PausedByUser, + Some(reason), + 1, + active_reviewers, + optional_reviewer_count, + Some(effective_parallel_instances), + queue_elapsed_ms, + conc_policy.max_queue_wait_seconds, + ) + .await; + sleep(DEEP_REVIEW_QUEUE_POLL_INTERVAL).await; + continue; + } + + if let Some(paused_at) = paused_since.take() { + paused_total += now.saturating_duration_since(paused_at); + } + + if let Some(guard) = + try_begin_deep_review_active_reviewer(dialog_turn_id, effective_parallel_instances) + { + let active_reviewer_count = deep_review_active_reviewer_count(dialog_turn_id); + record_deep_review_runtime_queue_wait(dialog_turn_id, queue_elapsed_ms); + clear_deep_review_queue_control_for_tool(dialog_turn_id, tool_id); + Self::emit_deep_review_queue_state( + session_id, + dialog_turn_id, + tool_id, + subagent_type, + DeepReviewQueueStatus::Running, + None, + 0, + active_reviewer_count, + optional_reviewer_count, + Some(effective_parallel_instances), + queue_elapsed_ms, + conc_policy.max_queue_wait_seconds, + ) + .await; + return Ok(DeepReviewQueueWaitOutcome::Ready { guard }); + } + + if queue_elapsed >= max_wait { + let snapshot = record_deep_review_effective_concurrency_capacity_error( + dialog_turn_id, + conc_policy.max_parallel_instances, + reason, + decision.retry_after_seconds.map(Duration::from_secs), + ); + record_deep_review_runtime_queue_wait(dialog_turn_id, queue_elapsed_ms); + record_deep_review_capacity_skip(dialog_turn_id); + clear_deep_review_queue_control_for_tool(dialog_turn_id, tool_id); + Self::emit_deep_review_queue_state( + session_id, + dialog_turn_id, + tool_id, + subagent_type, + DeepReviewQueueStatus::CapacitySkipped, + Some(reason), + 0, + active_reviewers, + optional_reviewer_count, + Some(snapshot.effective_parallel_instances), + queue_elapsed_ms, + conc_policy.max_queue_wait_seconds, + ) + .await; + return Ok(DeepReviewQueueWaitOutcome::Skipped { + queue_elapsed_ms, + skip_reason: DeepReviewQueueWaitSkipReason::QueueExpired, + }); + } + + Self::emit_deep_review_queue_state( + session_id, + dialog_turn_id, + tool_id, + subagent_type, + DeepReviewQueueStatus::QueuedForCapacity, + Some(reason), + 1, + active_reviewers, + optional_reviewer_count, + Some(effective_parallel_instances), + queue_elapsed_ms, + conc_policy.max_queue_wait_seconds, + ) + .await; + + let remaining = max_wait.saturating_sub(queue_elapsed); + sleep(DEEP_REVIEW_QUEUE_POLL_INTERVAL.min(remaining)).await; + } + } + fn format_agent_descriptions(&self, agents: &[AgentInfo]) -> String { if agents.is_empty() { return String::new(); @@ -76,6 +748,7 @@ Usage notes: - The 'workspace_path' parameter must still be provided explicitly for the Explore and FileFinder agent. - Use 'model_id' when a caller needs a specific model or model slot for the subagent. Omit it to use the agent default. - Use 'timeout_seconds' when you need a hard deadline for the subagent. Omit it or set it to 0 to disable the timeout. +- For DeepReview only, set 'retry' to true when re-dispatching a reviewer after that same reviewer returned partial_timeout or an explicit transient capacity failure in the current turn. Retry calls must include retry_coverage with source_packet_id, source_status, covered_files, and a smaller retry_scope_files list. - Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool calls - When the agent is done, it will return a single message back to you. - The agent's outputs should generally be trusted @@ -197,6 +870,45 @@ impl Tool for TaskTool { "type": "integer", "minimum": 0, "description": "Optional timeout for this subagent task in seconds. Use 0 or omit it to disable the timeout." + }, + "retry": { + "type": "boolean", + "description": "DeepReview only: true when this Task call is a retry for the same reviewer role after partial_timeout or an explicit transient capacity failure in the current turn." + }, + "retry_coverage": { + "type": "object", + "description": "DeepReview retry only: structured coverage metadata proving the retry is bounded. Required when retry=true.", + "properties": { + "source_packet_id": { + "type": "string", + "description": "The original reviewer packet_id being retried." + }, + "source_status": { + "type": "string", + "enum": ["partial_timeout", "capacity_skipped"], + "description": "The retryable source status." + }, + "capacity_reason": { + "type": "string", + "description": "Required for capacity_skipped; must be a transient capacity reason such as local_concurrency_cap, provider_rate_limit, provider_concurrency_limit, retry_after, or temporary_overload." + }, + "covered_files": { + "type": "array", + "items": { "type": "string" }, + "description": "Files already covered by the source attempt." + }, + "retry_scope_files": { + "type": "array", + "items": { "type": "string" }, + "description": "Smaller file list to retry. Every entry must belong to the source packet and must not overlap covered_files." + } + }, + "required": [ + "source_packet_id", + "source_status", + "covered_files", + "retry_scope_files" + ] } }, "required": [ @@ -289,6 +1001,10 @@ impl Tool for TaskTool { let start_time = std::time::Instant::now(); // description is only used for frontend display + let description = input + .get("description") + .and_then(Value::as_str) + .map(str::to_string); let mut prompt = input .get("prompt") @@ -339,6 +1055,7 @@ impl Tool for TaskTool { } None => None, }; + let is_retry = input.get("retry").and_then(Value::as_bool).unwrap_or(false); let current_workspace_path = context .workspace_root() .map(|path| path.to_string_lossy().into_owned()); @@ -415,6 +1132,17 @@ impl Tool for TaskTool { "dialog_turn_id is required in context".to_string(), )); }; + let mut deep_review_effective_policy: Option = None; + let mut deep_review_active_guard: Option> = None; + let mut deep_review_reviewer_configured_max_parallel_instances: Option = None; + let mut deep_review_concurrency_policy: Option = None; + let mut deep_review_is_optional_reviewer = false; + let mut deep_review_retry_scope_files: Option> = None; + let mut deep_review_subagent_role: Option = None; + + // Get global coordinator + let coordinator = get_global_coordinator() + .ok_or_else(|| BitFunError::tool("coordinator not initialized".to_string()))?; if context .agent_type @@ -422,12 +1150,46 @@ impl Tool for TaskTool { .map(str::trim) .is_some_and(|agent_type| agent_type == DEEP_REVIEW_AGENT_TYPE) { - let policy = load_default_deep_review_policy().await.map_err(|error| { + let base_policy = load_default_deep_review_policy().await.map_err(|error| { BitFunError::tool(format!( "Failed to load DeepReview execution policy: {}", error )) })?; + let mut run_manifest = context.custom_data.get("deep_review_run_manifest").cloned(); + if let Some(workspace) = context.workspace.as_ref() { + let session_storage_path = workspace.session_storage_path(); + match coordinator + .get_session_manager() + .load_session_metadata(&session_storage_path, &session_id) + .await + { + Ok(Some(metadata)) => { + if run_manifest.is_none() { + run_manifest = metadata.deep_review_run_manifest; + } + if let Some(run_manifest) = run_manifest.as_mut() { + Self::attach_deep_review_cache( + run_manifest, + metadata.deep_review_cache, + ); + } + } + Ok(None) => {} + Err(error) => { + warn!( + "Failed to load DeepReview session metadata for run-manifest policy: session_id={}, error={}", + session_id, error + ); + } + } + } + let policy = if let Some(manifest) = run_manifest.as_ref() { + base_policy.with_run_manifest_execution_policy(manifest) + } else { + base_policy + }; + deep_review_effective_policy = Some(policy.clone()); let role = policy .classify_subagent(&subagent_type) .map_err(|violation| { @@ -436,6 +1198,33 @@ impl Tool for TaskTool { violation.to_tool_error_message() )) })?; + deep_review_subagent_role = Some(role); + if let Some(gate) = run_manifest + .as_ref() + .and_then(DeepReviewRunManifestGate::from_value) + { + gate.ensure_active(&subagent_type).map_err(|violation| { + BitFunError::tool(format!( + "DeepReview Task policy violation: {}", + violation.to_tool_error_message() + )) + })?; + } + if is_retry && role == DeepReviewSubagentRole::Reviewer { + deep_review_retry_scope_files = Some( + Self::ensure_deep_review_retry_coverage( + input, + &subagent_type, + run_manifest.as_ref(), + ) + .map_err(|violation| { + BitFunError::tool(format!( + "DeepReview Task policy violation: {}", + violation.to_tool_error_message() + )) + })?, + ); + } let is_readonly = get_agent_registry() .get_subagent_is_readonly(&subagent_type) .unwrap_or(false); @@ -467,38 +1256,302 @@ impl Tool for TaskTool { ))); } timeout_seconds = policy.effective_timeout_seconds(role, timeout_seconds); + + // Check incremental review cache before queueing. A cache hit does + // not consume runtime reviewer capacity or reviewer timeout. + if role == DeepReviewSubagentRole::Reviewer && !is_retry { + if let Some(cache_value) = + run_manifest.as_ref().and_then(|m| m.get("deepReviewCache")) + { + let cache = DeepReviewIncrementalCache::from_value(cache_value); + if cache.matches_manifest(run_manifest.as_ref().unwrap_or(&Value::Null)) { + if let Some(packet_id) = Self::deep_review_packet_id_for_cache( + &subagent_type, + description.as_deref(), + run_manifest.as_ref(), + ) { + if let Some(cached_output) = cache.get_packet(&packet_id) { + let cached_result = format!( + "Subagent '{}' result (from incremental review cache):\n\n{}\n", + subagent_type, cached_output + ); + return Ok(vec![ToolResult::ok( + json!({ "cached": true, "packet_id": packet_id }), + Some(cached_result), + )]); + } + } + } + } + } + + // Enforce dynamic concurrency policy from the run manifest. + let conc_policy = policy + .concurrency_policy_from_manifest(run_manifest.as_ref().unwrap_or(&Value::Null)); + deep_review_concurrency_policy = Some(conc_policy.clone()); + match role { + DeepReviewSubagentRole::Reviewer => { + deep_review_reviewer_configured_max_parallel_instances = + Some(conc_policy.max_parallel_instances); + let effective_parallel_instances = deep_review_effective_parallel_instances( + &dialog_turn_id, + conc_policy.max_parallel_instances, + ); + let is_optional_reviewer = policy + .extra_subagent_ids + .iter() + .any(|id| id == &subagent_type); + deep_review_is_optional_reviewer = is_optional_reviewer; + if let Some(guard) = try_begin_deep_review_active_reviewer( + &dialog_turn_id, + effective_parallel_instances, + ) { + deep_review_active_guard = Some(guard); + } else { + match Self::wait_for_deep_review_reviewer_capacity( + &session_id, + &dialog_turn_id, + &tool_call_id, + &subagent_type, + &conc_policy, + is_optional_reviewer, + ) + .await? + { + DeepReviewQueueWaitOutcome::Ready { guard } => { + deep_review_active_guard = Some(guard); + } + DeepReviewQueueWaitOutcome::Skipped { + queue_elapsed_ms, + skip_reason, + } => { + let queue_skip_reason = match skip_reason { + DeepReviewQueueWaitSkipReason::QueueExpired => "queue_expired", + DeepReviewQueueWaitSkipReason::UserCancelled => { + "user_cancelled" + } + DeepReviewQueueWaitSkipReason::OptionalSkipped => { + "optional_skipped" + } + }; + let assistant_message = match skip_reason { + DeepReviewQueueWaitSkipReason::QueueExpired => format!( + "Subagent '{}' was skipped because the DeepReview capacity queue reached its maximum wait ({}s).\n", + subagent_type, + conc_policy.max_queue_wait_seconds, + queue_elapsed_ms + ), + DeepReviewQueueWaitSkipReason::UserCancelled => format!( + "Subagent '{}' was skipped because the DeepReview capacity queue was cancelled by the user.\n", + subagent_type, queue_elapsed_ms + ), + DeepReviewQueueWaitSkipReason::OptionalSkipped => format!( + "Subagent '{}' was skipped because optional DeepReview queued reviewers were skipped by the user.\n", + subagent_type, queue_elapsed_ms + ), + }; + return Ok(vec![ToolResult::Result { + data: json!({ + "duration": start_time.elapsed().as_millis(), + "status": "capacity_skipped", + "queue_elapsed_ms": queue_elapsed_ms, + "max_queue_wait_seconds": conc_policy.max_queue_wait_seconds, + "queue_skip_reason": queue_skip_reason, + "effective_parallel_instances": deep_review_effective_concurrency_snapshot( + &dialog_turn_id, + conc_policy.max_parallel_instances, + ).effective_parallel_instances + }), + result_for_assistant: Some(assistant_message), + image_attachments: None, + }]); + } + } + } + } + DeepReviewSubagentRole::Judge => { + let active_reviewers = deep_review_active_reviewer_count(&dialog_turn_id); + let judge_pending = deep_review_has_judge_been_launched(&dialog_turn_id); + conc_policy + .check_launch_allowed(active_reviewers, role, judge_pending) + .map_err(|violation| { + BitFunError::tool(format!( + "DeepReview concurrency policy violation: {}", + violation.to_tool_error_message() + )) + })?; + } + } + record_deep_review_task_budget( + &dialog_turn_id, + &policy, + role, + &subagent_type, + is_retry, + ) + .map_err(|violation| { + BitFunError::tool(format!( + "DeepReview Task policy violation: {}", + violation.to_tool_error_message() + )) + })?; } - // Get global coordinator - let coordinator = get_global_coordinator() - .ok_or_else(|| BitFunError::tool("coordinator not initialized".to_string()))?; + if let Some(retry_scope_files) = deep_review_retry_scope_files.as_ref() { + prompt = Self::prompt_with_deep_review_retry_scope(&prompt, retry_scope_files); + } let parent_info = SubagentParentInfo { - tool_call_id, - session_id, - dialog_turn_id, + tool_call_id: tool_call_id.clone(), + session_id: session_id.clone(), + dialog_turn_id: dialog_turn_id.clone(), }; - let result = coordinator + let subagent_context = deep_review_subagent_role.map(|role| { + let mut values = HashMap::new(); + values.insert( + "deep_review_subagent_role".to_string(), + match role { + DeepReviewSubagentRole::Reviewer => "reviewer", + DeepReviewSubagentRole::Judge => "judge", + } + .to_string(), + ); + values.insert( + "deep_review_subagent_type".to_string(), + subagent_type.clone(), + ); + values + }); + let result = match coordinator .execute_subagent( subagent_type.clone(), prompt, parent_info, Some(effective_workspace_path.clone()), - None, + subagent_context, context.cancellation_token.as_ref(), model_id, timeout_seconds, ) - .await?; + .await + { + Ok(result) => result, + Err(error) => { + if matches!( + deep_review_subagent_role, + Some(DeepReviewSubagentRole::Reviewer) + ) { + if let (Some(reason), Some(conc_policy)) = ( + Self::deep_review_capacity_reason_for_provider_error(&error), + deep_review_concurrency_policy.as_ref(), + ) { + drop(deep_review_active_guard.take()); + let (data, assistant_message) = + Self::deep_review_capacity_skip_result_for_provider_reason( + reason, + &dialog_turn_id, + &subagent_type, + conc_policy, + start_time.elapsed().as_millis(), + ); + let effective_parallel_instances = data + .get("effective_parallel_instances") + .and_then(Value::as_u64) + .and_then(|value| usize::try_from(value).ok()); + Self::emit_deep_review_queue_state( + &session_id, + &dialog_turn_id, + &tool_call_id, + &subagent_type, + DeepReviewQueueStatus::CapacitySkipped, + Some(reason), + 0, + deep_review_active_reviewer_count(&dialog_turn_id), + deep_review_is_optional_reviewer.then_some(1), + effective_parallel_instances, + 0, + conc_policy.max_queue_wait_seconds, + ) + .await; + return Ok(vec![ToolResult::Result { + data, + result_for_assistant: Some(assistant_message), + image_attachments: None, + }]); + } + } + return Err(error); + } + }; + if !result.is_partial_timeout() { + if let Some(configured_max_parallel_instances) = + deep_review_reviewer_configured_max_parallel_instances + { + record_deep_review_effective_concurrency_success( + &dialog_turn_id, + configured_max_parallel_instances, + ); + } + } + drop(deep_review_active_guard); let duration = start_time.elapsed().as_millis(); + let status = if result.is_partial_timeout() { + "partial_timeout" + } else { + "completed" + }; - Ok(vec![ToolResult::Result { - data: json!({"duration": duration}), - result_for_assistant: Some(format!( + // Build retry hint for deep review reviewer timeouts. + let retry_hint = if result.is_partial_timeout() && !is_retry { + let retries_used = crate::agentic::deep_review_policy::deep_review_retries_used( + &dialog_turn_id, + &subagent_type, + ); + let max_retries = Self::deep_review_retry_guidance_max_retries( + deep_review_effective_policy.as_ref(), + &dialog_turn_id, + ); + if max_retries > 0 && retries_used < max_retries { + format!( + "\n\nThis reviewer timed out. You may retry with 'retry: true' only if you can provide retry_coverage with source_packet_id, source_status='partial_timeout', covered_files, and a smaller retry_scope_files list. Retries used: {}/{}.", + retries_used, max_retries + ) + } else { + String::new() + } + } else { + String::new() + }; + + let result_for_assistant = if result.is_partial_timeout() { + format!( + "Subagent '{}' timed out with partial result:\n\n{}\n{}", + subagent_type, result.text, retry_hint + ) + } else { + format!( "Subagent '{}' completed successfully with result:\n\n{}\n", subagent_type, result.text - )), + ) + }; + let mut data = json!({ + "duration": duration, + "status": status + }); + if result.is_partial_timeout() { + data["partial_output"] = json!(result.text); + if let Some(reason) = result.reason.as_deref() { + data["reason"] = json!(reason); + } + if let Some(event_id) = result.ledger_event_id() { + data["ledger_event_id"] = json!(event_id); + } + } + + Ok(vec![ToolResult::Result { + data, + result_for_assistant: Some(result_for_assistant), image_attachments: None, }]) } @@ -507,7 +1560,9 @@ impl Tool for TaskTool { #[cfg(test)] mod tests { use super::TaskTool; - use crate::agentic::deep_review_policy::{DeepReviewExecutionPolicy, DeepReviewSubagentRole}; + use crate::agentic::deep_review_policy::{ + DeepReviewBudgetTracker, DeepReviewExecutionPolicy, DeepReviewSubagentRole, + }; use crate::agentic::tools::framework::Tool; use serde_json::json; @@ -592,4 +1647,784 @@ mod tests { Some(240) ); } + + #[test] + fn deep_review_policy_saturates_oversized_numeric_limits() { + let policy = DeepReviewExecutionPolicy::from_config_value(Some(&json!({ + "reviewer_timeout_seconds": u64::MAX, + "judge_timeout_seconds": u64::MAX + }))); + + assert_eq!(policy.reviewer_timeout_seconds, 3600); + assert_eq!(policy.judge_timeout_seconds, 3600); + } + + #[test] + fn deep_review_budget_tracker_caps_judge_per_turn() { + let policy = DeepReviewExecutionPolicy::default(); + let tracker = DeepReviewBudgetTracker::default(); + + tracker + .record_task( + "turn-1", + &policy, + DeepReviewSubagentRole::Judge, + "ReviewJudge", + false, + ) + .unwrap(); + assert!(tracker + .record_task( + "turn-1", + &policy, + DeepReviewSubagentRole::Judge, + "ReviewJudge", + false, + ) + .is_err()); + + tracker + .record_task( + "turn-2", + &policy, + DeepReviewSubagentRole::Judge, + "ReviewJudge", + false, + ) + .unwrap(); + } + + #[test] + fn deep_review_concurrency_policy_blocks_reviewer_at_cap() { + use crate::agentic::deep_review_policy::DeepReviewConcurrencyPolicy; + + let policy = DeepReviewConcurrencyPolicy { + max_parallel_instances: 2, + stagger_seconds: 0, + max_queue_wait_seconds: 60, + batch_extras_separately: true, + }; + // 0 active → allowed + assert!(policy + .check_launch_allowed(0, DeepReviewSubagentRole::Reviewer, false) + .is_ok()); + // 1 active → allowed + assert!(policy + .check_launch_allowed(1, DeepReviewSubagentRole::Reviewer, false) + .is_ok()); + // 2 active (at cap) → blocked + assert!(policy + .check_launch_allowed(2, DeepReviewSubagentRole::Reviewer, false) + .is_err()); + } + + #[test] + fn deep_review_concurrency_policy_returns_structured_cap_rejection() { + use crate::agentic::deep_review_policy::DeepReviewConcurrencyPolicy; + + let policy = DeepReviewConcurrencyPolicy { + max_parallel_instances: 2, + stagger_seconds: 0, + max_queue_wait_seconds: 60, + batch_extras_separately: true, + }; + let violation = policy + .check_launch_allowed(2, DeepReviewSubagentRole::Reviewer, false) + .expect_err("reviewer launch at cap should be rejected"); + let message = format!( + "DeepReview concurrency policy violation: {}", + violation.to_tool_error_message() + ); + + assert!(message.contains("deep_review_concurrency_cap_reached")); + assert!(message.contains("Maximum parallel reviewer instances reached")); + } + + #[tokio::test] + async fn deep_review_capacity_queue_skips_after_max_wait() { + use crate::agentic::deep_review_policy::{ + deep_review_capacity_skip_count, deep_review_concurrency_cap_rejection_count, + deep_review_effective_parallel_instances, try_begin_deep_review_active_reviewer, + DeepReviewConcurrencyPolicy, + }; + + let _occupied_a = try_begin_deep_review_active_reviewer("turn-queue-skip", 2) + .expect("precondition should occupy first reviewer capacity"); + let _occupied_b = try_begin_deep_review_active_reviewer("turn-queue-skip", 2) + .expect("precondition should occupy second reviewer capacity"); + let policy = DeepReviewConcurrencyPolicy { + max_parallel_instances: 2, + stagger_seconds: 0, + max_queue_wait_seconds: 0, + batch_extras_separately: true, + }; + + let outcome = TaskTool::wait_for_deep_review_reviewer_capacity( + "session-queue-skip", + "turn-queue-skip", + "tool-queue-skip", + "ReviewSecurity", + &policy, + false, + ) + .await + .expect("queue wait should resolve"); + + match outcome { + super::DeepReviewQueueWaitOutcome::Skipped { + queue_elapsed_ms, .. + } => { + assert!(queue_elapsed_ms < 100); + } + super::DeepReviewQueueWaitOutcome::Ready { .. } => { + panic!("occupied capacity should skip with maxQueueWaitSeconds=0"); + } + } + assert_eq!(deep_review_capacity_skip_count("turn-queue-skip"), 1); + assert_eq!( + deep_review_concurrency_cap_rejection_count("turn-queue-skip"), + 0 + ); + assert_eq!( + deep_review_effective_parallel_instances("turn-queue-skip", 2), + 1 + ); + } + + #[tokio::test] + async fn deep_review_capacity_queue_cancel_control_skips_waiting_reviewer() { + use crate::agentic::deep_review_policy::{ + apply_deep_review_queue_control, deep_review_capacity_skip_count, + try_begin_deep_review_active_reviewer, DeepReviewConcurrencyPolicy, + DeepReviewQueueControlAction, + }; + + let turn_id = "turn-queue-cancel"; + let tool_id = "tool-queue-cancel"; + let _occupied = try_begin_deep_review_active_reviewer(turn_id, 1) + .expect("precondition should occupy reviewer capacity"); + apply_deep_review_queue_control(turn_id, tool_id, DeepReviewQueueControlAction::Cancel); + let policy = DeepReviewConcurrencyPolicy { + max_parallel_instances: 1, + stagger_seconds: 0, + max_queue_wait_seconds: 60, + batch_extras_separately: true, + }; + + let outcome = TaskTool::wait_for_deep_review_reviewer_capacity( + "session-queue-cancel", + turn_id, + tool_id, + "ReviewSecurity", + &policy, + false, + ) + .await + .expect("queue wait should resolve"); + + match outcome { + super::DeepReviewQueueWaitOutcome::Skipped { + queue_elapsed_ms, .. + } => { + assert!(queue_elapsed_ms < 100); + } + super::DeepReviewQueueWaitOutcome::Ready { .. } => { + panic!("cancelled queue control should skip the waiting reviewer"); + } + } + assert_eq!(deep_review_capacity_skip_count(turn_id), 1); + } + + #[tokio::test] + async fn deep_review_capacity_queue_records_one_runtime_wait_when_ready() { + use crate::agentic::deep_review_policy::{ + deep_review_runtime_diagnostics_snapshot, try_begin_deep_review_active_reviewer, + DeepReviewConcurrencyPolicy, + }; + + let turn_id = "turn-queue-ready-diagnostics"; + let tool_id = "tool-queue-ready-diagnostics"; + let occupied = try_begin_deep_review_active_reviewer(turn_id, 1) + .expect("precondition should occupy reviewer capacity"); + let policy = DeepReviewConcurrencyPolicy { + max_parallel_instances: 1, + stagger_seconds: 0, + max_queue_wait_seconds: 1, + batch_extras_separately: true, + }; + let turn_id_owned = turn_id.to_string(); + let tool_id_owned = tool_id.to_string(); + + let handle = tokio::spawn(async move { + TaskTool::wait_for_deep_review_reviewer_capacity( + "session-queue-ready-diagnostics", + &turn_id_owned, + &tool_id_owned, + "ReviewSecurity", + &policy, + false, + ) + .await + }); + + tokio::time::sleep(tokio::time::Duration::from_millis(30)).await; + drop(occupied); + + let outcome = tokio::time::timeout(tokio::time::Duration::from_millis(500), handle) + .await + .expect("queue should become ready after capacity frees") + .expect("spawned wait should not panic") + .expect("queue wait should resolve"); + match outcome { + super::DeepReviewQueueWaitOutcome::Ready { .. } => {} + super::DeepReviewQueueWaitOutcome::Skipped { .. } => { + panic!("freed capacity should allow the queued reviewer to run"); + } + } + + let diagnostics = deep_review_runtime_diagnostics_snapshot(turn_id) + .expect("runtime diagnostics should record terminal queue wait"); + assert_eq!(diagnostics.queue_wait_count, 1); + assert!(diagnostics.queue_wait_total_ms >= 20); + } + + #[tokio::test] + async fn deep_review_capacity_queue_pause_does_not_expire_until_continued() { + use crate::agentic::deep_review_policy::{ + apply_deep_review_queue_control, try_begin_deep_review_active_reviewer, + DeepReviewConcurrencyPolicy, DeepReviewQueueControlAction, + }; + + let turn_id = "turn-queue-pause"; + let tool_id = "tool-queue-pause"; + let _occupied = try_begin_deep_review_active_reviewer(turn_id, 1) + .expect("precondition should occupy reviewer capacity"); + apply_deep_review_queue_control(turn_id, tool_id, DeepReviewQueueControlAction::Pause); + let policy = DeepReviewConcurrencyPolicy { + max_parallel_instances: 1, + stagger_seconds: 0, + max_queue_wait_seconds: 0, + batch_extras_separately: true, + }; + let turn_id_owned = turn_id.to_string(); + let tool_id_owned = tool_id.to_string(); + + let handle = tokio::spawn(async move { + TaskTool::wait_for_deep_review_reviewer_capacity( + "session-queue-pause", + &turn_id_owned, + &tool_id_owned, + "ReviewSecurity", + &policy, + false, + ) + .await + }); + + tokio::time::sleep(tokio::time::Duration::from_millis(30)).await; + assert!( + !handle.is_finished(), + "paused queue wait should not expire while user pause is active" + ); + + apply_deep_review_queue_control(turn_id, tool_id, DeepReviewQueueControlAction::Continue); + let outcome = tokio::time::timeout(tokio::time::Duration::from_millis(500), handle) + .await + .expect("continued queue wait should finish") + .expect("spawned wait should not panic") + .expect("queue wait should resolve"); + match outcome { + super::DeepReviewQueueWaitOutcome::Skipped { + queue_elapsed_ms, .. + } => { + assert!(queue_elapsed_ms < 100); + } + super::DeepReviewQueueWaitOutcome::Ready { .. } => { + panic!("occupied capacity should skip after pause is continued"); + } + } + } + + #[tokio::test] + async fn deep_review_capacity_queue_skip_optional_skips_optional_waiter() { + use crate::agentic::deep_review_policy::{ + apply_deep_review_queue_control, try_begin_deep_review_active_reviewer, + DeepReviewConcurrencyPolicy, DeepReviewQueueControlAction, + }; + + let turn_id = "turn-queue-skip-optional"; + let tool_id = "tool-queue-skip-optional"; + let _occupied = try_begin_deep_review_active_reviewer(turn_id, 1) + .expect("precondition should occupy reviewer capacity"); + apply_deep_review_queue_control( + turn_id, + tool_id, + DeepReviewQueueControlAction::SkipOptional, + ); + let policy = DeepReviewConcurrencyPolicy { + max_parallel_instances: 1, + stagger_seconds: 0, + max_queue_wait_seconds: 60, + batch_extras_separately: true, + }; + + let outcome = TaskTool::wait_for_deep_review_reviewer_capacity( + "session-queue-skip-optional", + turn_id, + tool_id, + "ReviewCustom", + &policy, + true, + ) + .await + .expect("queue wait should resolve"); + + match outcome { + super::DeepReviewQueueWaitOutcome::Skipped { + queue_elapsed_ms, .. + } => { + assert!(queue_elapsed_ms < 100); + } + super::DeepReviewQueueWaitOutcome::Ready { .. } => { + panic!("optional queue control should skip optional reviewer"); + } + } + } + + #[test] + fn deep_review_concurrency_policy_blocks_judge_with_active_reviewers() { + use crate::agentic::deep_review_policy::DeepReviewConcurrencyPolicy; + + let policy = DeepReviewConcurrencyPolicy::default(); + // 1 active reviewer → judge blocked + assert!(policy + .check_launch_allowed(1, DeepReviewSubagentRole::Judge, false) + .is_err()); + // 0 active reviewers, no judge pending → judge allowed + assert!(policy + .check_launch_allowed(0, DeepReviewSubagentRole::Judge, false) + .is_ok()); + // 0 active reviewers, judge already pending → blocked + assert!(policy + .check_launch_allowed(0, DeepReviewSubagentRole::Judge, true) + .is_err()); + } + + #[test] + fn deep_review_incremental_cache_hit_returns_cached_result() { + use crate::agentic::deep_review_policy::DeepReviewIncrementalCache; + + let mut cache = DeepReviewIncrementalCache::new("fp-test-123"); + cache.store_packet("ReviewSecurity", "Found 2 security issues"); + + // Cache hit + let result = cache.get_packet("ReviewSecurity"); + assert_eq!(result, Some("Found 2 security issues")); + + // Cache miss + assert_eq!(cache.get_packet("ReviewPerformance"), None); + } + + #[test] + fn deep_review_incremental_cache_fingerprint_mismatch_skips() { + use crate::agentic::deep_review_policy::DeepReviewIncrementalCache; + + let cache = DeepReviewIncrementalCache::new("fp-old"); + let manifest = serde_json::json!({ + "incrementalReviewCache": { + "fingerprint": "fp-new" + } + }); + // Fingerprint mismatch → cache should not match + assert!(!cache.matches_manifest(&manifest)); + } + + #[test] + fn deep_review_cache_packet_id_prefers_task_description_packet() { + let manifest = serde_json::json!({ + "workPackets": [ + { + "packetId": "reviewer:ReviewSecurity:group-1-of-2", + "phase": "reviewer", + "subagentId": "ReviewSecurity" + }, + { + "packetId": "reviewer:ReviewSecurity:group-2-of-2", + "phase": "reviewer", + "subagentId": "ReviewSecurity" + } + ] + }); + + assert_eq!( + TaskTool::deep_review_packet_id_for_cache( + "ReviewSecurity", + Some("Security review [packet reviewer:ReviewSecurity:group-2-of-2]"), + Some(&manifest), + ), + Some("reviewer:ReviewSecurity:group-2-of-2".to_string()) + ); + } + + #[test] + fn deep_review_cache_packet_id_uses_unique_manifest_packet() { + let manifest = serde_json::json!({ + "workPackets": [ + { + "packetId": "reviewer:ReviewBusinessLogic", + "phase": "reviewer", + "subagentId": "ReviewBusinessLogic" + } + ] + }); + + assert_eq!( + TaskTool::deep_review_packet_id_for_cache( + "ReviewBusinessLogic", + Some("Logic review"), + Some(&manifest), + ), + Some("reviewer:ReviewBusinessLogic".to_string()) + ); + } + + #[test] + fn deep_review_cache_packet_id_does_not_guess_split_packets() { + let manifest = serde_json::json!({ + "workPackets": [ + { + "packetId": "reviewer:ReviewPerformance:group-1-of-2", + "phase": "reviewer", + "subagentId": "ReviewPerformance" + }, + { + "packetId": "reviewer:ReviewPerformance:group-2-of-2", + "phase": "reviewer", + "subagentId": "ReviewPerformance" + } + ] + }); + + assert_eq!( + TaskTool::deep_review_packet_id_for_cache( + "ReviewPerformance", + Some("Performance review"), + Some(&manifest), + ), + None + ); + } + + #[test] + fn deep_review_cache_packet_id_ignores_description_for_other_subagent() { + let manifest = serde_json::json!({ + "workPackets": [ + { + "packetId": "reviewer:ReviewSecurity:group-1-of-1", + "phase": "reviewer", + "subagentId": "ReviewSecurity" + } + ] + }); + + assert_eq!( + TaskTool::deep_review_packet_id_for_cache( + "ReviewPerformance", + Some("Performance review [packet reviewer:ReviewSecurity:group-1-of-1]"), + Some(&manifest), + ), + None + ); + } + + #[test] + fn deep_review_retry_guidance_includes_budget_info() { + // Verify that the retry budget tracking functions work correctly + // for the retry guidance injected in task_tool. + use crate::agentic::deep_review_policy::{ + deep_review_max_retries_per_role, deep_review_retries_used, + }; + + // Default max retries should be 1 + assert_eq!(deep_review_max_retries_per_role("nonexistent-turn"), 1); + + // Retries used for a nonexistent turn should be 0 + assert_eq!( + deep_review_retries_used("nonexistent-turn", "ReviewSecurity"), + 0 + ); + } + + #[test] + fn deep_review_retry_guidance_uses_manifest_policy_limit() { + use crate::agentic::deep_review_policy::DeepReviewExecutionPolicy; + + let manifest = serde_json::json!({ + "reviewMode": "deep", + "executionPolicy": { + "maxRetriesPerRole": 2 + } + }); + let policy = + DeepReviewExecutionPolicy::default().with_run_manifest_execution_policy(&manifest); + + assert_eq!( + TaskTool::deep_review_retry_guidance_max_retries(Some(&policy), "nonexistent-turn"), + 2 + ); + } + + #[test] + fn deep_review_retry_rejects_missing_structured_coverage() { + let manifest = json!({ + "workPackets": [ + { + "packetId": "reviewer:ReviewSecurity:group-1-of-1", + "phase": "reviewer", + "subagentId": "ReviewSecurity", + "timeoutSeconds": 600, + "assignedScope": { + "files": [ + "src/crates/core/src/auth.rs", + "src/crates/core/src/token.rs" + ] + } + } + ] + }); + let input = json!({ + "retry": true + }); + + let violation = + TaskTool::ensure_deep_review_retry_coverage(&input, "ReviewSecurity", Some(&manifest)) + .expect_err("missing retry coverage should be rejected"); + + assert_eq!(violation.code, "deep_review_retry_missing_coverage"); + } + + #[test] + fn deep_review_retry_rejects_broad_scope() { + let manifest = json!({ + "workPackets": [ + { + "packetId": "reviewer:ReviewSecurity:group-1-of-1", + "phase": "reviewer", + "subagentId": "ReviewSecurity", + "timeoutSeconds": 600, + "assignedScope": { + "files": [ + "src/crates/core/src/auth.rs", + "src/crates/core/src/token.rs" + ] + } + } + ] + }); + let input = json!({ + "retry": true, + "timeout_seconds": 300, + "retry_coverage": { + "source_packet_id": "reviewer:ReviewSecurity:group-1-of-1", + "source_status": "partial_timeout", + "covered_files": [ + "src/crates/core/src/auth.rs" + ], + "retry_scope_files": [ + "src/crates/core/src/auth.rs", + "src/crates/core/src/token.rs" + ] + } + }); + + let violation = + TaskTool::ensure_deep_review_retry_coverage(&input, "ReviewSecurity", Some(&manifest)) + .expect_err("retrying the full packet should be rejected"); + + assert_eq!(violation.code, "deep_review_retry_scope_not_reduced"); + } + + #[test] + fn deep_review_retry_rejects_timeout_that_is_not_lowered() { + let manifest = json!({ + "workPackets": [ + { + "packetId": "reviewer:ReviewSecurity:group-1-of-1", + "phase": "reviewer", + "subagentId": "ReviewSecurity", + "timeoutSeconds": 600, + "assignedScope": { + "files": [ + "src/crates/core/src/auth.rs", + "src/crates/core/src/token.rs" + ] + } + } + ] + }); + let input = json!({ + "retry": true, + "timeout_seconds": 600, + "retry_coverage": { + "source_packet_id": "reviewer:ReviewSecurity:group-1-of-1", + "source_status": "partial_timeout", + "covered_files": [ + "src/crates/core/src/auth.rs" + ], + "retry_scope_files": [ + "src/crates/core/src/token.rs" + ] + } + }); + + let violation = + TaskTool::ensure_deep_review_retry_coverage(&input, "ReviewSecurity", Some(&manifest)) + .expect_err("retry timeout must be lower than source timeout"); + + assert_eq!(violation.code, "deep_review_retry_timeout_not_reduced"); + } + + #[test] + fn deep_review_retry_rejects_non_queueable_capacity_reason() { + let manifest = json!({ + "workPackets": [ + { + "packetId": "reviewer:ReviewSecurity:group-1-of-1", + "phase": "reviewer", + "subagentId": "ReviewSecurity", + "timeoutSeconds": 600, + "assignedScope": { + "files": [ + "src/crates/core/src/auth.rs", + "src/crates/core/src/token.rs" + ] + } + } + ] + }); + let input = json!({ + "retry": true, + "retry_coverage": { + "source_packet_id": "reviewer:ReviewSecurity:group-1-of-1", + "source_status": "capacity_skipped", + "capacity_reason": "auth_error", + "covered_files": [], + "retry_scope_files": [ + "src/crates/core/src/token.rs" + ] + } + }); + + let violation = + TaskTool::ensure_deep_review_retry_coverage(&input, "ReviewSecurity", Some(&manifest)) + .expect_err("non-queueable capacity failures must fail fast"); + + assert_eq!(violation.code, "deep_review_retry_non_retryable_status"); + } + + #[test] + fn deep_review_provider_capacity_error_builds_capacity_skipped_payload_and_lowers_effective_cap( + ) { + use crate::agentic::deep_review_policy::{ + deep_review_effective_concurrency_snapshot, DeepReviewConcurrencyPolicy, + }; + use crate::util::BitFunError; + + let policy = DeepReviewConcurrencyPolicy { + max_parallel_instances: 3, + stagger_seconds: 0, + max_queue_wait_seconds: 30, + batch_extras_separately: true, + }; + let turn_id = "turn-provider-capacity-skip"; + let reason = TaskTool::deep_review_capacity_reason_for_provider_error(&BitFunError::ai( + "Provider error: provider=openai, code=429, message=rate limit exceeded", + )) + .expect("provider rate limit should surface as capacity_skipped"); + let (data, assistant_message) = + TaskTool::deep_review_capacity_skip_result_for_provider_reason( + reason, + turn_id, + "ReviewSecurity", + &policy, + 42, + ); + + assert_eq!(data["status"], "capacity_skipped"); + assert_eq!(data["queue_skip_reason"], "provider_rate_limit"); + assert_eq!(data["effective_parallel_instances"], 2); + assert!(assistant_message.contains("status=\"capacity_skipped\"")); + assert!(assistant_message.contains("reason=\"provider_rate_limit\"")); + assert_eq!( + deep_review_effective_concurrency_snapshot(turn_id, 3).effective_parallel_instances, + 2 + ); + } + + #[test] + fn deep_review_provider_quota_error_is_not_capacity_skipped() { + use crate::util::BitFunError; + + let reason = TaskTool::deep_review_capacity_reason_for_provider_error(&BitFunError::ai( + "Provider error: provider=glm, code=1113, message=insufficient quota", + )); + + assert!( + reason.is_none(), + "quota errors should remain fail-fast instead of entering capacity queue flow" + ); + } + + #[test] + fn deep_review_retry_accepts_reduced_partial_timeout_scope() { + let manifest = json!({ + "workPackets": [ + { + "packetId": "reviewer:ReviewSecurity:group-1-of-1", + "phase": "reviewer", + "subagentId": "ReviewSecurity", + "timeoutSeconds": 600, + "assignedScope": { + "files": [ + "src/crates/core/src/auth.rs", + "src/crates/core/src/token.rs" + ] + } + } + ] + }); + let input = json!({ + "retry": true, + "timeout_seconds": 300, + "retry_coverage": { + "source_packet_id": "reviewer:ReviewSecurity:group-1-of-1", + "source_status": "partial_timeout", + "covered_files": [ + "src/crates/core/src/auth.rs" + ], + "retry_scope_files": [ + "src/crates/core/src/token.rs" + ] + } + }); + + let retry_scope = + TaskTool::ensure_deep_review_retry_coverage(&input, "ReviewSecurity", Some(&manifest)) + .expect("reduced retry scope should be accepted"); + + assert_eq!(retry_scope, vec!["src/crates/core/src/token.rs"]); + } + + #[test] + fn deep_review_retry_scope_prompt_prepend_bounds_review_files() { + let prompt = TaskTool::prompt_with_deep_review_retry_scope( + "Continue the security review.", + &["src/crates/core/src/token.rs".to_string()], + ); + + assert!(prompt.starts_with("")); + assert!(prompt.contains("Review only the following retry_scope_files")); + assert!(prompt.contains("- src/crates/core/src/token.rs")); + assert!(prompt.ends_with("Continue the security review.")); + } } diff --git a/src/crates/core/src/agentic/tools/pipeline/tool_pipeline.rs b/src/crates/core/src/agentic/tools/pipeline/tool_pipeline.rs index f8e3f31c5..ee82e093d 100644 --- a/src/crates/core/src/agentic/tools/pipeline/tool_pipeline.rs +++ b/src/crates/core/src/agentic/tools/pipeline/tool_pipeline.rs @@ -1265,6 +1265,51 @@ impl ToolPipeline { ); } } + if let Some(raw_manifest) = + task.context.context_vars.get("deep_review_run_manifest") + { + if let Ok(manifest) = serde_json::from_str::(raw_manifest) { + map.insert("deep_review_run_manifest".to_string(), manifest); + } + } + if let Some(role) = task.context.context_vars.get("deep_review_subagent_role") { + if !role.trim().is_empty() { + map.insert( + "deep_review_subagent_role".to_string(), + serde_json::json!(role.trim()), + ); + } + } + if let Some(subagent_type) = + task.context.context_vars.get("deep_review_subagent_type") + { + if !subagent_type.trim().is_empty() { + map.insert( + "deep_review_subagent_type".to_string(), + serde_json::json!(subagent_type.trim()), + ); + } + } + if map + .get("deep_review_subagent_role") + .and_then(serde_json::Value::as_str) + .is_some_and(|role| role == "reviewer") + { + if let Some(parent_info) = task.context.subagent_parent_info.as_ref() { + map.insert( + "deep_review_parent_tool_call_id".to_string(), + serde_json::json!(parent_info.tool_call_id.clone()), + ); + map.insert( + "deep_review_parent_session_id".to_string(), + serde_json::json!(parent_info.session_id.clone()), + ); + map.insert( + "deep_review_parent_dialog_turn_id".to_string(), + serde_json::json!(parent_info.dialog_turn_id.clone()), + ); + } + } map }, diff --git a/src/crates/core/src/service/config/types.rs b/src/crates/core/src/service/config/types.rs index 69a751c8a..a357272b7 100644 --- a/src/crates/core/src/service/config/types.rs +++ b/src/crates/core/src/service/config/types.rs @@ -560,6 +560,10 @@ pub struct AIConfig { /// Preferred browser for CDP browser control. Empty/default uses the system default browser. #[serde(default)] pub browser_control_preferred_browser: String, + + /// Maximum number of rounds per dialog turn before soft-pausing. + #[serde(default = "default_max_rounds")] + pub max_rounds: usize, } impl AIConfig { @@ -703,6 +707,12 @@ fn default_subagent_max_concurrency() -> usize { 5 } +pub const DEFAULT_MAX_ROUNDS: usize = 200; + +fn default_max_rounds() -> usize { + DEFAULT_MAX_ROUNDS +} + impl Default for ModeConfig { fn default() -> Self { Self { @@ -1512,6 +1522,7 @@ impl Default for AIConfig { debug_mode_config: DebugModeConfig::default(), computer_use_enabled: false, browser_control_preferred_browser: String::new(), + max_rounds: default_max_rounds(), } } } diff --git a/src/crates/core/src/service/cron/service.rs b/src/crates/core/src/service/cron/service.rs index c7e74a09d..a8ac1060c 100644 --- a/src/crates/core/src/service/cron/service.rs +++ b/src/crates/core/src/service/cron/service.rs @@ -519,6 +519,7 @@ impl CronService { scheduled_job_policy(), None, None, + None, ) .await; diff --git a/src/crates/core/src/service/git/git_service.rs b/src/crates/core/src/service/git/git_service.rs index 177f016d6..e1523e1b7 100644 --- a/src/crates/core/src/service/git/git_service.rs +++ b/src/crates/core/src/service/git/git_service.rs @@ -14,6 +14,48 @@ pub struct GitService; type CommitStats = (Option, Option, Option); +fn parse_name_status_output(output: &str) -> Vec { + output + .lines() + .filter_map(|line| { + let mut parts = line.split('\t'); + let raw_status = parts.next()?.trim(); + if raw_status.is_empty() { + return None; + } + + let status = match raw_status.chars().next().unwrap_or_default() { + 'A' => GitChangedFileStatus::Added, + 'M' => GitChangedFileStatus::Modified, + 'D' => GitChangedFileStatus::Deleted, + 'R' => GitChangedFileStatus::Renamed, + 'C' => GitChangedFileStatus::Copied, + _ => GitChangedFileStatus::Unknown, + }; + + match status { + GitChangedFileStatus::Renamed | GitChangedFileStatus::Copied => { + let old_path = parts.next()?.to_string(); + let path = parts.next()?.to_string(); + Some(GitChangedFile { + path, + old_path: Some(old_path), + status, + }) + } + _ => { + let path = parts.next()?.to_string(); + Some(GitChangedFile { + path, + old_path: None, + status, + }) + } + } + }) + .collect() +} + impl GitService { /// Checks whether the path is a Git repository. pub async fn is_repository>(path: P) -> Result { @@ -769,6 +811,38 @@ impl GitService { execute_git_command(&repo_path, &args).await } + /// Gets changed files using `git diff --name-status`. + pub async fn get_changed_files>( + path: P, + params: &GitChangedFilesParams, + ) -> Result, GitError> { + let repo_path = path.as_ref().to_string_lossy(); + + let mut args = vec!["diff", "--name-status"]; + let range; + + if params.staged.unwrap_or(false) { + args.push("--cached"); + } + + match (¶ms.source, ¶ms.target) { + (Some(src), Some(tgt)) => { + range = format!("{}..{}", src, tgt); + args.push(&range); + } + (Some(src), None) => { + args.push(src); + } + (None, Some(tgt)) => { + args.push(tgt); + } + (None, None) => {} + } + + let output = execute_git_command(&repo_path, &args).await?; + Ok(parse_name_status_output(&output)) + } + /// Gets file content. /// /// # Parameters @@ -1125,3 +1199,46 @@ impl GitService { }) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_name_status_output_for_common_statuses() { + let files = parse_name_status_output( + "M\tsrc/main.rs\nA\tsrc/new.rs\nD\tsrc/old.rs\nR100\tsrc/old_name.rs\tsrc/new_name.rs\nC087\tsrc/source.rs\tsrc/copy.rs\n", + ); + + assert_eq!( + files, + vec![ + GitChangedFile { + path: "src/main.rs".to_string(), + old_path: None, + status: GitChangedFileStatus::Modified, + }, + GitChangedFile { + path: "src/new.rs".to_string(), + old_path: None, + status: GitChangedFileStatus::Added, + }, + GitChangedFile { + path: "src/old.rs".to_string(), + old_path: None, + status: GitChangedFileStatus::Deleted, + }, + GitChangedFile { + path: "src/new_name.rs".to_string(), + old_path: Some("src/old_name.rs".to_string()), + status: GitChangedFileStatus::Renamed, + }, + GitChangedFile { + path: "src/copy.rs".to_string(), + old_path: Some("src/source.rs".to_string()), + status: GitChangedFileStatus::Copied, + }, + ], + ); + } +} diff --git a/src/crates/core/src/service/git/git_types.rs b/src/crates/core/src/service/git/git_types.rs index 4bd937317..7d2786ce3 100644 --- a/src/crates/core/src/service/git/git_types.rs +++ b/src/crates/core/src/service/git/git_types.rs @@ -163,6 +163,31 @@ pub struct GitDiffParams { pub stat: Option, } +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct GitChangedFilesParams { + pub source: Option, + pub target: Option, + pub staged: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum GitChangedFileStatus { + Added, + Modified, + Deleted, + Renamed, + Copied, + Unknown, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct GitChangedFile { + pub path: String, + pub old_path: Option, + pub status: GitChangedFileStatus, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct GitOperationResult { pub success: bool, diff --git a/src/crates/core/src/service/remote_connect/remote_server.rs b/src/crates/core/src/service/remote_connect/remote_server.rs index 51b003ef7..9276304d0 100644 --- a/src/crates/core/src/service/remote_connect/remote_server.rs +++ b/src/crates/core/src/service/remote_connect/remote_server.rs @@ -1780,6 +1780,7 @@ impl RemoteExecutionDispatcher { binding_workspace, submission_policy, None, + None, image_payload, ) .await diff --git a/src/crates/core/src/service/session/types.rs b/src/crates/core/src/service/session/types.rs index 69eb05a34..c989562ac 100644 --- a/src/crates/core/src/service/session/types.rs +++ b/src/crates/core/src/service/session/types.rs @@ -78,6 +78,27 @@ pub struct SessionMetadata { #[serde(skip_serializing_if = "Option::is_none")] pub todos: Option, + /// Deep Review run manifest for this session, when the session was launched + /// from Code Review Team. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "deep_review_run_manifest", + alias = "deepReviewRunManifest" + )] + pub deep_review_run_manifest: Option, + + /// Cached reviewer outputs from previous deep review runs in this session. + /// Keyed by packet_id, value is the reviewer's output text. + /// Used for incremental review: when the fingerprint matches, skip re-dispatching. + #[serde( + default, + skip_serializing_if = "Option::is_none", + alias = "deep_review_cache", + alias = "deepReviewCache" + )] + pub deep_review_cache: Option, + /// Workspace path this session belongs to (normalized source workspace root, not mirror dir) #[serde(skip_serializing_if = "Option::is_none", alias = "workspace_path")] pub workspace_path: Option, @@ -607,6 +628,8 @@ impl SessionMetadata { tags: Vec::new(), custom_metadata: None, todos: None, + deep_review_run_manifest: None, + deep_review_cache: None, workspace_path: None, workspace_hostname: None, unread_completion: None, @@ -924,4 +947,44 @@ mod tests { assert_eq!(encoded["queueWaitMs"], 7); assert_eq!(encoded["executionMs"], 69); } + + #[test] + fn session_metadata_preserves_deep_review_run_manifest() { + let payload = serde_json::json!({ + "sessionId": "session-1", + "sessionName": "Deep Review", + "agentType": "DeepReview", + "sessionKind": "standard", + "modelName": "fast", + "createdAt": 1, + "lastActiveAt": 1, + "turnCount": 0, + "messageCount": 0, + "toolCallCount": 0, + "status": "active", + "deep_review_run_manifest": { + "reviewMode": "deep", + "coreReviewers": [ + { "subagentId": "ReviewBusinessLogic" } + ], + "skippedReviewers": [ + { "subagentId": "ReviewFrontend", "reason": "not_applicable" } + ] + } + }); + + let metadata: SessionMetadata = + serde_json::from_value(payload).expect("metadata should deserialize"); + + assert_eq!( + metadata.deep_review_run_manifest.as_ref().unwrap()["reviewMode"], + "deep" + ); + + let serialized = serde_json::to_value(&metadata).expect("metadata should serialize"); + assert_eq!( + serialized["deepReviewRunManifest"]["coreReviewers"][0]["subagentId"], + "ReviewBusinessLogic" + ); + } } diff --git a/src/crates/core/tests/context_profile.rs b/src/crates/core/tests/context_profile.rs new file mode 100644 index 000000000..096a941d1 --- /dev/null +++ b/src/crates/core/tests/context_profile.rs @@ -0,0 +1,172 @@ +use bitfun_core::agentic::context_profile::{ + ContextProfile, ContextProfilePolicy, ModelCapabilityProfile, +}; +use bitfun_core::agentic::session::MicrocompactConfig; + +#[test] +fn context_profile_maps_long_running_agents_to_long_task_profile() { + for agent_type in [ + "agentic", + "DeepReview", + "DeepResearch", + "ComputerUse", + "Team", + "ReviewFrontend", + "ReviewSecurity", + ] { + assert_eq!( + ContextProfile::for_agent_type(agent_type), + ContextProfile::LongTask, + "{agent_type} should use the long-task profile" + ); + } +} + +#[test] +fn context_profile_maps_conversation_agents_to_conversation_profile() { + for agent_type in ["Cowork", "Plan", "Claw", "unknown-custom-agent"] { + assert_eq!( + ContextProfile::for_agent_type(agent_type), + ContextProfile::Conversation, + "{agent_type} should use the conversation profile" + ); + } +} + +#[test] +fn context_profile_review_custom_subagents_can_be_promoted_to_long_task_profile() { + assert_eq!( + ContextProfile::for_agent_context("legal-domain-reviewer", true), + ContextProfile::LongTask + ); + assert_eq!( + ContextProfile::for_agent_context("legal-domain-reviewer", false), + ContextProfile::Conversation + ); +} + +#[test] +fn context_profile_long_task_policy_preserves_current_context_defaults() { + let policy = ContextProfilePolicy::for_agent_context( + "DeepReview", + false, + ModelCapabilityProfile::Standard, + ); + let default_microcompact = MicrocompactConfig::default(); + + assert_eq!(policy.profile, ContextProfile::LongTask); + assert_eq!( + policy.microcompact_config().keep_recent, + default_microcompact.keep_recent + ); + assert!( + (policy.microcompact_config().trigger_ratio - default_microcompact.trigger_ratio).abs() + < f32::EPSILON + ); + assert_eq!(policy.compression_contract_limit, 8); + assert_eq!(policy.subagent_concurrency_cap, 5); + assert_eq!(policy.repeated_tool_signature_threshold, 3); + assert_eq!(policy.consecutive_failed_command_threshold, 2); +} + +#[test] +fn context_profile_conversation_policy_keeps_more_recent_chat_context() { + let policy = + ContextProfilePolicy::for_agent_context("Cowork", false, ModelCapabilityProfile::Standard); + + assert_eq!(policy.profile, ContextProfile::Conversation); + assert_eq!(policy.microcompact_config().keep_recent, 12); + assert!((policy.microcompact_config().trigger_ratio - 0.65).abs() < f32::EPSILON); + assert_eq!(policy.compression_contract_limit, 4); + assert_eq!(policy.subagent_concurrency_cap, 2); + assert_eq!(policy.repeated_tool_signature_threshold, 4); + assert_eq!(policy.consecutive_failed_command_threshold, 3); +} + +#[test] +fn context_profile_weak_model_override_shortens_contract_and_caps_fanout() { + let standard = ContextProfilePolicy::for_agent_context( + "DeepReview", + false, + ModelCapabilityProfile::Standard, + ); + let weak = + ContextProfilePolicy::for_agent_context("DeepReview", false, ModelCapabilityProfile::Weak); + + assert_eq!(weak.profile, ContextProfile::LongTask); + assert!(weak.compression_contract_limit < standard.compression_contract_limit); + assert!(weak.subagent_concurrency_cap < standard.subagent_concurrency_cap); + assert!(weak.repeated_tool_signature_threshold < standard.repeated_tool_signature_threshold); + assert_eq!(weak.compression_contract_limit, 4); + assert_eq!(weak.subagent_concurrency_cap, 2); + assert_eq!(weak.repeated_tool_signature_threshold, 2); +} + +#[test] +fn context_profile_model_capability_profile_only_marks_explicit_weak_models() { + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("claude-3-haiku")), + ModelCapabilityProfile::Weak + ); + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("gpt-5.4-mini")), + ModelCapabilityProfile::Weak + ); + assert_eq!( + ModelCapabilityProfile::from_model_id(Some("fast")), + ModelCapabilityProfile::Standard, + "configured model slots should not be treated as weak before resolving" + ); + assert_eq!( + ModelCapabilityProfile::from_model_id(None), + ModelCapabilityProfile::Standard + ); +} + +#[test] +fn context_profile_configured_subagent_concurrency_is_capped_by_policy() { + let long_task = ContextProfilePolicy::for_agent_context( + "DeepReview", + false, + ModelCapabilityProfile::Standard, + ); + let conversation = + ContextProfilePolicy::for_agent_context("Cowork", false, ModelCapabilityProfile::Standard); + + assert_eq!(long_task.effective_subagent_max_concurrency(64), 5); + assert_eq!(long_task.effective_subagent_max_concurrency(3), 3); + assert_eq!(conversation.effective_subagent_max_concurrency(64), 2); + assert_eq!(conversation.effective_subagent_max_concurrency(1), 1); +} + +#[test] +fn context_profile_subagent_policy_combines_parent_workload_and_child_model() { + let policy = ContextProfilePolicy::for_subagent_context_and_models( + "custom-security-reviewer", + true, + Some("claude-3-haiku"), + Some("DeepReview"), + false, + Some("gpt-5"), + ); + + assert_eq!(policy.profile, ContextProfile::LongTask); + assert_eq!(policy.compression_contract_limit, 4); + assert_eq!(policy.subagent_concurrency_cap, 2); + assert_eq!(policy.repeated_tool_signature_threshold, 2); +} + +#[test] +fn context_profile_subagent_policy_inherits_parent_long_task_when_child_is_plain() { + let policy = ContextProfilePolicy::for_subagent_context_and_models( + "Explore", + false, + None, + Some("DeepReview"), + false, + Some("gpt-5"), + ); + + assert_eq!(policy.profile, ContextProfile::LongTask); + assert_eq!(policy.subagent_concurrency_cap, 5); +} diff --git a/src/crates/events/src/agentic.rs b/src/crates/events/src/agentic.rs index 2f576a301..069e34c4e 100644 --- a/src/crates/events/src/agentic.rs +++ b/src/crates/events/src/agentic.rs @@ -1,4 +1,4 @@ -//! Agentic Events Definition +//! Agentic Events Definition pub use bitfun_core_types::errors::{AiErrorDetail, ErrorCategory}; use serde::{Deserialize, Serialize}; use std::time::SystemTime; @@ -21,6 +21,49 @@ pub struct SubagentParentInfo { pub dialog_turn_id: String, } +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DeepReviewQueueStatus { + QueuedForCapacity, + PausedByUser, + Running, + CapacitySkipped, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum DeepReviewQueueReason { + ProviderRateLimit, + ProviderConcurrencyLimit, + RetryAfter, + LocalConcurrencyCap, + TemporaryOverload, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct DeepReviewQueueState { + pub tool_id: String, + pub subagent_type: String, + pub status: DeepReviewQueueStatus, + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, + pub queued_reviewer_count: usize, + #[serde(skip_serializing_if = "Option::is_none")] + pub active_reviewer_count: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub effective_parallel_instances: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub optional_reviewer_count: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub queue_elapsed_ms: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub run_elapsed_ms: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub max_queue_wait_seconds: Option, + #[serde(default)] + pub session_concurrency_high: bool, +} + #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "type")] pub enum AgenticEvent { @@ -218,6 +261,13 @@ pub enum AgenticEvent { subagent_parent_info: Option, }, + DeepReviewQueueStateChanged { + session_id: String, + turn_id: String, + queue_state: DeepReviewQueueState, + subagent_parent_info: Option, + }, + SystemError { session_id: Option, error: String, @@ -377,7 +427,8 @@ impl PartialEq for AgenticEventEnvelope { #[cfg(test)] mod tests { - use super::{AgenticEvent, ToolEventData}; + use super::*; + use serde_json::json; #[test] fn model_round_completed_serializes_optional_timing_fields() { @@ -505,6 +556,49 @@ mod tests { assert_eq!(json["duration_ms"], 120); assert_eq!(json["execution_ms"], 90); } + + #[test] + fn deep_review_queue_state_event_serializes_stable_contract() { + let event = AgenticEvent::DeepReviewQueueStateChanged { + session_id: "review-session".to_string(), + turn_id: "turn-1".to_string(), + queue_state: DeepReviewQueueState { + tool_id: "task-1".to_string(), + subagent_type: "ReviewSecurity".to_string(), + status: DeepReviewQueueStatus::QueuedForCapacity, + reason: Some(DeepReviewQueueReason::ProviderConcurrencyLimit), + queued_reviewer_count: 2, + active_reviewer_count: Some(1), + effective_parallel_instances: Some(2), + optional_reviewer_count: Some(1), + queue_elapsed_ms: Some(1200), + run_elapsed_ms: None, + max_queue_wait_seconds: Some(60), + session_concurrency_high: true, + }, + subagent_parent_info: None, + }; + + assert_eq!(event.session_id(), Some("review-session")); + assert_eq!(event.default_priority(), AgenticEventPriority::High); + + let serialized = serde_json::to_value(event).expect("serialize event"); + assert_eq!(serialized["type"], "DeepReviewQueueStateChanged"); + assert_eq!(serialized["queue_state"]["status"], "queued_for_capacity"); + assert_eq!( + serialized["queue_state"]["reason"], + json!("provider_concurrency_limit") + ); + assert_eq!(serialized["queue_state"]["queue_elapsed_ms"], json!(1200)); + assert_eq!( + serialized["queue_state"]["effective_parallel_instances"], + json!(2) + ); + assert_eq!( + serialized["queue_state"]["run_elapsed_ms"], + serde_json::Value::Null + ); + } } impl Eq for AgenticEventEnvelope {} @@ -559,6 +653,7 @@ impl AgenticEvent { | Self::ModelRoundCompleted { session_id, .. } | Self::ToolEvent { session_id, .. } | Self::UserSteeringInjected { session_id, .. } + | Self::DeepReviewQueueStateChanged { session_id, .. } | Self::SessionModelAutoMigrated { session_id, .. } => Some(session_id), Self::SystemError { session_id, .. } => session_id.as_deref(), } @@ -574,6 +669,7 @@ impl AgenticEvent { Self::SessionStateChanged { .. } | Self::SessionTitleGenerated { .. } | Self::SessionModelAutoMigrated { .. } + | Self::DeepReviewQueueStateChanged { .. } | Self::ContextCompressionFailed { .. } => AgenticEventPriority::High, Self::ImageAnalysisStarted { .. } diff --git a/src/crates/events/src/lib.rs b/src/crates/events/src/lib.rs index cb983ace6..fe3884ac0 100644 --- a/src/crates/events/src/lib.rs +++ b/src/crates/events/src/lib.rs @@ -9,7 +9,8 @@ pub mod emitter; pub mod types; pub use agentic::{ - AgenticEvent, AgenticEventEnvelope, AgenticEventPriority, SubagentParentInfo, ToolEventData, + AgenticEvent, AgenticEventEnvelope, AgenticEventPriority, DeepReviewQueueReason, + DeepReviewQueueState, DeepReviewQueueStatus, SubagentParentInfo, ToolEventData, }; pub use emitter::EventEmitter; pub use types::*; diff --git a/src/crates/transport/src/adapters/tauri.rs b/src/crates/transport/src/adapters/tauri.rs index bada82692..5d48a2442 100644 --- a/src/crates/transport/src/adapters/tauri.rs +++ b/src/crates/transport/src/adapters/tauri.rs @@ -388,6 +388,35 @@ impl TransportAdapter for TauriTransportAdapter { }), )?; } + AgenticEvent::DeepReviewQueueStateChanged { + session_id, + turn_id, + queue_state, + subagent_parent_info, + } => { + self.app_handle.emit( + "agentic://deep-review-queue-state-changed", + json!({ + "sessionId": session_id, + "turnId": turn_id, + "queueState": { + "toolId": queue_state.tool_id, + "subagentType": queue_state.subagent_type, + "status": queue_state.status, + "reason": queue_state.reason, + "queuedReviewerCount": queue_state.queued_reviewer_count, + "activeReviewerCount": queue_state.active_reviewer_count, + "effectiveParallelInstances": queue_state.effective_parallel_instances, + "optionalReviewerCount": queue_state.optional_reviewer_count, + "queueElapsedMs": queue_state.queue_elapsed_ms, + "runElapsedMs": queue_state.run_elapsed_ms, + "maxQueueWaitSeconds": queue_state.max_queue_wait_seconds, + "sessionConcurrencyHigh": queue_state.session_concurrency_high, + }, + "subagentParentInfo": subagent_parent_info, + }), + )?; + } AgenticEvent::ModelRoundCompleted { session_id, turn_id, diff --git a/src/crates/transport/src/adapters/websocket.rs b/src/crates/transport/src/adapters/websocket.rs index c396c9d05..383469606 100644 --- a/src/crates/transport/src/adapters/websocket.rs +++ b/src/crates/transport/src/adapters/websocket.rs @@ -241,6 +241,33 @@ impl TransportAdapter for WebSocketTransportAdapter { "finishReason": finish_reason, }) } + AgenticEvent::DeepReviewQueueStateChanged { + session_id, + turn_id, + queue_state, + subagent_parent_info, + } => { + json!({ + "type": "deep-review-queue-state-changed", + "sessionId": session_id, + "turnId": turn_id, + "queueState": { + "toolId": queue_state.tool_id, + "subagentType": queue_state.subagent_type, + "status": queue_state.status, + "reason": queue_state.reason, + "queuedReviewerCount": queue_state.queued_reviewer_count, + "activeReviewerCount": queue_state.active_reviewer_count, + "effectiveParallelInstances": queue_state.effective_parallel_instances, + "optionalReviewerCount": queue_state.optional_reviewer_count, + "queueElapsedMs": queue_state.queue_elapsed_ms, + "runElapsedMs": queue_state.run_elapsed_ms, + "maxQueueWaitSeconds": queue_state.max_queue_wait_seconds, + "sessionConcurrencyHigh": queue_state.session_concurrency_high, + }, + "subagentParentInfo": subagent_parent_info, + }) + } _ => return Ok(()), }; diff --git a/src/web-ui/src/app/scenes/agents/AgentsScene.tsx b/src/web-ui/src/app/scenes/agents/AgentsScene.tsx index ee5f03eec..a23a4687f 100644 --- a/src/web-ui/src/app/scenes/agents/AgentsScene.tsx +++ b/src/web-ui/src/app/scenes/agents/AgentsScene.tsx @@ -198,6 +198,7 @@ const AgentsHomeView: React.FC = () => { availableTools, getModeSkills, counts, + hiddenAgentIds, loadAgents, getModeConfig, handleSetTools, @@ -262,8 +263,8 @@ const AgentsHomeView: React.FC = () => { const coreAgents = useMemo(() => allAgents.filter((agent) => CORE_AGENT_IDS.has(agent.id)), [allAgents]); const visibleAgents = useMemo( - () => filteredAgents.filter(isAgentInOverviewZone), - [filteredAgents], + () => filteredAgents.filter((agent) => isAgentInOverviewZone(agent, hiddenAgentIds)), + [filteredAgents, hiddenAgentIds], ); const scrollToZone = useCallback((targetId: string) => { diff --git a/src/web-ui/src/app/scenes/agents/agentVisibility.test.ts b/src/web-ui/src/app/scenes/agents/agentVisibility.test.ts new file mode 100644 index 000000000..73577d82a --- /dev/null +++ b/src/web-ui/src/app/scenes/agents/agentVisibility.test.ts @@ -0,0 +1,13 @@ +import { describe, expect, it } from 'vitest'; +import { isAgentInOverviewZone } from './agentVisibility'; + +describe('agentVisibility', () => { + it('hides review agents from backend-provided hidden ids', () => { + expect( + isAgentInOverviewZone( + { id: 'ReviewDocs' }, + new Set(['ReviewDocs']), + ), + ).toBe(false); + }); +}); diff --git a/src/web-ui/src/app/scenes/agents/agentVisibility.ts b/src/web-ui/src/app/scenes/agents/agentVisibility.ts index 65da93526..34031091a 100644 --- a/src/web-ui/src/app/scenes/agents/agentVisibility.ts +++ b/src/web-ui/src/app/scenes/agents/agentVisibility.ts @@ -1,6 +1,9 @@ /** Agent IDs hidden from the Agents overview UI (not listed, not counted). */ -export const HIDDEN_AGENT_IDS = new Set([ +export const STATIC_HIDDEN_AGENT_IDS = new Set([ 'Claw', +]); + +export const FALLBACK_REVIEW_HIDDEN_AGENT_IDS = new Set([ 'DeepReview', 'ReviewBusinessLogic', 'ReviewPerformance', @@ -10,10 +13,18 @@ export const HIDDEN_AGENT_IDS = new Set([ 'ReviewJudge', ]); +export const HIDDEN_AGENT_IDS = new Set([ + ...STATIC_HIDDEN_AGENT_IDS, + ...FALLBACK_REVIEW_HIDDEN_AGENT_IDS, +]); + /** Core mode agents shown in the top zone only; excluded from overview zone list and counts. */ export const CORE_AGENT_IDS = new Set(['agentic', 'Cowork', 'ComputerUse']); /** Agents that appear in the bottom overview grid (same pool as filter chip counts). */ -export function isAgentInOverviewZone(agent: { id: string }): boolean { - return !HIDDEN_AGENT_IDS.has(agent.id) && !CORE_AGENT_IDS.has(agent.id); +export function isAgentInOverviewZone( + agent: { id: string }, + hiddenAgentIds: ReadonlySet = HIDDEN_AGENT_IDS, +): boolean { + return !hiddenAgentIds.has(agent.id) && !CORE_AGENT_IDS.has(agent.id); } diff --git a/src/web-ui/src/app/scenes/agents/components/AgentTeamCard.scss b/src/web-ui/src/app/scenes/agents/components/AgentTeamCard.scss index 291c6acb6..7a571c044 100644 --- a/src/web-ui/src/app/scenes/agents/components/AgentTeamCard.scss +++ b/src/web-ui/src/app/scenes/agents/components/AgentTeamCard.scss @@ -116,6 +116,8 @@ align-items: center; gap: 4px; flex-wrap: wrap; + min-width: 0; + max-width: 100%; position: relative; z-index: 1; } diff --git a/src/web-ui/src/app/scenes/agents/components/AgentTeamCard.test.tsx b/src/web-ui/src/app/scenes/agents/components/AgentTeamCard.test.tsx new file mode 100644 index 000000000..46409d5c8 --- /dev/null +++ b/src/web-ui/src/app/scenes/agents/components/AgentTeamCard.test.tsx @@ -0,0 +1,61 @@ +import React from 'react'; +import { readFileSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; +import { renderToStaticMarkup } from 'react-dom/server'; +import { describe, expect, it } from 'vitest'; +import AgentTeamCard from './AgentTeamCard'; + +function readAgentTeamCardStylesheet(): string { + const stylesheet = readFileSync( + fileURLToPath(new URL('./AgentTeamCard.scss', import.meta.url)), + 'utf8', + ); + return stylesheet.replace(/\r\n/g, '\n'); +} + +function extractBlock(stylesheet: string, selector: string): string { + const escapedSelector = selector.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + const match = stylesheet.match(new RegExp(`${escapedSelector}\\s*\\{(?[\\s\\S]*?)\\n\\s*\\}`)); + return match?.groups?.body ?? ''; +} + +describe('AgentTeamCard', () => { + it('keeps role summary compact when the review team grows', () => { + const markup = renderToStaticMarkup( + undefined} + />, + ); + + const chipMatches = markup.match(/agent-team-card__tag-chip/g) ?? []; + expect(chipMatches).toHaveLength(3); + expect(markup).toContain('Business logic'); + expect(markup).toContain('Performance'); + expect(markup).toContain('Security'); + expect(markup).not.toContain('Architecture'); + expect(markup).not.toContain('Frontend'); + expect(markup).not.toContain('Judge'); + }); + + it('keeps role summary tags shrinkable and wrapping instead of clipping chips', () => { + const stylesheet = readAgentTeamCardStylesheet(); + const tagsBlock = extractBlock(stylesheet, '&__tags'); + const tagChipBlock = extractBlock(stylesheet, '&__tag-chip'); + + expect(tagsBlock).toContain('flex-wrap: wrap;'); + expect(tagsBlock).toContain('min-width: 0;'); + expect(tagsBlock).toContain('max-width: 100%;'); + expect(tagChipBlock).toContain('white-space: nowrap;'); + }); +}); diff --git a/src/web-ui/src/app/scenes/agents/components/ReviewTeamPage.tsx b/src/web-ui/src/app/scenes/agents/components/ReviewTeamPage.tsx index cc14943a3..0e6a096cf 100644 --- a/src/web-ui/src/app/scenes/agents/components/ReviewTeamPage.tsx +++ b/src/web-ui/src/app/scenes/agents/components/ReviewTeamPage.tsx @@ -31,8 +31,10 @@ import { useSettingsStore } from '@/app/scenes/settings/settingsStore'; import { useSceneStore } from '@/app/stores/sceneStore'; import { useAgentsStore } from '../agentsStore'; import { + DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY, DEFAULT_REVIEW_TEAM_EXECUTION_POLICY, DEFAULT_REVIEW_TEAM_MODEL, + FALLBACK_REVIEW_TEAM_DEFINITION, loadDefaultReviewTeam, REVIEW_STRATEGY_DEFINITIONS, type ReviewStrategyLevel, @@ -166,6 +168,8 @@ const ReviewTeamPage: React.FC = () => { strategyLevel: 'normal', memberStrategyOverrides: {}, executionPolicy: { ...DEFAULT_REVIEW_TEAM_EXECUTION_POLICY }, + concurrencyPolicy: { ...DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY }, + definition: FALLBACK_REVIEW_TEAM_DEFINITION, members: [], coreMembers: [], extraMembers: [], diff --git a/src/web-ui/src/app/scenes/agents/components/subagentEditorUtils.test.ts b/src/web-ui/src/app/scenes/agents/components/subagentEditorUtils.test.ts index e606f0792..29307c7e9 100644 --- a/src/web-ui/src/app/scenes/agents/components/subagentEditorUtils.test.ts +++ b/src/web-ui/src/app/scenes/agents/components/subagentEditorUtils.test.ts @@ -1,13 +1,17 @@ import { describe, expect, it } from 'vitest'; import { + evaluateReviewSubagentToolReadiness, filterToolsForReviewMode, normalizeReviewModeState, type SubagentEditorToolInfo, } from './subagentEditorUtils'; const tools: SubagentEditorToolInfo[] = [ + { name: 'GetFileDiff', isReadonly: true }, { name: 'Read', isReadonly: true }, { name: 'Grep', isReadonly: true }, + { name: 'Glob', isReadonly: true }, + { name: 'LS', isReadonly: true }, { name: 'Write', isReadonly: false }, { name: 'Bash', isReadonly: false }, ]; @@ -15,12 +19,18 @@ const tools: SubagentEditorToolInfo[] = [ describe('subagentEditorUtils', () => { it('shows only readonly tools for review subagents', () => { expect(filterToolsForReviewMode(tools, true).map((tool) => tool.name)).toEqual([ + 'GetFileDiff', 'Read', 'Grep', + 'Glob', + 'LS', ]); expect(filterToolsForReviewMode(tools, false).map((tool) => tool.name)).toEqual([ + 'GetFileDiff', 'Read', 'Grep', + 'Glob', + 'LS', 'Write', 'Bash', ]); @@ -38,4 +48,30 @@ describe('subagentEditorUtils', () => { expect(Array.from(next.selectedTools)).toEqual(['Read']); expect(next.removedToolNames).toEqual(['Write', 'Bash']); }); + + it('marks review subagent tooling invalid when the minimum diff or read tool is missing', () => { + expect(evaluateReviewSubagentToolReadiness(new Set(['Read']))).toMatchObject({ + readiness: 'invalid', + missingRequiredTools: ['GetFileDiff'], + }); + }); + + it('marks review subagent tooling degraded when only the minimum tools are present', () => { + expect(evaluateReviewSubagentToolReadiness(new Set(['GetFileDiff', 'Read']))).toMatchObject({ + readiness: 'degraded', + missingRecommendedTools: ['Grep', 'Glob', 'LS'], + }); + }); + + it('marks review subagent tooling ready when the standard review tools are present', () => { + expect( + evaluateReviewSubagentToolReadiness( + new Set(['GetFileDiff', 'Read', 'Grep', 'Glob', 'LS']), + ), + ).toMatchObject({ + readiness: 'ready', + missingRequiredTools: [], + missingRecommendedTools: [], + }); + }); }); diff --git a/src/web-ui/src/app/scenes/agents/components/subagentEditorUtils.ts b/src/web-ui/src/app/scenes/agents/components/subagentEditorUtils.ts index 806528f86..07c906d69 100644 --- a/src/web-ui/src/app/scenes/agents/components/subagentEditorUtils.ts +++ b/src/web-ui/src/app/scenes/agents/components/subagentEditorUtils.ts @@ -3,6 +3,15 @@ export interface SubagentEditorToolInfo { isReadonly: boolean; } +export { + REVIEW_SUBAGENT_OPTIONAL_TOOLS, + REVIEW_SUBAGENT_RECOMMENDED_TOOLS, + REVIEW_SUBAGENT_REQUIRED_TOOLS, + evaluateReviewSubagentToolReadiness, + type ReviewSubagentToolReadiness, + type ReviewSubagentToolReadinessResult, +} from '@/shared/services/reviewSubagentCapabilities'; + export function filterToolsForReviewMode( tools: SubagentEditorToolInfo[], review: boolean, diff --git a/src/web-ui/src/app/scenes/agents/hooks/useAgentsList.ts b/src/web-ui/src/app/scenes/agents/hooks/useAgentsList.ts index 2f77e7d72..c172540a2 100644 --- a/src/web-ui/src/app/scenes/agents/hooks/useAgentsList.ts +++ b/src/web-ui/src/app/scenes/agents/hooks/useAgentsList.ts @@ -7,8 +7,9 @@ import type { ModeConfigItem, ModeSkillInfo } from '@/infrastructure/config/type import { useNotification } from '@/shared/notification-system'; import type { AgentWithCapabilities } from '../agentsStore'; import { enrichCapabilities } from '../utils'; -import { isAgentInOverviewZone } from '../agentVisibility'; +import { STATIC_HIDDEN_AGENT_IDS, isAgentInOverviewZone } from '../agentVisibility'; import { useCurrentWorkspace } from '@/infrastructure/contexts/WorkspaceContext'; +import { loadDefaultReviewTeamDefinition } from '@/shared/services/reviewTeamService'; export type FilterLevel = 'all' | 'builtin' | 'user' | 'project'; export type FilterType = 'all' | 'mode' | 'subagent'; @@ -39,6 +40,9 @@ export function useAgentsList({ const [availableTools, setAvailableTools] = useState([]); const [modeSkills, setModeSkills] = useState>({}); const [modeConfigs, setModeConfigs] = useState>({}); + const [hiddenAgentIds, setHiddenAgentIds] = useState>( + () => new Set(STATIC_HIDDEN_AGENT_IDS), + ); const loadRequestIdRef = useRef(0); const loadAgents = useCallback(async () => { @@ -55,11 +59,12 @@ export function useAgentsList({ }; try { - const [modes, subagents, tools, configs] = await Promise.all([ + const [modes, subagents, tools, configs, reviewTeamDefinition] = await Promise.all([ agentAPI.getAvailableModes().catch(() => []), SubagentAPI.listSubagents({ workspacePath: workspacePath || undefined }).catch(() => []), fetchTools(), configAPI.getModeConfigs().catch(() => ({})), + loadDefaultReviewTeamDefinition().catch(() => undefined), ]); const skillEntries = await Promise.all( modes.map(async (mode) => [ @@ -101,6 +106,10 @@ export function useAgentsList({ setAvailableTools(tools); setModeSkills(Object.fromEntries(skillEntries)); setModeConfigs(configs as Record); + setHiddenAgentIds(new Set([ + ...STATIC_HIDDEN_AGENT_IDS, + ...(reviewTeamDefinition?.hiddenAgentIds ?? []), + ])); } finally { if (requestId === loadRequestIdRef.current) { setLoading(false); @@ -233,8 +242,8 @@ export function useAgentsList({ }), [allAgents, filterLevel, filterType, searchQuery]); const overviewAgents = useMemo( - () => allAgents.filter(isAgentInOverviewZone), - [allAgents], + () => allAgents.filter((agent) => isAgentInOverviewZone(agent, hiddenAgentIds)), + [allAgents, hiddenAgentIds], ); const counts = useMemo(() => ({ @@ -253,6 +262,7 @@ export function useAgentsList({ availableTools, getModeSkills, counts, + hiddenAgentIds, loadAgents, getModeConfig, handleSetTools, diff --git a/src/web-ui/src/flow_chat/components/ChatInput.tsx b/src/web-ui/src/flow_chat/components/ChatInput.tsx index 1483513d9..6f9b33d27 100644 --- a/src/web-ui/src/flow_chat/components/ChatInput.tsx +++ b/src/web-ui/src/flow_chat/components/ChatInput.tsx @@ -38,8 +38,9 @@ import { runUsageReportCommand } from '../services/usageReportService'; import { FlowChatManager } from '@/flow_chat'; import { DEEP_REVIEW_SLASH_COMMAND, - buildDeepReviewPromptFromSlashCommand, getDeepReviewLaunchErrorMessage, + buildDeepReviewLaunchFromSlashCommand, + buildDeepReviewPreviewFromSlashCommand, isDeepReviewSlashCommand, launchDeepReviewSession, } from '../services/DeepReviewService'; @@ -64,6 +65,7 @@ import { useDeepReviewConsent } from './DeepReviewConsentDialog'; import { useAgentCompanionActivity } from '../hooks/useAgentCompanionActivity'; import { useSessionReviewActivity } from '../hooks/useSessionReviewActivity'; import { shouldBlockDeepReviewCommand } from '../utils/deepReviewCommandGuard'; +import { deriveDeepReviewSessionConcurrencyGuard } from '../utils/deepReviewCapacityGuard'; import './ChatInput.scss'; const log = createLogger('ChatInput'); @@ -1553,24 +1555,34 @@ export const ChatInput: React.FC = ({ return; } - const confirmed = await confirmDeepReviewLaunch(); - if (!confirmed) { - return; - } - const originalPendingLargePastes = { ...pendingLargePastesRef.current }; - if (effectiveTargetSessionId) { - addToHistory(effectiveTargetSessionId, message); - } - setHistoryIndex(-1); - setSavedDraft(''); - dispatchInput({ type: 'CLEAR_VALUE' }); - clearPendingLargePastes(); - setQueuedInput(null); - setSlashCommandState({ isActive: false, kind: 'modes', query: '', selectedIndex: 0 }); try { - const prompt = await buildDeepReviewPromptFromSlashCommand( + const preview = await buildDeepReviewPreviewFromSlashCommand( + message, + effectiveTargetSession.workspacePath, + ); + const confirmed = await confirmDeepReviewLaunch(preview, { + sessionConcurrencyGuard: deriveDeepReviewSessionConcurrencyGuard( + flowChatState, + effectiveTargetSessionId, + ), + }); + if (!confirmed) { + return; + } + + if (effectiveTargetSessionId) { + addToHistory(effectiveTargetSessionId, message); + } + setHistoryIndex(-1); + setSavedDraft(''); + dispatchInput({ type: 'CLEAR_VALUE' }); + clearPendingLargePastes(); + setQueuedInput(null); + setSlashCommandState({ isActive: false, kind: 'modes', query: '', selectedIndex: 0 }); + + const { prompt, runManifest } = await buildDeepReviewLaunchFromSlashCommand( message, effectiveTargetSession.workspacePath, ); @@ -1580,6 +1592,7 @@ export const ChatInput: React.FC = ({ workspacePath: effectiveTargetSession.workspacePath, prompt, displayMessage: message, + runManifest, childSessionName: t('chatInput.deepreviewThreadTitle', { defaultValue: 'Deep review', }), @@ -1608,6 +1621,7 @@ export const ChatInput: React.FC = ({ currentReviewActivity, effectiveTargetSession, effectiveTargetSessionId, + flowChatState, inputState.value, isBtwSession, setQueuedInput, diff --git a/src/web-ui/src/flow_chat/components/DeepReviewConsentDialog.scss b/src/web-ui/src/flow_chat/components/DeepReviewConsentDialog.scss index ca709bd5d..c38a98d97 100644 --- a/src/web-ui/src/flow_chat/components/DeepReviewConsentDialog.scss +++ b/src/web-ui/src/flow_chat/components/DeepReviewConsentDialog.scss @@ -35,6 +35,12 @@ border: 1px solid color-mix(in srgb, var(--deep-review-accent) 22%, transparent); } +.deep-review-consent__fact-icon--warning { + color: color-mix(in srgb, var(--color-warning, #f59e0b) 82%, var(--color-text-primary)); + background: color-mix(in srgb, var(--color-warning, #f59e0b) 12%, transparent); + border-color: color-mix(in srgb, var(--color-warning, #f59e0b) 24%, transparent); +} + .deep-review-consent__heading { min-width: 0; @@ -43,7 +49,7 @@ color: var(--color-text-primary); font-size: 21px; font-weight: 680; - letter-spacing: -0.025em; + letter-spacing: 0; line-height: 1.22; } } @@ -52,7 +58,7 @@ color: color-mix(in srgb, var(--deep-review-accent) 58%, var(--color-text-muted)); font-size: 11px; font-weight: 720; - letter-spacing: 0.08em; + letter-spacing: 0; text-transform: uppercase; } @@ -92,8 +98,7 @@ line-height: 1.7; } -.deep-review-consent__safety-note, -.deep-review-consent__fact { +.deep-review-consent__safety-note { display: grid; grid-template-columns: auto minmax(0, 1fr); align-items: flex-start; @@ -118,16 +123,189 @@ color-mix(in srgb, var(--color-bg-elevated) 86%, transparent); } -.deep-review-consent__facts { +.deep-review-consent__capacity-note { display: grid; - grid-template-columns: repeat(2, minmax(0, 1fr)); + grid-template-columns: auto minmax(0, 1fr); + align-items: flex-start; gap: 12px; + padding: 12px 14px; + border: 1px solid color-mix(in srgb, var(--color-warning, #f59e0b) 28%, var(--border-subtle)); + border-radius: 8px; + background: color-mix(in srgb, var(--color-warning, #f59e0b) 8%, var(--color-bg-elevated)); + + p { + margin: 4px 0 0; + color: var(--color-text-secondary); + font-size: 12px; + line-height: 1.58; + } } -.deep-review-consent__fact { - min-height: 118px; +.deep-review-consent__summary { + display: flex; + flex-direction: column; + gap: 12px; padding: 14px; + border: 1px solid var(--border-subtle); + border-radius: 8px; + background: color-mix(in srgb, var(--color-bg-elevated) 88%, transparent); +} + +.deep-review-consent__summary-header { + display: grid; + grid-template-columns: minmax(0, 1fr) auto; + gap: 12px; + align-items: flex-start; + + p { + margin: 4px 0 0; + color: var(--color-text-secondary); + font-size: 12px; + line-height: 1.55; + } +} + +.deep-review-consent__summary-stats { + display: flex; + flex-wrap: wrap; + gap: 8px; + + span { + display: inline-flex; + align-items: center; + min-height: 24px; + padding: 2px 8px; + border: 1px solid color-mix(in srgb, var(--border-subtle) 88%, var(--deep-review-accent)); + border-radius: 999px; + color: var(--color-text-secondary); + background: color-mix(in srgb, var(--deep-review-accent) 7%, transparent); + font-size: 11px; + font-weight: 650; + line-height: 1.3; + } +} + +.deep-review-consent__summary-stats .deep-review-consent__summary-stat--warning { + border-color: color-mix(in srgb, var(--color-warning, #f59e0b) 34%, var(--border-subtle)); + background: color-mix(in srgb, var(--color-warning, #f59e0b) 9%, transparent); +} + +.deep-review-consent__reviewer-group { + display: flex; + flex-direction: column; + gap: 8px; +} + +.deep-review-consent__strategy-control { + display: grid; + grid-template-columns: minmax(0, 1fr) auto; + gap: 12px; + align-items: center; + padding: 10px; + border: 1px solid var(--border-subtle); border-radius: 8px; + background: color-mix(in srgb, var(--color-bg-primary) 78%, transparent); + + p { + margin: 4px 0 0; + color: var(--color-text-secondary); + font-size: 12px; + line-height: 1.45; + } +} + +.deep-review-consent__strategy-options { + display: inline-flex; + flex-wrap: wrap; + justify-content: flex-end; + gap: 6px; +} + +.deep-review-consent__strategy-option { + min-height: 28px; + padding: 4px 9px; + border: 1px solid var(--border-subtle); + border-radius: 6px; + background: color-mix(in srgb, var(--color-bg-elevated) 90%, transparent); + color: var(--color-text-secondary); + cursor: pointer; + font-size: 12px; + font-weight: 650; + line-height: 1.25; + transition: + background 160ms ease, + border-color 160ms ease, + color 160ms ease; + + &:hover { + border-color: color-mix(in srgb, var(--deep-review-accent) 34%, var(--border-subtle)); + background: color-mix(in srgb, var(--deep-review-accent) 9%, var(--color-bg-elevated)); + color: var(--color-text-primary); + } +} + +.deep-review-consent__strategy-option--active { + border-color: color-mix(in srgb, var(--deep-review-accent) 58%, var(--border-base)); + background: color-mix(in srgb, var(--deep-review-accent) 15%, var(--color-bg-elevated)); + color: var(--color-text-primary); +} + +.deep-review-consent__reviewer-group-title { + display: inline-flex; + align-items: center; + gap: 6px; + color: var(--color-text-secondary); + font-size: 11px; + font-weight: 720; + text-transform: uppercase; +} + +.deep-review-consent__reviewer-group-title--warning { + color: color-mix(in srgb, var(--color-warning, #f59e0b) 78%, var(--color-text-primary)); +} + +.deep-review-consent__skipped-list { + display: flex; + flex-direction: column; + gap: 6px; + margin: 0; + padding: 0; + list-style: none; + + li { + display: grid; + grid-template-columns: minmax(0, 1fr) auto; + gap: 10px; + align-items: center; + min-height: 28px; + padding: 6px 8px; + border: 1px solid color-mix(in srgb, var(--color-warning, #f59e0b) 20%, var(--border-subtle)); + border-radius: 6px; + background: color-mix(in srgb, var(--color-warning, #f59e0b) 6%, transparent); + color: var(--color-text-secondary); + font-size: 12px; + line-height: 1.35; + + span { + min-width: 0; + color: var(--color-text-primary); + overflow-wrap: anywhere; + } + + strong { + color: color-mix(in srgb, var(--color-warning, #f59e0b) 72%, var(--color-text-primary)); + font-size: 11px; + font-weight: 720; + text-align: right; + white-space: normal; + } + } +} + +li.deep-review-consent__skipped-more { + grid-template-columns: minmax(0, 1fr); + background: color-mix(in srgb, var(--color-bg-primary) 82%, transparent); + color: var(--color-text-muted); } .deep-review-consent__fact-icon { @@ -143,14 +321,6 @@ line-height: 1.35; } -.deep-review-consent__token-estimate { - margin: 6px 0 0; - color: var(--color-text-muted); - font-size: 11px; - font-weight: 500; - font-variant-numeric: tabular-nums; -} - .deep-review-consent__footer { display: flex; align-items: center; @@ -191,11 +361,22 @@ grid-template-columns: minmax(0, 1fr) auto; } - .deep-review-consent__facts, .deep-review-consent__footer { grid-template-columns: 1fr; } + .deep-review-consent__skipped-list li { + grid-template-columns: 1fr; + } + + .deep-review-consent__strategy-control { + grid-template-columns: 1fr; + } + + .deep-review-consent__strategy-options { + justify-content: flex-start; + } + .deep-review-consent__footer { flex-direction: column; align-items: stretch; diff --git a/src/web-ui/src/flow_chat/components/DeepReviewConsentDialog.test.tsx b/src/web-ui/src/flow_chat/components/DeepReviewConsentDialog.test.tsx new file mode 100644 index 000000000..a84ecbed5 --- /dev/null +++ b/src/web-ui/src/flow_chat/components/DeepReviewConsentDialog.test.tsx @@ -0,0 +1,367 @@ +import React from 'react'; +import { act } from 'react'; +import { createRoot, type Root } from 'react-dom/client'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { useDeepReviewConsent } from './DeepReviewConsentDialog'; +import type { ReviewTeamRunManifest } from '@/shared/services/reviewTeamService'; + +const mockSaveReviewTeamProjectStrategyOverride = vi.hoisted(() => vi.fn()); + +vi.mock('react-i18next', () => ({ + useTranslation: () => ({ + t: (_key: string, options?: Record) => { + const value = typeof options?.defaultValue === 'string' ? options.defaultValue : _key; + return value.replace(/\{\{(\w+)\}\}/g, (_match, key) => String(options?.[key] ?? '')); + }, + }), +})); + +vi.mock('@/component-library', () => ({ + Button: ({ + children, + onClick, + }: { + children: React.ReactNode; + onClick?: () => void; + }) => , + Checkbox: ({ + checked, + label, + onChange, + }: { + checked: boolean; + label: string; + onChange: (event: React.ChangeEvent) => void; + }) => ( + + ), + Modal: ({ + children, + isOpen, + }: { + children: React.ReactNode; + isOpen: boolean; + }) => (isOpen ?
{children}
: null), +})); + +vi.mock('@/shared/services/reviewTeamService', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + saveReviewTeamProjectStrategyOverride: ( + ...args: Parameters + ) => mockSaveReviewTeamProjectStrategyOverride(...args), + }; +}); + +let JSDOMCtor: (new ( + html?: string, + options?: { pretendToBeVisual?: boolean; url?: string } +) => { window: Window & typeof globalThis }) | null = null; + +try { + const jsdom = await import('jsdom'); + JSDOMCtor = jsdom.JSDOM as typeof JSDOMCtor; +} catch { + JSDOMCtor = null; +} + +const describeWithJsdom = JSDOMCtor ? describe : describe.skip; + +function Harness({ + preview, + launchContext, + onResult, +}: { + preview?: ReviewTeamRunManifest; + launchContext?: unknown; + onResult: (confirmed: boolean) => void; +}) { + const { confirmDeepReviewLaunch, deepReviewConsentDialog } = useDeepReviewConsent(); + + return ( + <> + + {deepReviewConsentDialog} + + ); +} + +function buildPreview(): ReviewTeamRunManifest { + return { + reviewMode: 'deep', + workspacePath: '/test-fixtures/project-a', + policySource: 'default-review-team-config', + target: { + source: 'session_files', + resolution: 'resolved', + tags: ['backend_core'], + files: ['src/crates/core/src/service/config/types.rs'], + warnings: [], + }, + strategyLevel: 'normal', + strategyRecommendation: { + strategyLevel: 'deep', + score: 24, + rationale: 'Large/high-risk change (8 files, 900 lines; 2 security-sensitive files, 3 workspace areas). Deep review recommended.', + factors: { + fileCount: 8, + totalLinesChanged: 900, + lineCountSource: 'diff_stat', + securityFileCount: 2, + workspaceAreaCount: 3, + contractSurfaceChanged: true, + }, + }, + executionPolicy: { + reviewerTimeoutSeconds: 300, + judgeTimeoutSeconds: 240, + reviewerFileSplitThreshold: 20, + maxSameRoleInstances: 3, + }, + tokenBudget: { + mode: 'balanced', + estimatedReviewerCalls: 3, + maxReviewerCalls: 4, + maxExtraReviewers: 1, + largeDiffSummaryFirst: false, + skippedReviewerIds: [], + warnings: [], + }, + coreReviewers: [ + { + subagentId: 'ReviewBusinessLogic', + displayName: 'Logic reviewer', + roleName: 'Business Logic Reviewer', + model: 'fast', + configuredModel: 'fast', + defaultModelSlot: 'fast', + strategyLevel: 'normal', + strategySource: 'team', + strategyDirective: 'Review logic.', + locked: true, + source: 'core', + subagentSource: 'builtin', + }, + ], + qualityGateReviewer: { + subagentId: 'ReviewJudge', + displayName: 'Quality inspector', + roleName: 'Review Quality Inspector', + model: 'fast', + configuredModel: 'fast', + defaultModelSlot: 'fast', + strategyLevel: 'normal', + strategySource: 'team', + strategyDirective: 'Check report quality.', + locked: true, + source: 'core', + subagentSource: 'builtin', + }, + enabledExtraReviewers: [ + { + subagentId: 'CustomSecurity', + displayName: 'Custom security reviewer', + roleName: 'Additional Specialist Reviewer', + model: 'fast', + configuredModel: 'fast', + defaultModelSlot: 'fast', + strategyLevel: 'normal', + strategySource: 'team', + strategyDirective: 'Review security.', + locked: false, + source: 'extra', + subagentSource: 'user', + }, + ], + skippedReviewers: [ + { + subagentId: 'ReviewFrontend', + displayName: 'Frontend reviewer', + roleName: 'Frontend Reviewer', + model: 'fast', + configuredModel: 'fast', + defaultModelSlot: 'fast', + strategyLevel: 'normal', + strategySource: 'team', + strategyDirective: 'Review frontend.', + locked: true, + source: 'core', + subagentSource: 'builtin', + reason: 'not_applicable', + }, + { + subagentId: 'CustomInvalid', + displayName: 'Custom invalid reviewer', + roleName: 'Additional Specialist Reviewer', + model: 'fast', + configuredModel: 'fast', + defaultModelSlot: 'fast', + strategyLevel: 'normal', + strategySource: 'team', + strategyDirective: 'Review custom rules.', + locked: false, + source: 'extra', + subagentSource: 'user', + reason: 'invalid_tooling', + }, + ], + }; +} + +function buildPreviewWithoutSkippedReviewers(): ReviewTeamRunManifest { + return { + ...buildPreview(), + skippedReviewers: [], + }; +} + +describeWithJsdom('DeepReviewConsentDialog', () => { + let dom: { window: Window & typeof globalThis }; + let container: HTMLDivElement; + let root: Root; + + beforeEach(() => { + mockSaveReviewTeamProjectStrategyOverride.mockResolvedValue(undefined); + dom = new JSDOMCtor!('', { + pretendToBeVisual: true, + url: 'http://localhost', + }); + + const { window } = dom; + vi.stubGlobal('window', window); + vi.stubGlobal('document', window.document); + vi.stubGlobal('navigator', window.navigator); + vi.stubGlobal('HTMLElement', window.HTMLElement); + vi.stubGlobal('Event', window.Event); + vi.stubGlobal('localStorage', window.localStorage); + vi.stubGlobal('IS_REACT_ACT_ENVIRONMENT', true); + + container = document.createElement('div'); + document.body.appendChild(container); + root = createRoot(container); + }); + + afterEach(() => { + act(() => { + root.unmount(); + }); + container.remove(); + dom.window.close(); + vi.unstubAllGlobals(); + }); + + it('shows a compact launch summary with skipped reviewers only when needed', async () => { + const result = vi.fn(); + + await act(async () => { + root.render(); + }); + await act(async () => { + container.querySelector('button')?.dispatchEvent(new window.Event('click', { bubbles: true })); + }); + + expect(container.textContent).toContain('Launch summary'); + expect(container.textContent).toContain('1 file'); + expect(container.textContent).toContain('Risk areas: Backend core'); + expect(container.textContent).toContain('3 reviewer calls'); + expect(container.textContent).toContain('1 optional reviewer'); + expect(container.textContent).toContain('2 skipped'); + expect(container.textContent).toContain('Run strategy: Normal'); + expect(container.textContent).toContain('Frontend reviewer'); + expect(container.textContent).toContain('Not applicable to this target'); + expect(container.textContent).toContain('Custom invalid reviewer'); + expect(container.textContent).toContain('Configuration issue'); + expect(container.textContent).not.toContain('Logic reviewer'); + expect(container.textContent).not.toContain('Custom security reviewer'); + }); + + it('still opens when skip preference is set but reviewers are skipped', async () => { + localStorage.setItem('bitfun.deepReview.skipCostConfirmation', 'true'); + const result = vi.fn(); + + await act(async () => { + root.render(); + }); + await act(async () => { + container.querySelector('button')?.dispatchEvent(new window.Event('click', { bubbles: true })); + }); + + expect(container.querySelector('[role="dialog"]')).not.toBeNull(); + expect(result).not.toHaveBeenCalled(); + }); + + it('still opens when skip preference is set but the active session is busy', async () => { + localStorage.setItem('bitfun.deepReview.skipCostConfirmation', 'true'); + const result = vi.fn(); + + await act(async () => { + root.render( + , + ); + }); + await act(async () => { + container.querySelector('button')?.dispatchEvent(new window.Event('click', { bubbles: true })); + }); + + expect(container.querySelector('[role="dialog"]')).not.toBeNull(); + expect(container.textContent).toContain('Active session is busy'); + expect(container.textContent).toContain('2 running subagent tasks'); + expect(result).not.toHaveBeenCalled(); + }); + + it('persists a selected project strategy override before confirming', async () => { + const result = vi.fn(); + + await act(async () => { + root.render(); + }); + await act(async () => { + container.querySelector('button')?.dispatchEvent(new window.Event('click', { bubbles: true })); + }); + + const deepStrategyButton = Array.from(container.querySelectorAll('button')) + .find((button) => button.textContent === 'Deep'); + expect(deepStrategyButton).not.toBeUndefined(); + + await act(async () => { + deepStrategyButton?.dispatchEvent(new window.Event('click', { bubbles: true })); + }); + await act(async () => { + Array.from(container.querySelectorAll('button')) + .find((button) => button.textContent === 'Start Deep Review') + ?.dispatchEvent(new window.Event('click', { bubbles: true })); + }); + + expect(mockSaveReviewTeamProjectStrategyOverride).toHaveBeenCalledWith( + '/test-fixtures/project-a', + 'deep', + ); + expect(result).toHaveBeenCalledWith(true); + }); +}); diff --git a/src/web-ui/src/flow_chat/components/DeepReviewConsentDialog.tsx b/src/web-ui/src/flow_chat/components/DeepReviewConsentDialog.tsx index c743b2706..e44f31d10 100644 --- a/src/web-ui/src/flow_chat/components/DeepReviewConsentDialog.tsx +++ b/src/web-ui/src/flow_chat/components/DeepReviewConsentDialog.tsx @@ -1,31 +1,111 @@ import React, { useCallback, useState } from 'react'; -import { Clock, Coins, ShieldCheck, X } from 'lucide-react'; -import { estimateTokenConsumption, formatTokenCount } from '../utils/deepReviewExperience'; +import { AlertTriangle, ShieldCheck, X } from 'lucide-react'; import { useTranslation } from 'react-i18next'; import { Button, Checkbox, Modal } from '@/component-library'; import { createLogger } from '@/shared/utils/logger'; +import type { + ReviewStrategyLevel, + ReviewTeamManifestMember, + ReviewTeamManifestMemberReason, + ReviewTeamRunManifest, +} from '@/shared/services/reviewTeamService'; +import { + REVIEW_STRATEGY_LEVELS, + getReviewStrategyProfile, + saveReviewTeamProjectStrategyOverride, +} from '@/shared/services/reviewTeamService'; +import type { DeepReviewSessionConcurrencyGuard } from '../utils/deepReviewCapacityGuard'; import './DeepReviewConsentDialog.scss'; const log = createLogger('DeepReviewConsentDialog'); const SKIP_DEEP_REVIEW_CONFIRMATION_STORAGE_KEY = 'bitfun.deepReview.skipCostConfirmation'; +const MAX_VISIBLE_SKIPPED_REVIEWERS = 3; +const MAX_VISIBLE_TARGET_TAGS = 3; + +const TARGET_TAG_LABELS: Record = { + frontend_ui: { key: 'frontendUi', defaultValue: 'Frontend UI' }, + frontend_style: { key: 'frontendStyle', defaultValue: 'Frontend styles' }, + frontend_i18n: { key: 'frontendI18n', defaultValue: 'Frontend i18n' }, + frontend_contract: { key: 'frontendContract', defaultValue: 'Frontend contract' }, + desktop_contract: { key: 'desktopContract', defaultValue: 'Desktop contract' }, + web_server_contract: { key: 'webServerContract', defaultValue: 'Web server contract' }, + backend_core: { key: 'backendCore', defaultValue: 'Backend core' }, + transport: { key: 'transport', defaultValue: 'Transport' }, + api_layer: { key: 'apiLayer', defaultValue: 'API layer' }, + ai_adapter: { key: 'aiAdapter', defaultValue: 'AI adapter' }, + installer_ui: { key: 'installerUi', defaultValue: 'Installer UI' }, + test: { key: 'test', defaultValue: 'Tests' }, + docs: { key: 'docs', defaultValue: 'Docs' }, + config: { key: 'config', defaultValue: 'Config' }, + generated_or_lock: { key: 'generatedOrLock', defaultValue: 'Generated or lockfile' }, + unknown: { key: 'unknown', defaultValue: 'Unknown area' }, +}; interface PendingConsent { resolve: (confirmed: boolean) => void; + preview?: ReviewTeamRunManifest; + launchContext?: DeepReviewConsentLaunchContext; +} + +export interface DeepReviewConsentLaunchContext { + sessionConcurrencyGuard?: DeepReviewSessionConcurrencyGuard | null; } export interface DeepReviewConsentControls { - confirmDeepReviewLaunch: () => Promise; + confirmDeepReviewLaunch: ( + preview?: ReviewTeamRunManifest, + launchContext?: DeepReviewConsentLaunchContext, + ) => Promise; deepReviewConsentDialog: React.ReactNode; } +function hasSkippedReviewers(preview?: ReviewTeamRunManifest): boolean { + return Boolean(preview?.skippedReviewers?.length); +} + +function hasSessionConcurrencyWarning(launchContext?: DeepReviewConsentLaunchContext): boolean { + return Boolean(launchContext?.sessionConcurrencyGuard?.highActivity); +} + +function getReviewerLabel(member: ReviewTeamManifestMember): string { + return member.displayName || member.subagentId; +} + +function getReviewTargetFileCount(preview: ReviewTeamRunManifest): number { + return preview.target.files.filter((file) => { + if (typeof file === 'string') { + return true; + } + return !file.excluded; + }).length; +} + +function getFallbackTargetTagLabel(tag: string): string { + return tag + .split('_') + .filter(Boolean) + .map((part) => part.charAt(0).toUpperCase() + part.slice(1)) + .join(' '); +} + export function useDeepReviewConsent(): DeepReviewConsentControls { const { t } = useTranslation('flow-chat'); const [pendingConsent, setPendingConsent] = useState(null); const [dontShowAgain, setDontShowAgain] = useState(false); + const [selectedStrategyOverride, setSelectedStrategyOverride] = + useState(null); + const [strategySelectionTouched, setStrategySelectionTouched] = useState(false); - const confirmDeepReviewLaunch = useCallback(async () => { + const confirmDeepReviewLaunch = useCallback(async ( + preview?: ReviewTeamRunManifest, + launchContext?: DeepReviewConsentLaunchContext, + ) => { try { - if (localStorage.getItem(SKIP_DEEP_REVIEW_CONFIRMATION_STORAGE_KEY) === 'true') { + if ( + localStorage.getItem(SKIP_DEEP_REVIEW_CONFIRMATION_STORAGE_KEY) === 'true' && + !hasSkippedReviewers(preview) && + !hasSessionConcurrencyWarning(launchContext) + ) { return true; } } catch (error) { @@ -34,7 +114,9 @@ export function useDeepReviewConsent(): DeepReviewConsentControls { return new Promise((resolve) => { setDontShowAgain(false); - setPendingConsent({ resolve }); + setSelectedStrategyOverride(null); + setStrategySelectionTouched(false); + setPendingConsent({ resolve, preview, launchContext }); }); }, []); @@ -44,6 +126,21 @@ export function useDeepReviewConsent(): DeepReviewConsentControls { return; } + if ( + confirmed && + strategySelectionTouched && + pending.preview?.workspacePath + ) { + try { + await saveReviewTeamProjectStrategyOverride( + pending.preview.workspacePath, + selectedStrategyOverride ?? undefined, + ); + } catch (error) { + log.warn('Failed to persist Deep Review project strategy override', error); + } + } + if (confirmed && dontShowAgain) { try { localStorage.setItem(SKIP_DEEP_REVIEW_CONFIRMATION_STORAGE_KEY, 'true'); @@ -54,7 +151,229 @@ export function useDeepReviewConsent(): DeepReviewConsentControls { setPendingConsent(null); pending.resolve(confirmed); - }, [dontShowAgain, pendingConsent]); + }, [dontShowAgain, pendingConsent, selectedStrategyOverride, strategySelectionTouched]); + + const selectStrategyOverride = useCallback((strategyLevel: ReviewStrategyLevel | null) => { + setSelectedStrategyOverride(strategyLevel); + setStrategySelectionTouched(true); + }, []); + + const getSkippedReasonLabel = useCallback((reason?: ReviewTeamManifestMemberReason) => { + switch (reason) { + case 'not_applicable': + return t('deepReviewConsent.skippedReasons.notApplicable', { + defaultValue: 'Not applicable to this target', + }); + case 'budget_limited': + return t('deepReviewConsent.skippedReasons.budgetLimited', { + defaultValue: 'Limited by token budget', + }); + case 'invalid_tooling': + return t('deepReviewConsent.skippedReasons.invalidTooling', { + defaultValue: 'Configuration issue', + }); + case 'disabled': + return t('deepReviewConsent.skippedReasons.disabled', { + defaultValue: 'Disabled', + }); + case 'unavailable': + return t('deepReviewConsent.skippedReasons.unavailable', { + defaultValue: 'Unavailable', + }); + default: + return t('deepReviewConsent.skippedReasons.skipped', { + defaultValue: 'Skipped', + }); + } + }, [t]); + + const renderLaunchSummary = useCallback((preview: ReviewTeamRunManifest) => { + const skippedReviewers = preview.skippedReviewers; + const skippedCount = skippedReviewers.length; + const visibleSkippedReviewers = skippedReviewers.slice(0, MAX_VISIBLE_SKIPPED_REVIEWERS); + const hiddenSkippedCount = Math.max(0, skippedCount - visibleSkippedReviewers.length); + const selectedStrategy = strategySelectionTouched + ? selectedStrategyOverride + : preview.strategyLevel; + const selectedStrategyLabel = selectedStrategy + ? t(`deepReviewConsent.strategyLabels.${selectedStrategy}`, { + defaultValue: getReviewStrategyProfile(selectedStrategy).label, + }) + : t('deepReviewConsent.teamDefaultStrategy', { + defaultValue: 'Team default', + }); + const targetFileCount = getReviewTargetFileCount(preview); + const visibleTargetTags = preview.target.tags.slice(0, MAX_VISIBLE_TARGET_TAGS); + const hiddenTargetTagCount = Math.max(0, preview.target.tags.length - visibleTargetTags.length); + const targetTagLabels = visibleTargetTags.map((tag) => { + const label = TARGET_TAG_LABELS[tag] ?? { + key: 'unknown', + defaultValue: getFallbackTargetTagLabel(tag), + }; + return t(`deepReviewConsent.targetTagLabels.${label.key}`, { + defaultValue: label.defaultValue, + }); + }); + const targetTagSummary = targetTagLabels.length > 0 + ? hiddenTargetTagCount > 0 + ? t('deepReviewConsent.targetTagsWithMore', { + tags: targetTagLabels.join(', '), + count: hiddenTargetTagCount, + defaultValue: '{{tags}} +{{count}} more', + }) + : targetTagLabels.join(', ') + : t('deepReviewConsent.targetTagLabels.unknown', { + defaultValue: 'Unknown area', + }); + const optionalReviewerCount = preview.enabledExtraReviewers.length; + + return ( +
+
+ + {t('deepReviewConsent.summaryTitle', { defaultValue: 'Launch summary' })} + +
+ +
+ + {t('deepReviewConsent.targetFiles', { + count: targetFileCount, + defaultValue: targetFileCount === 1 ? '{{count}} file' : '{{count}} files', + })} + + + {t('deepReviewConsent.targetRiskTags', { + tags: targetTagSummary, + defaultValue: 'Risk areas: {{tags}}', + })} + + + {t('deepReviewConsent.estimatedCalls', { + count: preview.tokenBudget.estimatedReviewerCalls, + defaultValue: '{{count}} reviewer calls', + })} + + {skippedCount > 0 && ( + + {t('deepReviewConsent.skippedReviewers', { + count: skippedCount, + defaultValue: '{{count}} skipped', + })} + + )} + {optionalReviewerCount > 0 && ( + + {t('deepReviewConsent.optionalReviewers', { + count: optionalReviewerCount, + defaultValue: optionalReviewerCount === 1 + ? '{{count}} optional reviewer' + : '{{count}} optional reviewers', + })} + + )} + {preview.tokenBudget.largeDiffSummaryFirst && ( + + {t('deepReviewConsent.summaryFirstReview', { + defaultValue: 'Summary-first coverage', + })} + + )} + + {t('deepReviewConsent.runStrategy', { + strategy: selectedStrategyLabel, + defaultValue: 'Run strategy: {{strategy}}', + })} + +
+ + {preview.workspacePath && ( +
+
+ {t('deepReviewConsent.strategyOverrideTitle', { + defaultValue: 'Run strategy', + })} +
+
+ + {REVIEW_STRATEGY_LEVELS.map((strategyLevel) => { + const isActive = selectedStrategy === strategyLevel; + return ( + + ); + })} +
+
+ )} + + {skippedReviewers.length > 0 && ( +
+
+ + {t('deepReviewConsent.skippedGroupTitle', { defaultValue: 'Skipped reviewers' })} +
+
    + {visibleSkippedReviewers.map((member) => ( +
  • + {getReviewerLabel(member)} + {getSkippedReasonLabel(member.reason)} +
  • + ))} + {hiddenSkippedCount > 0 && ( +
  • + + {t('deepReviewConsent.skippedMore', { + count: hiddenSkippedCount, + defaultValue: '+{{count}} more', + })} + +
  • + )} +
+
+ )} +
+ ); + }, [ + getSkippedReasonLabel, + selectStrategyOverride, + selectedStrategyOverride, + strategySelectionTouched, + t, + ]); const deepReviewConsentDialog = pendingConsent ? ( {t('deepReviewConsent.eyebrow', { defaultValue: 'Code review team' })} -

{t('deepReviewConsent.title')}

+

{t('deepReviewConsent.title', { defaultValue: 'Start Deep Review?' })}

-

{t('deepReviewConsent.body')}

+

+ {t('deepReviewConsent.body', { + defaultValue: 'Deep Review launches multiple reviewers and can take longer or use more tokens than a standard review.', + })} +

@@ -93,51 +416,46 @@ export function useDeepReviewConsent(): DeepReviewConsentControls { {t('deepReviewConsent.readonlyLabel', { defaultValue: 'Read-only first pass' })} -

{t('deepReviewConsent.readonly')}

+

+ {t('deepReviewConsent.readonly', { + defaultValue: 'The first pass reports findings and a remediation plan before any code changes.', + })} +

-
-
-
- + {pendingConsent.launchContext?.sessionConcurrencyGuard?.highActivity && ( +
+
+
- {t('deepReviewConsent.costLabel', { defaultValue: 'Higher token usage' })} + {t('deepReviewConsent.sessionConcurrencyTitle', { + defaultValue: 'Active session is busy', + })} -

{t('deepReviewConsent.cost')}

-

- {(() => { - const est = estimateTokenConsumption(5); - return t('deepReviewConsent.estimatedTokens', { - min: formatTokenCount(est.min), - max: formatTokenCount(est.max), - defaultValue: 'Estimated: {{min}} - {{max}} tokens', - }); - })()} +

+ {t('deepReviewConsent.sessionConcurrencyBody', { + count: pendingConsent.launchContext.sessionConcurrencyGuard.activeSubagentCount, + defaultValue: + 'The target session already has {{count}} running subagent tasks. Choose a lighter strategy, cancel for now, or continue manually when capacity is free.', + })}

-
-
- -
-
- - {t('deepReviewConsent.timeLabel', { defaultValue: 'Longer runtime' })} - -

{t('deepReviewConsent.time')}

-
-
-
+ )} + + {pendingConsent.preview && renderLaunchSummary(pendingConsent.preview)}
setDontShowAgain(event.target.checked)} - label={t('deepReviewConsent.dontShowAgain')} + label={t('deepReviewConsent.dontShowAgain', { + defaultValue: 'Do not show this again', + })} />
diff --git a/src/web-ui/src/flow_chat/components/btw/BtwSessionPanel.tsx b/src/web-ui/src/flow_chat/components/btw/BtwSessionPanel.tsx index 07b14fe76..98d04a60f 100644 --- a/src/web-ui/src/flow_chat/components/btw/BtwSessionPanel.tsx +++ b/src/web-ui/src/flow_chat/components/btw/BtwSessionPanel.tsx @@ -437,6 +437,14 @@ export const BtwSessionPanel: React.FC = ({ completedRemediationIds: store.completedRemediationIds, }); store.minimize(); + } else if (isComplete && store.phase === 'review_waiting_capacity') { + store.showActionBar({ + childSessionId, + parentSessionId: parentSessionId ?? null, + reviewData: latestReviewData, + reviewMode, + phase: 'review_completed', + }); } return; } diff --git a/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.i18n.test.ts b/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.i18n.test.ts index 17bafe44a..c76f66e69 100644 --- a/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.i18n.test.ts +++ b/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.i18n.test.ts @@ -34,6 +34,19 @@ const REQUIRED_ACTION_BAR_KEYS = [ 'deepReviewActionBar.continueFix', 'deepReviewActionBar.skipRemaining', 'deepReviewActionBar.switchModel', + 'deepReviewActionBar.capacityQueue.title', + 'deepReviewActionBar.capacityQueue.pausedTitle', + 'deepReviewActionBar.capacityQueue.detail', + 'deepReviewActionBar.capacityQueue.sessionBusy', + 'deepReviewActionBar.capacityQueue.pauseQueue', + 'deepReviewActionBar.capacityQueue.continueQueue', + 'deepReviewActionBar.capacityQueue.cancelQueued', + 'deepReviewActionBar.capacityQueue.skipOptionalQueued', + 'deepReviewActionBar.capacityQueue.runSlowerNextTime', + 'deepReviewActionBar.capacityQueue.openReviewSettings', + 'deepReviewActionBar.capacityQueue.runSlowerSaved', + 'deepReviewActionBar.capacityQueue.runSlowerFailed', + 'deepReviewActionBar.capacityQueue.controlFailed', 'reviewActionBar.noIssuesFound', ]; @@ -59,6 +72,11 @@ const REQUIRED_REVIEW_TEAM_PAGE_KEYS = [ 'reviewTeams.detail.loading', ]; +const REQUIRED_DEEP_REVIEW_CONSENT_KEYS = [ + 'deepReviewConsent.sessionConcurrencyTitle', + 'deepReviewConsent.sessionConcurrencyBody', +]; + function getMessageValue(messages: unknown, key: string): unknown { return key .split('.') @@ -103,4 +121,15 @@ describe('DeepReviewActionBar i18n', () => { expect(missingKeys, `${locale} missing keys`).toEqual([]); } }); + + it('keeps Deep Review consent strings available in every bundled locale', () => { + for (const [locale, messages] of Object.entries(LOCALES)) { + const missingKeys = REQUIRED_DEEP_REVIEW_CONSENT_KEYS.filter((key) => { + const value = getMessageValue(messages, key); + return typeof value !== 'string' || value.trim().length === 0; + }); + + expect(missingKeys, `${locale} missing keys`).toEqual([]); + } + }); }); diff --git a/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.scss b/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.scss index 16f30c002..00844d416 100644 --- a/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.scss +++ b/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.scss @@ -594,6 +594,56 @@ font-variant-numeric: tabular-nums; } + /* Capacity queue */ + &__capacity-queue { + display: flex; + flex-wrap: wrap; + align-items: flex-start; + justify-content: space-between; + gap: 10px; + padding: 8px 10px; + border-radius: 6px; + border: 1px solid color-mix(in srgb, var(--color-warning, #f59e0b) 30%, var(--border-base)); + background: color-mix(in srgb, var(--color-warning, #f59e0b) 8%, var(--deep-review-action-bar-surface)); + } + + &__capacity-queue-main { + display: flex; + align-items: flex-start; + gap: 8px; + min-width: 0; + } + + &__capacity-queue-icon { + flex-shrink: 0; + margin-top: 2px; + color: var(--color-warning, #f59e0b); + } + + &__capacity-queue-copy { + display: flex; + flex-direction: column; + gap: 2px; + min-width: 0; + } + + &__capacity-queue-title { + color: var(--color-text-primary); + font-weight: 600; + } + + &__capacity-queue-detail { + color: var(--color-text-secondary); + } + + &__capacity-queue-actions { + display: flex; + flex-wrap: wrap; + justify-content: flex-end; + gap: 6px; + flex-shrink: 0; + } + /* Partial results */ &__partial-summary { display: flex; diff --git a/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.test.tsx b/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.test.tsx index f268716f4..10d84a20b 100644 --- a/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.test.tsx +++ b/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.test.tsx @@ -13,6 +13,8 @@ const buildRecoveryPlanMock = vi.hoisted(() => vi.fn(() => ({ willSkip: [], summaryText: '1 completed reviewer will be preserved; 1 reviewer will be rerun', }))); +const controlDeepReviewQueueMock = vi.hoisted(() => vi.fn()); +const lowerDefaultReviewTeamMaxParallelReviewersMock = vi.hoisted(() => vi.fn()); vi.mock('react-i18next', () => ({ initReactI18next: { @@ -59,6 +61,16 @@ vi.mock('../../services/FlowChatManager', () => ({ }, })); +vi.mock('@/infrastructure/api/service-api/AgentAPI', () => ({ + agentAPI: { + controlDeepReviewQueue: controlDeepReviewQueueMock, + }, +})); + +vi.mock('@/shared/services/reviewTeamService', () => ({ + lowerDefaultReviewTeamMaxParallelReviewers: lowerDefaultReviewTeamMaxParallelReviewersMock, +})); + vi.mock('@/infrastructure/event-bus', () => ({ globalEventBus: { emit: eventBusEmitMock, @@ -159,6 +171,13 @@ describeWithJsdom('DeepReviewActionBar', () => { confirmWarningMock.mockResolvedValue(true); eventBusEmitMock.mockReturnValue(false); continueDeepReviewSessionMock.mockResolvedValue(undefined); + lowerDefaultReviewTeamMaxParallelReviewersMock.mockResolvedValue({ + maxParallelInstances: 1, + maxQueueWaitSeconds: 120, + allowProviderCapacityQueue: true, + allowBoundedAutoRetry: false, + autoRetryElapsedGuardSeconds: 180, + }); useReviewActionBarStore.getState().reset(); }); @@ -360,6 +379,142 @@ describeWithJsdom('DeepReviewActionBar', () => { expect(state.minimized).toBe(true); }); + it('does not show capacity queue controls when there is no queue state', async () => { + const { DeepReviewActionBar } = await import('./DeepReviewActionBar'); + + useReviewActionBarStore.getState().showActionBar({ + childSessionId: 'child-session', + parentSessionId: 'parent-session', + reviewData: { + summary: { recommended_action: 'request_changes' }, + remediation_plan: ['Fix issue 1'], + }, + phase: 'review_completed', + }); + + await act(async () => { + root.render(); + }); + + expect(container.textContent).not.toContain('Reviewers waiting for capacity'); + expect(Array.from(container.querySelectorAll('button')).some((button) => ( + button.textContent?.includes('Pause queue') + ))).toBe(false); + }); + + it('shows compact capacity queue controls and keeps them locally adjustable', async () => { + const { DeepReviewActionBar } = await import('./DeepReviewActionBar'); + + useReviewActionBarStore.getState().showActionBar({ + childSessionId: 'child-session', + parentSessionId: 'parent-session', + reviewData: { + summary: { recommended_action: 'request_changes' }, + remediation_plan: ['Fix issue 1'], + }, + phase: 'review_completed', + }); + useReviewActionBarStore.setState({ + capacityQueueState: { + status: 'queued_for_capacity', + queuedReviewerCount: 2, + activeReviewerCount: 1, + optionalReviewerCount: 1, + sessionConcurrencyHigh: true, + }, + } as Partial>); + + await act(async () => { + root.render(); + }); + + expect(container.textContent).toContain('Reviewers waiting for capacity'); + expect(container.textContent).toContain('Queue wait does not count against reviewer runtime.'); + expect(container.textContent).toContain('Your active session is busy.'); + expect(container.textContent).toContain('Run slower next time'); + expect(container.textContent).toContain('Open Review settings'); + + const pauseButton = Array.from(container.querySelectorAll('button')) + .find((button) => button.textContent?.includes('Pause queue')); + expect(pauseButton).toBeTruthy(); + + await act(async () => { + pauseButton!.dispatchEvent(new dom.window.MouseEvent('click', { bubbles: true })); + await Promise.resolve(); + }); + + expect((useReviewActionBarStore.getState() as unknown as { + capacityQueueState: { status: string }; + }).capacityQueueState.status).toBe('paused_by_user'); + expect(container.textContent).toContain('Queue paused'); + + const runSlowerButton = Array.from(container.querySelectorAll('button')) + .find((button) => button.textContent?.includes('Run slower next time')); + expect(runSlowerButton).toBeTruthy(); + + await act(async () => { + runSlowerButton!.dispatchEvent(new dom.window.MouseEvent('click', { bubbles: true })); + await Promise.resolve(); + }); + + expect(lowerDefaultReviewTeamMaxParallelReviewersMock).toHaveBeenCalledTimes(1); + + const openSettingsButton = Array.from(container.querySelectorAll('button')) + .find((button) => button.textContent?.includes('Open Review settings')); + expect(openSettingsButton).toBeTruthy(); + + await act(async () => { + openSettingsButton!.dispatchEvent(new dom.window.MouseEvent('click', { bubbles: true })); + await Promise.resolve(); + }); + + const { useSettingsStore } = await import('@/app/scenes/settings/settingsStore'); + expect(useSettingsStore.getState().activeTab).toBe('review'); + }); + + it('sends backend queue control actions for event-driven capacity waits', async () => { + const { DeepReviewActionBar } = await import('./DeepReviewActionBar'); + controlDeepReviewQueueMock.mockResolvedValue(undefined); + + useReviewActionBarStore.getState().showCapacityQueueBar({ + childSessionId: 'child-session', + parentSessionId: 'parent-session', + capacityQueueState: { + toolId: 'task-queue-1', + subagentType: 'ReviewSecurity', + dialogTurnId: 'turn-queue-1', + status: 'queued_for_capacity', + queuedReviewerCount: 1, + activeReviewerCount: 1, + optionalReviewerCount: 1, + controlMode: 'backend', + }, + }); + + await act(async () => { + root.render(); + }); + + const pauseButton = Array.from(container.querySelectorAll('button')) + .find((button) => button.textContent?.includes('Pause queue')); + expect(pauseButton).toBeTruthy(); + + await act(async () => { + pauseButton!.dispatchEvent(new dom.window.MouseEvent('click', { bubbles: true })); + await Promise.resolve(); + }); + + expect(controlDeepReviewQueueMock).toHaveBeenCalledWith({ + sessionId: 'child-session', + dialogTurnId: 'turn-queue-1', + toolId: 'task-queue-1', + action: 'pause', + }); + expect((useReviewActionBarStore.getState() as unknown as { + capacityQueueState: { status: string }; + }).capacityQueueState.status).toBe('paused_by_user'); + }); + it('shows distinct progress text after starting fix and re-review', async () => { const { DeepReviewActionBar } = await import('./DeepReviewActionBar'); diff --git a/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.tsx b/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.tsx index a0584bff1..d1cc65db5 100644 --- a/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.tsx +++ b/src/web-ui/src/flow_chat/components/btw/DeepReviewActionBar.tsx @@ -10,15 +10,21 @@ import { ChevronUp, MessageSquare, Play, + Pause, Copy, Info, SkipForward, RotateCcw, Eye, Minus, + Settings, } from 'lucide-react'; import { Button, Checkbox, Tooltip } from '@/component-library'; -import { useReviewActionBarStore, type ReviewActionPhase } from '../../store/deepReviewActionBarStore'; +import { + useReviewActionBarStore, + type DeepReviewCapacityQueueAction, + type ReviewActionPhase, +} from '../../store/deepReviewActionBarStore'; import type { ReviewRemediationItem } from '../../utils/codeReviewRemediation'; import { buildSelectedReviewRemediationPrompt, REMEDIATION_GROUP_ORDER } from '../../utils/codeReviewRemediation'; import type { RemediationGroupId } from '../../utils/codeReviewReport'; @@ -40,10 +46,20 @@ import { } from '../../utils/deepReviewExperience'; import { flowChatStore } from '../../store/FlowChatStore'; import { CodeReviewReportExportActions } from '../../tool-cards/CodeReviewReportExportActions'; +import { agentAPI } from '@/infrastructure/api/service-api/AgentAPI'; +import { lowerDefaultReviewTeamMaxParallelReviewers } from '@/shared/services/reviewTeamService'; +import { useSettingsStore } from '@/app/scenes/settings/settingsStore'; +import { useSceneStore } from '@/app/stores/sceneStore'; +import type { ConfigTab } from '@/app/scenes/settings/settingsConfig'; import './DeepReviewActionBar.scss'; const log = createLogger('DeepReviewActionBar'); +function openSettingsTab(tab: ConfigTab) { + useSettingsStore.getState().setActiveTab(tab); + useSceneStore.getState().openScene('settings'); +} + const PHASE_CONFIG: Record; iconClass: string; @@ -56,6 +72,7 @@ const PHASE_CONFIG: Record { completedRemediationIds, remainingFixIds, decisionSelections, + capacityQueueState, } = store; const [showCustomInput, setShowCustomInput] = useState(false); @@ -112,6 +130,74 @@ export const ReviewActionBar: React.FC = () => { const isDeepReview = reviewMode === 'deep'; const hasInterruption = isDeepReview && Boolean(interruption); const isResumeRunning = phase === 'resume_running'; + const showCapacityQueueNotice = isDeepReview && + Boolean(capacityQueueState) && + capacityQueueState?.status !== 'running' && + capacityQueueState?.status !== 'capacity_skipped'; + const hasBackendQueueControlTarget = Boolean( + childSessionId && + capacityQueueState?.dialogTurnId && + capacityQueueState?.toolId, + ); + const supportsInlineQueueControls = + capacityQueueState?.controlMode === 'backend' + ? hasBackendQueueControlTarget + : capacityQueueState?.controlMode !== 'session_stop_only'; + + const handleCapacityQueueAction = useCallback(async ( + action: DeepReviewCapacityQueueAction, + applyLocalAction: () => void, + ) => { + if (!capacityQueueState) { + return; + } + + if (capacityQueueState.controlMode !== 'backend') { + applyLocalAction(); + return; + } + + if (!childSessionId || !capacityQueueState.dialogTurnId || !capacityQueueState.toolId) { + notificationService.error(t('deepReviewActionBar.capacityQueue.controlFailed', { + defaultValue: 'Queue control is unavailable for this reviewer.', + })); + return; + } + + try { + await agentAPI.controlDeepReviewQueue({ + sessionId: childSessionId, + dialogTurnId: capacityQueueState.dialogTurnId, + toolId: capacityQueueState.toolId, + action, + }); + applyLocalAction(); + } catch (error) { + log.warn('Failed to control DeepReview capacity queue', error); + notificationService.error(t('deepReviewActionBar.capacityQueue.controlFailed', { + defaultValue: 'Queue control failed. Please try again or stop the review.', + })); + } + }, [capacityQueueState, childSessionId, t]); + + const handleRunSlowerNextTime = useCallback(async () => { + try { + const nextPolicy = await lowerDefaultReviewTeamMaxParallelReviewers(); + notificationService.success(t('deepReviewActionBar.capacityQueue.runSlowerSaved', { + count: nextPolicy.maxParallelInstances, + defaultValue: `Next Deep Review will use up to ${nextPolicy.maxParallelInstances} parallel reviewers.`, + })); + } catch (error) { + log.warn('Failed to lower DeepReview max parallel reviewers', error); + notificationService.error(t('deepReviewActionBar.capacityQueue.runSlowerFailed', { + defaultValue: 'Failed to update Review settings.', + })); + } + }, [t]); + + const handleOpenReviewSettings = useCallback(() => { + openSettingsTab('review'); + }, []); // ---- progress tracking ---- const sessions = flowChatStore.getState().sessions; @@ -388,7 +474,7 @@ export const ReviewActionBar: React.FC = () => { const handleOpenModelSettings = useCallback(async () => { if (!interruption) return; - globalEventBus.emit('settings:open', { tab: 'models' }); + openSettingsTab('models'); }, [interruption]); const handleViewPartialResults = useCallback(() => { @@ -533,6 +619,10 @@ export const ReviewActionBar: React.FC = () => { return t('deepReviewActionBar.fixTimeout', { defaultValue: 'Fix timed out', }); + case 'review_waiting_capacity': + return t('deepReviewActionBar.reviewWaitingCapacity', { + defaultValue: 'Review queue waiting', + }); case 'review_interrupted': return t('deepReviewActionBar.reviewInterrupted', { defaultValue: 'Deep review interrupted', @@ -613,6 +703,126 @@ export const ReviewActionBar: React.FC = () => {
)} + {/* Capacity queue notice */} + {showCapacityQueueNotice && capacityQueueState && ( +
+
+ +
+ + {capacityQueueState.status === 'paused_by_user' + ? t('deepReviewActionBar.capacityQueue.pausedTitle', { + defaultValue: 'Queue paused', + }) + : t('deepReviewActionBar.capacityQueue.title', { + defaultValue: 'Reviewers waiting for capacity', + })} + + + {t('deepReviewActionBar.capacityQueue.detail', { + defaultValue: 'Queue wait does not count against reviewer runtime.', + })} + + {capacityQueueState.sessionConcurrencyHigh && ( + + {t('deepReviewActionBar.capacityQueue.sessionBusy', { + defaultValue: 'Your active session is busy. Pause Deep Review or continue later.', + })} + + )} + {!supportsInlineQueueControls && ( + + {t('deepReviewActionBar.capacityQueue.stopHint', { + defaultValue: 'Use Stop to interrupt this review queue.', + })} + + )} +
+
+
+ {supportsInlineQueueControls && ( + <> + {capacityQueueState.status === 'paused_by_user' ? ( + + ) : ( + + )} + {(capacityQueueState.optionalReviewerCount ?? 0) > 0 && ( + + )} + + + )} + + +
+
+ )} + {/* Partial results summary on interruption */} {hasInterruption && progressSummary && progressSummary.completed > 0 && (
@@ -668,6 +878,32 @@ export const ReviewActionBar: React.FC = () => { {t(errorAttribution.description, { defaultValue: '' })} + {errorAttribution.actions.length > 0 && ( +
+ {errorAttribution.actions.map((action) => ( + + ))} +
+ )}
)} diff --git a/src/web-ui/src/flow_chat/components/modern/SessionFilesBadge.tsx b/src/web-ui/src/flow_chat/components/modern/SessionFilesBadge.tsx index b60acb156..5f363d133 100644 --- a/src/web-ui/src/flow_chat/components/modern/SessionFilesBadge.tsx +++ b/src/web-ui/src/flow_chat/components/modern/SessionFilesBadge.tsx @@ -27,7 +27,8 @@ import { runWithConcurrencyLimit } from '@/shared/utils/runWithConcurrencyLimit' import { createBtwChildSession } from '../../services/BtwThreadService'; import { openBtwSessionInAuxPane } from '../../services/openBtwSession'; import { - buildDeepReviewPromptFromSessionFiles, + buildDeepReviewLaunchFromSessionFiles, + buildDeepReviewPreviewFromSessionFiles, launchDeepReviewSession, } from '../../services/DeepReviewService'; import { insertReviewSessionSummaryMarker } from '../../services/ReviewSessionMarkerService'; @@ -48,6 +49,7 @@ import { type QuickAction, } from '@/infrastructure/config/services/AIExperienceConfigService'; import { resolveQuickActionText } from '@/infrastructure/config/services/quickActionLocalization'; +import { deriveDeepReviewSessionConcurrencyGuard } from '../../utils/deepReviewCapacityGuard'; import './SessionFilesBadge.scss'; const log = createLogger('SessionFilesBadge'); @@ -672,24 +674,6 @@ export const SessionFilesBadge: React.FC = ({ return; } - const confirmed = await confirmDeepReviewLaunch(); - if (!confirmed) { - return; - } - setLaunchingReviewMode('deep_review'); - - if (skippedCount > 0) { - notificationService.info( - t('sessionFilesBadge.review.filteredNotice', { - included: reviewableFilePaths.length, - skipped: skippedCount, - defaultValue: - 'Review will analyze {{included}} files and skip {{skipped}} excluded files such as lock, generated, or binary assets.', - }), - { duration: 3500 } - ); - } - const fileList = reviewableFilePaths.map(p => `- ${p}`).join('\n'); const displayMessage = skippedCount > 0 ? t('sessionFilesBadge.deepReview.displayMessageFiltered', { @@ -704,7 +688,34 @@ export const SessionFilesBadge: React.FC = ({ }); try { - const prompt = await buildDeepReviewPromptFromSessionFiles( + const preview = await buildDeepReviewPreviewFromSessionFiles( + reviewableFilePaths, + currentWorkspace?.rootPath, + ); + const confirmed = await confirmDeepReviewLaunch(preview, { + sessionConcurrencyGuard: deriveDeepReviewSessionConcurrencyGuard( + flowChatStore.getState(), + sessionId, + ), + }); + if (!confirmed) { + return; + } + setLaunchingReviewMode('deep_review'); + + if (skippedCount > 0) { + notificationService.info( + t('sessionFilesBadge.review.filteredNotice', { + included: reviewableFilePaths.length, + skipped: skippedCount, + defaultValue: + 'Review will analyze {{included}} files and skip {{skipped}} excluded files such as lock, generated, or binary assets.', + }), + { duration: 3500 } + ); + } + + const { prompt, runManifest } = await buildDeepReviewLaunchFromSessionFiles( reviewableFilePaths, undefined, currentWorkspace?.rootPath, @@ -715,6 +726,7 @@ export const SessionFilesBadge: React.FC = ({ workspacePath: currentWorkspace?.rootPath, prompt, displayMessage, + runManifest, childSessionName: t('sessionFilesBadge.deepReview.threadTitle', { defaultValue: 'Deep review', }), diff --git a/src/web-ui/src/flow_chat/services/AgenticEventListener.ts b/src/web-ui/src/flow_chat/services/AgenticEventListener.ts index 3a712fac5..e6ca4da1b 100644 --- a/src/web-ui/src/flow_chat/services/AgenticEventListener.ts +++ b/src/web-ui/src/flow_chat/services/AgenticEventListener.ts @@ -17,6 +17,7 @@ import type { ImageAnalysisEvent, ModelRoundCompletedEvent, UserSteeringInjectedEvent, + DeepReviewQueueStateChangedEvent, } from '@/infrastructure/api/service-api/AgentAPI'; import { createLogger } from '@/shared/utils/logger'; @@ -35,6 +36,7 @@ export interface AgenticEventCallbacks { onModelRoundCompleted?: (event: ModelRoundCompletedEvent) => void; onTextChunk?: (event: TextChunkEvent) => void; onToolEvent?: (event: ToolEvent) => void; + onDeepReviewQueueStateChanged?: (event: DeepReviewQueueStateChangedEvent) => void; onDialogTurnCompleted?: (event: AgenticEvent) => void; onDialogTurnFailed?: (event: AgenticEvent) => void; onDialogTurnCancelled?: (event: AgenticEvent) => void; @@ -138,6 +140,14 @@ export class AgenticEventListener { this.unlistenFunctions.push(unlisten); } + if (callbacks.onDeepReviewQueueStateChanged) { + const unlisten = agentAPI.onDeepReviewQueueStateChanged((event) => { + logger.debug('Deep Review queue state changed:', event); + callbacks.onDeepReviewQueueStateChanged?.(event); + }); + this.unlistenFunctions.push(unlisten); + } + if (callbacks.onDialogTurnCompleted) { const unlisten = agentAPI.onDialogTurnCompleted((event) => { logger.debug('Dialog turn completed:', event); diff --git a/src/web-ui/src/flow_chat/services/BtwThreadService.ts b/src/web-ui/src/flow_chat/services/BtwThreadService.ts index 1e002b409..2fa0e0ee9 100644 --- a/src/web-ui/src/flow_chat/services/BtwThreadService.ts +++ b/src/web-ui/src/flow_chat/services/BtwThreadService.ts @@ -6,6 +6,7 @@ import { stateMachineManager } from '../state-machine'; import { flowChatManager } from './FlowChatManager'; import type { Session } from '../types/flow-chat'; import type { SessionKind } from '@/shared/types/session-history'; +import type { ReviewTeamRunManifest } from '@/shared/services/reviewTeamService'; import { buildSessionMetadata } from '../utils/sessionMetadata'; const log = createLogger('BtwThreadService'); @@ -93,6 +94,7 @@ export async function createBtwChildSession(params: { requestId?: string; addMarker?: boolean; sessionKind?: Extract; + deepReviewRunManifest?: ReviewTeamRunManifest; }): Promise<{ requestId: string; childSessionId: string; @@ -149,6 +151,7 @@ export async function createBtwChildSession(params: { parentDialogTurnId, parentTurnIndex, }, + deepReviewRunManifest: params.deepReviewRunManifest, isTransient: false, }, remoteConnectionId, diff --git a/src/web-ui/src/flow_chat/services/DeepReviewService.test.ts b/src/web-ui/src/flow_chat/services/DeepReviewService.test.ts index a2abd05aa..9b8cdf35b 100644 --- a/src/web-ui/src/flow_chat/services/DeepReviewService.test.ts +++ b/src/web-ui/src/flow_chat/services/DeepReviewService.test.ts @@ -1,11 +1,15 @@ import { describe, expect, it, vi, beforeEach } from 'vitest'; import { DEEP_REVIEW_SLASH_COMMAND, + buildDeepReviewLaunchFromSlashCommand, + buildDeepReviewPreviewFromSessionFiles, + buildDeepReviewPromptFromSessionFiles, buildDeepReviewPromptFromSlashCommand, getDeepReviewLaunchErrorMessage, isDeepReviewSlashCommand, launchDeepReviewSession, } from './DeepReviewService'; +import { buildEffectiveReviewTeamManifest } from '@/shared/services/reviewTeamService'; const mockDeleteSession = vi.fn(); const mockCreateBtwChildSession = vi.fn(); @@ -14,11 +18,23 @@ const mockCloseBtwSessionInAuxPane = vi.fn(); const mockSendMessage = vi.fn(); const mockDiscardLocalSession = vi.fn(); const mockInsertReviewSessionSummaryMarker = vi.fn(); +const mockGitGetStatus = vi.fn(); +const mockGitGetChangedFiles = vi.fn(); +const mockGitGetDiff = vi.fn(); +const mockLoadDefaultReviewTeam = vi.fn(); +const mockPrepareDefaultReviewTeamForLaunch = vi.fn(); +const mockLoadReviewTeamRateLimitStatus = vi.fn(); +const mockLoadReviewTeamProjectStrategyOverride = vi.fn(); vi.mock('@/infrastructure/api', () => ({ agentAPI: { deleteSession: (...args: any[]) => mockDeleteSession(...args), }, + gitAPI: { + getStatus: (...args: any[]) => mockGitGetStatus(...args), + getChangedFiles: (...args: any[]) => mockGitGetChangedFiles(...args), + getDiff: (...args: any[]) => mockGitGetDiff(...args), + }, })); vi.mock('./BtwThreadService', () => ({ @@ -51,12 +67,34 @@ vi.mock('./ReviewSessionMarkerService', () => ({ })); vi.mock('@/shared/services/reviewTeamService', () => ({ - prepareDefaultReviewTeamForLaunch: vi.fn(async () => ({ members: [] })), + loadDefaultReviewTeam: (...args: any[]) => mockLoadDefaultReviewTeam(...args), + prepareDefaultReviewTeamForLaunch: (...args: any[]) => mockPrepareDefaultReviewTeamForLaunch(...args), + loadReviewTeamRateLimitStatus: (...args: any[]) => mockLoadReviewTeamRateLimitStatus(...args), + loadReviewTeamProjectStrategyOverride: (...args: any[]) => mockLoadReviewTeamProjectStrategyOverride(...args), buildEffectiveReviewTeamManifest: vi.fn(() => ({ reviewers: [] })), buildReviewTeamPromptBlock: vi.fn(() => 'Review team manifest.'), })); describe('DeepReviewService slash command', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockLoadDefaultReviewTeam.mockResolvedValue({ members: [] }); + mockPrepareDefaultReviewTeamForLaunch.mockResolvedValue({ members: [] }); + mockLoadReviewTeamRateLimitStatus.mockResolvedValue(null); + mockLoadReviewTeamProjectStrategyOverride.mockResolvedValue(undefined); + mockGitGetStatus.mockResolvedValue({ + staged: [], + unstaged: [], + untracked: [], + conflicts: [], + current_branch: 'main', + ahead: 0, + behind: 0, + }); + mockGitGetChangedFiles.mockResolvedValue([]); + mockGitGetDiff.mockResolvedValue(''); + }); + it('uses /DeepReview as the canonical command', () => { expect(DEEP_REVIEW_SLASH_COMMAND).toBe('/DeepReview'); }); @@ -78,6 +116,362 @@ describe('DeepReviewService slash command', () => { expect(prompt).toContain('User-provided focus or target:\nreview commit abc123 for security'); expect(prompt).not.toContain('User-provided focus or target:\n/DeepReview'); }); + + it('classifies explicit slash-command file paths before building the review team manifest', async () => { + await buildDeepReviewPromptFromSlashCommand( + '/DeepReview src/web-ui/src/App.tsx src/crates/core/src/service/config/types.rs for regressions', + 'D:\\workspace\\repo', + ); + + expect(buildEffectiveReviewTeamManifest).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + workspacePath: 'D:\\workspace\\repo', + target: expect.objectContaining({ + source: 'slash_command_explicit_files', + resolution: 'resolved', + tags: expect.arrayContaining(['frontend_ui', 'backend_core']), + }), + }), + ); + }); + + it('classifies workspace diff files for a slash command without an explicit target', async () => { + mockGitGetStatus.mockResolvedValueOnce({ + staged: [{ path: 'src/web-ui/src/App.tsx', status: 'modified' }], + unstaged: [{ path: 'src/crates/core/src/service/config/types.rs', status: 'modified' }], + untracked: ['src/web-ui/src/newFeature.tsx'], + conflicts: [], + current_branch: 'main', + ahead: 0, + behind: 0, + }); + + await buildDeepReviewPromptFromSlashCommand( + '/DeepReview', + 'D:\\workspace\\repo', + ); + + expect(mockGitGetStatus).toHaveBeenCalledWith('D:\\workspace\\repo'); + expect(buildEffectiveReviewTeamManifest).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ + workspacePath: 'D:\\workspace\\repo', + target: expect.objectContaining({ + source: 'workspace_diff', + resolution: 'resolved', + tags: expect.arrayContaining(['frontend_ui', 'backend_core']), + }), + }), + ); + }); + + it('passes workspace diff line stats into the review manifest', async () => { + mockGitGetStatus.mockResolvedValueOnce({ + staged: [{ path: 'src/web-ui/src/App.tsx', status: 'modified' }], + unstaged: [{ path: 'src/crates/core/src/service/config/types.rs', status: 'modified' }], + untracked: [], + conflicts: [], + current_branch: 'main', + ahead: 0, + behind: 0, + }); + mockGitGetDiff.mockResolvedValueOnce([ + 'diff --git a/src/crates/core/src/service/config/types.rs b/src/crates/core/src/service/config/types.rs', + '@@ -1,2 +1,3 @@', + '-old core line', + '+new core line', + '+another core line', + 'diff --git a/src/web-ui/src/App.tsx b/src/web-ui/src/App.tsx', + '@@ -5,3 +5,2 @@', + '-removed ui line', + '+added ui line', + ].join('\n')); + + await buildDeepReviewPromptFromSlashCommand( + '/DeepReview', + 'D:\\workspace\\repo', + ); + + expect(mockGitGetDiff).toHaveBeenCalledWith('D:\\workspace\\repo', { + source: 'HEAD', + }); + expect(buildEffectiveReviewTeamManifest).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ + changeStats: expect.objectContaining({ + fileCount: 2, + totalLinesChanged: 5, + lineCountSource: 'diff_stat', + }), + }), + ); + }); + + it('passes cached rate limit status into slash-command launch manifests', async () => { + mockLoadReviewTeamRateLimitStatus.mockResolvedValueOnce({ remaining: 2 }); + + await buildDeepReviewLaunchFromSlashCommand( + '/DeepReview', + 'D:\\workspace\\repo', + ); + + expect(mockLoadReviewTeamRateLimitStatus).toHaveBeenCalled(); + expect(buildEffectiveReviewTeamManifest).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ + workspacePath: 'D:\\workspace\\repo', + rateLimitStatus: { remaining: 2 }, + }), + ); + }); + + it('does not block slash-command launch manifests when rate limit status is unavailable', async () => { + mockLoadReviewTeamRateLimitStatus.mockRejectedValueOnce(new Error('rate status unavailable')); + + await buildDeepReviewLaunchFromSlashCommand( + '/DeepReview', + 'D:\\workspace\\repo', + ); + + const lastCall = vi.mocked(buildEffectiveReviewTeamManifest).mock.calls.at(-1); + expect(lastCall?.[1]).not.toHaveProperty('rateLimitStatus'); + }); + + it('passes project strategy overrides into slash-command launch manifests', async () => { + mockLoadReviewTeamProjectStrategyOverride.mockResolvedValueOnce('deep'); + + await buildDeepReviewLaunchFromSlashCommand( + '/DeepReview', + 'D:\\workspace\\repo', + ); + + expect(mockLoadReviewTeamProjectStrategyOverride).toHaveBeenCalledWith( + 'D:\\workspace\\repo', + ); + expect(buildEffectiveReviewTeamManifest).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ + workspacePath: 'D:\\workspace\\repo', + strategyOverride: 'deep', + }), + ); + }); + + it('does not block slash-command launch manifests when project strategy overrides are unavailable', async () => { + mockLoadReviewTeamProjectStrategyOverride.mockRejectedValueOnce(new Error('strategy unavailable')); + + await buildDeepReviewLaunchFromSlashCommand( + '/DeepReview', + 'D:\\workspace\\repo', + ); + + const lastCall = vi.mocked(buildEffectiveReviewTeamManifest).mock.calls.at(-1); + expect(lastCall?.[1]).not.toHaveProperty('strategyOverride'); + }); + + it('classifies commit target files through the git changed-files API', async () => { + mockGitGetChangedFiles.mockResolvedValueOnce([ + { + path: 'src/web-ui/src/App.tsx', + old_path: undefined, + status: 'modified', + }, + ]); + + await buildDeepReviewPromptFromSlashCommand( + '/DeepReview review commit abc123', + 'D:\\workspace\\repo', + ); + + expect(mockGitGetChangedFiles).toHaveBeenCalledWith('D:\\workspace\\repo', { + source: 'abc123^', + target: 'abc123', + }); + expect(buildEffectiveReviewTeamManifest).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ + target: expect.objectContaining({ + source: 'slash_command_git_ref', + resolution: 'resolved', + tags: expect.arrayContaining(['frontend_ui']), + }), + }), + ); + }); + + it('passes git ref diff line stats into the review manifest', async () => { + mockGitGetChangedFiles.mockResolvedValueOnce([ + { + path: 'src/web-ui/src/App.tsx', + old_path: undefined, + status: 'modified', + }, + ]); + mockGitGetDiff.mockResolvedValueOnce([ + 'diff --git a/src/web-ui/src/App.tsx b/src/web-ui/src/App.tsx', + '--- a/src/web-ui/src/App.tsx', + '+++ b/src/web-ui/src/App.tsx', + '@@ -10,2 +10,3 @@', + '-old line', + '+new line', + '+new second line', + ].join('\n')); + + await buildDeepReviewPromptFromSlashCommand( + '/DeepReview review commit abc123', + 'D:\\workspace\\repo', + ); + + expect(mockGitGetDiff).toHaveBeenCalledWith('D:\\workspace\\repo', { + source: 'abc123^', + target: 'abc123', + }); + expect(buildEffectiveReviewTeamManifest).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ + changeStats: expect.objectContaining({ + fileCount: 1, + totalLinesChanged: 3, + lineCountSource: 'diff_stat', + }), + }), + ); + }); + + it('keeps line stats unknown when git diff stats fail', async () => { + mockGitGetChangedFiles.mockResolvedValueOnce([ + { + path: 'src/web-ui/src/App.tsx', + old_path: undefined, + status: 'modified', + }, + ]); + mockGitGetDiff.mockRejectedValueOnce(new Error('diff unavailable')); + + await buildDeepReviewPromptFromSlashCommand( + '/DeepReview review commit abc123', + 'D:\\workspace\\repo', + ); + + expect(buildEffectiveReviewTeamManifest).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ + changeStats: expect.objectContaining({ + fileCount: 1, + lineCountSource: 'unknown', + }), + }), + ); + }); + + it('classifies explicit ref ranges through the git changed-files API', async () => { + mockGitGetChangedFiles.mockResolvedValueOnce([ + { + path: 'src/crates/core/src/service/config/types.rs', + old_path: undefined, + status: 'modified', + }, + ]); + + await buildDeepReviewPromptFromSlashCommand( + '/DeepReview review main..feature/deep-review', + 'D:\\workspace\\repo', + ); + + expect(mockGitGetChangedFiles).toHaveBeenCalledWith('D:\\workspace\\repo', { + source: 'main', + target: 'feature/deep-review', + }); + expect(buildEffectiveReviewTeamManifest).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ + target: expect.objectContaining({ + source: 'slash_command_git_ref', + resolution: 'resolved', + tags: expect.arrayContaining(['backend_core']), + }), + }), + ); + }); + + it('keeps git targets conservative when no workspace is available', async () => { + await buildDeepReviewPromptFromSlashCommand( + '/DeepReview review commit abc123', + ); + + expect(mockGitGetChangedFiles).not.toHaveBeenCalled(); + expect(buildEffectiveReviewTeamManifest).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ + target: expect.objectContaining({ + source: 'slash_command_git_ref', + resolution: 'unknown', + tags: ['unknown'], + }), + }), + ); + }); + + it('returns the run manifest with the slash-command launch prompt', async () => { + const runManifest = { reviewMode: 'deep', skippedReviewers: [] }; + vi.mocked(buildEffectiveReviewTeamManifest).mockReturnValueOnce(runManifest as any); + + const result = await buildDeepReviewLaunchFromSlashCommand( + '/DeepReview review commit abc123', + 'D:\\workspace\\repo', + ); + + expect(result.prompt).toContain('Original command:\n/DeepReview review commit abc123'); + expect(result.runManifest).toBe(runManifest); + }); + + it('classifies session files before building the review team manifest', async () => { + await buildDeepReviewPromptFromSessionFiles( + ['src/web-ui/src/App.tsx'], + undefined, + 'D:\\workspace\\repo', + ); + + expect(buildEffectiveReviewTeamManifest).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + workspacePath: 'D:\\workspace\\repo', + target: expect.objectContaining({ + resolution: 'resolved', + tags: expect.arrayContaining(['frontend_ui']), + }), + }), + ); + }); + + it('builds a read-only session-file preview without preparing launch state', async () => { + const runManifest = { + reviewMode: 'deep', + skippedReviewers: [{ subagentId: 'ReviewFrontend', reason: 'not_applicable' }], + }; + vi.mocked(buildEffectiveReviewTeamManifest).mockReturnValueOnce(runManifest as any); + + const result = await buildDeepReviewPreviewFromSessionFiles( + ['src/crates/core/src/service/config/types.rs'], + 'D:\\workspace\\repo', + ); + + expect(result).toBe(runManifest); + expect(mockLoadDefaultReviewTeam).toHaveBeenCalledWith('D:\\workspace\\repo'); + expect(mockPrepareDefaultReviewTeamForLaunch).not.toHaveBeenCalled(); + expect(buildEffectiveReviewTeamManifest).toHaveBeenCalledWith( + expect.anything(), + expect.objectContaining({ + workspacePath: 'D:\\workspace\\repo', + target: expect.objectContaining({ + source: 'session_files', + resolution: 'resolved', + tags: expect.arrayContaining(['backend_core']), + }), + }), + ); + }); }); describe('launchDeepReviewSession', () => { @@ -126,6 +520,59 @@ describe('launchDeepReviewSession', () => { ); }); + it('passes the run manifest into child session creation', async () => { + const runManifest = { reviewMode: 'deep', skippedReviewers: [] }; + mockCreateBtwChildSession.mockResolvedValue({ + childSessionId: 'child-123', + parentDialogTurnId: 'turn-456', + }); + mockSendMessage.mockResolvedValue(undefined); + + await launchDeepReviewSession({ + parentSessionId: 'parent-123', + workspacePath: 'D:\\workspace\\repo', + prompt: 'Review these files', + displayMessage: 'Deep review started', + runManifest: runManifest as any, + }); + + expect(mockCreateBtwChildSession).toHaveBeenCalledWith( + expect.objectContaining({ + deepReviewRunManifest: runManifest, + }), + ); + }); + + it('passes the run manifest as first-turn message metadata', async () => { + const runManifest = { reviewMode: 'deep', skippedReviewers: [] }; + mockCreateBtwChildSession.mockResolvedValue({ + childSessionId: 'child-123', + parentDialogTurnId: 'turn-456', + }); + mockSendMessage.mockResolvedValue(undefined); + + await launchDeepReviewSession({ + parentSessionId: 'parent-123', + workspacePath: 'D:\\workspace\\repo', + prompt: 'Review these files', + displayMessage: 'Deep review started', + runManifest: runManifest as any, + }); + + expect(mockSendMessage).toHaveBeenCalledWith( + 'Review these files', + 'child-123', + 'Deep review started', + undefined, + undefined, + { + userMessageMetadata: { + deepReviewRunManifest: runManifest, + }, + }, + ); + }); + it('throws and does not cleanup when createBtwChildSession fails', async () => { mockCreateBtwChildSession.mockRejectedValue(new Error('Session creation failed')); diff --git a/src/web-ui/src/flow_chat/services/DeepReviewService.ts b/src/web-ui/src/flow_chat/services/DeepReviewService.ts index c25d1d221..880403bd6 100644 --- a/src/web-ui/src/flow_chat/services/DeepReviewService.ts +++ b/src/web-ui/src/flow_chat/services/DeepReviewService.ts @@ -1,4 +1,10 @@ -import { agentAPI } from '@/infrastructure/api'; +import { agentAPI, gitAPI } from '@/infrastructure/api'; +import type { + GitChangedFile, + GitChangedFilesParams, + GitDiffParams, + GitStatus, +} from '@/infrastructure/api/service-api/GitAPI'; import { createLogger } from '@/shared/utils/logger'; import { createBtwChildSession } from './BtwThreadService'; import { closeBtwSessionInAuxPane, openBtwSessionInAuxPane } from './openBtwSession'; @@ -8,8 +14,19 @@ import { insertReviewSessionSummaryMarker } from './ReviewSessionMarkerService'; import { buildEffectiveReviewTeamManifest, buildReviewTeamPromptBlock, + loadDefaultReviewTeam, + loadReviewTeamProjectStrategyOverride, + loadReviewTeamRateLimitStatus, prepareDefaultReviewTeamForLaunch, + type ReviewTeamChangeStats, + type ReviewTeamRunManifest, } from '@/shared/services/reviewTeamService'; +import { + classifyReviewTargetFromFiles, + createUnknownReviewTargetClassification, + normalizeReviewPath, + type ReviewTargetClassification, +} from '@/shared/services/reviewTargetClassifier'; import { DEEP_REVIEW_COMMAND_RE } from '../utils/deepReviewConstants'; import { classifyLaunchError } from '../utils/deepReviewExperience'; @@ -24,6 +41,17 @@ interface LaunchDeepReviewSessionParams { displayMessage: string; childSessionName?: string; requestedFiles?: string[]; + runManifest?: ReviewTeamRunManifest; +} + +export interface DeepReviewLaunchPrompt { + prompt: string; + runManifest: ReviewTeamRunManifest; +} + +interface ResolvedDeepReviewTarget { + target: ReviewTargetClassification; + changeStats: ReviewTeamChangeStats; } type DeepReviewLaunchStep = @@ -220,24 +248,294 @@ function getDeepReviewCommandFocus(commandText: string): string { return commandText.trim().replace(/^\/DeepReview\b/, '').trim(); } -export async function buildDeepReviewPromptFromSessionFiles( +const EXPLICIT_REVIEW_FILE_EXTENSIONS = new Set([ + '.ts', + '.tsx', + '.js', + '.jsx', + '.rs', + '.json', + '.scss', + '.css', + '.md', + '.toml', + '.yaml', + '.yml', +]); + +function cleanPotentialFileToken(token: string): string { + return token + .trim() + .replace(/^[`"']+/, '') + .replace(/[`"',;:]+$/, ''); +} + +function getPathExtension(path: string): string { + const lastSlash = path.lastIndexOf('/'); + const lastDot = path.lastIndexOf('.'); + if (lastDot <= lastSlash) { + return ''; + } + return path.slice(lastDot); +} + +function looksLikeExplicitReviewPath(token: string): boolean { + const normalizedPath = normalizeReviewPath(token); + return ( + normalizedPath.includes('/') && + !normalizedPath.startsWith('-') && + EXPLICIT_REVIEW_FILE_EXTENSIONS.has(getPathExtension(normalizedPath)) + ); +} + +function extractExplicitReviewFilePaths(commandFocus: string): string[] { + const paths = commandFocus + .split(/\s+/) + .map(cleanPotentialFileToken) + .filter(Boolean) + .filter(looksLikeExplicitReviewPath); + + return Array.from(new Set(paths)); +} + +function parseSlashCommandGitTarget(commandFocus: string): GitChangedFilesParams | null { + const tokens = commandFocus + .split(/\s+/) + .map(cleanPotentialFileToken) + .filter(Boolean); + + const commitKeywordIndex = tokens.findIndex((token) => token.toLowerCase() === 'commit'); + const commitRef = commitKeywordIndex >= 0 ? tokens[commitKeywordIndex + 1] : undefined; + if (commitRef && !commitRef.startsWith('-')) { + return { + source: `${commitRef}^`, + target: commitRef, + }; + } + + const rangeToken = tokens.find((token) => { + if (token.startsWith('-') || !token.includes('..')) { + return false; + } + + const parts = token.split('..'); + return parts.length === 2 && Boolean(parts[0]) && Boolean(parts[1]); + }); + + if (!rangeToken) { + return null; + } + + const [source, target] = rangeToken.split('..'); + return { source, target }; +} + +function collectChangedFilePaths(changedFiles: GitChangedFile[]): string[] { + return Array.from( + new Set( + changedFiles + .flatMap((file) => [file.path, file.old_path]) + .filter((path): path is string => Boolean(path)), + ), + ); +} + +function collectWorkspaceDiffFilePaths(status: GitStatus): string[] { + return Array.from( + new Set([ + ...status.staged.map((file) => file.path), + ...status.unstaged.map((file) => file.path), + ...status.untracked, + ...status.conflicts, + ].filter(Boolean)), + ); +} + +function countReviewTargetFiles(target: ReviewTargetClassification): number { + return target.files.filter((file) => !file.excluded).length; +} + +function buildUnknownChangeStats(target: ReviewTargetClassification): ReviewTeamChangeStats { + return { + fileCount: countReviewTargetFiles(target), + lineCountSource: 'unknown', + }; +} + +function countChangedLinesFromUnifiedDiff(diff: string): number | undefined { + if (!diff.trim()) { + return undefined; + } + + let changedLines = 0; + for (const line of diff.split(/\r?\n/)) { + if ( + (line.startsWith('+') && !/^\+\+\+\s/.test(line)) || + (line.startsWith('-') && !/^---\s/.test(line)) + ) { + changedLines += 1; + } + } + + return changedLines; +} + +function buildDiffChangeStats( + target: ReviewTargetClassification, + totalLinesChanged: number | undefined, +): ReviewTeamChangeStats { + if (totalLinesChanged === undefined) { + return buildUnknownChangeStats(target); + } + + return { + fileCount: countReviewTargetFiles(target), + totalLinesChanged, + lineCountSource: 'diff_stat', + }; +} + +async function resolveGitDiffChangeStats( + workspacePath: string, + params: GitDiffParams, + target: ReviewTargetClassification, +): Promise { + try { + const diff = await gitAPI.getDiff(workspacePath, params); + return buildDiffChangeStats(target, countChangedLinesFromUnifiedDiff(diff)); + } catch (error) { + log.warn('Failed to resolve Git diff stats for Deep Review target', { + workspacePath, + params, + error, + }); + return buildUnknownChangeStats(target); + } +} + +async function resolveWorkspaceDiffChangeStats( + workspacePath: string, + target: ReviewTargetClassification, +): Promise { + return resolveGitDiffChangeStats(workspacePath, { source: 'HEAD' }, target); +} + +async function resolveSlashCommandReviewTarget( + commandFocus: string, + workspacePath?: string, +): Promise { + const explicitFilePaths = extractExplicitReviewFilePaths(commandFocus); + if (explicitFilePaths.length > 0) { + const target = classifyReviewTargetFromFiles( + explicitFilePaths, + 'slash_command_explicit_files', + ); + return { target, changeStats: buildUnknownChangeStats(target) }; + } + + const gitTarget = parseSlashCommandGitTarget(commandFocus); + if (gitTarget) { + if (!workspacePath) { + const target = createUnknownReviewTargetClassification('slash_command_git_ref'); + return { target, changeStats: buildUnknownChangeStats(target) }; + } + + try { + const changedFiles = await gitAPI.getChangedFiles(workspacePath, gitTarget); + const target = classifyReviewTargetFromFiles( + collectChangedFilePaths(changedFiles), + 'slash_command_git_ref', + ); + const changeStats = await resolveGitDiffChangeStats( + workspacePath, + gitTarget, + target, + ); + return { target, changeStats }; + } catch (error) { + log.warn('Failed to resolve Git target for Deep Review target', { + workspacePath, + gitTarget, + error, + }); + const target = createUnknownReviewTargetClassification('slash_command_git_ref'); + return { target, changeStats: buildUnknownChangeStats(target) }; + } + } + + if (!commandFocus && workspacePath) { + try { + const status = await gitAPI.getStatus(workspacePath); + const target = classifyReviewTargetFromFiles( + collectWorkspaceDiffFilePaths(status), + 'workspace_diff', + ); + const changeStats = await resolveWorkspaceDiffChangeStats( + workspacePath, + target, + ); + return { target, changeStats }; + } catch (error) { + log.warn('Failed to resolve workspace diff for Deep Review target', { + workspacePath, + error, + }); + } + } + + const target = createUnknownReviewTargetClassification( + commandFocus ? 'manual_prompt' : 'unknown', + ); + return { target, changeStats: buildUnknownChangeStats(target) }; +} + +async function buildReviewTeamManifestWithRuntimeSignals( + team: Parameters[0], + options: Parameters[1], +): Promise { + const manifestOptions = options ?? {}; + const [rateLimitStatus, strategyOverride] = await Promise.all([ + loadReviewTeamRateLimitStatus().catch((error) => { + log.warn('Failed to load Deep Review rate limit status', { error }); + return null; + }), + manifestOptions.workspacePath + ? loadReviewTeamProjectStrategyOverride(manifestOptions.workspacePath).catch((error) => { + log.warn('Failed to load Deep Review project strategy override', { error }); + return undefined; + }) + : Promise.resolve(undefined), + ]); + + return buildEffectiveReviewTeamManifest(team, { + ...manifestOptions, + ...(rateLimitStatus ? { rateLimitStatus } : {}), + ...(strategyOverride ? { strategyOverride } : {}), + }); +} + +export async function buildDeepReviewLaunchFromSessionFiles( filePaths: string[], extraContext?: string, workspacePath?: string, -): Promise { +): Promise { + const target = classifyReviewTargetFromFiles(filePaths, 'session_files'); + const changeStats = buildUnknownChangeStats(target); const team = await prepareDefaultReviewTeamForLaunch(workspacePath, { reviewTargetFilePaths: filePaths, + target, }); - const manifest = buildEffectiveReviewTeamManifest(team, { + const manifest = await buildReviewTeamManifestWithRuntimeSignals(team, { workspacePath, - reviewTargetFilePaths: filePaths, + target, + changeStats, }); const fileList = formatFileList(filePaths); const contextBlock = extraContext?.trim() ? `User-provided focus:\n${extraContext.trim()}` : 'User-provided focus:\nNone.'; - return [ + const prompt = [ 'Run a deep code review using the parallel Code Review Team.', 'Review scope: ONLY inspect the following files modified in this session.', fileList, @@ -245,21 +543,54 @@ export async function buildDeepReviewPromptFromSessionFiles( buildReviewTeamPromptBlock(team, manifest), 'Keep the scope tight to the listed files unless a directly-related dependency must be read to confirm a finding.', ].join('\n\n'); + + return { prompt, runManifest: manifest }; } -export async function buildDeepReviewPromptFromSlashCommand( - commandText: string, +export async function buildDeepReviewPreviewFromSessionFiles( + filePaths: string[], + workspacePath?: string, +): Promise { + const team = await loadDefaultReviewTeam(workspacePath); + const target = classifyReviewTargetFromFiles(filePaths, 'session_files'); + const changeStats = buildUnknownChangeStats(target); + return buildReviewTeamManifestWithRuntimeSignals(team, { + workspacePath, + target, + changeStats, + }); +} + +export async function buildDeepReviewPromptFromSessionFiles( + filePaths: string[], + extraContext?: string, workspacePath?: string, ): Promise { + return (await buildDeepReviewLaunchFromSessionFiles( + filePaths, + extraContext, + workspacePath, + )).prompt; +} + +export async function buildDeepReviewLaunchFromSlashCommand( + commandText: string, + workspacePath?: string, +): Promise { const team = await prepareDefaultReviewTeamForLaunch(workspacePath); - const manifest = buildEffectiveReviewTeamManifest(team, { workspacePath }); const trimmed = commandText.trim(); const extraContext = getDeepReviewCommandFocus(trimmed); + const { target, changeStats } = await resolveSlashCommandReviewTarget(extraContext, workspacePath); + const manifest = await buildReviewTeamManifestWithRuntimeSignals(team, { + workspacePath, + target, + changeStats, + }); const contextBlock = extraContext ? `User-provided focus or target:\n${extraContext}` : 'User-provided focus or target:\nNone. If no explicit target is given, review the current workspace changes relative to HEAD.'; - return [ + const prompt = [ 'Run a deep code review using the parallel Code Review Team.', 'Interpret the user command below to determine the review target.', 'If the user mentions a commit, ref, branch, or explicit file set, review that target.', @@ -268,6 +599,30 @@ export async function buildDeepReviewPromptFromSlashCommand( contextBlock, buildReviewTeamPromptBlock(team, manifest), ].join('\n\n'); + + return { prompt, runManifest: manifest }; +} + +export async function buildDeepReviewPreviewFromSlashCommand( + commandText: string, + workspacePath?: string, +): Promise { + const team = await loadDefaultReviewTeam(workspacePath); + const trimmed = commandText.trim(); + const extraContext = getDeepReviewCommandFocus(trimmed); + const { target, changeStats } = await resolveSlashCommandReviewTarget(extraContext, workspacePath); + return buildReviewTeamManifestWithRuntimeSignals(team, { + workspacePath, + target, + changeStats, + }); +} + +export async function buildDeepReviewPromptFromSlashCommand( + commandText: string, + workspacePath?: string, +): Promise { + return (await buildDeepReviewLaunchFromSlashCommand(commandText, workspacePath)).prompt; } export async function launchDeepReviewSession({ @@ -277,6 +632,7 @@ export async function launchDeepReviewSession({ displayMessage, childSessionName = 'Deep review', requestedFiles = [], + runManifest, }: LaunchDeepReviewSessionParams): Promise<{ childSessionId: string }> { let childSessionId: string | null = null; let launchStep: DeepReviewLaunchStep = 'create_child_session'; @@ -293,6 +649,7 @@ export async function launchDeepReviewSession({ autoCompact: true, enableContextCompression: true, addMarker: false, + deepReviewRunManifest: runManifest, }); childSessionId = created.childSessionId; @@ -306,11 +663,26 @@ export async function launchDeepReviewSession({ launchStep = 'send_start_message'; const flowChatManager = FlowChatManager.getInstance(); - await flowChatManager.sendMessage( - prompt, - childSessionId, - displayMessage, - ); + if (runManifest) { + await flowChatManager.sendMessage( + prompt, + childSessionId, + displayMessage, + undefined, + undefined, + { + userMessageMetadata: { + deepReviewRunManifest: runManifest, + }, + }, + ); + } else { + await flowChatManager.sendMessage( + prompt, + childSessionId, + displayMessage, + ); + } insertReviewSessionSummaryMarker({ parentSessionId, diff --git a/src/web-ui/src/flow_chat/services/FlowChatManager.ts b/src/web-ui/src/flow_chat/services/FlowChatManager.ts index c90cd1c58..a7c050cb4 100644 --- a/src/web-ui/src/flow_chat/services/FlowChatManager.ts +++ b/src/web-ui/src/flow_chat/services/FlowChatManager.ts @@ -368,6 +368,7 @@ export class FlowChatManager { options?: { imageContexts?: import('@/infrastructure/api/service-api/ImageContextTypes').ImageContextData[]; imageDisplayData?: Array<{ id: string; name: string; dataUrl?: string; imagePath?: string; mimeType?: string }>; + userMessageMetadata?: Record; } ): Promise { const targetSessionId = sessionId || this.context.flowChatStore.getState().activeSessionId; diff --git a/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.ts b/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.ts index 638cdc83c..51d9cd2ed 100644 --- a/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.ts +++ b/src/web-ui/src/flow_chat/services/flow-chat-manager/EventHandlerModule.ts @@ -21,6 +21,7 @@ import { notificationService } from '../../../shared/notification-system/service import type { NotificationAction } from '../../../shared/notification-system/types'; import { createLogger } from '@/shared/utils/logger'; import type { + DeepReviewQueueStateChangedEvent, ImageAnalysisEvent, ModelRoundCompletedEvent, SessionModelAutoMigratedEvent, @@ -40,6 +41,8 @@ import { type AiErrorPresentation, type AiErrorDetail, } from '@/shared/ai-errors/aiErrorPresenter'; +import { useReviewActionBarStore } from '../../store/deepReviewActionBarStore'; +import { buildDeepReviewCapacityQueueStateFromEvent } from '../../utils/deepReviewQueueStateEvents'; const pendingImageAnalysisTurns = new Map(); // `restore_session` and assistant bootstrap can race on the same historical @@ -107,6 +110,34 @@ function logDroppedDataEvent( }); } +function handleDeepReviewQueueStateChanged(event: DeepReviewQueueStateChangedEvent): void { + const store = FlowChatStore.getInstance(); + const session = store.getState().sessions.get(event.sessionId); + const queueState = buildDeepReviewCapacityQueueStateFromEvent(event, session); + if (!queueState) { + return; + } + + const actionBar = useReviewActionBarStore.getState(); + if (actionBar.childSessionId === event.sessionId) { + actionBar.setCapacityQueueState(queueState); + if (actionBar.phase === 'idle') { + actionBar.updatePhase('review_waiting_capacity'); + } + return; + } + + if (queueState.status === 'running' || queueState.status === 'capacity_skipped') { + return; + } + + actionBar.showCapacityQueueBar({ + childSessionId: event.sessionId, + parentSessionId: session?.parentSessionId ?? null, + capacityQueueState: queueState, + }); +} + function attachSubagentSessionToParentTool( parentInfo: SubagentParentInfo, subagentSessionId: string, @@ -392,6 +423,9 @@ export async function initializeEventListeners( onToolEvent: (event) => { handleToolEvent(context, event, onTodoWriteResult); }, + onDeepReviewQueueStateChanged: (event) => { + handleDeepReviewQueueStateChanged(event); + }, onModelRoundStarted: (event) => { handleModelRoundStart(context, event); }, diff --git a/src/web-ui/src/flow_chat/services/flow-chat-manager/MessageModule.ts b/src/web-ui/src/flow_chat/services/flow-chat-manager/MessageModule.ts index 34cdb3260..7f08be09e 100644 --- a/src/web-ui/src/flow_chat/services/flow-chat-manager/MessageModule.ts +++ b/src/web-ui/src/flow_chat/services/flow-chat-manager/MessageModule.ts @@ -122,6 +122,7 @@ export async function sendMessage( * Callers should not set this directly. */ bypassPendingQueue?: boolean; + userMessageMetadata?: Record; } ): Promise { const session = context.flowChatStore.getState().sessions.get(sessionId); @@ -235,6 +236,7 @@ export async function sendMessage( timestamp: Date.now(), hasImages, images: options?.imageDisplayData, + metadata: options?.userMessageMetadata, }, modelRounds: [], // Images are attached for multimodal primary models or reduced to text placeholders for text-only models. @@ -304,6 +306,8 @@ export async function sendMessage( originalUserInput: displayMessage || message, turnId: dialogTurnId, workspacePath, + imageContexts: options?.imageContexts, + userMessageMetadata: options?.userMessageMetadata, remoteConnectionId: updatedSession.remoteConnectionId, remoteSshHost: updatedSession.remoteSshHost, }); @@ -317,6 +321,7 @@ export async function sendMessage( agentType: currentAgentType, workspacePath, imageContexts: options?.imageContexts, + userMessageMetadata: options?.userMessageMetadata, }); } catch (error: any) { if (error?.message?.includes('Session does not exist') || error?.message?.includes('Not found')) { diff --git a/src/web-ui/src/flow_chat/services/flow-chat-manager/TextChunkModule.test.ts b/src/web-ui/src/flow_chat/services/flow-chat-manager/TextChunkModule.test.ts index 86dff03ad..4881667a5 100644 --- a/src/web-ui/src/flow_chat/services/flow-chat-manager/TextChunkModule.test.ts +++ b/src/web-ui/src/flow_chat/services/flow-chat-manager/TextChunkModule.test.ts @@ -54,6 +54,8 @@ function makeContext(session: Session): any { turnSaveInFlight: new Map(), turnSavePending: new Set(), runtimeStatusTimers: new Map(), + userCancelledSessionIds: new Set(), + currentWorkspacePath: null, }; } diff --git a/src/web-ui/src/flow_chat/store/FlowChatStore.ts b/src/web-ui/src/flow_chat/store/FlowChatStore.ts index 192f3a7c1..c0ff90b84 100644 --- a/src/web-ui/src/flow_chat/store/FlowChatStore.ts +++ b/src/web-ui/src/flow_chat/store/FlowChatStore.ts @@ -286,6 +286,7 @@ export class FlowChatStore { sessionKind?: SessionKind; btwOrigin?: Session['btwOrigin']; isTransient?: boolean; + deepReviewRunManifest?: Session['deepReviewRunManifest']; }, remoteConnectionId?: string, remoteSshHost?: string @@ -324,6 +325,7 @@ export class FlowChatStore { sessionKind: relationship.sessionKind, btwThreads: [], btwOrigin: relationship.btwOrigin, + deepReviewRunManifest: meta?.deepReviewRunManifest, isTransient: meta?.isTransient ?? false, }; @@ -1768,6 +1770,7 @@ export class FlowChatStore { btwOrigin: relationship.btwOrigin, hasUnreadCompletion: metadata.unreadCompletion, needsUserAttention: metadata.needsUserAttention, + deepReviewRunManifest: metadata.deepReviewRunManifest, isTransient: false, }; diff --git a/src/web-ui/src/flow_chat/store/deepReviewActionBarStore.test.ts b/src/web-ui/src/flow_chat/store/deepReviewActionBarStore.test.ts index f52e9e29b..f6884ddb0 100644 --- a/src/web-ui/src/flow_chat/store/deepReviewActionBarStore.test.ts +++ b/src/web-ui/src/flow_chat/store/deepReviewActionBarStore.test.ts @@ -188,6 +188,97 @@ describe('deepReviewActionBarStore', () => { }); }); + describe('capacity queue controls', () => { + it('can bind a visible queue state before the review report is available', () => { + bar().showCapacityQueueBar({ + childSessionId: 'child-1', + parentSessionId: 'parent-1', + capacityQueueState: { + status: 'queued_for_capacity', + queuedReviewerCount: 2, + activeReviewerCount: 1, + }, + }); + + expect(bar().childSessionId).toBe('child-1'); + expect(bar().reviewMode).toBe('deep'); + expect(bar().phase).toBe('review_waiting_capacity'); + expect(bar().reviewData).toBeNull(); + expect(bar().capacityQueueState?.queuedReviewerCount).toBe(2); + }); + + it('pauses and resumes capacity queue state without clearing completed remediation', () => { + bar().showActionBar({ + childSessionId: 'child-1', + parentSessionId: 'parent-1', + reviewData: { + summary: { recommended_action: 'request_changes' }, + remediation_plan: ['Fix issue 1', 'Fix issue 2'], + }, + completedRemediationIds: new Set(['remediation-0']), + }); + + const queueActions = bar() as unknown as { + setCapacityQueueState: (state: { + status: string; + queuedReviewerCount: number; + optionalReviewerCount: number; + }) => void; + pauseCapacityQueue: () => void; + continueCapacityQueue: () => void; + }; + + queueActions.setCapacityQueueState({ + status: 'queued_for_capacity', + queuedReviewerCount: 2, + optionalReviewerCount: 1, + }); + queueActions.pauseCapacityQueue(); + + expect((bar() as unknown as { capacityQueueState: { status: string } }).capacityQueueState.status).toBe('paused_by_user'); + expect(bar().completedRemediationIds.has('remediation-0')).toBe(true); + + queueActions.continueCapacityQueue(); + + expect((bar() as unknown as { capacityQueueState: { status: string } }).capacityQueueState.status).toBe('queued_for_capacity'); + expect(bar().completedRemediationIds.has('remediation-0')).toBe(true); + }); + + it('can skip optional queued reviewers without cancelling required queued work', () => { + bar().showActionBar({ + childSessionId: 'child-1', + parentSessionId: 'parent-1', + reviewData: { + summary: { recommended_action: 'request_changes' }, + remediation_plan: ['Fix issue 1'], + }, + }); + + const queueActions = bar() as unknown as { + setCapacityQueueState: (state: { + status: string; + queuedReviewerCount: number; + optionalReviewerCount: number; + }) => void; + skipOptionalQueuedReviewers: () => void; + }; + + queueActions.setCapacityQueueState({ + status: 'queued_for_capacity', + queuedReviewerCount: 3, + optionalReviewerCount: 2, + }); + queueActions.skipOptionalQueuedReviewers(); + + const state = (bar() as unknown as { + capacityQueueState: { status: string; queuedReviewerCount: number; optionalReviewerCount: number }; + }).capacityQueueState; + expect(state.status).toBe('queued_for_capacity'); + expect(state.queuedReviewerCount).toBe(1); + expect(state.optionalReviewerCount).toBe(0); + }); + }); + describe('toggleRemediation with completed items', () => { it('does not allow toggling completed items', () => { bar().showActionBar({ diff --git a/src/web-ui/src/flow_chat/store/deepReviewActionBarStore.ts b/src/web-ui/src/flow_chat/store/deepReviewActionBarStore.ts index be6a2d855..cdf984a29 100644 --- a/src/web-ui/src/flow_chat/store/deepReviewActionBarStore.ts +++ b/src/web-ui/src/flow_chat/store/deepReviewActionBarStore.ts @@ -28,6 +28,7 @@ export type ReviewActionPhase = | 'fix_failed' | 'fix_timeout' | 'fix_interrupted' + | 'review_waiting_capacity' | 'review_interrupted' | 'resume_blocked' | 'resume_running' @@ -36,6 +37,34 @@ export type ReviewActionPhase = export type DeepReviewActionPhase = ReviewActionPhase; +export type DeepReviewCapacityQueueStatus = + | 'queued_for_capacity' + | 'paused_by_user' + | 'running' + | 'capacity_skipped'; + +export type DeepReviewCapacityQueueAction = + | 'pause' + | 'continue' + | 'cancel' + | 'skip_optional'; + +export interface DeepReviewCapacityQueueState { + toolId?: string; + subagentType?: string; + dialogTurnId?: string; + status: DeepReviewCapacityQueueStatus; + queuedReviewerCount: number; + activeReviewerCount?: number; + effectiveParallelInstances?: number; + optionalReviewerCount?: number; + queueElapsedMs?: number; + runElapsedMs?: number; + maxQueueWaitSeconds?: number; + sessionConcurrencyHigh?: boolean; + controlMode?: 'local' | 'session_stop_only' | 'backend'; +} + export interface ReviewActionBarState { /** Which child session this bar belongs to */ childSessionId: string | null; @@ -73,6 +102,10 @@ export interface ReviewActionBarState { remainingFixIds: string[]; /** User's option choice for needs_decision items: map of item id -> option index */ decisionSelections: Record; + /** Visible Deep Review capacity queue state. Automatic queue execution is not enabled here. */ + capacityQueueState: DeepReviewCapacityQueueState | null; + /** Last local queue-control action selected by the user */ + lastCapacityQueueAction: DeepReviewCapacityQueueAction | null; // ---- actions ---- showActionBar: (params: { @@ -89,6 +122,11 @@ export interface ReviewActionBarState { interruption: DeepReviewInterruption; phase?: Extract; }) => void; + showCapacityQueueBar: (params: { + childSessionId: string; + parentSessionId: string | null; + capacityQueueState: DeepReviewCapacityQueueState; + }) => void; updatePhase: (phase: ReviewActionPhase, errorMessage?: string | null) => void; toggleRemediation: (id: string) => void; toggleAllRemediation: () => void; @@ -100,6 +138,11 @@ export interface ReviewActionBarState { minimize: () => void; restore: () => void; skipRemainingFixes: () => void; + setCapacityQueueState: (state: DeepReviewCapacityQueueState | null) => void; + pauseCapacityQueue: () => void; + continueCapacityQueue: () => void; + cancelQueuedReviewers: () => void; + skipOptionalQueuedReviewers: () => void; setDecisionSelection: (itemId: string, optionIndex: number) => void; reset: () => void; } @@ -125,6 +168,8 @@ const initialState = { fixingRemediationIds: new Set(), remainingFixIds: [] as string[], decisionSelections: {} as Record, + capacityQueueState: null as DeepReviewCapacityQueueState | null, + lastCapacityQueueAction: null as DeepReviewCapacityQueueAction | null, }; export const useReviewActionBarStore = create((set, get) => ({ @@ -164,6 +209,8 @@ export const useReviewActionBarStore = create((set, get) = fixingRemediationIds: new Set(), remainingFixIds: [], decisionSelections: {}, + capacityQueueState: null, + lastCapacityQueueAction: null, }); }, @@ -187,6 +234,35 @@ export const useReviewActionBarStore = create((set, get) = fixingRemediationIds: new Set(), remainingFixIds: [], decisionSelections: {}, + capacityQueueState: null, + lastCapacityQueueAction: null, + }); + }, + + showCapacityQueueBar: ({ childSessionId, parentSessionId, capacityQueueState }) => { + set({ + childSessionId, + parentSessionId, + reviewMode: 'deep', + reviewData: null, + remediationItems: [], + selectedRemediationIds: new Set(), + phase: 'review_waiting_capacity', + dismissed: false, + minimized: false, + activeAction: null, + lastSubmittedAction: null, + customInstructions: '', + errorMessage: null, + interruption: null, + completedRemediationIds: get().childSessionId === childSessionId + ? get().completedRemediationIds + : new Set(), + fixingRemediationIds: new Set(), + remainingFixIds: [], + decisionSelections: {}, + capacityQueueState, + lastCapacityQueueAction: null, }); }, @@ -283,6 +359,57 @@ export const useReviewActionBarStore = create((set, get) = activeAction: null, lastSubmittedAction: null, }), + setCapacityQueueState: (capacityQueueState) => set({ + capacityQueueState, + lastCapacityQueueAction: null, + }), + pauseCapacityQueue: () => { + const current = get().capacityQueueState; + if (!current || current.status === 'capacity_skipped') return; + set({ + capacityQueueState: { ...current, status: 'paused_by_user' }, + lastCapacityQueueAction: 'pause', + }); + }, + continueCapacityQueue: () => { + const current = get().capacityQueueState; + if (!current || current.status !== 'paused_by_user') return; + set({ + capacityQueueState: { ...current, status: 'queued_for_capacity' }, + lastCapacityQueueAction: 'continue', + }); + }, + cancelQueuedReviewers: () => { + const current = get().capacityQueueState; + if (!current) return; + set({ + capacityQueueState: { + ...current, + status: 'capacity_skipped', + queuedReviewerCount: 0, + optionalReviewerCount: 0, + }, + lastCapacityQueueAction: 'cancel', + }); + }, + skipOptionalQueuedReviewers: () => { + const current = get().capacityQueueState; + if (!current) return; + const optionalCount = current.optionalReviewerCount ?? 0; + if (optionalCount <= 0) return; + + const skippedCount = Math.min(optionalCount, current.queuedReviewerCount); + const queuedReviewerCount = Math.max(0, current.queuedReviewerCount - skippedCount); + set({ + capacityQueueState: { + ...current, + status: queuedReviewerCount > 0 ? current.status : 'capacity_skipped', + queuedReviewerCount, + optionalReviewerCount: 0, + }, + lastCapacityQueueAction: 'skip_optional', + }); + }, reset: () => set({ ...initialState, selectedRemediationIds: new Set() }), })); diff --git a/src/web-ui/src/flow_chat/tool-cards/CodeReviewReportExportActions.tsx b/src/web-ui/src/flow_chat/tool-cards/CodeReviewReportExportActions.tsx index b8efcd7f6..0e0173c76 100644 --- a/src/web-ui/src/flow_chat/tool-cards/CodeReviewReportExportActions.tsx +++ b/src/web-ui/src/flow_chat/tool-cards/CodeReviewReportExportActions.tsx @@ -9,9 +9,11 @@ import { type CodeReviewReportData, type CodeReviewReportMarkdownLabels, } from '../utils/codeReviewReport'; +import type { ReviewTeamRunManifest } from '@/shared/services/reviewTeamService'; interface CodeReviewReportExportActionsProps { reviewData: CodeReviewReportData; + runManifest?: ReviewTeamRunManifest; } function timestampForFileName(): string { @@ -38,6 +40,7 @@ function downloadMarkdownInBrowser(fileName: string, markdown: string): void { export const CodeReviewReportExportActions: React.FC = ({ reviewData, + runManifest, }) => { const { t } = useTranslation('flow-chat'); const [copied, setCopied] = useState(false); @@ -48,16 +51,25 @@ export const CodeReviewReportExportActions: React.FC formatCodeReviewReportMarkdown(reviewData, markdownLabels), - [markdownLabels, reviewData], + () => formatCodeReviewReportMarkdown( + reviewData, + markdownLabels, + { runManifest }, + ), + [markdownLabels, reviewData, runManifest], ); const fileName = useMemo(() => { diff --git a/src/web-ui/src/flow_chat/tool-cards/CodeReviewToolCard.scss b/src/web-ui/src/flow_chat/tool-cards/CodeReviewToolCard.scss index 87eb6f1e6..db4e83f72 100644 --- a/src/web-ui/src/flow_chat/tool-cards/CodeReviewToolCard.scss +++ b/src/web-ui/src/flow_chat/tool-cards/CodeReviewToolCard.scss @@ -79,6 +79,72 @@ flex-direction: column; gap: 10px; + .review-reliability-status { + display: grid; + gap: 7px; + padding: 2px 0 8px; + border-bottom: 1px dashed var(--border-base); + } + + .review-reliability-status__title { + color: var(--color-text-muted); + font-size: 11px; + font-weight: 650; + } + + .review-reliability-status__items { + display: flex; + flex-wrap: wrap; + gap: 6px; + } + + .review-reliability-status__item { + display: grid; + grid-template-columns: auto minmax(0, 1fr); + align-items: start; + gap: 6px; + max-width: 100%; + padding: 6px 8px; + border-left: 2px solid var(--color-accent-500, #60a5fa); + border-radius: 4px; + background: color-mix(in srgb, var(--color-bg-elevated) 70%, transparent); + color: var(--color-text-secondary); + font-size: 11px; + line-height: 1.4; + + &--warning { + border-left-color: var(--color-warning, #f59e0b); + } + + &--action { + border-left-color: var(--color-danger, #ef4444); + } + } + + .review-reliability-status__icon { + display: inline-flex; + align-items: center; + justify-content: center; + margin-top: 1px; + color: var(--color-text-muted); + } + + .review-reliability-status__text { + display: grid; + gap: 1px; + min-width: 0; + } + + .review-reliability-status__label { + color: var(--color-text-primary); + font-weight: 650; + } + + .review-reliability-status__detail { + color: var(--color-text-muted); + overflow-wrap: anywhere; + } + /* ---------- Summary section — vertical layout ---------- */ .review-summary { padding-bottom: 10px; @@ -278,6 +344,140 @@ font-style: italic; } + .run-manifest { + display: grid; + gap: 10px; + } + + .run-manifest__facts { + display: grid; + grid-template-columns: repeat(3, minmax(0, 1fr)); + gap: 6px; + } + + .run-manifest__fact { + display: grid; + gap: 3px; + min-width: 0; + padding: 8px; + border: 1px solid var(--border-base); + border-radius: 6px; + background: var(--color-bg-elevated, rgba(255, 255, 255, 0.03)); + + span { + color: var(--color-text-muted); + font-size: 11px; + } + + strong { + overflow: hidden; + color: var(--color-text-primary); + font-size: 12px; + font-weight: 650; + text-overflow: ellipsis; + white-space: nowrap; + } + } + + .run-manifest__group { + display: grid; + gap: 6px; + } + + .run-manifest__group-title { + color: var(--color-text-muted); + font-size: 11px; + font-weight: 650; + + &--warning { + color: var(--color-warning, #f59e0b); + } + } + + .run-manifest__chips { + display: flex; + flex-wrap: wrap; + gap: 6px; + } + + .run-manifest__chip { + display: inline-grid; + max-width: 100%; + gap: 2px; + padding: 6px 8px; + border: 1px solid var(--border-base); + border-radius: 6px; + background: var(--color-bg-elevated, rgba(255, 255, 255, 0.03)); + } + + .run-manifest__chip-name { + overflow: hidden; + color: var(--color-text-primary); + font-size: 12px; + font-weight: 600; + text-overflow: ellipsis; + white-space: nowrap; + } + + .run-manifest__chip-meta { + overflow: hidden; + color: var(--color-text-muted); + font-size: 11px; + text-overflow: ellipsis; + white-space: nowrap; + } + + .run-manifest__skipped-list { + display: grid; + gap: 5px; + margin: 0; + padding: 0; + list-style: none; + + li { + display: grid; + grid-template-columns: minmax(0, 1fr) auto; + align-items: center; + gap: 8px; + min-width: 0; + padding: 6px 8px; + border: 1px solid var(--border-base); + border-radius: 6px; + background: var(--color-bg-elevated, rgba(255, 255, 255, 0.03)); + color: var(--color-text-secondary); + font-size: 12px; + } + + span { + overflow: hidden; + min-width: 0; + text-overflow: ellipsis; + white-space: nowrap; + } + + strong { + color: var(--color-text-muted); + font-size: 11px; + font-weight: 650; + white-space: nowrap; + } + } + + @media (max-width: 560px) { + .run-manifest__facts { + grid-template-columns: 1fr; + } + + .run-manifest__skipped-list li { + grid-template-columns: 1fr; + align-items: start; + } + + .run-manifest__skipped-list strong { + white-space: normal; + } + } + .team-list { display: flex; flex-direction: column; diff --git a/src/web-ui/src/flow_chat/tool-cards/CodeReviewToolCard.test.tsx b/src/web-ui/src/flow_chat/tool-cards/CodeReviewToolCard.test.tsx new file mode 100644 index 000000000..d66a5eb8b --- /dev/null +++ b/src/web-ui/src/flow_chat/tool-cards/CodeReviewToolCard.test.tsx @@ -0,0 +1,381 @@ +import React, { act } from 'react'; +import { createRoot, type Root } from 'react-dom/client'; +import { JSDOM } from 'jsdom'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { CodeReviewToolCard } from './CodeReviewToolCard'; +import type { FlowToolItem, ToolCardConfig } from '../types/flow-chat'; +import type { ReviewTeamManifestMember, ReviewTeamRunManifest } from '@/shared/services/reviewTeamService'; + +globalThis.IS_REACT_ACT_ENVIRONMENT = true; + +const flowState = vi.hoisted(() => ({ + current: { + sessions: new Map(), + activeSessionId: null, + }, + listeners: new Set<(state: { sessions: Map; activeSessionId: string | null }) => void>(), +})); + +vi.mock('react-i18next', () => ({ + initReactI18next: { + type: '3rdParty', + init: vi.fn(), + }, + useTranslation: () => ({ + t: (key: string, options?: Record) => { + const value = typeof options?.defaultValue === 'string' ? options.defaultValue : key; + return value.replace(/\{\{(\w+)\}\}/g, (_match, name) => String(options?.[name] ?? '')); + }, + }), +})); + +vi.mock('@/component-library', () => ({ + Tooltip: ({ children }: { children: React.ReactNode }) => <>{children}, +})); + +vi.mock('./CodeReviewReportExportActions', () => ({ + CodeReviewReportExportActions: () => null, +})); + +vi.mock('../store/FlowChatStore', () => ({ + flowChatStore: { + getState: () => flowState.current, + subscribe: (listener: (state: typeof flowState.current) => void) => { + flowState.listeners.add(listener); + return () => flowState.listeners.delete(listener); + }, + }, + FlowChatStore: { + getInstance: () => ({ + getState: () => flowState.current, + subscribe: (listener: (state: typeof flowState.current) => void) => { + flowState.listeners.add(listener); + return () => flowState.listeners.delete(listener); + }, + }), + }, +})); + +function buildManifestMember( + subagentId: string, + displayName: string, + source: ReviewTeamManifestMember['source'], + reason?: ReviewTeamManifestMember['reason'], +): ReviewTeamManifestMember { + return { + subagentId, + displayName, + roleName: displayName, + model: 'fast-model', + configuredModel: 'fast-model', + defaultModelSlot: 'fast', + strategyLevel: 'normal', + strategySource: 'team', + strategyDirective: 'Review the target.', + locked: source === 'core', + source, + subagentSource: source === 'extra' ? 'user' : 'builtin', + ...(reason ? { reason } : {}), + }; +} + +function buildManifest(): ReviewTeamRunManifest { + return { + reviewMode: 'deep', + workspacePath: 'C:/repo/project', + policySource: 'default-review-team-config', + target: { + source: 'session_files', + resolution: 'resolved', + tags: ['frontend'], + files: ['src/App.tsx'], + warnings: [], + }, + strategyLevel: 'normal', + strategyRecommendation: { + strategyLevel: 'deep', + score: 24, + rationale: 'Large/high-risk change (8 files, 900 lines; 2 security-sensitive files, 3 workspace areas). Deep review recommended.', + factors: { + fileCount: 8, + totalLinesChanged: 900, + lineCountSource: 'diff_stat', + securityFileCount: 2, + workspaceAreaCount: 3, + contractSurfaceChanged: true, + }, + }, + executionPolicy: { + reviewerTimeoutSeconds: 300, + judgeTimeoutSeconds: 240, + reviewerFileSplitThreshold: 20, + maxSameRoleInstances: 3, + }, + tokenBudget: { + mode: 'balanced', + estimatedReviewerCalls: 3, + maxReviewerCalls: 4, + maxExtraReviewers: 1, + largeDiffSummaryFirst: false, + skippedReviewerIds: ['CustomInvalid'], + warnings: [], + }, + coreReviewers: [ + buildManifestMember('ReviewBusinessLogic', 'Logic reviewer', 'core'), + ], + qualityGateReviewer: buildManifestMember('ReviewJudge', 'Quality inspector', 'core'), + enabledExtraReviewers: [ + buildManifestMember('CustomSecurity', 'Custom security reviewer', 'extra'), + ], + skippedReviewers: [ + buildManifestMember('ReviewFrontend', 'Frontend reviewer', 'core', 'not_applicable'), + buildManifestMember('CustomInvalid', 'Custom invalid reviewer', 'extra', 'invalid_tooling'), + ], + }; +} + +function notifyFlowState(): void { + for (const listener of flowState.listeners) { + listener(flowState.current); + } +} + +describe('CodeReviewToolCard', () => { + let dom: JSDOM; + let container: HTMLDivElement; + let root: Root; + + beforeEach(() => { + dom = new JSDOM('
', { + pretendToBeVisual: true, + url: 'http://localhost', + }); + vi.stubGlobal('window', dom.window); + vi.stubGlobal('document', dom.window.document); + vi.stubGlobal('navigator', dom.window.navigator); + vi.stubGlobal('HTMLElement', dom.window.HTMLElement); + vi.stubGlobal('CustomEvent', dom.window.CustomEvent); + + flowState.current = { + sessions: new Map([ + ['review-session', { id: 'review-session', deepReviewRunManifest: buildManifest() }], + ]), + activeSessionId: 'review-session', + }; + flowState.listeners.clear(); + container = dom.window.document.getElementById('root') as HTMLDivElement; + root = createRoot(container); + }); + + afterEach(() => { + act(() => { + root.unmount(); + }); + vi.unstubAllGlobals(); + dom.window.close(); + }); + + it('echoes the deep review run manifest from the review session', () => { + const toolItem: FlowToolItem = { + id: 'tool-1', + type: 'tool', + timestamp: Date.now(), + toolName: 'submit_code_review', + status: 'completed', + toolCall: { + id: 'call-1', + input: {}, + }, + toolResult: { + success: true, + result: { + review_mode: 'deep', + summary: { + overall_assessment: 'No validated issues.', + risk_level: 'low', + recommended_action: 'approve', + }, + issues: [], + reviewers: [], + }, + }, + }; + const config: ToolCardConfig = { + toolName: 'submit_code_review', + displayName: 'Code Review', + icon: 'REVIEW', + requiresConfirmation: false, + resultDisplayType: 'detailed', + }; + + act(() => { + root.render( + , + ); + }); + act(() => { + container.querySelector('.preview-toggle-btn')?.dispatchEvent( + new window.Event('click', { bubbles: true }), + ); + }); + + expect(container.textContent).toContain('Run manifest'); + expect(container.textContent).toContain('3 active'); + expect(container.textContent).toContain('2 skipped'); + + const manifestSectionButton = Array.from(container.querySelectorAll('button')) + .find((button) => button.textContent?.includes('Run manifest')); + + act(() => { + manifestSectionButton?.dispatchEvent(new window.Event('click', { bubbles: true })); + }); + + expect(container.textContent).toContain('Logic reviewer'); + expect(container.textContent).toContain('Quality inspector'); + expect(container.textContent).toContain('Custom security reviewer'); + expect(container.textContent).toContain('Frontend reviewer'); + expect(container.textContent).toContain('Not applicable to this target'); + expect(container.textContent).toContain('Custom invalid reviewer'); + expect(container.textContent).toContain('Configuration issue'); + expect(container.textContent).toContain('Recommended strategy'); + expect(container.textContent).toContain('deep'); + expect(container.textContent).toContain('Large/high-risk change'); + }); + + it('updates the manifest echo when session metadata arrives after render', () => { + flowState.current = { + sessions: new Map([ + ['review-session', { id: 'review-session' }], + ]), + activeSessionId: 'review-session', + }; + + const toolItem: FlowToolItem = { + id: 'tool-1', + type: 'tool', + timestamp: Date.now(), + toolName: 'submit_code_review', + status: 'completed', + toolCall: { + id: 'call-1', + input: {}, + }, + toolResult: { + success: true, + result: { + review_mode: 'deep', + summary: { + overall_assessment: 'No validated issues.', + risk_level: 'low', + recommended_action: 'approve', + }, + issues: [], + reviewers: [], + }, + }, + }; + const config: ToolCardConfig = { + toolName: 'submit_code_review', + displayName: 'Code Review', + icon: 'REVIEW', + requiresConfirmation: false, + resultDisplayType: 'detailed', + }; + + act(() => { + root.render( + , + ); + }); + act(() => { + container.querySelector('.preview-toggle-btn')?.dispatchEvent( + new window.Event('click', { bubbles: true }), + ); + }); + + expect(container.textContent).not.toContain('Run manifest'); + + act(() => { + flowState.current = { + sessions: new Map([ + ['review-session', { id: 'review-session', deepReviewRunManifest: buildManifest() }], + ]), + activeSessionId: 'review-session', + }; + notifyFlowState(); + }); + + expect(container.textContent).toContain('Run manifest'); + expect(container.textContent).toContain('3 active'); + }); + + it('renders compact reliability status when a reviewer timed out with partial evidence', () => { + const toolItem: FlowToolItem = { + id: 'tool-1', + type: 'tool', + timestamp: Date.now(), + toolName: 'submit_code_review', + status: 'completed', + toolCall: { + id: 'call-1', + input: {}, + }, + toolResult: { + success: true, + result: { + review_mode: 'deep', + summary: { + overall_assessment: 'Review completed with reduced confidence.', + risk_level: 'medium', + recommended_action: 'request_changes', + }, + issues: [], + reviewers: [ + { + name: 'Security Reviewer', + specialty: 'security', + status: 'partial_timeout', + summary: 'Timed out after producing partial evidence.', + partial_output: 'Found likely token logging in src/auth.ts before timeout.', + }, + ], + }, + }, + }; + const config: ToolCardConfig = { + toolName: 'submit_code_review', + displayName: 'Code Review', + icon: 'REVIEW', + requiresConfirmation: false, + resultDisplayType: 'detailed', + }; + + act(() => { + root.render( + , + ); + }); + act(() => { + container.querySelector('.preview-toggle-btn')?.dispatchEvent( + new window.Event('click', { bubbles: true }), + ); + }); + + expect(container.textContent).toContain('Review status'); + expect(container.textContent).toContain('Reviewer timed out with partial result'); + expect(container.textContent).toContain('1 reviewer result is partial; confidence is reduced.'); + }); +}); diff --git a/src/web-ui/src/flow_chat/tool-cards/CodeReviewToolCard.tsx b/src/web-ui/src/flow_chat/tool-cards/CodeReviewToolCard.tsx index 8ef9acb76..ebf88866d 100644 --- a/src/web-ui/src/flow_chat/tool-cards/CodeReviewToolCard.tsx +++ b/src/web-ui/src/flow_chat/tool-cards/CodeReviewToolCard.tsx @@ -9,6 +9,7 @@ import { Loader2, AlertTriangle, AlertCircle, + Clock, Info, ChevronDown, ChevronUp, @@ -16,6 +17,7 @@ import { import { useTranslation } from 'react-i18next'; import { Tooltip, ToolProcessingDots } from '@/component-library'; import type { ToolCardProps } from '../types/flow-chat'; +import { flowChatStore } from '../store/FlowChatStore'; import { BaseToolCard, ToolCardHeader } from './BaseToolCard'; import { createLogger } from '@/shared/utils/logger'; import { useToolCardHeightContract } from './useToolCardHeightContract'; @@ -23,10 +25,12 @@ import { buildReviewRemediationItems, } from '../utils/codeReviewRemediation'; import { + buildCodeReviewReliabilityNotices, buildCodeReviewReportSections, getDefaultExpandedCodeReviewSectionIds, type CodeReviewReportData, type CodeReviewReviewer, + type ReviewReliabilityNotice, type RemediationGroupId, type ReviewReportGroup, type ReviewSectionId, @@ -34,8 +38,14 @@ import { } from '../utils/codeReviewReport'; import { CodeReviewReportExportActions } from './CodeReviewReportExportActions'; import { DEEP_REVIEW_SCROLL_TO_EVENT, type DeepReviewScrollToRequest } from '../events/flowchatNavigation'; -import { globalEventBus } from '@/infrastructure'; +import { globalEventBus } from '@/infrastructure/event-bus'; import { normalizeDecisionEntry, type DecisionContext } from '../utils/codeReviewReport'; +import { + getActiveReviewTeamManifestMembers, + type ReviewTeamManifestMember, + type ReviewTeamManifestMemberReason, + type ReviewTeamRunManifest, +} from '@/shared/services/reviewTeamService'; import './CodeReviewToolCard.scss'; const log = createLogger('CodeReviewToolCard'); @@ -126,6 +136,122 @@ function formatReviewerStatus(status: string, t: Translate): string { }); } +function getReliabilityNoticeLabel(notice: ReviewReliabilityNotice, t: Translate): string { + return t(`toolCards.codeReview.reliabilityStatus.${notice.kind}.label`, { + defaultValue: { + context_pressure: 'Context pressure rising', + compression_preserved: 'Compression preserved key facts', + cache_hit: 'Incremental cache reused reviewer output', + cache_miss: 'Incremental cache missed or refreshed', + concurrency_limited: 'Reviewer launch was concurrency-limited', + partial_reviewer: 'Reviewer timed out with partial result', + retry_guidance: 'Retry guidance emitted', + skipped_reviewers: 'Skipped reviewers', + token_budget_limited: 'Token budget limited reviewer coverage', + user_decision: 'User decision needed', + }[notice.kind], + }); +} + +function getReliabilityNoticeDetail(notice: ReviewReliabilityNotice, t: Translate): string { + if (notice.detail?.trim()) { + return notice.detail.trim(); + } + + return t(`toolCards.codeReview.reliabilityStatus.${notice.kind}.detail`, { + count: notice.count ?? 0, + defaultValue: { + context_pressure: '{{count}} reviewer calls planned for a large or constrained target.', + compression_preserved: 'Coverage notes include preserved context from compression.', + cache_hit: '{{count}} reviewer packet reused matching cached output.', + cache_miss: '{{count}} reviewer packet ran fresh or refreshed stale cache.', + concurrency_limited: '{{count}} reviewer launch hit a concurrency cap.', + partial_reviewer: '{{count}} reviewer result is partial; confidence is reduced.', + retry_guidance: '{{count}} retry guidance item was emitted for partial review coverage.', + skipped_reviewers: '{{count}} reviewer was skipped by applicability, configuration, or budget.', + token_budget_limited: '{{count}} reviewer was skipped by token budget mode.', + user_decision: '{{count}} review item needs your decision before fixing.', + }[notice.kind], + }); +} + +function getReliabilityNoticeIcon(notice: ReviewReliabilityNotice): React.ReactNode { + if (notice.kind === 'partial_reviewer' || notice.kind === 'retry_guidance') { + return ; + } + if ( + notice.kind === 'user_decision' || + notice.kind === 'concurrency_limited' || + notice.kind === 'token_budget_limited' + ) { + return ; + } + return ; +} + +function getDeepReviewRunManifestForSession(sessionId?: string): ReviewTeamRunManifest | undefined { + if (!sessionId) { + return undefined; + } + + return flowChatStore.getState().sessions.get(sessionId)?.deepReviewRunManifest; +} + +function getReviewerLabel(member: ReviewTeamManifestMember): string { + return member.displayName || member.subagentId; +} + +function getSkippedReasonLabel( + reason: ReviewTeamManifestMemberReason | undefined, + t: Translate, +): string { + switch (reason) { + case 'not_applicable': + return t('toolCards.codeReview.runManifest.skippedReasons.notApplicable', { + defaultValue: 'Not applicable to this target', + }); + case 'budget_limited': + return t('toolCards.codeReview.runManifest.skippedReasons.budgetLimited', { + defaultValue: 'Limited by token budget', + }); + case 'invalid_tooling': + return t('toolCards.codeReview.runManifest.skippedReasons.invalidTooling', { + defaultValue: 'Configuration issue', + }); + case 'disabled': + return t('toolCards.codeReview.runManifest.skippedReasons.disabled', { + defaultValue: 'Disabled', + }); + case 'unavailable': + return t('toolCards.codeReview.runManifest.skippedReasons.unavailable', { + defaultValue: 'Unavailable', + }); + default: + return t('toolCards.codeReview.runManifest.skippedReasons.skipped', { + defaultValue: 'Skipped', + }); + } +} + +function formatRunManifestSummary( + manifest: ReviewTeamRunManifest, + activeReviewers: ReviewTeamManifestMember[], + t: Translate, +): string { + return t('toolCards.codeReview.runManifest.summary', { + active: activeReviewers.length, + skipped: manifest.skippedReviewers.length, + calls: manifest.tokenBudget.estimatedReviewerCalls, + defaultValue: '{{active}} active / {{skipped}} skipped / {{calls}} calls', + }); +} + +function formatRunManifestTarget(manifest: ReviewTeamRunManifest): string { + return manifest.target.tags.length > 0 + ? manifest.target.tags.join(', ') + : manifest.target.source; +} + function renderReportGroupList( groups: Array>, titleForGroup: (id: TId) => string, @@ -144,7 +270,7 @@ function renderReportGroupList export const CodeReviewToolCard: React.FC = React.memo(({ toolItem, - sessionId: _sessionId, + sessionId, }) => { const { t } = useTranslation('flow-chat'); const { toolResult, status } = toolItem; @@ -157,6 +283,21 @@ export const CodeReviewToolCard: React.FC = React.memo(({ toolId, toolName: toolItem.toolName, }); + const [sessionRunManifest, setSessionRunManifest] = useState( + () => getDeepReviewRunManifestForSession(sessionId), + ); + + useEffect(() => { + setSessionRunManifest(getDeepReviewRunManifestForSession(sessionId)); + + if (!sessionId) { + return undefined; + } + + return flowChatStore.subscribe((state) => { + setSessionRunManifest(state.sessions.get(sessionId)?.deepReviewRunManifest); + }); + }, [sessionId]); const getStatusIcon = () => { switch (status) { @@ -445,7 +586,10 @@ export const CodeReviewToolCard: React.FC = React.memo(({ extra={( <> {hasData && reviewData && ( - + )} {hasData && ( = React.memo(({ const review_mode = reviewData.review_mode; const review_scope = reviewData.review_scope; const reviewers = reviewData.reviewers ?? []; + const runManifest = review_mode === 'deep' + ? sessionRunManifest + : undefined; + const activeRunManifestReviewers = runManifest + ? getActiveReviewTeamManifestMembers(runManifest) + : []; const reportSections = buildCodeReviewReportSections(reviewData); + const reliabilityNotices = buildCodeReviewReliabilityNotices(reviewData, runManifest); const riskLevel = summary.risk_level ?? 'low'; const recommendedAction = summary.recommended_action ?? 'approve'; const remediationItemCount = reportSections.remediationGroups @@ -485,11 +636,47 @@ export const CodeReviewToolCard: React.FC = React.memo(({ const remediationExpanded = expandedReportSectionIds.has('remediation'); const issuesExpanded = expandedReportSectionIds.has('issues'); const strengthsExpanded = expandedReportSectionIds.has('strengths'); + const runManifestExpanded = expandedReportSectionIds.has('runManifest'); const teamExpanded = expandedReportSectionIds.has('team'); const coverageExpanded = expandedReportSectionIds.has('coverage'); return (
+ {reliabilityNotices.length > 0 && ( +
+
+ {t('toolCards.codeReview.reliabilityStatus.title', { + defaultValue: 'Review status', + })} +
+
+ {reliabilityNotices.map((notice) => ( +
+ + {getReliabilityNoticeIcon(notice)} + + + + {getReliabilityNoticeLabel(notice, t)} + + + {getReliabilityNoticeDetail(notice, t)} + + +
+ ))} +
+
+ )} +
{t('toolCards.codeReview.overallAssessment')}
@@ -538,6 +725,85 @@ export const CodeReviewToolCard: React.FC = React.memo(({
+ {runManifest && ( + +
+
+
+ {t('toolCards.codeReview.runManifest.target', { defaultValue: 'Target' })} + {formatRunManifestTarget(runManifest)} +
+
+ {t('toolCards.codeReview.runManifest.budget', { defaultValue: 'Budget' })} + {runManifest.tokenBudget.mode} +
+
+ {t('toolCards.codeReview.runManifest.estimatedCalls', { defaultValue: 'Estimated calls' })} + {runManifest.tokenBudget.estimatedReviewerCalls} +
+ {runManifest.strategyRecommendation && ( +
+ + {t('toolCards.codeReview.runManifest.recommendedStrategy', { + defaultValue: 'Recommended strategy', + })} + + {runManifest.strategyRecommendation.strategyLevel} +
+ )} +
+ + {runManifest.strategyRecommendation && ( +
+
+ {t('toolCards.codeReview.runManifest.riskRecommendationTitle', { + defaultValue: 'Risk recommendation', + })} +
+

{runManifest.strategyRecommendation.rationale}

+
+ )} + + {activeRunManifestReviewers.length > 0 && ( +
+
+ {t('toolCards.codeReview.runManifest.activeGroupTitle', { defaultValue: 'Will run' })} +
+
+ {activeRunManifestReviewers.map((member) => ( + + {getReviewerLabel(member)} + {member.roleName} + + ))} +
+
+ )} + + {runManifest.skippedReviewers.length > 0 && ( +
+
+ {t('toolCards.codeReview.runManifest.skippedGroupTitle', { defaultValue: 'Skipped reviewers' })} +
+
    + {runManifest.skippedReviewers.map((member) => ( +
  • + {getReviewerLabel(member)} + {getSkippedReasonLabel(member.reason, t)} +
  • + ))} +
+
+ )} +
+
+ )} + {issues.length > 0 && ( = React.memo(({ handleToggleReportSection, remediationItems, reviewData, + sessionRunManifest, t, ]); diff --git a/src/web-ui/src/flow_chat/types/flow-chat.ts b/src/web-ui/src/flow_chat/types/flow-chat.ts index 67776545a..75f56f5c3 100644 --- a/src/web-ui/src/flow_chat/types/flow-chat.ts +++ b/src/web-ui/src/flow_chat/types/flow-chat.ts @@ -8,6 +8,7 @@ import type { SessionKind, SessionTitleSource, } from '@/shared/types/session-history'; +import type { ReviewTeamRunManifest } from '@/shared/services/reviewTeamService'; // Base type for streaming items. export interface FlowItem { @@ -332,6 +333,9 @@ export interface Session { */ needsUserAttention?: 'ask_user' | 'tool_confirm'; + /** Per-run reviewer manifest for Deep Review child sessions. */ + deepReviewRunManifest?: ReviewTeamRunManifest; + /** * Runtime-only session that should stay in memory but never be persisted or * shown in the main session navigation. diff --git a/src/web-ui/src/flow_chat/utils/codeReviewReport.test.ts b/src/web-ui/src/flow_chat/utils/codeReviewReport.test.ts index df17cfe39..824f75e3a 100644 --- a/src/web-ui/src/flow_chat/utils/codeReviewReport.test.ts +++ b/src/web-ui/src/flow_chat/utils/codeReviewReport.test.ts @@ -1,9 +1,148 @@ import { describe, expect, it } from 'vitest'; import { buildCodeReviewReportSections, + buildCodeReviewReliabilityNotices, formatCodeReviewReportMarkdown, getDefaultExpandedCodeReviewSectionIds, } from './codeReviewReport'; +import type { ReviewTeamManifestMember, ReviewTeamRunManifest } from '@/shared/services/reviewTeamService'; + +function manifestMember( + subagentId: string, + displayName: string, + reason?: ReviewTeamManifestMember['reason'], +): ReviewTeamManifestMember { + return { + subagentId, + displayName, + roleName: displayName, + model: 'fast', + configuredModel: 'fast', + defaultModelSlot: 'fast', + strategyLevel: 'normal', + strategySource: 'team', + strategyDirective: 'Review the target.', + locked: !subagentId.startsWith('Custom'), + source: subagentId.startsWith('Custom') ? 'extra' : 'core', + subagentSource: subagentId.startsWith('Custom') ? 'user' : 'builtin', + ...(reason ? { reason } : {}), + }; +} + +function buildRunManifest(): ReviewTeamRunManifest { + return { + reviewMode: 'deep', + workspacePath: '/test-fixtures/project-a', + policySource: 'default-review-team-config', + target: { + source: 'session_files', + resolution: 'resolved', + tags: ['frontend'], + files: ['src/App.tsx'], + warnings: [], + }, + strategyLevel: 'normal', + strategyRecommendation: { + strategyLevel: 'deep', + score: 24, + rationale: 'Large/high-risk change (8 files, 900 lines; 2 security-sensitive files, 3 workspace areas). Deep review recommended.', + factors: { + fileCount: 8, + totalLinesChanged: 900, + lineCountSource: 'diff_stat', + securityFileCount: 2, + workspaceAreaCount: 3, + contractSurfaceChanged: true, + }, + }, + executionPolicy: { + reviewerTimeoutSeconds: 300, + judgeTimeoutSeconds: 240, + reviewerFileSplitThreshold: 20, + maxSameRoleInstances: 3, + maxRetriesPerRole: 1, + }, + concurrencyPolicy: { + maxParallelInstances: 4, + staggerSeconds: 0, + batchExtrasSeparately: true, + }, + preReviewSummary: { + source: 'target_manifest', + summary: '1 file, 12 changed lines across 1 workspace area: web-ui (1)', + fileCount: 1, + excludedFileCount: 0, + lineCount: 12, + lineCountSource: 'diff_stat', + targetTags: ['frontend'], + workspaceAreas: [ + { + key: 'web-ui', + fileCount: 1, + sampleFiles: ['src/App.tsx'], + }, + ], + warnings: [], + }, + sharedContextCache: { + source: 'work_packets', + strategy: 'reuse_readonly_file_context_by_cache_key', + entries: [ + { + cacheKey: 'shared-context:1', + path: 'src/App.tsx', + workspaceArea: 'web-ui', + recommendedTools: ['GetFileDiff', 'Read'], + consumerPacketIds: [ + 'reviewer:ReviewBusinessLogic', + 'reviewer:CustomSecurity', + ], + }, + ], + omittedEntryCount: 0, + }, + incrementalReviewCache: { + source: 'target_manifest', + strategy: 'reuse_completed_packets_when_fingerprint_matches', + cacheKey: 'incremental-review:abc12345', + fingerprint: 'abc12345', + filePaths: ['src/App.tsx'], + workspaceAreas: ['web-ui'], + targetTags: ['frontend'], + reviewerPacketIds: [ + 'reviewer:ReviewBusinessLogic', + 'reviewer:CustomSecurity', + ], + lineCount: 12, + lineCountSource: 'diff_stat', + invalidatesOn: [ + 'target_file_set_changed', + 'target_line_count_changed', + 'reviewer_roster_changed', + ], + }, + tokenBudget: { + mode: 'balanced', + estimatedReviewerCalls: 3, + maxReviewerCalls: 4, + maxExtraReviewers: 1, + largeDiffSummaryFirst: false, + skippedReviewerIds: ['CustomInvalid'], + warnings: [], + }, + coreReviewers: [ + manifestMember('ReviewBusinessLogic', 'Logic reviewer'), + ], + qualityGateReviewer: manifestMember('ReviewJudge', 'Quality inspector'), + enabledExtraReviewers: [ + manifestMember('CustomSecurity', 'Custom security reviewer'), + ], + skippedReviewers: [ + manifestMember('ReviewFrontend', 'Frontend reviewer', 'not_applicable'), + manifestMember('CustomInvalid', 'Custom invalid reviewer', 'invalid_tooling'), + ], + }; +} describe('codeReviewReport', () => { it('uses structured report sections when present', () => { @@ -99,6 +238,256 @@ describe('codeReviewReport', () => { ]); }); + it('surfaces partial reviewer output in coverage notes', () => { + const sections = buildCodeReviewReportSections({ + summary: { + overall_assessment: 'Review completed with reduced confidence.', + risk_level: 'medium' as const, + recommended_action: 'request_changes' as const, + }, + reviewers: [ + { + name: 'Security Reviewer', + specialty: 'security', + status: 'partial_timeout', + summary: 'Timed out after finding one likely issue.', + partial_output: 'Found likely token logging in src/auth.ts before timeout.', + }, + ], + }); + + expect(sections.reviewerStats).toMatchObject({ total: 1, completed: 0, degraded: 1 }); + expect(sections.coverageNotes).toEqual([ + 'Security Reviewer timed out after producing partial output: Found likely token logging in src/auth.ts before timeout.', + ]); + }); + + it('builds compact reliability notices only when review attention is needed', () => { + expect(buildCodeReviewReliabilityNotices({ + summary: { + overall_assessment: 'No issues found.', + risk_level: 'low' as const, + recommended_action: 'approve' as const, + }, + reviewers: [{ name: 'Reviewer', specialty: 'logic', status: 'completed', summary: 'Done.' }], + })).toEqual([]); + + const manifest = { + ...buildRunManifest(), + tokenBudget: { + ...buildRunManifest().tokenBudget, + largeDiffSummaryFirst: true, + warnings: ['Large target; reviewers will receive compact scopes.'], + }, + }; + const notices = buildCodeReviewReliabilityNotices({ + summary: { + overall_assessment: 'Review completed with reduced confidence.', + risk_level: 'medium' as const, + recommended_action: 'request_changes' as const, + }, + reviewers: [ + { + name: 'Security Reviewer', + specialty: 'security', + status: 'partial_timeout', + summary: 'Timed out after producing partial evidence.', + partial_output: 'Found likely token logging in src/auth.ts before timeout.', + }, + ], + report_sections: { + coverage_notes: ['Context compression preserved key file and test facts.'], + remediation_groups: { + needs_decision: ['Decide whether to block the release or isolate the feature.'], + }, + }, + }, manifest); + + expect(notices.map((notice) => notice.kind)).toEqual([ + 'context_pressure', + 'skipped_reviewers', + 'token_budget_limited', + 'compression_preserved', + 'partial_reviewer', + 'retry_guidance', + 'user_decision', + ]); + expect(notices.find((notice) => notice.kind === 'partial_reviewer')).toMatchObject({ + severity: 'warning', + count: 1, + }); + }); + + it('prefers structured reliability signals for status and markdown export', () => { + const report = { + summary: { + overall_assessment: 'Review completed with runtime reliability signals.', + risk_level: 'medium' as const, + recommended_action: 'request_changes' as const, + }, + review_mode: 'deep' as const, + reviewers: [ + { + name: 'Security Reviewer', + specialty: 'security', + status: 'completed', + summary: 'Completed.', + }, + ], + reliability_signals: [ + { + kind: 'context_pressure', + severity: 'warning', + count: 7, + source: 'runtime', + detail: 'Runtime profile capped reviewer fan-out for this large target.', + }, + { + kind: 'compression_preserved', + severity: 'info', + source: 'runtime', + detail: 'Compression contract retained modified files and failed commands.', + }, + { + kind: 'cache_hit', + severity: 'info', + count: 2, + source: 'runtime', + detail: 'Two reviewer packets reused matching cached output.', + }, + { + kind: 'cache_miss', + severity: 'info', + count: 1, + source: 'runtime', + detail: 'One reviewer packet ran fresh and updated the cache.', + }, + { + kind: 'concurrency_limited', + severity: 'warning', + count: 1, + source: 'runtime', + detail: 'One reviewer launch hit the configured concurrency cap.', + }, + { + kind: 'retry_guidance', + severity: 'warning', + count: 1, + source: 'runtime', + detail: 'Retry guidance was emitted for a partial reviewer.', + }, + ], + }; + + const notices = buildCodeReviewReliabilityNotices(report); + + expect(notices).toEqual([ + { + kind: 'context_pressure', + severity: 'warning', + count: 7, + source: 'runtime', + detail: 'Runtime profile capped reviewer fan-out for this large target.', + }, + { + kind: 'compression_preserved', + severity: 'info', + source: 'runtime', + detail: 'Compression contract retained modified files and failed commands.', + }, + { + kind: 'cache_hit', + severity: 'info', + count: 2, + source: 'runtime', + detail: 'Two reviewer packets reused matching cached output.', + }, + { + kind: 'cache_miss', + severity: 'info', + count: 1, + source: 'runtime', + detail: 'One reviewer packet ran fresh and updated the cache.', + }, + { + kind: 'concurrency_limited', + severity: 'warning', + count: 1, + source: 'runtime', + detail: 'One reviewer launch hit the configured concurrency cap.', + }, + { + kind: 'retry_guidance', + severity: 'warning', + count: 1, + source: 'runtime', + detail: 'Retry guidance was emitted for a partial reviewer.', + }, + ]); + + const markdown = formatCodeReviewReportMarkdown(report); + + expect(markdown).toContain('## Review Reliability'); + expect(markdown).toContain( + '- Context pressure rising [warning/runtime]: Runtime profile capped reviewer fan-out for this large target.', + ); + expect(markdown).toContain( + '- Compression preserved key facts [info/runtime]: Compression contract retained modified files and failed commands.', + ); + expect(markdown).toContain( + '- Incremental cache reused reviewer output [info/runtime]: Two reviewer packets reused matching cached output.', + ); + expect(markdown).toContain( + '- Incremental cache missed or refreshed [info/runtime]: One reviewer packet ran fresh and updated the cache.', + ); + expect(markdown).toContain( + '- Reviewer launch was concurrency-limited [warning/runtime]: One reviewer launch hit the configured concurrency cap.', + ); + expect(markdown).toContain( + '- Retry guidance emitted [warning/runtime]: Retry guidance was emitted for a partial reviewer.', + ); + }); + + it('summarizes skipped reviewer and token budget tradeoffs from the run manifest', () => { + const report = { + summary: { + overall_assessment: 'Review completed with one skipped reviewer.', + risk_level: 'medium' as const, + recommended_action: 'request_changes' as const, + }, + review_mode: 'deep' as const, + reviewers: [ + { + name: 'Business Logic Reviewer', + specialty: 'logic', + status: 'completed', + summary: 'Done.', + }, + ], + }; + const notices = buildCodeReviewReliabilityNotices(report, buildRunManifest()); + + expect(notices).toEqual([ + { + kind: 'skipped_reviewers', + severity: 'info', + count: 2, + source: 'manifest', + }, + { + kind: 'token_budget_limited', + severity: 'warning', + count: 1, + source: 'manifest', + }, + ]); + + const markdown = formatCodeReviewReportMarkdown(report, undefined, { runManifest: buildRunManifest() }); + + expect(markdown).toContain('- Skipped reviewers [info/manifest]: Count: 2'); + expect(markdown).toContain('- Token budget limited reviewer coverage [warning/manifest]: Count: 1'); + }); + it('keeps team and issue details collapsed by default while leaving remediation visible', () => { const report = { summary: { @@ -149,4 +538,94 @@ describe('codeReviewReport', () => { expect(markdown).toContain('## Remediation Plan'); expect(markdown).toContain('## Code Review Team'); }); + + it('exports partial reviewer output in markdown', () => { + const markdown = formatCodeReviewReportMarkdown({ + summary: { + overall_assessment: 'Review completed with partial security evidence.', + risk_level: 'medium' as const, + recommended_action: 'request_changes' as const, + }, + review_mode: 'deep' as const, + issues: [], + reviewers: [ + { + name: 'Security Reviewer', + specialty: 'security', + status: 'partial_timeout', + summary: 'Timed out after producing partial evidence.', + partial_output: 'Found likely token logging in src/auth.ts before timeout.', + }, + ], + }); + + expect(markdown).toContain('Security Reviewer (security; Status: partial_timeout)'); + expect(markdown).toContain('Partial output: Found likely token logging in src/auth.ts before timeout.'); + expect(markdown).toContain( + 'Security Reviewer timed out after producing partial output: Found likely token logging in src/auth.ts before timeout.', + ); + }); + + it('exports reviewer packet fallback metadata in markdown', () => { + const markdown = formatCodeReviewReportMarkdown({ + summary: { + overall_assessment: 'Review completed with inferred packet metadata.', + risk_level: 'low' as const, + recommended_action: 'approve' as const, + }, + review_mode: 'deep' as const, + issues: [], + reviewers: [ + { + name: 'Security Reviewer', + specialty: 'security', + status: 'completed', + summary: 'Checked the first security split.', + packet_id: 'reviewer:ReviewSecurity:group-1-of-3', + packet_status_source: 'inferred', + }, + ], + }); + + expect(markdown).toContain('Packet: reviewer:ReviewSecurity:group-1-of-3 (inferred)'); + }); + + it('includes the run manifest when exporting a deep review report', () => { + const markdown = formatCodeReviewReportMarkdown( + { + summary: { + overall_assessment: 'No validated issues.', + risk_level: 'low' as const, + recommended_action: 'approve' as const, + }, + review_mode: 'deep' as const, + issues: [], + reviewers: [], + }, + undefined, + { runManifest: buildRunManifest() }, + ); + + expect(markdown).toContain('## Run manifest'); + expect(markdown).toContain('- Target: frontend'); + expect(markdown).toContain('- Budget: balanced'); + expect(markdown).toContain('- Estimated calls: 3'); + expect(markdown).toContain('- Recommended strategy: deep'); + expect(markdown).toContain('- Recommendation score: 24'); + expect(markdown).toContain('- Recommendation rationale: Large/high-risk change'); + expect(markdown).toContain('- Logic reviewer (ReviewBusinessLogic)'); + expect(markdown).toContain('- Custom security reviewer (CustomSecurity)'); + expect(markdown).toContain('- Quality inspector (ReviewJudge)'); + expect(markdown).toContain('- Frontend reviewer (ReviewFrontend): not_applicable'); + expect(markdown).toContain('- Custom invalid reviewer (CustomInvalid): invalid_tooling'); + expect(markdown).toContain('### Pre-review summary'); + expect(markdown).toContain('- 1 file, 12 changed lines across 1 workspace area: web-ui (1)'); + expect(markdown).toContain('- web-ui: 1 file (src/App.tsx)'); + expect(markdown).toContain('### Shared context cache'); + expect(markdown).toContain('- shared-context:1: src/App.tsx -> reviewer:ReviewBusinessLogic, reviewer:CustomSecurity'); + expect(markdown).toContain('### Incremental review cache'); + expect(markdown).toContain('- Cache key: incremental-review:abc12345'); + expect(markdown).toContain('- Fingerprint: abc12345'); + expect(markdown).toContain('- Invalidates on: target_file_set_changed, target_line_count_changed, reviewer_roster_changed'); + }); }); diff --git a/src/web-ui/src/flow_chat/utils/codeReviewReport.ts b/src/web-ui/src/flow_chat/utils/codeReviewReport.ts index f34c57889..0c70e082b 100644 --- a/src/web-ui/src/flow_chat/utils/codeReviewReport.ts +++ b/src/web-ui/src/flow_chat/utils/codeReviewReport.ts @@ -1,9 +1,23 @@ +import { + getActiveReviewTeamManifestMembers, + type ReviewTeamManifestMember, + type ReviewTeamRunManifest, +} from '@/shared/services/reviewTeamService'; + export type ReviewRiskLevel = 'low' | 'medium' | 'high' | 'critical'; export type ReviewAction = 'approve' | 'approve_with_suggestions' | 'request_changes' | 'block'; export type ReviewMode = 'standard' | 'deep'; export type ReviewIssueSeverity = 'critical' | 'high' | 'medium' | 'low' | 'info'; export type ReviewIssueCertainty = 'confirmed' | 'likely' | 'possible'; -export type ReviewSectionId = 'summary' | 'issues' | 'remediation' | 'strengths' | 'team' | 'coverage'; +export type ReviewPacketStatusSource = 'reported' | 'inferred' | 'missing'; +export type ReviewSectionId = + | 'summary' + | 'issues' + | 'remediation' + | 'strengths' + | 'runManifest' + | 'team' + | 'coverage'; export type RemediationGroupId = 'must_fix' | 'should_improve' | 'needs_decision' | 'verification'; export type StrengthGroupId = | 'architecture' @@ -39,6 +53,9 @@ export interface CodeReviewReviewer { specialty: string; status: string; summary: string; + partial_output?: string; + packet_id?: string; + packet_status_source?: ReviewPacketStatusSource; issue_count?: number; } @@ -80,6 +97,7 @@ export interface CodeReviewReportData { reviewers?: CodeReviewReviewer[]; remediation_plan?: string[]; report_sections?: CodeReviewReportSectionsData; + reliability_signals?: CodeReviewReliabilitySignal[]; } export interface ReviewReportGroup { @@ -111,27 +129,72 @@ export interface ReviewReportSections { reviewerStats: ReviewReviewerStats; } +export type ReviewReliabilityNoticeKind = + | 'context_pressure' + | 'compression_preserved' + | 'cache_hit' + | 'cache_miss' + | 'concurrency_limited' + | 'partial_reviewer' + | 'retry_guidance' + | 'skipped_reviewers' + | 'token_budget_limited' + | 'user_decision'; + +export type ReviewReliabilityNoticeSeverity = 'info' | 'warning' | 'action'; +export type ReviewReliabilitySignalSource = 'runtime' | 'manifest' | 'report' | 'inferred'; + +export interface ReviewReliabilityNotice { + kind: ReviewReliabilityNoticeKind; + severity: ReviewReliabilityNoticeSeverity; + count?: number; + source?: ReviewReliabilitySignalSource; + detail?: string; +} + +export interface CodeReviewReliabilitySignal { + kind: ReviewReliabilityNoticeKind; + severity?: ReviewReliabilityNoticeSeverity; + count?: number; + source?: ReviewReliabilitySignalSource; + detail?: string; +} + export interface CodeReviewReportMarkdownLabels { titleStandard: string; titleDeep: string; executiveSummary: string; reviewDecision: string; + runManifest: string; riskLevel: string; recommendedAction: string; scope: string; + target: string; + budget: string; + estimatedCalls: string; + activeReviewers: string; + skippedReviewers: string; issues: string; noIssues: string; remediationPlan: string; strengths: string; reviewTeam: string; + reliabilitySignals: string; coverageNotes: string; status: string; + packet: string; + partialOutput: string; findings: string; validation: string; suggestion: string; source: string; noItems: string; groupTitles: Record; + reliabilityNoticeLabels: Record; +} + +export interface CodeReviewReportMarkdownOptions { + runManifest?: ReviewTeamRunManifest; } const REMEDIATION_GROUP_ORDER: RemediationGroupId[] = [ @@ -152,27 +215,74 @@ const STRENGTH_GROUP_ORDER: StrengthGroupId[] = [ ]; const DEGRADED_REVIEWER_STATUSES = new Set(['timed_out', 'cancelled_by_user', 'failed', 'skipped']); +const PARTIAL_TIMEOUT_REVIEWER_STATUSES = new Set(['partial_timeout', 'timed_out', 'cancelled_by_user']); +const RELIABILITY_NOTICE_ORDER: ReviewReliabilityNoticeKind[] = [ + 'context_pressure', + 'skipped_reviewers', + 'token_budget_limited', + 'compression_preserved', + 'cache_hit', + 'cache_miss', + 'concurrency_limited', + 'partial_reviewer', + 'retry_guidance', + 'user_decision', +]; +const RELIABILITY_NOTICE_FALLBACK_LABELS: Record = { + context_pressure: 'Context pressure rising', + compression_preserved: 'Compression preserved key facts', + cache_hit: 'Incremental cache reused reviewer output', + cache_miss: 'Incremental cache missed or refreshed', + concurrency_limited: 'Reviewer launch was concurrency-limited', + partial_reviewer: 'Reviewer timed out with partial result', + retry_guidance: 'Retry guidance emitted', + skipped_reviewers: 'Skipped reviewers', + token_budget_limited: 'Token budget limited reviewer coverage', + user_decision: 'User decision needed', +}; +const RELIABILITY_NOTICE_SEVERITY_BY_KIND: Record = { + context_pressure: 'info', + compression_preserved: 'info', + cache_hit: 'info', + cache_miss: 'info', + concurrency_limited: 'warning', + partial_reviewer: 'warning', + retry_guidance: 'warning', + skipped_reviewers: 'info', + token_budget_limited: 'warning', + user_decision: 'action', +}; export const DEFAULT_CODE_REVIEW_MARKDOWN_LABELS: CodeReviewReportMarkdownLabels = { titleStandard: 'Code Review Report', titleDeep: 'Deep Review Report', executiveSummary: 'Executive Summary', reviewDecision: 'Review Decision', + runManifest: 'Run manifest', riskLevel: 'Risk Level', recommendedAction: 'Recommended Action', scope: 'Scope', + target: 'Target', + budget: 'Budget', + estimatedCalls: 'Estimated calls', + activeReviewers: 'Active reviewers', + skippedReviewers: 'Skipped reviewers', issues: 'Issues', noIssues: 'No validated issues.', remediationPlan: 'Remediation Plan', strengths: 'Strengths', reviewTeam: 'Code Review Team', + reliabilitySignals: 'Review Reliability', coverageNotes: 'Coverage Notes', status: 'Status', + packet: 'Packet', + partialOutput: 'Partial output', findings: 'Findings', validation: 'Validation', suggestion: 'Suggestion', source: 'Source', noItems: 'None.', + reliabilityNoticeLabels: RELIABILITY_NOTICE_FALLBACK_LABELS, groupTitles: { must_fix: 'Must Fix', should_improve: 'Should Improve', @@ -259,7 +369,10 @@ function buildReviewerStats(reviewers: CodeReviewReviewer[] = []): ReviewReviewe for (const reviewer of reviewers) { if (reviewer.status === 'completed') { completed += 1; - } else if (DEGRADED_REVIEWER_STATUSES.has(reviewer.status)) { + } else if ( + DEGRADED_REVIEWER_STATUSES.has(reviewer.status) || + reviewer.status === 'partial_timeout' + ) { degraded += 1; } } @@ -271,6 +384,251 @@ function buildReviewerStats(reviewers: CodeReviewReviewer[] = []): ReviewReviewe }; } +function buildPartialReviewerCoverageNotes(reviewers: CodeReviewReviewer[] = []): string[] { + return reviewers + .map((reviewer) => { + const partialOutput = reviewer.partial_output?.trim(); + if (!partialOutput || !PARTIAL_TIMEOUT_REVIEWER_STATUSES.has(reviewer.status)) { + return null; + } + return `${reviewer.name} timed out after producing partial output: ${partialOutput}`; + }) + .filter((note): note is string => Boolean(note)); +} + +function hasCompressionPreservationNote(report: CodeReviewReportData): boolean { + const notes = [ + ...(report.report_sections?.coverage_notes ?? []), + report.summary?.confidence_note, + ]; + + return notes.some((note) => { + const normalized = note?.toLowerCase() ?? ''; + return normalized.includes('compress') && normalized.includes('preserv'); + }); +} + +function countPartialReviewers(reviewers: CodeReviewReviewer[] = []): number { + return reviewers.filter((reviewer) => + reviewer.status === 'partial_timeout' || + ( + PARTIAL_TIMEOUT_REVIEWER_STATUSES.has(reviewer.status) && + Boolean(reviewer.partial_output?.trim()) + ) + ).length; +} + +function countSkippedReviewers(runManifest?: ReviewTeamRunManifest): number { + return runManifest?.skippedReviewers.length ?? 0; +} + +function countTokenBudgetLimitedReviewers(runManifest?: ReviewTeamRunManifest): number { + if (!runManifest) { + return 0; + } + const skippedByBudget = new Set(runManifest.tokenBudget.skippedReviewerIds); + for (const reviewer of runManifest.skippedReviewers) { + if (reviewer.reason === 'budget_limited') { + skippedByBudget.add(reviewer.subagentId); + } + } + return skippedByBudget.size; +} + +function countDecisionItems(report: CodeReviewReportData): number { + const structuredDecisionItems = report.report_sections?.remediation_groups?.needs_decision ?? []; + if (structuredDecisionItems.length > 0) { + const stringItems = structuredDecisionItems.filter((item): item is string => typeof item === 'string'); + return nonEmpty(stringItems).length; + } + + return report.summary?.recommended_action === 'block' ? 1 : 0; +} + +function isReliabilityNoticeKind(value: string): value is ReviewReliabilityNoticeKind { + return RELIABILITY_NOTICE_ORDER.includes(value as ReviewReliabilityNoticeKind); +} + +function isReliabilitySeverity(value: string): value is ReviewReliabilityNoticeSeverity { + return value === 'info' || value === 'warning' || value === 'action'; +} + +function isReliabilitySignalSource(value: string): value is ReviewReliabilitySignalSource { + return value === 'runtime' || value === 'manifest' || value === 'report' || value === 'inferred'; +} + +function normalizeStructuredReliabilityNotice( + signal: CodeReviewReliabilitySignal, +): ReviewReliabilityNotice | null { + if (!isReliabilityNoticeKind(signal.kind)) { + return null; + } + + const detail = signal.detail?.trim(); + return { + kind: signal.kind, + severity: signal.severity && isReliabilitySeverity(signal.severity) + ? signal.severity + : RELIABILITY_NOTICE_SEVERITY_BY_KIND[signal.kind], + ...(typeof signal.count === 'number' ? { count: signal.count } : {}), + ...(signal.source && isReliabilitySignalSource(signal.source) + ? { source: signal.source } + : {}), + ...(detail ? { detail } : {}), + }; +} + +function structuredReliabilityNoticeMap( + report: CodeReviewReportData, +): Map { + const notices = new Map(); + for (const signal of report.reliability_signals ?? []) { + const notice = normalizeStructuredReliabilityNotice(signal); + if (notice && !notices.has(notice.kind)) { + notices.set(notice.kind, notice); + } + } + return notices; +} + +function reliabilityNoticeLabel( + kind: ReviewReliabilityNoticeKind, + labels: CodeReviewReportMarkdownLabels, +): string { + return labels.reliabilityNoticeLabels[kind] ?? RELIABILITY_NOTICE_FALLBACK_LABELS[kind]; +} + +function reliabilityNoticeMarkdownDetail(notice: ReviewReliabilityNotice): string { + if (notice.detail?.trim()) { + return notice.detail.trim(); + } + if (typeof notice.count === 'number') { + return `Count: ${notice.count}`; + } + return ''; +} + +function reliabilityNoticeMarkdownLine( + notice: ReviewReliabilityNotice, + labels: CodeReviewReportMarkdownLabels, +): string { + const tags = [notice.severity, notice.source].filter(Boolean).join('/'); + const detail = reliabilityNoticeMarkdownDetail(notice); + const tagText = tags ? ` [${tags}]` : ''; + return detail + ? `- ${reliabilityNoticeLabel(notice.kind, labels)}${tagText}: ${detail}` + : `- ${reliabilityNoticeLabel(notice.kind, labels)}${tagText}`; +} + +export function buildCodeReviewReliabilityNotices( + report: CodeReviewReportData, + runManifest?: ReviewTeamRunManifest, +): ReviewReliabilityNotice[] { + const notices: ReviewReliabilityNotice[] = []; + const structuredNotices = structuredReliabilityNoticeMap(report); + const hasContextPressure = runManifest + ? runManifest.tokenBudget.largeDiffSummaryFirst || runManifest.tokenBudget.warnings.length > 0 + : false; + + const structuredContextPressure = structuredNotices.get('context_pressure'); + if (structuredContextPressure) { + notices.push(structuredContextPressure); + } else if (hasContextPressure && runManifest) { + notices.push({ + kind: 'context_pressure', + severity: 'info', + count: runManifest.tokenBudget.estimatedReviewerCalls, + source: 'manifest', + }); + } + + const structuredCompressionPreserved = structuredNotices.get('compression_preserved'); + if (structuredCompressionPreserved) { + notices.push(structuredCompressionPreserved); + } else if (hasCompressionPreservationNote(report)) { + notices.push({ + kind: 'compression_preserved', + severity: 'info', + source: 'inferred', + }); + } + + for (const kind of ['cache_hit', 'cache_miss', 'concurrency_limited'] as const) { + const structuredNotice = structuredNotices.get(kind); + if (structuredNotice) { + notices.push(structuredNotice); + } + } + + const partialReviewerCount = countPartialReviewers(report.reviewers); + const structuredPartialReviewer = structuredNotices.get('partial_reviewer'); + if (structuredPartialReviewer) { + notices.push(structuredPartialReviewer); + } else if (partialReviewerCount > 0) { + notices.push({ + kind: 'partial_reviewer', + severity: 'warning', + count: partialReviewerCount, + source: 'runtime', + }); + } + + const structuredRetryGuidance = structuredNotices.get('retry_guidance'); + if (structuredRetryGuidance) { + notices.push(structuredRetryGuidance); + } else if (partialReviewerCount > 0) { + notices.push({ + kind: 'retry_guidance', + severity: 'warning', + count: partialReviewerCount, + source: 'runtime', + }); + } + + const skippedReviewerCount = countSkippedReviewers(runManifest); + const structuredSkippedReviewers = structuredNotices.get('skipped_reviewers'); + if (structuredSkippedReviewers) { + notices.push(structuredSkippedReviewers); + } else if (skippedReviewerCount > 0) { + notices.push({ + kind: 'skipped_reviewers', + severity: 'info', + count: skippedReviewerCount, + source: 'manifest', + }); + } + + const tokenBudgetLimitedReviewerCount = countTokenBudgetLimitedReviewers(runManifest); + const structuredTokenBudgetLimited = structuredNotices.get('token_budget_limited'); + if (structuredTokenBudgetLimited) { + notices.push(structuredTokenBudgetLimited); + } else if (tokenBudgetLimitedReviewerCount > 0) { + notices.push({ + kind: 'token_budget_limited', + severity: 'warning', + count: tokenBudgetLimitedReviewerCount, + source: 'manifest', + }); + } + + const decisionItemCount = countDecisionItems(report); + const structuredUserDecision = structuredNotices.get('user_decision'); + if (structuredUserDecision) { + notices.push(structuredUserDecision); + } else if (decisionItemCount > 0) { + notices.push({ + kind: 'user_decision', + severity: 'action', + count: decisionItemCount, + source: 'report', + }); + } + + return RELIABILITY_NOTICE_ORDER + .map((kind) => notices.find((notice) => notice.kind === kind)) + .filter((notice): notice is ReviewReliabilityNotice => Boolean(notice)); +} + export function buildCodeReviewReportSections(report: CodeReviewReportData): ReviewReportSections { const structuredSections = report.report_sections; @@ -291,6 +649,7 @@ export function buildCodeReviewReportSections(report: CodeReviewReportData): Rev const strengthGroups = buildGroups(STRENGTH_GROUP_ORDER, structuredSections?.strength_groups); const executiveSummary = nonEmpty(structuredSections?.executive_summary); const coverageNotes = nonEmpty(structuredSections?.coverage_notes); + const partialReviewerCoverageNotes = buildPartialReviewerCoverageNotes(report.reviewers); const confidenceNote = report.summary?.confidence_note?.trim(); return { @@ -304,8 +663,8 @@ export function buildCodeReviewReportSections(report: CodeReviewReportData): Rev ? strengthGroups : buildLegacyStrengthGroups(report), coverageNotes: coverageNotes.length > 0 - ? coverageNotes - : nonEmpty([confidenceNote]), + ? nonEmpty([...coverageNotes, ...partialReviewerCoverageNotes]) + : nonEmpty([confidenceNote, ...partialReviewerCoverageNotes]), issueStats: buildIssueStats(report.issues), reviewerStats: buildReviewerStats(report.reviewers), }; @@ -330,6 +689,10 @@ function mergeLabels(labels?: Partial): CodeRevi ...DEFAULT_CODE_REVIEW_MARKDOWN_LABELS.groupTitles, ...labels?.groupTitles, }, + reliabilityNoticeLabels: { + ...DEFAULT_CODE_REVIEW_MARKDOWN_LABELS.reliabilityNoticeLabels, + ...labels?.reliabilityNoticeLabels, + }, }; } @@ -352,9 +715,137 @@ function issueLocation(issue: CodeReviewIssue): string { return issue.line ? `${issue.file}:${issue.line}` : issue.file; } +function manifestTarget(manifest: ReviewTeamRunManifest): string { + return manifest.target.tags.length > 0 + ? manifest.target.tags.join(', ') + : manifest.target.source; +} + +function manifestMemberLabel(member: ReviewTeamManifestMember): string { + return member.displayName || member.subagentId; +} + +function manifestMemberLine(member: ReviewTeamManifestMember): string { + return `${manifestMemberLabel(member)} (${member.subagentId})`; +} + +function pluralize(count: number, singular: string): string { + return `${count} ${singular}${count === 1 ? '' : 's'}`; +} + +function pushPreReviewSummarySection( + lines: string[], + manifest: ReviewTeamRunManifest, +): void { + const summary = manifest.preReviewSummary; + if (!summary) { + return; + } + + lines.push(`### Pre-review summary`); + lines.push(`- ${summary.summary}`); + lines.push(`- Files: ${summary.fileCount}`); + if (summary.lineCount !== undefined) { + lines.push(`- Lines changed: ${summary.lineCount} (${summary.lineCountSource})`); + } else { + lines.push(`- Lines changed: unknown (${summary.lineCountSource})`); + } + if (summary.workspaceAreas.length > 0) { + for (const area of summary.workspaceAreas) { + const sampleFiles = area.sampleFiles.length > 0 + ? ` (${area.sampleFiles.join(', ')})` + : ''; + lines.push(`- ${area.key}: ${pluralize(area.fileCount, 'file')}${sampleFiles}`); + } + } + lines.push(''); +} + +function pushSharedContextCacheSection( + lines: string[], + manifest: ReviewTeamRunManifest, +): void { + const cachePlan = manifest.sharedContextCache; + if (!cachePlan) { + return; + } + + lines.push(`### Shared context cache`); + if (cachePlan.entries.length === 0) { + lines.push('- None.'); + } else { + for (const entry of cachePlan.entries) { + lines.push( + `- ${entry.cacheKey}: ${entry.path} -> ${entry.consumerPacketIds.join(', ')}`, + ); + } + } + if (cachePlan.omittedEntryCount > 0) { + lines.push(`- Omitted entries: ${cachePlan.omittedEntryCount}`); + } + lines.push(''); +} + +function pushIncrementalReviewCacheSection( + lines: string[], + manifest: ReviewTeamRunManifest, +): void { + const cachePlan = manifest.incrementalReviewCache; + if (!cachePlan) { + return; + } + + lines.push(`### Incremental review cache`); + lines.push(`- Cache key: ${cachePlan.cacheKey}`); + lines.push(`- Fingerprint: ${cachePlan.fingerprint}`); + lines.push(`- Strategy: ${cachePlan.strategy}`); + lines.push(`- Reviewer packets: ${cachePlan.reviewerPacketIds.join(', ') || 'none'}`); + lines.push(`- Invalidates on: ${cachePlan.invalidatesOn.join(', ') || 'none'}`); + lines.push(''); +} + +function pushRunManifestSection( + lines: string[], + manifest: ReviewTeamRunManifest, + labels: CodeReviewReportMarkdownLabels, +): void { + const activeReviewers = getActiveReviewTeamManifestMembers(manifest); + + lines.push(`## ${labels.runManifest}`); + lines.push(`- ${labels.target}: ${manifestTarget(manifest)}`); + lines.push(`- ${labels.budget}: ${manifest.tokenBudget.mode}`); + lines.push(`- ${labels.estimatedCalls}: ${manifest.tokenBudget.estimatedReviewerCalls}`); + if (manifest.strategyRecommendation) { + lines.push(`- Recommended strategy: ${manifest.strategyRecommendation.strategyLevel}`); + lines.push(`- Recommendation score: ${manifest.strategyRecommendation.score}`); + lines.push(`- Recommendation rationale: ${manifest.strategyRecommendation.rationale}`); + } + lines.push(''); + lines.push(`### ${labels.activeReviewers}`); + pushList( + lines, + activeReviewers.map((member) => manifestMemberLine(member)), + labels.noItems, + ); + lines.push(''); + lines.push(`### ${labels.skippedReviewers}`); + pushList( + lines, + manifest.skippedReviewers.map((member) => + `${manifestMemberLine(member)}: ${member.reason ?? 'skipped'}`, + ), + labels.noItems, + ); + lines.push(''); + pushPreReviewSummarySection(lines, manifest); + pushSharedContextCacheSection(lines, manifest); + pushIncrementalReviewCacheSection(lines, manifest); +} + export function formatCodeReviewReportMarkdown( report: CodeReviewReportData, labels?: Partial, + options?: CodeReviewReportMarkdownOptions, ): string { const mergedLabels = mergeLabels(labels); const sections = buildCodeReviewReportSections(report); @@ -374,6 +865,17 @@ export function formatCodeReviewReportMarkdown( lines.push(`- ${mergedLabels.scope}: ${report.review_scope.trim()}`); } lines.push(''); + if (report.review_mode === 'deep' && options?.runManifest) { + pushRunManifestSection(lines, options.runManifest, mergedLabels); + } + const reliabilityNotices = buildCodeReviewReliabilityNotices(report, options?.runManifest); + if (reliabilityNotices.length > 0) { + lines.push(`## ${mergedLabels.reliabilitySignals}`); + reliabilityNotices.forEach((notice) => { + lines.push(reliabilityNoticeMarkdownLine(notice, mergedLabels)); + }); + lines.push(''); + } lines.push(`## ${mergedLabels.issues}`); if (issues.length === 0) { lines.push(`- ${mergedLabels.noIssues}`); @@ -438,6 +940,17 @@ export function formatCodeReviewReportMarkdown( if (reviewer.summary) { lines.push(` - ${reviewer.summary}`); } + const packetId = reviewer.packet_id?.trim(); + if (packetId || reviewer.packet_status_source) { + const packetLabel = packetId || 'missing'; + const sourceLabel = reviewer.packet_status_source + ? ` (${reviewer.packet_status_source})` + : ''; + lines.push(` - ${mergedLabels.packet}: ${packetLabel}${sourceLabel}`); + } + if (reviewer.partial_output?.trim()) { + lines.push(` - ${mergedLabels.partialOutput}: ${reviewer.partial_output.trim()}`); + } } } lines.push(''); diff --git a/src/web-ui/src/flow_chat/utils/deepReviewCapacityGuard.test.ts b/src/web-ui/src/flow_chat/utils/deepReviewCapacityGuard.test.ts new file mode 100644 index 000000000..855400492 --- /dev/null +++ b/src/web-ui/src/flow_chat/utils/deepReviewCapacityGuard.test.ts @@ -0,0 +1,86 @@ +import { describe, expect, it } from 'vitest'; +import { + DEEP_REVIEW_SESSION_CONCURRENCY_WARNING_THRESHOLD, + deriveDeepReviewSessionConcurrencyGuard, +} from './deepReviewCapacityGuard'; +import type { FlowChatState, FlowToolItem, Session } from '../types/flow-chat'; + +function createTaskItem(id: string, status: FlowToolItem['status']): FlowToolItem { + return { + id, + type: 'tool', + toolName: 'Task', + timestamp: 1000, + status, + toolCall: { + id, + input: { subagent_type: 'ReviewSecurity' }, + }, + }; +} + +function createSession(items: FlowToolItem[]): Session { + return { + sessionId: 'parent-session', + sessionKind: 'normal', + status: 'active', + createdAt: 1000, + updatedAt: 2000, + lastActiveAt: 2000, + dialogTurns: [ + { + id: 'turn-1', + status: 'processing', + modelRounds: [ + { + id: 'round-1', + items, + }, + ], + } as any, + ], + } as Session; +} + +function createState(session: Session): FlowChatState { + return { + sessions: new Map([[session.sessionId, session]]), + activeSessionId: session.sessionId, + } as FlowChatState; +} + +describe('deriveDeepReviewSessionConcurrencyGuard', () => { + it('warns when the target session already has multiple active Task subagents', () => { + const state = createState(createSession([ + createTaskItem('task-1', 'running'), + createTaskItem('task-2', 'streaming'), + ])); + + const guard = deriveDeepReviewSessionConcurrencyGuard(state, 'parent-session'); + + expect(guard.activeSubagentCount).toBe(DEEP_REVIEW_SESSION_CONCURRENCY_WARNING_THRESHOLD); + expect(guard.highActivity).toBe(true); + }); + + it('ignores completed Task subagents and unrelated sessions', () => { + const targetSession = createSession([ + createTaskItem('task-1', 'completed'), + ]); + const unrelatedSession = { + ...createSession([createTaskItem('task-2', 'running')]), + sessionId: 'unrelated-session', + } as Session; + const state = { + sessions: new Map([ + [targetSession.sessionId, targetSession], + [unrelatedSession.sessionId, unrelatedSession], + ]), + activeSessionId: targetSession.sessionId, + } as FlowChatState; + + const guard = deriveDeepReviewSessionConcurrencyGuard(state, 'parent-session'); + + expect(guard.activeSubagentCount).toBe(0); + expect(guard.highActivity).toBe(false); + }); +}); diff --git a/src/web-ui/src/flow_chat/utils/deepReviewCapacityGuard.ts b/src/web-ui/src/flow_chat/utils/deepReviewCapacityGuard.ts new file mode 100644 index 000000000..8a8a8b0ba --- /dev/null +++ b/src/web-ui/src/flow_chat/utils/deepReviewCapacityGuard.ts @@ -0,0 +1,67 @@ +import type { FlowChatState, FlowToolItem, Session } from '../types/flow-chat'; + +export const DEEP_REVIEW_SESSION_CONCURRENCY_WARNING_THRESHOLD = 2; + +export interface DeepReviewSessionConcurrencyGuard { + activeSubagentCount: number; + highActivity: boolean; +} + +const ACTIVE_TOOL_STATUSES = new Set([ + 'pending', + 'preparing', + 'running', + 'streaming', + 'receiving', + 'analyzing', +]); + +function isActiveSubagentTask(item: unknown): item is FlowToolItem { + if (!item || typeof item !== 'object') { + return false; + } + const toolItem = item as FlowToolItem; + if ( + toolItem.type !== 'tool' || + toolItem.toolName !== 'Task' || + !ACTIVE_TOOL_STATUSES.has(toolItem.status) + ) { + return false; + } + + const input = toolItem.toolCall?.input ?? {}; + const subagentType = input.subagent_type ?? input.subagentType ?? input.agent_type ?? input.agentType; + return typeof subagentType === 'string' && subagentType.trim().length > 0; +} + +function countActiveSubagentTasks(session?: Session): number { + if (!session) { + return 0; + } + + let count = 0; + for (const turn of session.dialogTurns ?? []) { + for (const round of turn.modelRounds ?? []) { + for (const item of round.items ?? []) { + if (isActiveSubagentTask(item)) { + count += 1; + } + } + } + } + return count; +} + +export function deriveDeepReviewSessionConcurrencyGuard( + state: FlowChatState, + parentSessionId?: string | null, +): DeepReviewSessionConcurrencyGuard { + const activeSubagentCount = countActiveSubagentTasks( + parentSessionId ? state.sessions.get(parentSessionId) : undefined, + ); + + return { + activeSubagentCount, + highActivity: activeSubagentCount >= DEEP_REVIEW_SESSION_CONCURRENCY_WARNING_THRESHOLD, + }; +} diff --git a/src/web-ui/src/flow_chat/utils/deepReviewContinuation.test.ts b/src/web-ui/src/flow_chat/utils/deepReviewContinuation.test.ts index 7080526a5..e41f83849 100644 --- a/src/web-ui/src/flow_chat/utils/deepReviewContinuation.test.ts +++ b/src/web-ui/src/flow_chat/utils/deepReviewContinuation.test.ts @@ -168,6 +168,72 @@ describe('deepReviewContinuation', () => { expect(prompt).toContain('ReviewSecurity: timed_out'); }); + it('tracks reviewer partial timeout output when available', () => { + const session = createDeepReviewSession({ + error: 'Timeout', + dialogTurns: [ + { + id: 'turn-1', + sessionId: 'deep-review-session', + timestamp: 1, + status: 'error', + userMessage: { + id: 'user-1', + content: 'Original command:\n/DeepReview review latest commit', + timestamp: 1, + }, + startTime: 1, + modelRounds: [ + { + id: 'round-1', + index: 0, + startTime: 1, + isStreaming: false, + isComplete: true, + status: 'completed', + items: [ + { + id: 'tool-1', + type: 'tool', + toolName: 'Task', + toolCall: { + id: 'call-security', + input: { subagent_type: 'ReviewSecurity' }, + }, + toolResult: { + result: { + status: 'partial_timeout', + partial_output: 'Found one likely token logging issue before timeout.', + }, + success: true, + resultForAssistant: + "Subagent 'ReviewSecurity' timed out with partial result.", + }, + startTime: 1, + timestamp: 1, + status: 'completed', + }, + ], + }, + ], + }, + ], + }); + + const interruption = deriveDeepReviewInterruption(session, { category: 'timeout' }); + const prompt = buildDeepReviewContinuationPrompt(interruption!); + + expect(interruption?.reviewers).toEqual([ + expect.objectContaining({ + reviewer: 'ReviewSecurity', + status: 'partial_timeout', + partialOutput: 'Found one likely token logging issue before timeout.', + }), + ]); + expect(prompt).toContain('ReviewSecurity: partial_timeout'); + expect(prompt).toContain('partial output: Found one likely token logging issue before timeout.'); + }); + it('marks policy-ineligible reviewers as skipped so continuation does not re-run them', () => { const session = createDeepReviewSession({ dialogTurns: [ @@ -287,4 +353,160 @@ describe('deepReviewContinuation', () => { expect(actionCodes).not.toContain('switch_model'); expect(actionCodes).not.toContain('wait_and_retry'); }); + + it('includes retry budget constraints from the persisted run manifest', () => { + const session = createDeepReviewSession({ + error: 'Timeout', + deepReviewRunManifest: { + executionPolicy: { + maxRetriesPerRole: 1, + }, + skippedReviewers: [], + }, + dialogTurns: [ + { + id: 'turn-1', + sessionId: 'deep-review-session', + timestamp: 1, + status: 'error', + userMessage: { + id: 'user-1', + content: 'Original command:\n/DeepReview review latest commit', + timestamp: 1, + }, + startTime: 1, + modelRounds: [ + { + id: 'round-1', + index: 0, + startTime: 1, + isStreaming: false, + isComplete: true, + status: 'completed', + items: [ + { + id: 'tool-1', + type: 'tool', + toolName: 'Task', + toolCall: { + id: 'call-security', + input: { subagent_type: 'ReviewSecurity' }, + }, + toolResult: { + result: { status: 'timed_out' }, + success: false, + error: 'Reviewer timed out', + }, + startTime: 1, + timestamp: 1, + status: 'error', + }, + ], + }, + ], + error: 'Timeout', + }, + ], + } as Partial); + + const interruption = deriveDeepReviewInterruption(session, { category: 'timeout' }); + const prompt = buildDeepReviewContinuationPrompt(interruption!); + + expect(prompt).toContain('max_retries_per_role = 1'); + expect(prompt).toContain('retry = true'); + expect(prompt).toContain('reduce the scope'); + }); + + it('includes persisted manifest skips when continuing an interrupted review', () => { + const session = createDeepReviewSession({ + error: 'Timeout', + deepReviewRunManifest: { + skippedReviewers: [ + { + subagentId: 'ReviewFrontend', + displayName: 'Frontend Reviewer', + reason: 'not_applicable', + }, + ], + }, + dialogTurns: [ + { + id: 'turn-1', + sessionId: 'deep-review-session', + timestamp: 1, + status: 'error', + userMessage: { + id: 'user-1', + content: 'Original command:\n/DeepReview review latest commit', + timestamp: 1, + }, + startTime: 1, + modelRounds: [], + error: 'Timeout', + }, + ], + } as Partial); + + const interruption = deriveDeepReviewInterruption(session, { category: 'timeout' }); + const prompt = buildDeepReviewContinuationPrompt(interruption!); + + expect(prompt).toContain('Do not run reviewers skipped as not_applicable.'); + expect(prompt).toContain('ReviewFrontend: skipped (not_applicable)'); + }); + + it('includes incremental cache guidance from the persisted run manifest', () => { + const session = createDeepReviewSession({ + error: 'Timeout', + deepReviewRunManifest: { + incrementalReviewCache: { + source: 'target_manifest', + strategy: 'reuse_completed_packets_when_fingerprint_matches', + cacheKey: 'incremental-review:abc12345', + fingerprint: 'abc12345', + filePaths: [ + 'src/web-ui/src/shared/services/reviewTeamService.ts', + ], + workspaceAreas: ['web-ui'], + reviewerPacketIds: [ + 'reviewer:ReviewBusinessLogic', + 'reviewer:ReviewSecurity', + ], + lineCount: 128, + lineCountSource: 'diff_stat', + invalidatesOn: [ + 'target_file_set_changed', + 'target_line_count_changed', + 'reviewer_roster_changed', + ], + }, + skippedReviewers: [], + }, + dialogTurns: [ + { + id: 'turn-1', + sessionId: 'deep-review-session', + timestamp: 1, + status: 'error', + userMessage: { + id: 'user-1', + content: 'Original command:\n/DeepReview review latest commit', + timestamp: 1, + }, + startTime: 1, + modelRounds: [], + error: 'Timeout', + }, + ], + } as Partial); + + const interruption = deriveDeepReviewInterruption(session, { category: 'timeout' }); + const prompt = buildDeepReviewContinuationPrompt(interruption!); + + expect(prompt).toContain('Incremental review cache guidance:'); + expect(prompt).toContain('cache_key: incremental-review:abc12345'); + expect(prompt).toContain('fingerprint: abc12345'); + expect(prompt).toContain('Only reuse completed reviewer outputs when the current review target fingerprint still matches.'); + expect(prompt).toContain('reviewer:ReviewBusinessLogic'); + expect(prompt).toContain('target_file_set_changed'); + }); }); diff --git a/src/web-ui/src/flow_chat/utils/deepReviewContinuation.ts b/src/web-ui/src/flow_chat/utils/deepReviewContinuation.ts index 0bdc62579..c30d53206 100644 --- a/src/web-ui/src/flow_chat/utils/deepReviewContinuation.ts +++ b/src/web-ui/src/flow_chat/utils/deepReviewContinuation.ts @@ -6,13 +6,21 @@ import { import type { FlowToolItem, Session } from '../types/flow-chat'; export type DeepReviewContinuationPhase = 'review_interrupted' | 'resume_blocked'; -export type DeepReviewReviewerStatus = 'completed' | 'timed_out' | 'failed' | 'cancelled' | 'skipped' | 'unknown'; +export type DeepReviewReviewerStatus = + | 'completed' + | 'partial_timeout' + | 'timed_out' + | 'failed' + | 'cancelled' + | 'skipped' + | 'unknown'; export interface DeepReviewReviewerProgress { reviewer: string; status: DeepReviewReviewerStatus; toolCallId?: string; error?: string; + partialOutput?: string; } export interface DeepReviewInterruption { @@ -24,6 +32,7 @@ export interface DeepReviewInterruption { canResume: boolean; recommendedActions: AiErrorAction[]; reviewers: DeepReviewReviewerProgress[]; + runManifest?: Session['deepReviewRunManifest']; } const RESUME_BLOCKING_CATEGORIES = new Set([ @@ -75,6 +84,7 @@ export function deriveDeepReviewInterruption( canResume, recommendedActions: presentation.actions, reviewers: collectReviewerProgress(session), + runManifest: session.deepReviewRunManifest, }; } @@ -83,10 +93,31 @@ export function buildDeepReviewContinuationPrompt(interruption: DeepReviewInterr ? interruption.reviewers .map((reviewer) => { const suffix = reviewer.error ? ` (${reviewer.error})` : ''; - return `- ${reviewer.reviewer}: ${reviewer.status}${suffix}`; + const partialOutput = reviewer.partialOutput + ? `; partial output: ${reviewer.partialOutput}` + : ''; + return `- ${reviewer.reviewer}: ${reviewer.status}${suffix}${partialOutput}`; }) .join('\n') : '- No reliable reviewer progress was detected. Reconstruct progress from this session before deciding what to rerun.'; + const skippedReviewers = interruption.runManifest?.skippedReviewers ?? []; + const manifestSkippedReviewers = formatManifestSkippedReviewers(skippedReviewers); + const manifestRules = skippedReviewers.some((reviewer) => reviewer.reason === 'not_applicable') + ? [ + '- Do not run reviewers skipped as not_applicable.', + ] + : []; + const manifestBlock = manifestSkippedReviewers.length + ? [ + '', + 'Run manifest reviewer skips:', + manifestSkippedReviewers.join('\n'), + ] + : []; + const retryBudgetRules = formatRetryBudgetRules(interruption.runManifest); + const incrementalCacheBlock = formatIncrementalReviewCacheGuidance( + interruption.runManifest, + ); return [ 'Continue the interrupted Deep Review in this same session.', @@ -94,6 +125,8 @@ export function buildDeepReviewContinuationPrompt(interruption: DeepReviewInterr 'Recovery rules:', '- Do not restart completed reviewer work unless the existing result is clearly incomplete or unusable.', '- Do not re-run skipped, non-applicable, or policy-ineligible reviewers; keep them recorded as skipped coverage.', + ...retryBudgetRules, + ...manifestRules, '- Re-run only missing, failed, timed-out, or cancelled reviewers when enough context exists.', '- If reviewer coverage remains incomplete, say that explicitly and mark the final report as lower confidence.', '- Run ReviewJudge before the final submit_code_review result when reviewer findings exist.', @@ -103,6 +136,8 @@ export function buildDeepReviewContinuationPrompt(interruption: DeepReviewInterr '', 'Known reviewer progress:', reviewerLines, + ...manifestBlock, + ...incrementalCacheBlock, '', 'Last error:', `- category: ${interruption.errorDetail.category ?? 'unknown'}`, @@ -111,6 +146,66 @@ export function buildDeepReviewContinuationPrompt(interruption: DeepReviewInterr ].join('\n'); } +function formatIncrementalReviewCacheGuidance( + runManifest: Session['deepReviewRunManifest'] | undefined, +): string[] { + const cachePlan = runManifest?.incrementalReviewCache; + if (!cachePlan) { + return []; + } + + return [ + '', + 'Incremental review cache guidance:', + `- cache_key: ${cachePlan.cacheKey}`, + `- fingerprint: ${cachePlan.fingerprint}`, + `- strategy: ${cachePlan.strategy}`, + `- reviewer_packet_ids: ${cachePlan.reviewerPacketIds.join(', ') || 'none'}`, + `- invalidates_on: ${cachePlan.invalidatesOn.join(', ') || 'none'}`, + '- Only reuse completed reviewer outputs when the current review target fingerprint still matches.', + '- If any invalidates_on condition changed, rerun affected reviewer packets and explain the fresh review boundary.', + ]; +} + +function formatRetryBudgetRules( + runManifest: Session['deepReviewRunManifest'] | undefined, +): string[] { + const maxRetriesPerRole = runManifest?.executionPolicy?.maxRetriesPerRole; + const baseRules = [ + '- Treat partial_timeout reviewers as preserved partial evidence. Re-run them only when useful evidence is missing or unusable.', + ]; + + if (typeof maxRetriesPerRole !== 'number') { + return [ + ...baseRules, + '- Respect the original retry budget if it is recoverable from context; do not retry the same reviewer repeatedly.', + ]; + } + + if (maxRetriesPerRole <= 0) { + return [ + ...baseRules, + '- Retry budget from manifest: max_retries_per_role = 0. Do not re-run failed, timed-out, or partial reviewers automatically; report remaining gaps instead.', + ]; + } + + return [ + ...baseRules, + `- Retry budget from manifest: max_retries_per_role = ${maxRetriesPerRole}.`, + '- For each retry, use the same subagent_type with retry = true, reduce the scope to missing evidence, downgrade strategy when possible, and use a shorter timeout.', + ]; +} + +function formatManifestSkippedReviewers( + skippedReviewers: NonNullable['skippedReviewers'], +): string[] { + return skippedReviewers.map((reviewer) => { + const reviewerName = reviewer.subagentId || reviewer.displayName; + const reason = reviewer.reason ?? 'unknown'; + return `- ${reviewerName}: skipped (${reason})`; + }); +} + function findOriginalTarget(session: Session): string { const firstTurn = session.dialogTurns[0]; return firstTurn?.userMessage?.content?.trim() || 'Unknown Deep Review target.'; @@ -151,8 +246,12 @@ function getReviewerProgressFromTask(item: FlowToolItem): DeepReviewReviewerProg } const error = item.toolResult?.error; + const resultStatus = String(item.toolResult?.result?.status ?? '').trim(); + const partialOutput = getPartialOutput(item); let status: DeepReviewReviewerStatus = 'unknown'; - if (item.toolResult?.success === true || item.status === 'completed') { + if (resultStatus === 'partial_timeout' || /partial[_ -]?timeout/i.test(error ?? '')) { + status = 'partial_timeout'; + } else if (item.toolResult?.success === true || item.status === 'completed') { status = 'completed'; } else if (/timeout|timed out/i.test(error ?? '')) { status = 'timed_out'; @@ -172,6 +271,7 @@ function getReviewerProgressFromTask(item: FlowToolItem): DeepReviewReviewerProg status, toolCallId: item.toolCall.id, error, + partialOutput, }; } @@ -181,3 +281,9 @@ function isPolicyIneligibleReviewerError(error?: string): boolean { } return /DeepReview Task policy violation|deep_review_subagent_(?:not_review|not_allowed|not_readonly)/i.test(error); } + +function getPartialOutput(item: FlowToolItem): string | undefined { + const result = item.toolResult?.result; + const value = result?.partial_output ?? result?.partialOutput; + return typeof value === 'string' && value.trim() ? value.trim() : undefined; +} diff --git a/src/web-ui/src/flow_chat/utils/deepReviewQueueStateEvents.test.ts b/src/web-ui/src/flow_chat/utils/deepReviewQueueStateEvents.test.ts new file mode 100644 index 000000000..39b3bfb95 --- /dev/null +++ b/src/web-ui/src/flow_chat/utils/deepReviewQueueStateEvents.test.ts @@ -0,0 +1,73 @@ +import { describe, expect, it } from 'vitest'; +import type { DeepReviewQueueStateChangedEvent } from '@/infrastructure/api/service-api/AgentAPI'; +import type { Session } from '../types/flow-chat'; +import { buildDeepReviewCapacityQueueStateFromEvent } from './deepReviewQueueStateEvents'; + +function createQueueEvent( + overrides: Partial = {}, +): DeepReviewQueueStateChangedEvent { + return { + sessionId: 'review-child', + turnId: 'turn-1', + queueState: { + toolId: 'task-1', + subagentType: 'ReviewSecurity', + status: 'queued_for_capacity', + reason: 'provider_concurrency_limit', + queuedReviewerCount: 2, + activeReviewerCount: 1, + effectiveParallelInstances: 2, + optionalReviewerCount: 1, + queueElapsedMs: 1200, + maxQueueWaitSeconds: 60, + sessionConcurrencyHigh: true, + }, + ...overrides, + }; +} + +function createSession(sessionKind: Session['sessionKind']): Session { + return { + sessionId: 'review-child', + sessionKind, + status: 'active', + createdAt: 1000, + updatedAt: 1000, + lastActiveAt: 1000, + dialogTurns: [], + } as Session; +} + +describe('buildDeepReviewCapacityQueueStateFromEvent', () => { + it('maps backend queue events into the action bar queue state for Deep Review sessions', () => { + const state = buildDeepReviewCapacityQueueStateFromEvent( + createQueueEvent(), + createSession('deep_review'), + ); + + expect(state).toEqual({ + toolId: 'task-1', + subagentType: 'ReviewSecurity', + dialogTurnId: 'turn-1', + status: 'queued_for_capacity', + queuedReviewerCount: 2, + activeReviewerCount: 1, + effectiveParallelInstances: 2, + optionalReviewerCount: 1, + queueElapsedMs: 1200, + runElapsedMs: undefined, + maxQueueWaitSeconds: 60, + sessionConcurrencyHigh: true, + controlMode: 'backend', + }); + }); + + it('ignores queue events for non-Deep Review sessions', () => { + const state = buildDeepReviewCapacityQueueStateFromEvent( + createQueueEvent(), + createSession('normal'), + ); + + expect(state).toBeNull(); + }); +}); diff --git a/src/web-ui/src/flow_chat/utils/deepReviewQueueStateEvents.ts b/src/web-ui/src/flow_chat/utils/deepReviewQueueStateEvents.ts new file mode 100644 index 000000000..4d4019a7c --- /dev/null +++ b/src/web-ui/src/flow_chat/utils/deepReviewQueueStateEvents.ts @@ -0,0 +1,33 @@ +import type { DeepReviewQueueStateChangedEvent } from '@/infrastructure/api/service-api/AgentAPI'; +import type { DeepReviewCapacityQueueState } from '../store/deepReviewActionBarStore'; +import type { Session } from '../types/flow-chat'; + +export function buildDeepReviewCapacityQueueStateFromEvent( + event: DeepReviewQueueStateChangedEvent, + session: Session | undefined, +): DeepReviewCapacityQueueState | null { + if (session?.sessionKind !== 'deep_review') { + return null; + } + + const queueState = event.queueState; + if (!queueState) { + return null; + } + + return { + toolId: queueState.toolId, + subagentType: queueState.subagentType, + dialogTurnId: event.turnId, + status: queueState.status, + queuedReviewerCount: Math.max(0, queueState.queuedReviewerCount ?? 0), + activeReviewerCount: queueState.activeReviewerCount, + effectiveParallelInstances: queueState.effectiveParallelInstances, + optionalReviewerCount: queueState.optionalReviewerCount, + queueElapsedMs: queueState.queueElapsedMs, + runElapsedMs: queueState.runElapsedMs, + maxQueueWaitSeconds: queueState.maxQueueWaitSeconds, + sessionConcurrencyHigh: queueState.sessionConcurrencyHigh, + controlMode: 'backend', + }; +} diff --git a/src/web-ui/src/flow_chat/utils/sessionMetadata.test.ts b/src/web-ui/src/flow_chat/utils/sessionMetadata.test.ts index dd4c721a6..3d1f5ff5f 100644 --- a/src/web-ui/src/flow_chat/utils/sessionMetadata.test.ts +++ b/src/web-ui/src/flow_chat/utils/sessionMetadata.test.ts @@ -357,6 +357,27 @@ describe('sessionMetadata', () => { }); }); + it('persists the Deep Review run manifest from the runtime session', () => { + const runManifest = { + reviewMode: 'deep', + skippedReviewers: [ + { + subagentId: 'ReviewFrontend', + displayName: 'Frontend Reviewer', + reason: 'not_applicable', + }, + ], + }; + const session = createSession({ + sessionKind: 'deep_review', + deepReviewRunManifest: runManifest, + } as Partial); + + const metadata = buildSessionMetadata(session); + + expect(metadata.deepReviewRunManifest).toBe(runManifest); + }); + describe('unread completion persistence', () => { it('persists unreadCompletion from session to metadata', () => { const session = createSession({ diff --git a/src/web-ui/src/flow_chat/utils/sessionMetadata.ts b/src/web-ui/src/flow_chat/utils/sessionMetadata.ts index 84dc33b43..f32882734 100644 --- a/src/web-ui/src/flow_chat/utils/sessionMetadata.ts +++ b/src/web-ui/src/flow_chat/utils/sessionMetadata.ts @@ -261,6 +261,7 @@ export function buildSessionMetadata( | 'titleI18nParams' | 'hasUnreadCompletion' | 'needsUserAttention' + | 'deepReviewRunManifest' >, existingMetadata?: SessionMetadata | null ): SessionMetadata { @@ -317,5 +318,7 @@ export function buildSessionMetadata( // `undefined ?? existingMetadata.unreadCompletion` would restore the old value. unreadCompletion: session.hasUnreadCompletion, needsUserAttention: session.needsUserAttention, + deepReviewRunManifest: + session.deepReviewRunManifest ?? existingMetadata?.deepReviewRunManifest, }; } diff --git a/src/web-ui/src/infrastructure/api/service-api/ACPClientAPI.ts b/src/web-ui/src/infrastructure/api/service-api/ACPClientAPI.ts index 9db847132..05a9d1d89 100644 --- a/src/web-ui/src/infrastructure/api/service-api/ACPClientAPI.ts +++ b/src/web-ui/src/infrastructure/api/service-api/ACPClientAPI.ts @@ -1,4 +1,5 @@ import { api } from './ApiClient'; +import type { ImageContextData as ImageInputContextData } from './ImageContextTypes'; export type AcpClientPermissionMode = 'ask' | 'allow_once' | 'reject_once'; export type AcpClientStatus = 'configured' | 'starting' | 'running' | 'stopped' | 'failed'; @@ -60,6 +61,8 @@ export interface StartAcpDialogTurnRequest { remoteConnectionId?: string; remoteSshHost?: string; timeoutSeconds?: number; + imageContexts?: ImageInputContextData[]; + userMessageMetadata?: Record; } export interface CancelAcpDialogTurnRequest { diff --git a/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts b/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts index 3e86d3bc9..66e16b519 100644 --- a/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts +++ b/src/web-ui/src/infrastructure/api/service-api/AgentAPI.ts @@ -62,6 +62,7 @@ export interface StartDialogTurnRequest { workspacePath?: string; /** Optional multimodal image contexts (snake_case fields, aligned with backend ImageContextData). */ imageContexts?: ImageInputContextData[]; + userMessageMetadata?: Record; } export interface CompactSessionRequest { @@ -155,6 +156,51 @@ export interface ToolEvent extends AgenticEvent { subagentParentInfo?: SubagentParentInfo; } +export type DeepReviewQueueStatus = + | 'queued_for_capacity' + | 'paused_by_user' + | 'running' + | 'capacity_skipped'; + +export type DeepReviewQueueReason = + | 'provider_rate_limit' + | 'provider_concurrency_limit' + | 'retry_after' + | 'local_concurrency_cap' + | 'temporary_overload'; + +export interface DeepReviewQueueStateEventData { + toolId: string; + subagentType: string; + status: DeepReviewQueueStatus; + reason?: DeepReviewQueueReason; + queuedReviewerCount: number; + activeReviewerCount?: number; + effectiveParallelInstances?: number; + optionalReviewerCount?: number; + queueElapsedMs?: number; + runElapsedMs?: number; + maxQueueWaitSeconds?: number; + sessionConcurrencyHigh?: boolean; +} + +export interface DeepReviewQueueStateChangedEvent extends AgenticEvent { + queueState: DeepReviewQueueStateEventData; +} + +export type DeepReviewQueueControlAction = + | 'pause' + | 'continue' + | 'cancel' + | 'skip_optional'; + +export interface DeepReviewQueueControlRequest { + sessionId: string; + dialogTurnId: string; + toolId: string; + action: DeepReviewQueueControlAction; +} + export interface ImageAnalysisEvent extends AgenticEvent { imageCount?: number; @@ -283,6 +329,14 @@ export class AgentAPI { } } + async controlDeepReviewQueue(request: DeepReviewQueueControlRequest): Promise { + try { + await api.invoke('control_deep_review_queue', { request }); + } catch (error) { + throw createTauriCommandError('control_deep_review_queue', error, request); + } + } + async deleteSession( sessionId: string, @@ -439,6 +493,15 @@ export class AgentAPI { return api.listen('agentic://tool-event', callback); } + onDeepReviewQueueStateChanged( + callback: (event: DeepReviewQueueStateChangedEvent) => void + ): () => void { + return api.listen( + 'agentic://deep-review-queue-state-changed', + callback + ); + } + onDialogTurnCompleted(callback: (event: AgenticEvent) => void): () => void { return api.listen('agentic://dialog-turn-completed', callback); @@ -497,7 +560,14 @@ export class AgentAPI { } } - + async getDefaultReviewTeamDefinition(): Promise { + try { + return await api.invoke('get_default_review_team_definition'); + } catch (error) { + throw createTauriCommandError('get_default_review_team_definition', error); + } + } + async generateSessionTitle( sessionId: string, userMessage: string, diff --git a/src/web-ui/src/infrastructure/api/service-api/GitAPI.ts b/src/web-ui/src/infrastructure/api/service-api/GitAPI.ts index 543971783..fd5591fae 100644 --- a/src/web-ui/src/infrastructure/api/service-api/GitAPI.ts +++ b/src/web-ui/src/infrastructure/api/service-api/GitAPI.ts @@ -89,11 +89,35 @@ export interface GitPullParams { } export interface GitDiffParams { + source?: string; + target?: string; + files?: string[]; + stat?: boolean; filePath?: string; staged?: boolean; commit?: string; } +export interface GitChangedFilesParams { + source?: string; + target?: string; + staged?: boolean; +} + +export type GitChangedFileStatus = + | 'added' + | 'modified' + | 'deleted' + | 'renamed' + | 'copied' + | 'unknown'; + +export interface GitChangedFile { + path: string; + old_path?: string; + status: GitChangedFileStatus; +} + export interface GitLogParams { maxCount?: number; since?: string; @@ -330,6 +354,17 @@ export class GitAPI { } + async getChangedFiles(repositoryPath: string, params: GitChangedFilesParams): Promise { + try { + return await api.invoke('git_get_changed_files', { + request: { repositoryPath, params } + }); + } catch (error) { + throw createTauriCommandError('git_get_changed_files', error, { repositoryPath, params }); + } + } + + async resetFiles(repositoryPath: string, files: string[], staged: boolean = false): Promise { try { return await api.invoke('git_reset_files', { @@ -435,4 +470,4 @@ export class GitAPI { } -export const gitAPI = new GitAPI(); \ No newline at end of file +export const gitAPI = new GitAPI(); diff --git a/src/web-ui/src/infrastructure/config/components/ReviewConfig.tsx b/src/web-ui/src/infrastructure/config/components/ReviewConfig.tsx index 753c7cc32..832a10555 100644 --- a/src/web-ui/src/infrastructure/config/components/ReviewConfig.tsx +++ b/src/web-ui/src/infrastructure/config/components/ReviewConfig.tsx @@ -23,12 +23,14 @@ import { removeDefaultReviewTeamMember, REVIEW_STRATEGY_DEFINITIONS, REVIEW_STRATEGY_LEVELS, + saveDefaultReviewTeamConcurrencyPolicy, saveDefaultReviewTeamExecutionPolicy, saveDefaultReviewTeamMemberStrategyOverride, saveDefaultReviewTeamStrategyLevel, type ReviewMemberStrategyLevel, type ReviewStrategyLevel, type ReviewTeam, + type ReviewTeamConcurrencyPolicy, type ReviewTeamExecutionPolicy, type ReviewTeamMember, } from '@/shared/services/reviewTeamService'; @@ -111,6 +113,7 @@ const ReviewConfig: React.FC = () => { const [subagents, setSubagents] = useState([]); const [candidateId, setCandidateId] = useState(''); const [savingPolicyKey, setSavingPolicyKey] = useState(null); + const [savingConcurrencyKey, setSavingConcurrencyKey] = useState(null); const [savingMemberId, setSavingMemberId] = useState(null); const [savingStrategyTarget, setSavingStrategyTarget] = useState(null); const [addingMember, setAddingMember] = useState(false); @@ -263,6 +266,29 @@ const ReviewConfig: React.FC = () => { } }, [loadData, notifyError, notifySuccess, t, team]); + const handleConcurrencyPolicyChange = useCallback(async ( + key: keyof ReviewTeamConcurrencyPolicy, + value: ReviewTeamConcurrencyPolicy[keyof ReviewTeamConcurrencyPolicy], + ) => { + if (!team) return; + + const nextPolicy = { + ...team.concurrencyPolicy, + [key]: value, + } as ReviewTeamConcurrencyPolicy; + setSavingConcurrencyKey(key); + setTeam({ ...team, concurrencyPolicy: nextPolicy }); + try { + await saveDefaultReviewTeamConcurrencyPolicy(nextPolicy); + notifySuccess(t('messages.saved')); + } catch (error) { + await loadData(); + notifyError(error instanceof Error ? error.message : t('messages.saveFailed')); + } finally { + setSavingConcurrencyKey(null); + } + }, [loadData, notifyError, notifySuccess, t, team]); + const handleModelChange = useCallback(async (member: ReviewTeamMember, modelId: string) => { if (!team) return; @@ -444,6 +470,34 @@ const ReviewConfig: React.FC = () => { + + + void handleConcurrencyPolicyChange('maxParallelInstances', value)} + min={1} + max={16} + step={1} + size="small" + disabled={savingConcurrencyKey === 'maxParallelInstances'} + /> + + + + void handleConcurrencyPolicyChange('maxQueueWaitSeconds', value)} + min={0} + max={600} + step={15} + unit="s" + size="small" + disabled={savingConcurrencyKey === 'maxQueueWaitSeconds'} + /> + + + + , +): ReviewSubagentToolReadinessResult { + const selectedToolNames = new Set(selectedTools); + const missingRequiredTools = REVIEW_SUBAGENT_REQUIRED_TOOLS.filter( + (toolName) => !selectedToolNames.has(toolName), + ); + const missingRecommendedTools = REVIEW_SUBAGENT_RECOMMENDED_TOOLS.filter( + (toolName) => !selectedToolNames.has(toolName), + ); + const readiness: ReviewSubagentToolReadiness = + missingRequiredTools.length > 0 + ? 'invalid' + : missingRecommendedTools.length > 0 + ? 'degraded' + : 'ready'; + + return { + readiness, + requiredTools: [...REVIEW_SUBAGENT_REQUIRED_TOOLS], + recommendedTools: [...REVIEW_SUBAGENT_RECOMMENDED_TOOLS], + optionalTools: [...REVIEW_SUBAGENT_OPTIONAL_TOOLS], + missingRequiredTools, + missingRecommendedTools, + }; +} diff --git a/src/web-ui/src/shared/services/reviewTargetClassifier.test.ts b/src/web-ui/src/shared/services/reviewTargetClassifier.test.ts new file mode 100644 index 000000000..0f6e383da --- /dev/null +++ b/src/web-ui/src/shared/services/reviewTargetClassifier.test.ts @@ -0,0 +1,97 @@ +import { describe, expect, it } from 'vitest'; +import { + classifyReviewTargetFromFiles, + createUnknownReviewTargetClassification, + getReviewerApplicabilityRule, + normalizeReviewPath, + shouldRunReviewerForTarget, +} from './reviewTargetClassifier'; + +describe('reviewTargetClassifier', () => { + it('normalizes Windows and relative paths for review classification', () => { + expect(normalizeReviewPath('.\\src\\web-ui\\src\\App.tsx')).toBe( + 'src/web-ui/src/App.tsx', + ); + }); + + it('classifies frontend source, style, locale, and contract files', () => { + const target = classifyReviewTargetFromFiles( + [ + 'src/web-ui/src/App.tsx', + 'src/web-ui/src/app/App.scss', + 'src/web-ui/src/locales/en-US/flow-chat.json', + 'src/apps/desktop/src/api/agentic_api.rs', + ], + 'session_files', + ); + + expect(target.resolution).toBe('resolved'); + expect(target.tags).toEqual( + expect.arrayContaining([ + 'frontend_ui', + 'frontend_style', + 'frontend_i18n', + 'desktop_contract', + 'frontend_contract', + ]), + ); + expect(target.files[0]).toMatchObject({ + path: 'src/web-ui/src/App.tsx', + normalizedPath: 'src/web-ui/src/App.tsx', + source: 'session_files', + tags: expect.arrayContaining(['frontend_ui']), + }); + }); + + it('classifies backend core files without frontend tags', () => { + const target = classifyReviewTargetFromFiles( + ['src/crates/core/src/service/config/types.rs'], + 'session_files', + ); + + expect(target.resolution).toBe('resolved'); + expect(target.tags).toEqual(['backend_core']); + }); + + it('returns an unknown target when no file list is available', () => { + const target = createUnknownReviewTargetClassification('unknown'); + + expect(target.resolution).toBe('unknown'); + expect(target.tags).toEqual(['unknown']); + expect(target.warnings).toEqual([ + expect.objectContaining({ code: 'target_unknown' }), + ]); + }); + + it('keeps frontend reviewer applicability in a reusable registry', () => { + const rule = getReviewerApplicabilityRule('ReviewFrontend'); + + expect(rule).toEqual( + expect.objectContaining({ + subagentId: 'ReviewFrontend', + runWhenTargetUnknown: true, + matchingTags: expect.arrayContaining([ + 'frontend_ui', + 'frontend_contract', + ]), + }), + ); + }); + + it('evaluates conditional reviewer applicability from registry tags', () => { + const backendTarget = classifyReviewTargetFromFiles( + ['src/crates/core/src/service/config/types.rs'], + 'session_files', + ); + const frontendTarget = classifyReviewTargetFromFiles( + ['src/web-ui/src/App.tsx'], + 'session_files', + ); + const unknownTarget = createUnknownReviewTargetClassification('manual_prompt'); + + expect(shouldRunReviewerForTarget('ReviewFrontend', backendTarget)).toBe(false); + expect(shouldRunReviewerForTarget('ReviewFrontend', frontendTarget)).toBe(true); + expect(shouldRunReviewerForTarget('ReviewFrontend', unknownTarget)).toBe(true); + expect(shouldRunReviewerForTarget('ReviewSecurity', backendTarget)).toBe(true); + }); +}); diff --git a/src/web-ui/src/shared/services/reviewTargetClassifier.ts b/src/web-ui/src/shared/services/reviewTargetClassifier.ts new file mode 100644 index 000000000..fc85fedcf --- /dev/null +++ b/src/web-ui/src/shared/services/reviewTargetClassifier.ts @@ -0,0 +1,344 @@ +export type ReviewTargetSource = + | 'session_files' + | 'slash_command_explicit_files' + | 'slash_command_git_ref' + | 'workspace_diff' + | 'manual_prompt' + | 'unknown'; + +export type ReviewDomainTag = + | 'frontend_ui' + | 'frontend_style' + | 'frontend_i18n' + | 'frontend_contract' + | 'desktop_contract' + | 'web_server_contract' + | 'backend_core' + | 'transport' + | 'api_layer' + | 'ai_adapter' + | 'installer_ui' + | 'test' + | 'docs' + | 'config' + | 'generated_or_lock' + | 'unknown'; + +export interface ReviewTargetFile { + path: string; + normalizedPath: string; + oldPath?: string; + normalizedOldPath?: string; + status: 'added' | 'modified' | 'deleted' | 'renamed' | 'copied' | 'unknown'; + source: ReviewTargetSource; + tags: ReviewDomainTag[]; + excluded?: boolean; + excludeReason?: 'lockfile' | 'generated' | 'binary' | 'too_large' | 'unsupported'; +} + +export interface ReviewTargetWarning { + code: + | 'target_unknown' + | 'git_ref_unresolved' + | 'file_list_empty' + | 'remote_resolution_unavailable' + | 'excluded_files_present' + | 'contract_surface_detected' + | 'classification_partial'; + message: string; +} + +export interface ReviewTargetClassification { + source: ReviewTargetSource; + resolution: 'resolved' | 'partial' | 'unknown'; + files: ReviewTargetFile[]; + tags: ReviewDomainTag[]; + evidence: string[]; + warnings: ReviewTargetWarning[]; +} + +interface PathTagRule { + id: string; + tags: ReviewDomainTag[]; + match: { + pathPrefixes?: string[]; + extensions?: string[]; + exactFiles?: string[]; + }; + evidence: string; +} + +export const FRONTEND_REVIEW_DOMAIN_TAGS: ReviewDomainTag[] = [ + 'frontend_ui', + 'frontend_style', + 'frontend_i18n', + 'frontend_contract', + 'desktop_contract', + 'web_server_contract', +]; + +export interface ReviewerApplicabilityRule { + subagentId: string; + matchingTags: ReviewDomainTag[]; + runWhenTargetUnknown: boolean; +} + +const REVIEWER_APPLICABILITY_RULES: ReviewerApplicabilityRule[] = [ + { + subagentId: 'ReviewFrontend', + matchingTags: FRONTEND_REVIEW_DOMAIN_TAGS, + runWhenTargetUnknown: true, + }, +]; + +export function getReviewerApplicabilityRule( + subagentId: string, +): ReviewerApplicabilityRule | undefined { + return REVIEWER_APPLICABILITY_RULES.find((rule) => rule.subagentId === subagentId); +} + +export function shouldRunReviewerForTarget( + subagentId: string, + target: ReviewTargetClassification, +): boolean { + const rule = getReviewerApplicabilityRule(subagentId); + if (!rule) { + return true; + } + if (target.resolution === 'unknown') { + return rule.runWhenTargetUnknown; + } + return rule.matchingTags.some((tag) => target.tags.includes(tag)); +} + +const PATH_TAG_RULES: PathTagRule[] = [ + { + id: 'web-ui-locales', + tags: ['frontend_i18n'], + match: { pathPrefixes: ['src/web-ui/src/locales/'] }, + evidence: 'Frontend locale file changed', + }, + { + id: 'web-ui-style', + tags: ['frontend_style'], + match: { + pathPrefixes: ['src/web-ui/'], + extensions: ['.scss', '.css', '.sass', '.less'], + }, + evidence: 'Frontend stylesheet changed', + }, + { + id: 'web-ui-source', + tags: ['frontend_ui'], + match: { + pathPrefixes: ['src/web-ui/src/'], + extensions: ['.ts', '.tsx', '.js', '.jsx'], + }, + evidence: 'File is under src/web-ui/src', + }, + { + id: 'desktop-api-contract', + tags: ['desktop_contract', 'frontend_contract'], + match: { pathPrefixes: ['src/apps/desktop/src/api/'] }, + evidence: 'Desktop API surface may affect frontend invoke contract', + }, + { + id: 'api-layer-contract', + tags: ['api_layer', 'frontend_contract'], + match: { pathPrefixes: ['src/crates/api-layer/'] }, + evidence: 'API layer may affect frontend/backend contract', + }, + { + id: 'server-contract', + tags: ['web_server_contract', 'frontend_contract'], + match: { pathPrefixes: ['src/apps/server/src/routes/'] }, + evidence: 'Server route surface may affect frontend communication contract', + }, + { + id: 'transport', + tags: ['transport'], + match: { pathPrefixes: ['src/crates/transport/'] }, + evidence: 'Transport layer changed', + }, + { + id: 'core', + tags: ['backend_core'], + match: { pathPrefixes: ['src/crates/core/'] }, + evidence: 'Core product logic changed', + }, + { + id: 'ai-adapter', + tags: ['ai_adapter'], + match: { pathPrefixes: ['src/crates/ai-adapters/'] }, + evidence: 'AI adapter changed', + }, + { + id: 'installer-ui', + tags: ['installer_ui'], + match: { pathPrefixes: ['BitFun-Installer/'] }, + evidence: 'Installer UI changed', + }, + { + id: 'docs', + tags: ['docs'], + match: { + pathPrefixes: ['docs/'], + extensions: ['.md'], + }, + evidence: 'Documentation changed', + }, + { + id: 'lockfile', + tags: ['generated_or_lock'], + match: { + exactFiles: ['pnpm-lock.yaml', 'package-lock.json', 'yarn.lock', 'Cargo.lock'], + }, + evidence: 'Lockfile changed', + }, +]; + +export function normalizeReviewPath(path: string): string { + return path.trim().replace(/\\/g, '/').replace(/^\.\/+/, ''); +} + +function dedupe(values: T[]): T[] { + return Array.from(new Set(values)); +} + +function getExtension(path: string): string { + const lastSlash = path.lastIndexOf('/'); + const lastDot = path.lastIndexOf('.'); + if (lastDot <= lastSlash) { + return ''; + } + return path.slice(lastDot); +} + +function matchesRule(path: string, rule: PathTagRule): boolean { + const { pathPrefixes, extensions, exactFiles } = rule.match; + const extension = getExtension(path); + return Boolean( + exactFiles?.includes(path) || + pathPrefixes?.some((prefix) => path.startsWith(prefix)) && + (!extensions || extensions.includes(extension)) || + !pathPrefixes && + extensions?.includes(extension), + ); +} + +function inferSupplementalTags(path: string): ReviewDomainTag[] { + const tags: ReviewDomainTag[] = []; + if ( + path.includes('/tests/') || + path.endsWith('.test.ts') || + path.endsWith('.test.tsx') || + path.endsWith('.spec.ts') || + path.endsWith('.spec.tsx') + ) { + tags.push('test'); + } + if ( + path === 'package.json' || + path.endsWith('/package.json') || + path.endsWith('.config.ts') || + path.endsWith('.config.js') || + path.startsWith('.github/workflows/') + ) { + tags.push('config'); + } + return tags; +} + +function classifyPath( + originalPath: string, + source: ReviewTargetSource, +): { file: ReviewTargetFile; evidence: string[] } { + const normalizedPath = normalizeReviewPath(originalPath); + const matchedRules = PATH_TAG_RULES.filter((rule) => + matchesRule(normalizedPath, rule), + ); + const ruleTags = matchedRules.flatMap((rule) => rule.tags); + const tags = dedupe([...ruleTags, ...inferSupplementalTags(normalizedPath)]); + const finalTags = tags.length > 0 ? tags : ['unknown' as const]; + + return { + file: { + path: originalPath, + normalizedPath, + status: 'unknown', + source, + tags: finalTags, + }, + evidence: matchedRules.map((rule) => rule.evidence), + }; +} + +export function createUnknownReviewTargetClassification( + source: ReviewTargetSource, +): ReviewTargetClassification { + return { + source, + resolution: 'unknown', + files: [], + tags: ['unknown'], + evidence: ['Review target could not be resolved before launch.'], + warnings: [ + { + code: 'target_unknown', + message: 'Review target could not be resolved before launch.', + }, + ], + }; +} + +export function classifyReviewTargetFromFiles( + filePaths: string[], + source: ReviewTargetSource, +): ReviewTargetClassification { + const normalizedInputs = filePaths + .map((path) => path.trim()) + .filter(Boolean); + + if (normalizedInputs.length === 0) { + return { + ...createUnknownReviewTargetClassification(source), + warnings: [ + { + code: 'file_list_empty', + message: 'No reviewable files were provided for target classification.', + }, + ], + }; + } + + const classified = normalizedInputs.map((path) => classifyPath(path, source)); + const files = classified.map((item) => item.file); + const tags = dedupe(files.flatMap((file) => file.tags)); + const hasUnknown = tags.includes('unknown'); + const hasKnown = tags.some((tag) => tag !== 'unknown'); + const resolution = hasUnknown ? (hasKnown ? 'partial' : 'unknown') : 'resolved'; + const warnings: ReviewTargetWarning[] = []; + + if (resolution === 'partial') { + warnings.push({ + code: 'classification_partial', + message: 'Some review target files could not be classified.', + }); + } + + if (tags.includes('frontend_contract')) { + warnings.push({ + code: 'contract_surface_detected', + message: 'A frontend-facing contract surface changed.', + }); + } + + return { + source, + resolution, + files, + tags, + evidence: dedupe(classified.flatMap((item) => item.evidence)), + warnings, + }; +} diff --git a/src/web-ui/src/shared/services/reviewTeamLocaleCompleteness.test.ts b/src/web-ui/src/shared/services/reviewTeamLocaleCompleteness.test.ts new file mode 100644 index 000000000..4f370aed4 --- /dev/null +++ b/src/web-ui/src/shared/services/reviewTeamLocaleCompleteness.test.ts @@ -0,0 +1,83 @@ +import { readFileSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; +import { describe, expect, it } from 'vitest'; +import { FALLBACK_REVIEW_TEAM_DEFINITION } from './reviewTeamService'; + +const REVIEW_TEAM_LOCALES = ['en-US', 'zh-CN', 'zh-TW'] as const; + +type Locale = (typeof REVIEW_TEAM_LOCALES)[number]; +type JsonObject = Record; + +const REVIEW_TEAM_FLOW_CHAT_KEYS = [ + 'deepReviewConsent.runStrategy', + 'deepReviewConsent.recommendedStrategy', + 'deepReviewConsent.recommendationTitle', + 'deepReviewConsent.strategyOverrideTitle', + 'deepReviewConsent.strategyOverrideBody', + 'deepReviewConsent.teamDefaultStrategy', + 'deepReviewConsent.strategyLabels.quick', + 'deepReviewConsent.strategyLabels.normal', + 'deepReviewConsent.strategyLabels.deep', + 'toolCards.codeReview.runManifest.recommendedStrategy', + 'toolCards.codeReview.runManifest.riskRecommendationTitle', +] as const; + +function readLocaleJson( + locale: Locale, + namespace: 'flow-chat.json' | 'scenes/agents.json' | 'settings/review.json', +) { + const filePath = fileURLToPath(new URL(`../../locales/${locale}/${namespace}`, import.meta.url)); + return JSON.parse(readFileSync(filePath, 'utf8')) as JsonObject; +} + +function getPathValue(source: JsonObject, path: string): unknown { + return path.split('.').reduce((current, segment) => { + if (!current || typeof current !== 'object') { + return undefined; + } + return (current as JsonObject)[segment]; + }, source); +} + +function expectNonEmptyLocaleString(source: JsonObject, path: string) { + const value = getPathValue(source, path); + expect(value, path).toEqual(expect.any(String)); + expect((value as string).trim(), path).not.toBe(''); +} + +describe('review team locale completeness', () => { + it.each(REVIEW_TEAM_LOCALES)( + 'keeps core review roles translated in %s settings and agents namespaces', + (locale) => { + const settingsReview = readLocaleJson(locale, 'settings/review.json'); + const scenesAgents = readLocaleJson(locale, 'scenes/agents.json'); + + for (const role of FALLBACK_REVIEW_TEAM_DEFINITION.coreRoles) { + expectNonEmptyLocaleString(settingsReview, `members.${role.key}.name`); + expectNonEmptyLocaleString(settingsReview, `members.${role.key}.role`); + + expectNonEmptyLocaleString(scenesAgents, `reviewTeams.members.${role.key}.funName`); + expectNonEmptyLocaleString(scenesAgents, `reviewTeams.members.${role.key}.role`); + expectNonEmptyLocaleString(scenesAgents, `reviewTeams.members.${role.key}.description`); + + role.responsibilities.forEach((_, index) => { + expectNonEmptyLocaleString( + scenesAgents, + `reviewTeams.members.${role.key}.responsibilities.${index}`, + ); + }); + } + }, + ); + + it.each(REVIEW_TEAM_LOCALES)( + 'keeps Deep Review strategy recommendation UI translated in %s flow chat namespace', + (locale) => { + const flowChat = readLocaleJson(locale, 'flow-chat.json'); + + for (const path of REVIEW_TEAM_FLOW_CHAT_KEYS) { + expectNonEmptyLocaleString(flowChat, path); + } + }, + ); +}); diff --git a/src/web-ui/src/shared/services/reviewTeamService.test.ts b/src/web-ui/src/shared/services/reviewTeamService.test.ts index 073ebbeb4..8769fd50f 100644 --- a/src/web-ui/src/shared/services/reviewTeamService.test.ts +++ b/src/web-ui/src/shared/services/reviewTeamService.test.ts @@ -1,21 +1,34 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; import { configAPI } from '@/infrastructure/api/service-api/ConfigAPI'; import { + DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY, DEFAULT_REVIEW_TEAM_EXECUTION_POLICY, DEFAULT_REVIEW_TEAM_STRATEGY_LEVEL, + FALLBACK_REVIEW_TEAM_DEFINITION, REVIEW_STRATEGY_DEFINITIONS, buildEffectiveReviewTeamManifest, buildReviewTeamPromptBlock, canUseSubagentAsReviewTeamMember, + loadDefaultReviewTeamDefinition, loadDefaultReviewTeamConfig, + loadReviewTeamProjectStrategyOverride, + loadReviewTeamRateLimitStatus, + lowerDefaultReviewTeamMaxParallelReviewers, prepareDefaultReviewTeamForLaunch, resolveDefaultReviewTeam, + saveDefaultReviewTeamConcurrencyPolicy, + saveReviewTeamProjectStrategyOverride, type ReviewTeamStoredConfig, } from './reviewTeamService'; +import { agentAPI } from '@/infrastructure/api/service-api/AgentAPI'; import { SubagentAPI, type SubagentInfo, } from '@/infrastructure/api/service-api/SubagentAPI'; +import { + classifyReviewTargetFromFiles, + createUnknownReviewTargetClassification, +} from './reviewTargetClassifier'; vi.mock('@/infrastructure/api/service-api/ConfigAPI', () => ({ configAPI: { @@ -31,11 +44,19 @@ vi.mock('@/infrastructure/api/service-api/SubagentAPI', () => ({ }, })); +vi.mock('@/infrastructure/api/service-api/AgentAPI', () => ({ + agentAPI: { + getDefaultReviewTeamDefinition: vi.fn(), + }, +})); + describe('reviewTeamService', () => { beforeEach(() => { vi.clearAllMocks(); }); + const WORKSPACE_PATH = '/test-fixtures/project-a'; + const storedConfigWithExtra = ( extraSubagentIds: string[] = [], overrides: Partial = {}, @@ -47,6 +68,13 @@ describe('reviewTeamService', () => { judge_timeout_seconds: DEFAULT_REVIEW_TEAM_EXECUTION_POLICY.judgeTimeoutSeconds, reviewer_file_split_threshold: DEFAULT_REVIEW_TEAM_EXECUTION_POLICY.reviewerFileSplitThreshold, max_same_role_instances: DEFAULT_REVIEW_TEAM_EXECUTION_POLICY.maxSameRoleInstances, + max_retries_per_role: DEFAULT_REVIEW_TEAM_EXECUTION_POLICY.maxRetriesPerRole, + max_parallel_reviewers: DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.maxParallelInstances, + max_queue_wait_seconds: DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.maxQueueWaitSeconds, + allow_provider_capacity_queue: DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.allowProviderCapacityQueue, + allow_bounded_auto_retry: DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.allowBoundedAutoRetry, + auto_retry_elapsed_guard_seconds: + DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.autoRetryElapsedGuardSeconds, ...overrides, }); @@ -57,14 +85,15 @@ describe('reviewTeamService', () => { model = 'fast', isReadonly = true, isReview = id.startsWith('Review'), + defaultTools = ['GetFileDiff', 'Read', 'Grep', 'Glob', 'LS'], ): SubagentInfo => ({ id, name: id, description: `${id} description`, isReadonly, isReview, - toolCount: 1, - defaultTools: ['Read'], + toolCount: defaultTools.length, + defaultTools, enabled, subagentSource, model, @@ -92,6 +121,13 @@ describe('reviewTeamService', () => { judge_timeout_seconds: DEFAULT_REVIEW_TEAM_EXECUTION_POLICY.judgeTimeoutSeconds, reviewer_file_split_threshold: DEFAULT_REVIEW_TEAM_EXECUTION_POLICY.reviewerFileSplitThreshold, max_same_role_instances: DEFAULT_REVIEW_TEAM_EXECUTION_POLICY.maxSameRoleInstances, + max_retries_per_role: DEFAULT_REVIEW_TEAM_EXECUTION_POLICY.maxRetriesPerRole, + max_parallel_reviewers: DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.maxParallelInstances, + max_queue_wait_seconds: DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.maxQueueWaitSeconds, + allow_provider_capacity_queue: DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.allowProviderCapacityQueue, + allow_bounded_auto_retry: DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.allowBoundedAutoRetry, + auto_retry_elapsed_guard_seconds: + DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.autoRetryElapsedGuardSeconds, }); }); @@ -127,6 +163,91 @@ describe('reviewTeamService', () => { }); }); + it('normalizes persisted capacity and retry settings into the team concurrency policy', async () => { + vi.mocked(configAPI.getConfig).mockResolvedValueOnce({ + extra_subagent_ids: [], + strategy_level: 'normal', + member_strategy_overrides: {}, + max_parallel_reviewers: 99, + max_queue_wait_seconds: 999, + allow_provider_capacity_queue: false, + allow_bounded_auto_retry: true, + auto_retry_elapsed_guard_seconds: 1, + }); + + const config = await loadDefaultReviewTeamConfig(); + const team = resolveDefaultReviewTeam(coreSubagents(), config); + + expect(team.concurrencyPolicy).toEqual({ + maxParallelInstances: 16, + staggerSeconds: 0, + maxQueueWaitSeconds: 600, + batchExtrasSeparately: true, + allowProviderCapacityQueue: false, + allowBoundedAutoRetry: true, + autoRetryElapsedGuardSeconds: 30, + }); + }); + + it('saves capacity and retry settings without changing unrelated review team config', async () => { + vi.mocked(configAPI.getConfig).mockResolvedValueOnce( + storedConfigWithExtra(['ExtraReviewer'], { + strategy_level: 'deep', + member_strategy_overrides: { ReviewSecurity: 'quick' }, + reviewer_timeout_seconds: 300, + }), + ); + + await saveDefaultReviewTeamConcurrencyPolicy({ + maxParallelInstances: 2, + staggerSeconds: 20, + maxQueueWaitSeconds: 45, + batchExtrasSeparately: false, + allowProviderCapacityQueue: false, + allowBoundedAutoRetry: true, + autoRetryElapsedGuardSeconds: 240, + }); + + expect(configAPI.setConfig).toHaveBeenCalledWith( + 'ai.review_teams.default', + expect.objectContaining({ + extra_subagent_ids: ['ExtraReviewer'], + strategy_level: 'deep', + member_strategy_overrides: { ReviewSecurity: 'quick' }, + reviewer_timeout_seconds: 300, + max_parallel_reviewers: 2, + max_queue_wait_seconds: 45, + allow_provider_capacity_queue: false, + allow_bounded_auto_retry: true, + auto_retry_elapsed_guard_seconds: 240, + }), + ); + }); + + it('lowers the next review max parallel reviewers without going below one', async () => { + vi.mocked(configAPI.getConfig) + .mockResolvedValueOnce(storedConfigWithExtra([], { max_parallel_reviewers: 3 })) + .mockResolvedValueOnce(storedConfigWithExtra([], { max_parallel_reviewers: 1 })); + + await expect(lowerDefaultReviewTeamMaxParallelReviewers()).resolves.toMatchObject({ + maxParallelInstances: 2, + }); + expect(configAPI.setConfig).toHaveBeenNthCalledWith( + 1, + 'ai.review_teams.default', + expect.objectContaining({ max_parallel_reviewers: 2 }), + ); + + await expect(lowerDefaultReviewTeamMaxParallelReviewers()).resolves.toMatchObject({ + maxParallelInstances: 1, + }); + expect(configAPI.setConfig).toHaveBeenNthCalledWith( + 2, + 'ai.review_teams.default', + expect.objectContaining({ max_parallel_reviewers: 1 }), + ); + }); + it('propagates config errors that are not missing review team config paths', async () => { const error = new Error('Config service unavailable'); vi.mocked(configAPI.getConfig).mockRejectedValueOnce(error); @@ -134,6 +255,78 @@ describe('reviewTeamService', () => { await expect(loadDefaultReviewTeamConfig()).rejects.toThrow(error.message); }); + it('loads cached review team rate limit status when available', async () => { + vi.mocked(configAPI.getConfig).mockResolvedValueOnce({ + remaining: 3.8, + }); + + await expect(loadReviewTeamRateLimitStatus()).resolves.toEqual({ + remaining: 3, + }); + expect(configAPI.getConfig).toHaveBeenCalledWith( + 'ai.review_teams.rate_limit_status', + { skipRetryOnNotFound: true }, + ); + }); + + it('ignores missing or invalid cached review team rate limit status', async () => { + vi.mocked(configAPI.getConfig) + .mockResolvedValueOnce(undefined) + .mockResolvedValueOnce({ remaining: 'not-a-number' }) + .mockRejectedValueOnce(new Error('rate status unavailable')); + + await expect(loadReviewTeamRateLimitStatus()).resolves.toBeNull(); + await expect(loadReviewTeamRateLimitStatus()).resolves.toBeNull(); + await expect(loadReviewTeamRateLimitStatus()).resolves.toBeNull(); + }); + + it('loads project strategy overrides by normalized workspace path', async () => { + vi.mocked(configAPI.getConfig).mockResolvedValueOnce({ + 'd:/workspace/repo': 'deep', + '/test-fixtures/project-a': 'quick', + invalid: 'invalid', + }); + + await expect( + loadReviewTeamProjectStrategyOverride('D:\\workspace\\repo'), + ).resolves.toBe('deep'); + expect(configAPI.getConfig).toHaveBeenCalledWith( + 'ai.review_teams.project_strategy_overrides', + { skipRetryOnNotFound: true }, + ); + }); + + it('saves and clears project strategy overrides by normalized workspace path', async () => { + vi.mocked(configAPI.getConfig) + .mockResolvedValueOnce({ + 'd:/workspace/repo': 'quick', + '/test-fixtures/project-a': 'normal', + }) + .mockResolvedValueOnce({ + 'd:/workspace/repo': 'deep', + '/test-fixtures/project-a': 'normal', + }); + + await saveReviewTeamProjectStrategyOverride('D:\\workspace\\repo', 'deep'); + expect(configAPI.setConfig).toHaveBeenNthCalledWith( + 1, + 'ai.review_teams.project_strategy_overrides', + { + 'd:/workspace/repo': 'deep', + '/test-fixtures/project-a': 'normal', + }, + ); + + await saveReviewTeamProjectStrategyOverride('D:\\workspace\\repo'); + expect(configAPI.setConfig).toHaveBeenNthCalledWith( + 2, + 'ai.review_teams.project_strategy_overrides', + { + '/test-fixtures/project-a': 'normal', + }, + ); + }); + it('only force-enables locked core members before launch', async () => { vi.mocked(configAPI.getConfig).mockResolvedValue( storedConfigWithExtra(['ExtraEnabled', 'ExtraDisabled']), @@ -144,38 +337,38 @@ describe('reviewTeamService', () => { subagent('ExtraDisabled', false, 'project', 'fast', true, true), ]); - await prepareDefaultReviewTeamForLaunch('D:/workspace/project-a'); + await prepareDefaultReviewTeamForLaunch(WORKSPACE_PATH); expect(SubagentAPI.updateSubagentConfig).toHaveBeenCalledTimes(6); expect(SubagentAPI.updateSubagentConfig).toHaveBeenCalledWith({ subagentId: 'ReviewBusinessLogic', enabled: true, - workspacePath: 'D:/workspace/project-a', + workspacePath: WORKSPACE_PATH, }); expect(SubagentAPI.updateSubagentConfig).toHaveBeenCalledWith({ subagentId: 'ReviewPerformance', enabled: true, - workspacePath: 'D:/workspace/project-a', + workspacePath: WORKSPACE_PATH, }); expect(SubagentAPI.updateSubagentConfig).toHaveBeenCalledWith({ subagentId: 'ReviewSecurity', enabled: true, - workspacePath: 'D:/workspace/project-a', + workspacePath: WORKSPACE_PATH, }); expect(SubagentAPI.updateSubagentConfig).toHaveBeenCalledWith({ subagentId: 'ReviewArchitecture', enabled: true, - workspacePath: 'D:/workspace/project-a', + workspacePath: WORKSPACE_PATH, }); expect(SubagentAPI.updateSubagentConfig).toHaveBeenCalledWith({ subagentId: 'ReviewFrontend', enabled: true, - workspacePath: 'D:/workspace/project-a', + workspacePath: WORKSPACE_PATH, }); expect(SubagentAPI.updateSubagentConfig).toHaveBeenCalledWith({ subagentId: 'ReviewJudge', enabled: true, - workspacePath: 'D:/workspace/project-a', + workspacePath: WORKSPACE_PATH, }); expect(SubagentAPI.updateSubagentConfig).not.toHaveBeenCalledWith( expect.objectContaining({ subagentId: 'ExtraEnabled' }), @@ -199,11 +392,142 @@ describe('reviewTeamService', () => { expect(promptBlock).toContain('subagent_type: ExtraEnabled'); expect(promptBlock).not.toContain('subagent_type: ExtraDisabled'); - expect(promptBlock).toContain('Always run the four locked core reviewer roles'); + expect(promptBlock).toContain('Run the active core reviewer roles first'); expect(promptBlock).not.toContain('Always run the three locked reviewer roles'); }); - it('requires extra members to be explicitly marked for review and readonly', () => { + it('can resolve the team from a backend-provided reviewer definition', () => { + const team = resolveDefaultReviewTeam( + [ + ...coreSubagents(), + subagent('ReviewDocs'), + ], + storedConfigWithExtra(['ReviewDocs']), + { + definition: { + id: 'default-review-team', + name: 'Code Review Team', + description: 'Backend-defined team', + warning: 'Review may take longer.', + defaultModel: 'fast', + defaultStrategyLevel: 'normal', + defaultExecutionPolicy: { + reviewerTimeoutSeconds: 300, + judgeTimeoutSeconds: 240, + reviewerFileSplitThreshold: 20, + maxSameRoleInstances: 3, + maxRetriesPerRole: 1, + }, + disallowedExtraSubagentIds: [ + 'ReviewBusinessLogic', + 'ReviewPerformance', + 'ReviewSecurity', + 'ReviewArchitecture', + 'ReviewFrontend', + 'ReviewDocs', + 'ReviewJudge', + 'DeepReview', + 'ReviewFixer', + ], + hiddenAgentIds: [ + 'DeepReview', + 'ReviewBusinessLogic', + 'ReviewPerformance', + 'ReviewSecurity', + 'ReviewArchitecture', + 'ReviewFrontend', + 'ReviewDocs', + 'ReviewJudge', + ], + coreRoles: [ + ...[ + 'ReviewBusinessLogic', + 'ReviewPerformance', + 'ReviewSecurity', + 'ReviewArchitecture', + 'ReviewFrontend', + 'ReviewJudge', + ].map((id) => ({ + key: id === 'ReviewJudge' ? 'judge' : id.replace(/^Review/, '').replace(/^BusinessLogic$/, 'businessLogic').toLowerCase(), + subagentId: id, + funName: id, + roleName: id, + description: `${id} description`, + responsibilities: [`${id} responsibility`], + accentColor: '#64748b', + conditional: id === 'ReviewFrontend', + })), + { + key: 'docs', + subagentId: 'ReviewDocs', + funName: 'Docs Reviewer', + roleName: 'Documentation Reviewer', + description: 'Checks docs and release notes.', + responsibilities: ['Verify documentation stays aligned.'], + accentColor: '#0f766e', + }, + ], + strategyProfiles: { + ...REVIEW_STRATEGY_DEFINITIONS, + quick: { + ...REVIEW_STRATEGY_DEFINITIONS.quick, + roleDirectives: { + ...REVIEW_STRATEGY_DEFINITIONS.quick.roleDirectives, + ReviewDocs: 'Only check changed docs.', + }, + }, + }, + }, + }, + ); + + expect(team.coreMembers.map((member) => member.subagentId)).toContain('ReviewDocs'); + expect(team.extraMembers.map((member) => member.subagentId)).not.toContain('ReviewDocs'); + + const manifest = buildEffectiveReviewTeamManifest(team, { + tokenBudgetMode: 'balanced', + }); + expect(manifest.coreReviewers).toContainEqual( + expect.objectContaining({ + subagentId: 'ReviewDocs', + strategyDirective: REVIEW_STRATEGY_DEFINITIONS.normal.promptDirective, + }), + ); + }); + + it('falls back safely when backend reviewer definition fields are malformed', async () => { + vi.mocked(agentAPI.getDefaultReviewTeamDefinition).mockResolvedValue({ + id: 42, + name: null, + description: ['bad'], + warning: {}, + defaultModel: 99, + defaultStrategyLevel: 'normal', + defaultExecutionPolicy: { + reviewerTimeoutSeconds: 300, + judgeTimeoutSeconds: 240, + reviewerFileSplitThreshold: 20, + maxSameRoleInstances: 3, + maxRetriesPerRole: 1, + }, + coreRoles: [], + strategyProfiles: {}, + disallowedExtraSubagentIds: ['ReviewDocs', 42], + hiddenAgentIds: ['ReviewDocs', null], + }); + + await expect(loadDefaultReviewTeamDefinition()).resolves.toMatchObject({ + id: FALLBACK_REVIEW_TEAM_DEFINITION.id, + name: FALLBACK_REVIEW_TEAM_DEFINITION.name, + description: FALLBACK_REVIEW_TEAM_DEFINITION.description, + warning: FALLBACK_REVIEW_TEAM_DEFINITION.warning, + defaultModel: FALLBACK_REVIEW_TEAM_DEFINITION.defaultModel, + disallowedExtraSubagentIds: ['ReviewDocs'], + hiddenAgentIds: ['ReviewDocs'], + }); + }); + + it('keeps invalid configured extra members explainable in the run manifest', () => { const readonlyReviewExtra = subagent('ExtraReadonlyReview', true, 'user', 'fast', true, true); const readonlyPlainExtra = subagent('ExtraReadonlyPlain', true, 'user', 'fast', true, false); const writableReviewExtra = subagent('ExtraWritableReview', true, 'project', 'fast', false, true); @@ -219,15 +543,113 @@ describe('reviewTeamService', () => { readonlyPlainExtra, writableReviewExtra, ], - storedConfigWithExtra(['ExtraReadonlyReview', 'ExtraReadonlyPlain', 'ExtraWritableReview']), + storedConfigWithExtra([ + 'ExtraReadonlyReview', + 'ExtraReadonlyPlain', + 'ExtraWritableReview', + 'ExtraMissingReviewer', + ]), ); - expect(team.extraMembers.map((member) => member.subagentId)).toEqual(['ExtraReadonlyReview']); + expect( + team.extraMembers + .filter((member) => member.available) + .map((member) => member.subagentId), + ).toEqual(['ExtraReadonlyReview']); - const promptBlock = buildReviewTeamPromptBlock(team); + const manifest = buildEffectiveReviewTeamManifest(team); + + expect(manifest.skippedReviewers).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + subagentId: 'ExtraReadonlyPlain', + reason: 'invalid_tooling', + }), + expect.objectContaining({ + subagentId: 'ExtraWritableReview', + reason: 'invalid_tooling', + }), + expect.objectContaining({ + subagentId: 'ExtraMissingReviewer', + reason: 'unavailable', + }), + ]), + ); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); expect(promptBlock).toContain('subagent_type: ExtraReadonlyReview'); - expect(promptBlock).not.toContain('ExtraReadonlyPlain'); - expect(promptBlock).not.toContain('ExtraWritableReview'); + expect(promptBlock).toContain('- ExtraReadonlyPlain: invalid_tooling'); + expect(promptBlock).toContain('- ExtraWritableReview: invalid_tooling'); + expect(promptBlock).toContain('- ExtraMissingReviewer: unavailable'); + expect(promptBlock).not.toContain('subagent_type: ExtraReadonlyPlain'); + expect(promptBlock).not.toContain('subagent_type: ExtraWritableReview'); + expect(promptBlock).not.toContain('subagent_type: ExtraMissingReviewer'); + }); + + it('requires extra review members to have the minimum review tools', () => { + const readyReviewExtra = subagent('ExtraReadyReview', true, 'user', 'fast', true, true); + const missingDiffExtra = subagent( + 'ExtraMissingDiff', + true, + 'user', + 'fast', + true, + true, + ['Read', 'Grep'], + ); + const missingReadExtra = subagent( + 'ExtraMissingRead', + true, + 'project', + 'fast', + true, + true, + ['GetFileDiff', 'Grep'], + ); + + expect(canUseSubagentAsReviewTeamMember(readyReviewExtra)).toBe(true); + expect(canUseSubagentAsReviewTeamMember(missingDiffExtra)).toBe(false); + expect(canUseSubagentAsReviewTeamMember(missingReadExtra)).toBe(false); + + const team = resolveDefaultReviewTeam( + [ + ...coreSubagents(), + readyReviewExtra, + missingDiffExtra, + missingReadExtra, + ], + storedConfigWithExtra(['ExtraReadyReview', 'ExtraMissingDiff', 'ExtraMissingRead']), + ); + + expect( + team.extraMembers + .filter((member) => member.available) + .map((member) => member.subagentId), + ).toEqual(['ExtraReadyReview']); + + const manifest = buildEffectiveReviewTeamManifest(team); + + expect(manifest.enabledExtraReviewers.map((member) => member.subagentId)).toEqual([ + 'ExtraReadyReview', + ]); + expect(manifest.skippedReviewers).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + subagentId: 'ExtraMissingDiff', + reason: 'invalid_tooling', + }), + expect.objectContaining({ + subagentId: 'ExtraMissingRead', + reason: 'invalid_tooling', + }), + ]), + ); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('- ExtraMissingDiff: invalid_tooling'); + expect(promptBlock).toContain('- ExtraMissingRead: invalid_tooling'); + expect(promptBlock).not.toContain('subagent_type: ExtraMissingDiff'); + expect(promptBlock).not.toContain('subagent_type: ExtraMissingRead'); }); it('builds an explicit run manifest for enabled, skipped, and quality-gate reviewers', () => { @@ -241,13 +663,13 @@ describe('reviewTeamService', () => { ); const manifest = buildEffectiveReviewTeamManifest(team, { - workspacePath: 'D:/workspace/project-a', + workspacePath: WORKSPACE_PATH, policySource: 'default-review-team-config', }); expect(manifest.reviewMode).toBe('deep'); expect(manifest.strategyLevel).toBe('normal'); - expect(manifest.workspacePath).toBe('D:/workspace/project-a'); + expect(manifest.workspacePath).toBe(WORKSPACE_PATH); expect(manifest.policySource).toBe('default-review-team-config'); expect(manifest.coreReviewers.map((member) => member.subagentId)).toEqual([ 'ReviewBusinessLogic', @@ -268,14 +690,481 @@ describe('reviewTeamService', () => { ]); }); - it('skips the conditional frontend reviewer when an explicit target has no frontend files', () => { + it('generates structured work packets for active reviewers and the judge', () => { + const team = resolveDefaultReviewTeam( + [ + ...coreSubagents(), + subagent('ExtraEnabled', true, 'user', 'fast', true, true), + ], + storedConfigWithExtra(['ExtraEnabled']), + ); + const target = classifyReviewTargetFromFiles( + ['src/web-ui/src/components/ReviewPanel.tsx'], + 'session_files', + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + workspacePath: WORKSPACE_PATH, + target, + }); + + const logicPacket = manifest.workPackets?.find( + (packet) => packet.subagentId === 'ReviewBusinessLogic', + ); + const judgePacket = manifest.workPackets?.find( + (packet) => packet.subagentId === 'ReviewJudge', + ); + + expect(logicPacket).toMatchObject({ + packetId: 'reviewer:ReviewBusinessLogic', + phase: 'reviewer', + subagentId: 'ReviewBusinessLogic', + roleName: 'Business Logic Reviewer', + assignedScope: { + kind: 'review_target', + fileCount: 1, + files: ['src/web-ui/src/components/ReviewPanel.tsx'], + }, + allowedTools: ['GetFileDiff', 'Read', 'Grep', 'Glob', 'LS', 'Git'], + timeoutSeconds: manifest.executionPolicy.reviewerTimeoutSeconds, + requiredOutputFields: expect.arrayContaining([ + 'packet_id', + 'status', + 'findings', + ]), + }); + expect(judgePacket).toMatchObject({ + packetId: 'judge:ReviewJudge', + phase: 'judge', + subagentId: 'ReviewJudge', + timeoutSeconds: manifest.executionPolicy.judgeTimeoutSeconds, + requiredOutputFields: expect.arrayContaining([ + 'packet_id', + 'status', + 'validated_findings', + ]), + }); + expect(manifest.workPackets?.map((packet) => packet.subagentId)).not.toContain( + 'ExtraDisabled', + ); + expect(manifest.executionPolicy.maxRetriesPerRole).toBe(1); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('Review work packets:'); + expect(promptBlock).toContain('"packet_id": "reviewer:ReviewBusinessLogic"'); + expect(promptBlock).toContain('"allowed_tools"'); + expect(promptBlock).toContain('- max_retries_per_role: 1'); + expect(promptBlock).toContain('set retry to true'); + expect(promptBlock).toContain('Each reviewer Task prompt must include the matching work packet verbatim.'); + expect(promptBlock).toContain('If the reviewer omits packet_id but the Task was launched from a packet, infer the packet_id from the Task description or work packet and mark packet_status_source as inferred.'); + }); + + it('pre-generates a compact diff summary for reviewer orientation', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra(), + ); + const target = classifyReviewTargetFromFiles( + [ + 'src/web-ui/src/shared/services/reviewTeamService.ts', + 'src/web-ui/src/app/scenes/agents/components/ReviewTeamPage.tsx', + 'src/web-ui/src/locales/en-US/scenes/agents.json', + 'src/crates/core/src/agentic/deep_review_policy.rs', + 'src/crates/core/src/agentic/tools/implementations/task_tool.rs', + ], + 'session_files', + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + target, + changeStats: { + totalLinesChanged: 420, + lineCountSource: 'diff_stat', + }, + }); + + expect(manifest.preReviewSummary).toMatchObject({ + source: 'target_manifest', + fileCount: 5, + lineCount: 420, + lineCountSource: 'diff_stat', + workspaceAreas: [ + { + key: 'web-ui', + fileCount: 3, + sampleFiles: [ + 'src/web-ui/src/shared/services/reviewTeamService.ts', + 'src/web-ui/src/app/scenes/agents/components/ReviewTeamPage.tsx', + 'src/web-ui/src/locales/en-US/scenes/agents.json', + ], + }, + { + key: 'crate:core', + fileCount: 2, + sampleFiles: [ + 'src/crates/core/src/agentic/deep_review_policy.rs', + 'src/crates/core/src/agentic/tools/implementations/task_tool.rs', + ], + }, + ], + }); + expect(manifest.preReviewSummary.summary).toContain( + '5 files, 420 changed lines across 2 workspace areas', + ); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('Pre-generated diff summary:'); + expect(promptBlock).toContain('"key": "web-ui"'); + expect(promptBlock).toContain('Use the pre-generated diff summary'); + }); + + it('builds a shared context cache plan for files consumed by multiple reviewers', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra(), + ); + const target = classifyReviewTargetFromFiles( + [ + 'src/web-ui/src/shared/services/reviewTeamService.ts', + 'src/crates/core/src/agentic/deep_review_policy.rs', + ], + 'session_files', + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { target }); + const webUiCacheEntry = manifest.sharedContextCache.entries.find( + (entry) => entry.path === 'src/web-ui/src/shared/services/reviewTeamService.ts', + ); + + expect(manifest.sharedContextCache).toMatchObject({ + source: 'work_packets', + strategy: 'reuse_readonly_file_context_by_cache_key', + omittedEntryCount: 0, + }); + expect(webUiCacheEntry).toMatchObject({ + cacheKey: 'shared-context:1', + workspaceArea: 'web-ui', + recommendedTools: ['GetFileDiff', 'Read'], + consumerPacketIds: expect.arrayContaining([ + 'reviewer:ReviewBusinessLogic', + 'reviewer:ReviewPerformance', + 'reviewer:ReviewSecurity', + 'reviewer:ReviewArchitecture', + 'reviewer:ReviewFrontend', + ]), + }); + expect(webUiCacheEntry?.consumerPacketIds).not.toContain('judge:ReviewJudge'); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('Shared context cache plan:'); + expect(promptBlock).toContain('"cache_key": "shared-context:1"'); + expect(promptBlock).toContain('Use shared_context_cache entries'); + }); + + it('builds an incremental review cache plan for follow-up reviews', () => { const team = resolveDefaultReviewTeam( coreSubagents(), storedConfigWithExtra(), ); + const target = classifyReviewTargetFromFiles( + [ + 'src/web-ui/src/shared/services/reviewTeamService.ts', + 'src/crates/core/src/agentic/deep_review_policy.rs', + ], + 'session_files', + ); const manifest = buildEffectiveReviewTeamManifest(team, { - workspacePath: 'D:/workspace/project-a', + target, + changeStats: { + totalLinesChanged: 128, + lineCountSource: 'diff_stat', + }, + }); + + expect(manifest.incrementalReviewCache).toMatchObject({ + source: 'target_manifest', + strategy: 'reuse_completed_packets_when_fingerprint_matches', + filePaths: [ + 'src/crates/core/src/agentic/deep_review_policy.rs', + 'src/web-ui/src/shared/services/reviewTeamService.ts', + ], + workspaceAreas: ['crate:core', 'web-ui'], + lineCount: 128, + lineCountSource: 'diff_stat', + reviewerPacketIds: expect.arrayContaining([ + 'reviewer:ReviewBusinessLogic', + 'reviewer:ReviewSecurity', + 'reviewer:ReviewFrontend', + ]), + invalidatesOn: expect.arrayContaining([ + 'target_file_set_changed', + 'target_line_count_changed', + 'reviewer_roster_changed', + ]), + }); + expect(manifest.incrementalReviewCache.cacheKey).toMatch(/^incremental-review:/); + expect(manifest.incrementalReviewCache.fingerprint).toHaveLength(8); + expect(manifest.incrementalReviewCache.reviewerPacketIds).not.toContain('judge:ReviewJudge'); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('Incremental review cache plan:'); + expect(promptBlock).toContain('"strategy": "reuse_completed_packets_when_fingerprint_matches"'); + expect(promptBlock).toContain('Use incremental_review_cache only when the target fingerprint matches'); + }); + + it('splits reviewer work packets across file groups for large targets', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra([], { + reviewer_file_split_threshold: 10, + max_same_role_instances: 3, + }), + ); + const target = classifyReviewTargetFromFiles( + Array.from( + { length: 25 }, + (_, index) => `src/web-ui/src/components/ReviewPanel${index}.tsx`, + ), + 'session_files', + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + target, + concurrencyPolicy: { + maxParallelInstances: 16, + }, + }); + const logicPackets = manifest.workPackets?.filter( + (packet) => packet.subagentId === 'ReviewBusinessLogic', + ); + const judgePackets = manifest.workPackets?.filter( + (packet) => packet.subagentId === 'ReviewJudge', + ); + + expect(logicPackets).toHaveLength(3); + expect(logicPackets?.map((packet) => packet.packetId)).toEqual([ + 'reviewer:ReviewBusinessLogic:group-1-of-3', + 'reviewer:ReviewBusinessLogic:group-2-of-3', + 'reviewer:ReviewBusinessLogic:group-3-of-3', + ]); + expect(logicPackets?.map((packet) => packet.assignedScope.fileCount)).toEqual([ + 9, + 8, + 8, + ]); + expect(logicPackets?.[0].assignedScope).toMatchObject({ + groupIndex: 1, + groupCount: 3, + }); + expect(logicPackets?.[0].assignedScope.files.slice(0, 2)).toEqual([ + 'src/web-ui/src/components/ReviewPanel0.tsx', + 'src/web-ui/src/components/ReviewPanel1.tsx', + ]); + expect(logicPackets?.[0].assignedScope.files.at(-1)).toBe( + 'src/web-ui/src/components/ReviewPanel8.tsx', + ); + expect(judgePackets).toHaveLength(1); + expect(judgePackets?.[0].assignedScope).toMatchObject({ + fileCount: 25, + }); + expect(judgePackets?.[0].assignedScope.groupCount).toBeUndefined(); + expect(manifest.tokenBudget).toMatchObject({ + estimatedReviewerCalls: 16, + maxFilesPerReviewer: 10, + largeDiffSummaryFirst: true, + }); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('"packet_id": "reviewer:ReviewBusinessLogic:group-1-of-3"'); + expect(promptBlock).toContain('"group_index": 1'); + expect(promptBlock).toContain('"group_count": 3'); + }); + + it('keeps split reviewer work packets grouped by workspace area when possible', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra([], { + reviewer_file_split_threshold: 4, + max_same_role_instances: 3, + }), + ); + const target = classifyReviewTargetFromFiles( + [ + 'src/web-ui/src/components/ReviewPanel.tsx', + 'src/crates/core/src/agentic/deep_review_policy.rs', + 'src/apps/desktop/src/api/review.rs', + 'src/web-ui/src/shared/services/reviewTeamService.ts', + 'src/crates/core/src/agentic/tools/implementations/task_tool.rs', + 'src/apps/desktop/src/api/agent.rs', + 'src/web-ui/src/app/scenes/agents/components/ReviewTeamPage.tsx', + 'src/crates/core/src/agentic/agents/deep_review_agent.rs', + 'src/apps/desktop/src/api/config.rs', + 'src/web-ui/src/locales/en-US/scenes/agents.json', + 'src/crates/core/src/agentic/agents/prompts/deep_review_agent.md', + 'src/apps/desktop/src/api/subagent.rs', + ], + 'session_files', + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + target, + concurrencyPolicy: { + maxParallelInstances: 16, + }, + }); + const logicPackets = manifest.workPackets?.filter( + (packet) => packet.subagentId === 'ReviewBusinessLogic', + ); + + expect(logicPackets).toHaveLength(3); + expect(logicPackets?.map((packet) => packet.assignedScope.files)).toEqual([ + [ + 'src/web-ui/src/components/ReviewPanel.tsx', + 'src/web-ui/src/shared/services/reviewTeamService.ts', + 'src/web-ui/src/app/scenes/agents/components/ReviewTeamPage.tsx', + 'src/web-ui/src/locales/en-US/scenes/agents.json', + ], + [ + 'src/crates/core/src/agentic/deep_review_policy.rs', + 'src/crates/core/src/agentic/tools/implementations/task_tool.rs', + 'src/crates/core/src/agentic/agents/deep_review_agent.rs', + 'src/crates/core/src/agentic/agents/prompts/deep_review_agent.md', + ], + [ + 'src/apps/desktop/src/api/review.rs', + 'src/apps/desktop/src/api/agent.rs', + 'src/apps/desktop/src/api/config.rs', + 'src/apps/desktop/src/api/subagent.rs', + ], + ]); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('Prefer module/workspace-area coherent file groups'); + }); + + it('caps file splitting and launch batches by concurrency policy', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra([], { + reviewer_file_split_threshold: 10, + max_same_role_instances: 3, + }), + ); + const target = classifyReviewTargetFromFiles( + Array.from( + { length: 25 }, + (_, index) => `src/web-ui/src/components/ReviewPanel${index}.tsx`, + ), + 'session_files', + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { target }); + const reviewerPackets = manifest.workPackets?.filter( + (packet) => packet.phase === 'reviewer', + ) ?? []; + const logicPackets = reviewerPackets.filter( + (packet) => packet.subagentId === 'ReviewBusinessLogic', + ); + + expect(manifest.concurrencyPolicy).toMatchObject({ + maxParallelInstances: 4, + staggerSeconds: 0, + maxQueueWaitSeconds: 60, + batchExtrasSeparately: true, + }); + expect(logicPackets).toHaveLength(1); + expect(logicPackets[0].assignedScope.groupCount).toBeUndefined(); + expect(reviewerPackets).toHaveLength(5); + expect(reviewerPackets.slice(0, 4).map((packet) => packet.launchBatch)).toEqual([1, 1, 1, 1]); + expect(reviewerPackets[4].launchBatch).toBe(2); + expect(manifest.qualityGateReviewer && manifest.workPackets?.find( + (packet) => packet.subagentId === manifest.qualityGateReviewer?.subagentId, + )?.launchBatch).toBe(3); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('- max_parallel_instances: 4'); + expect(promptBlock).toContain('- max_queue_wait_seconds: 60'); + expect(promptBlock).toContain('Launch reviewer Tasks by launch_batch'); + expect(promptBlock).toContain('"launch_batch": 2'); + }); + + it('reduces reviewer concurrency when rate limit remaining is tight', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra([], { + reviewer_file_split_threshold: 10, + max_same_role_instances: 3, + }), + ); + const target = classifyReviewTargetFromFiles( + Array.from( + { length: 25 }, + (_, index) => `src/web-ui/src/components/ReviewPanel${index}.tsx`, + ), + 'session_files', + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + target, + rateLimitStatus: { remaining: 2 }, + }); + const reviewerPackets = manifest.workPackets?.filter( + (packet) => packet.phase === 'reviewer', + ) ?? []; + + expect(manifest.concurrencyPolicy).toMatchObject({ + maxParallelInstances: 2, + staggerSeconds: 10, + batchExtrasSeparately: true, + }); + expect(reviewerPackets.map((packet) => packet.launchBatch)).toEqual([1, 1, 2, 2, 3]); + expect(manifest.qualityGateReviewer && manifest.workPackets?.find( + (packet) => packet.subagentId === manifest.qualityGateReviewer?.subagentId, + )?.launchBatch).toBe(4); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('- max_parallel_instances: 2'); + expect(promptBlock).toContain('- stagger_seconds: 10'); + }); + + it('skips the frontend reviewer when the resolved target has no frontend tags', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra(), + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + target: classifyReviewTargetFromFiles( + ['src/crates/core/src/service/config/types.rs'], + 'session_files', + ), + }); + + expect(manifest.target.resolution).toBe('resolved'); + expect(manifest.target.tags).toEqual(['backend_core']); + expect(manifest.coreReviewers.map((member) => member.subagentId)).toEqual([ + 'ReviewBusinessLogic', + 'ReviewPerformance', + 'ReviewSecurity', + 'ReviewArchitecture', + ]); + expect(manifest.skippedReviewers).toEqual([ + expect.objectContaining({ + subagentId: 'ReviewFrontend', + reason: 'not_applicable', + }), + ]); + }); + + it('keeps explicit file-path targets compatible with conditional frontend reviewer gating', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra(), + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + workspacePath: WORKSPACE_PATH, reviewTargetFilePaths: ['src/crates/core/src/agentic/deep_review_policy.rs'], }); @@ -288,11 +1177,432 @@ describe('reviewTeamService', () => { expect(manifest.skippedReviewers).toEqual([ expect.objectContaining({ subagentId: 'ReviewFrontend', - reason: 'non_applicable', + reason: 'not_applicable', }), ]); }); + it('runs the frontend reviewer for frontend and contract targets', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra(), + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + target: classifyReviewTargetFromFiles( + ['src/apps/desktop/src/api/agentic_api.rs'], + 'session_files', + ), + }); + + expect(manifest.target.tags).toEqual( + expect.arrayContaining(['desktop_contract', 'frontend_contract']), + ); + expect(manifest.coreReviewers.map((member) => member.subagentId)).toContain( + 'ReviewFrontend', + ); + expect(manifest.skippedReviewers).not.toEqual([ + expect.objectContaining({ subagentId: 'ReviewFrontend' }), + ]); + }); + + it('runs conditional reviewers conservatively for unknown targets', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra(), + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + target: createUnknownReviewTargetClassification('manual_prompt'), + }); + + expect(manifest.target.resolution).toBe('unknown'); + expect(manifest.coreReviewers.map((member) => member.subagentId)).toContain( + 'ReviewFrontend', + ); + }); + + it('adds a balanced token budget to the run manifest by default', () => { + const team = resolveDefaultReviewTeam( + [ + ...coreSubagents(), + subagent('ExtraEnabled', true, 'user', 'fast', true, true), + ], + storedConfigWithExtra(['ExtraEnabled']), + ); + + const manifest = buildEffectiveReviewTeamManifest(team); + + expect(manifest.tokenBudget).toMatchObject({ + mode: 'balanced', + estimatedReviewerCalls: 7, + maxExtraReviewers: 1, + skippedReviewerIds: [], + }); + }); + + it('enables summary-first from prompt-byte pressure without hiding assigned files', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra(), + ); + const files = Array.from( + { length: 6 }, + (_, index) => `src/crates/core/src/agentic/large_change_${index}.rs`, + ); + const target = classifyReviewTargetFromFiles(files, 'workspace_diff'); + + const manifest = buildEffectiveReviewTeamManifest(team, { + target, + changeStats: { + fileCount: files.length, + totalLinesChanged: 5000, + lineCountSource: 'diff_stat', + }, + }); + + expect(manifest.tokenBudget).toMatchObject({ + maxPromptBytesPerReviewer: 96_000, + promptByteEstimateSource: 'manifest_heuristic', + promptByteLimitExceeded: true, + largeDiffSummaryFirst: true, + }); + expect(manifest.tokenBudget.estimatedPromptBytesPerReviewer).toBeGreaterThan( + manifest.tokenBudget.maxPromptBytesPerReviewer ?? 0, + ); + expect(manifest.tokenBudget.decisions).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + kind: 'summary_first_full_scope', + reason: 'prompt_bytes_exceeded', + }), + ]), + ); + const reviewerPackets = manifest.workPackets.filter( + (packet) => packet.phase === 'reviewer', + ); + expect(reviewerPackets).not.toHaveLength(0); + for (const packet of reviewerPackets) { + expect(packet.assignedScope.files).toEqual(files); + } + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('- max_prompt_bytes_per_reviewer: 96000'); + expect(promptBlock).toContain('- prompt_byte_limit_exceeded: yes'); + expect(promptBlock).toContain('- token_budget_decisions: summary_first_full_scope'); + expect(promptBlock).toContain('Do not remove files from assigned_scope'); + }); + + it('keeps summary-first disabled when split guardrails fit the prompt-byte budget', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra([], { + reviewer_file_split_threshold: 4, + max_same_role_instances: 2, + }), + ); + const files = Array.from( + { length: 5 }, + (_, index) => `src/crates/core/src/agentic/small_${index}.rs`, + ); + const target = classifyReviewTargetFromFiles(files, 'workspace_diff'); + + const manifest = buildEffectiveReviewTeamManifest(team, { + target, + tokenBudgetMode: 'thorough', + concurrencyPolicy: { + maxParallelInstances: 8, + }, + changeStats: { + fileCount: files.length, + totalLinesChanged: 25, + lineCountSource: 'diff_stat', + }, + }); + + expect(manifest.tokenBudget).toMatchObject({ + maxFilesPerReviewer: 4, + maxPromptBytesPerReviewer: 192_000, + promptByteLimitExceeded: false, + largeDiffSummaryFirst: false, + }); + expect(manifest.workPackets.filter((packet) => packet.phase === 'reviewer')) + .toEqual( + expect.arrayContaining([ + expect.objectContaining({ + assignedScope: expect.objectContaining({ + groupCount: 2, + }), + }), + ]), + ); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('- prompt_byte_limit_exceeded: no'); + expect(promptBlock).toContain('- token_budget_decisions: none'); + }); + + it('predicts manifest timeouts from resolved target size', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra(), + ); + const target = classifyReviewTargetFromFiles( + Array.from( + { length: 25 }, + (_, index) => `src/web-ui/src/components/ReviewPanel${index}.tsx`, + ), + 'session_files', + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { target }); + + expect(manifest.changeStats).toMatchObject({ + fileCount: 25, + lineCountSource: 'unknown', + }); + expect(manifest.executionPolicy).toMatchObject({ + reviewerTimeoutSeconds: 675, + judgeTimeoutSeconds: 1350, + }); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('- target_file_count: 25'); + expect(promptBlock).toContain('- target_line_count: unknown'); + expect(promptBlock).toContain('- reviewer_timeout_seconds: 675'); + expect(promptBlock).toContain('- judge_timeout_seconds: 1350'); + }); + + it('includes diff line stats in predictive manifest timeouts', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra(), + ); + const target = classifyReviewTargetFromFiles( + Array.from( + { length: 25 }, + (_, index) => `src/web-ui/src/components/ReviewPanel${index}.tsx`, + ), + 'workspace_diff', + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + target, + changeStats: { + fileCount: 25, + totalLinesChanged: 800, + lineCountSource: 'diff_stat', + }, + }); + + expect(manifest.changeStats).toMatchObject({ + fileCount: 25, + totalLinesChanged: 800, + lineCountSource: 'diff_stat', + }); + expect(manifest.executionPolicy).toMatchObject({ + reviewerTimeoutSeconds: 915, + judgeTimeoutSeconds: 1830, + }); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('- target_line_count: 800'); + expect(promptBlock).toContain('- target_line_count_source: diff_stat'); + expect(promptBlock).toContain('- reviewer_timeout_seconds: 915'); + expect(promptBlock).toContain('- judge_timeout_seconds: 1830'); + }); + + it('adds an advisory risk-based strategy recommendation to the manifest and prompt', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra(), + ); + const target = classifyReviewTargetFromFiles( + [ + 'src/crates/core/src/service/auth/token_store.rs', + 'src/apps/desktop/src/api/agentic_api.rs', + ...Array.from( + { length: 18 }, + (_, index) => `src/web-ui/src/components/ReviewPanel${index}.tsx`, + ), + ], + 'workspace_diff', + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + target, + changeStats: { + fileCount: 20, + totalLinesChanged: 1400, + lineCountSource: 'diff_stat', + }, + }); + + expect(manifest.strategyLevel).toBe('normal'); + expect(manifest.strategyRecommendation).toMatchObject({ + strategyLevel: 'deep', + factors: { + fileCount: 20, + totalLinesChanged: 1400, + securityFileCount: 1, + }, + }); + expect(manifest.strategyRecommendation?.rationale).toContain('Large/high-risk change'); + expect(manifest.strategyDecision).toMatchObject({ + authority: 'mismatch_warning', + teamDefaultStrategy: 'normal', + finalStrategy: 'normal', + mismatch: true, + mismatchSeverity: 'medium', + frontendRecommendation: { + strategyLevel: 'deep', + }, + backendRecommendation: { + strategyLevel: 'deep', + factors: { + fileCount: 20, + totalLinesChanged: 1400, + filesInSecurityPaths: 1, + maxCyclomaticComplexityDelta: 0, + maxCyclomaticComplexityDeltaSource: 'not_measured', + }, + }, + }); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('- recommended_strategy: deep'); + expect(promptBlock).toContain('- frontend_recommended_strategy: deep'); + expect(promptBlock).toContain('- backend_recommended_strategy: deep'); + expect(promptBlock).toContain('- strategy_authority: mismatch_warning'); + expect(promptBlock).toContain('- strategy_mismatch: yes'); + expect(promptBlock).toContain('- max_cyclomatic_complexity_delta_source: not_measured'); + expect(promptBlock).toContain('- strategy_recommendation_rationale: Large/high-risk change'); + expect(promptBlock).toContain('Risk recommendation is advisory'); + }); + + it('records explicit strategy override as final strategy metadata without expanding reviewer roster', () => { + const team = resolveDefaultReviewTeam( + [ + ...coreSubagents(), + subagent('ExtraEnabled', true, 'user', 'fast', true, true), + ], + storedConfigWithExtra(['ExtraEnabled']), + ); + const target = classifyReviewTargetFromFiles( + [ + ...Array.from( + { length: 24 }, + (_, index) => `src/crates/core/src/review/module_${index}.rs`, + ), + ], + 'workspace_diff', + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + target, + strategyOverride: 'quick', + changeStats: { + fileCount: 24, + totalLinesChanged: 1800, + lineCountSource: 'diff_stat', + }, + }); + + expect(manifest.strategyDecision).toMatchObject({ + authority: 'mismatch_warning', + teamDefaultStrategy: 'normal', + userOverride: 'quick', + finalStrategy: 'quick', + mismatch: true, + mismatchSeverity: 'high', + backendRecommendation: { + strategyLevel: 'deep', + }, + }); + expect(manifest.coreReviewers).toHaveLength(4); + expect(manifest.enabledExtraReviewers).toHaveLength(1); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('- final_strategy: quick'); + expect(promptBlock).toContain('- strategy_user_override: quick'); + expect(promptBlock).toContain('- strategy_mismatch_severity: high'); + }); + + it('keeps unknown targets at a conservative normal recommendation', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra(), + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + target: createUnknownReviewTargetClassification('manual_prompt'), + }); + + expect(manifest.strategyRecommendation).toMatchObject({ + strategyLevel: 'normal', + score: 0, + }); + expect(manifest.strategyRecommendation?.rationale).toContain('unresolved target'); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('- recommended_strategy: normal'); + }); + + it('preserves explicit zero timeout policy when predicting manifest timeouts', () => { + const team = resolveDefaultReviewTeam( + coreSubagents(), + storedConfigWithExtra([], { + reviewer_timeout_seconds: 0, + judge_timeout_seconds: 0, + }), + ); + const target = classifyReviewTargetFromFiles( + ['src/web-ui/src/components/ReviewPanel.tsx'], + 'session_files', + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { target }); + + expect(manifest.executionPolicy).toMatchObject({ + reviewerTimeoutSeconds: 0, + judgeTimeoutSeconds: 0, + }); + }); + + it('marks excess extra reviewers as budget-limited in economy mode', () => { + const team = resolveDefaultReviewTeam( + [ + ...coreSubagents(), + subagent('ExtraOne', true, 'user', 'fast', true, true), + subagent('ExtraTwo', true, 'user', 'fast', true, true), + ], + storedConfigWithExtra(['ExtraOne', 'ExtraTwo']), + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + tokenBudgetMode: 'economy', + }); + + expect(manifest.enabledExtraReviewers).toEqual([]); + expect(manifest.skippedReviewers).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + subagentId: 'ExtraOne', + reason: 'budget_limited', + }), + expect.objectContaining({ + subagentId: 'ExtraTwo', + reason: 'budget_limited', + }), + ]), + ); + expect(manifest.tokenBudget).toMatchObject({ + mode: 'economy', + maxExtraReviewers: 0, + skippedReviewerIds: ['ExtraOne', 'ExtraTwo'], + }); + }); + it('applies per-member strategy overrides in the launch manifest and prompt', () => { const team = resolveDefaultReviewTeam( [ @@ -309,7 +1619,7 @@ describe('reviewTeamService', () => { ); const manifest = buildEffectiveReviewTeamManifest(team, { - workspacePath: 'D:/workspace/project-a', + workspacePath: WORKSPACE_PATH, }); expect(manifest.strategyLevel).toBe('quick'); @@ -369,6 +1679,55 @@ describe('reviewTeamService', () => { expect(promptBlock).toContain('Token/time impact: approximately 1.8-2.5x token usage and 1.5-2.5x runtime.'); }); + it('applies a project strategy override to the launch manifest without changing member overrides', () => { + const team = resolveDefaultReviewTeam( + [ + ...coreSubagents(), + subagent('ExtraEnabled', true, 'user', 'fast', true, true), + ], + storedConfigWithExtra(['ExtraEnabled'], { + strategy_level: 'normal', + member_strategy_overrides: { + ReviewSecurity: 'quick', + }, + }), + ); + + const manifest = buildEffectiveReviewTeamManifest(team, { + workspacePath: WORKSPACE_PATH, + strategyOverride: 'deep', + }); + + expect(manifest.strategyLevel).toBe('deep'); + expect(manifest.coreReviewers).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + subagentId: 'ReviewBusinessLogic', + strategyLevel: 'deep', + strategySource: 'team', + defaultModelSlot: 'primary', + }), + expect.objectContaining({ + subagentId: 'ReviewSecurity', + strategyLevel: 'quick', + strategySource: 'member', + defaultModelSlot: 'fast', + }), + ]), + ); + expect(manifest.enabledExtraReviewers[0]).toMatchObject({ + subagentId: 'ExtraEnabled', + strategyLevel: 'deep', + strategySource: 'team', + defaultModelSlot: 'primary', + }); + + const promptBlock = buildReviewTeamPromptBlock(team, manifest); + expect(promptBlock).toContain('- team_strategy: deep'); + expect(promptBlock).toContain('subagent_type: ReviewSecurity'); + expect(promptBlock).toContain('strategy: quick'); + }); + it('falls back removed concrete reviewer models to the strategy default model slot', () => { const team = resolveDefaultReviewTeam( [ @@ -416,18 +1775,21 @@ describe('reviewTeamService', () => { const promptBlock = buildReviewTeamPromptBlock( team, buildEffectiveReviewTeamManifest(team, { - workspacePath: 'D:/workspace/project-a', + workspacePath: WORKSPACE_PATH, }), ); expect(promptBlock).toContain('Run manifest:'); + expect(promptBlock).toContain('target_resolution: unknown'); expect(promptBlock).toContain('- team_strategy: normal'); - expect(promptBlock).toContain('- workspace_path: D:/workspace/project-a'); + expect(promptBlock).toContain(`- workspace_path: ${WORKSPACE_PATH}`); expect(promptBlock).toContain('quality_gate_reviewer: ReviewJudge'); expect(promptBlock).toContain('enabled_extra_reviewers: ExtraEnabled'); expect(promptBlock).toContain('skipped_reviewers:'); expect(promptBlock).toContain('- ExtraDisabled: disabled'); expect(promptBlock).not.toContain('subagent_type: ExtraDisabled'); + expect(promptBlock).toContain('Run only reviewers listed in core_reviewers and enabled_extra_reviewers.'); + expect(promptBlock).not.toContain('run it in parallel with the locked reviewers whenever the change contains frontend files'); }); it('tells DeepReview to wait for user approval before running ReviewFixer', () => { diff --git a/src/web-ui/src/shared/services/reviewTeamService.ts b/src/web-ui/src/shared/services/reviewTeamService.ts index 1c588808f..48f793615 100644 --- a/src/web-ui/src/shared/services/reviewTeamService.ts +++ b/src/web-ui/src/shared/services/reviewTeamService.ts @@ -1,12 +1,25 @@ import { configAPI } from '@/infrastructure/api/service-api/ConfigAPI'; +import { agentAPI } from '@/infrastructure/api/service-api/AgentAPI'; import { SubagentAPI, type SubagentInfo, type SubagentSource, } from '@/infrastructure/api/service-api/SubagentAPI'; +import { + classifyReviewTargetFromFiles, + createUnknownReviewTargetClassification, + shouldRunReviewerForTarget, + type ReviewDomainTag, + type ReviewTargetClassification, +} from './reviewTargetClassifier'; +import { evaluateReviewSubagentToolReadiness } from './reviewSubagentCapabilities'; export const DEFAULT_REVIEW_TEAM_ID = 'default-review-team'; export const DEFAULT_REVIEW_TEAM_CONFIG_PATH = 'ai.review_teams.default'; +export const DEFAULT_REVIEW_TEAM_RATE_LIMIT_STATUS_CONFIG_PATH = + 'ai.review_teams.rate_limit_status'; +export const DEFAULT_REVIEW_TEAM_PROJECT_STRATEGY_OVERRIDES_CONFIG_PATH = + 'ai.review_teams.project_strategy_overrides'; export const DEFAULT_REVIEW_TEAM_MODEL = 'fast'; export const DEFAULT_REVIEW_TEAM_STRATEGY_LEVEL = 'normal' as const; export const DEFAULT_REVIEW_MEMBER_STRATEGY_LEVEL = 'inherit' as const; @@ -15,7 +28,37 @@ export const DEFAULT_REVIEW_TEAM_EXECUTION_POLICY = { judgeTimeoutSeconds: 600, reviewerFileSplitThreshold: 20, maxSameRoleInstances: 3, + maxRetriesPerRole: 1, +} as const; +export const DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY = { + maxParallelInstances: 4, + staggerSeconds: 0, + maxQueueWaitSeconds: 60, + batchExtrasSeparately: true, + allowProviderCapacityQueue: true, + allowBoundedAutoRetry: false, + autoRetryElapsedGuardSeconds: 180, } as const; +const MAX_PREDICTIVE_TIMEOUT_SECONDS = 3600; +const MAX_PARALLEL_REVIEWER_INSTANCES = 16; +const MAX_QUEUE_WAIT_SECONDS = 600; +const MAX_AUTO_RETRY_ELAPSED_GUARD_SECONDS = 900; +const PREDICTIVE_TIMEOUT_PER_FILE_SECONDS = 15; +const PREDICTIVE_TIMEOUT_PER_100_LINES_SECONDS = 30; +const PREDICTIVE_TIMEOUT_BASE_SECONDS: Record = { + quick: 180, + normal: 300, + deep: 600, +}; +const TOKEN_BUDGET_PROMPT_BYTE_LIMIT_BY_MODE: Record = { + economy: 64_000, + balanced: 96_000, + thorough: 192_000, +}; +const PROMPT_BYTE_ESTIMATE_BASE_BYTES = 12_000; +const PROMPT_BYTE_ESTIMATE_PER_FILE_BYTES = 1_800; +const PROMPT_BYTE_ESTIMATE_PER_CHANGED_LINE_BYTES = 120; +const PROMPT_BYTE_ESTIMATE_UNKNOWN_LINES_PER_FILE = 80; export type ReviewStrategyLevel = 'quick' | 'normal' | 'deep'; export type ReviewMemberStrategyLevel = ReviewStrategyLevel | 'inherit'; @@ -26,13 +69,7 @@ export interface ReviewStrategyCommonRules { reviewerPromptRules: string[]; } -export type ReviewRoleDirectiveKey = - | 'ReviewBusinessLogic' - | 'ReviewPerformance' - | 'ReviewSecurity' - | 'ReviewArchitecture' - | 'ReviewFrontend' - | 'ReviewJudge'; +export type ReviewRoleDirectiveKey = string; export interface ReviewStrategyProfile { level: ReviewStrategyLevel; @@ -44,7 +81,7 @@ export interface ReviewStrategyProfile { promptDirective: string; /** Per-role strategy directives. When a role key is present, its directive * overrides `promptDirective` for that reviewer or the judge. */ - roleDirectives: Partial>; + roleDirectives: Record; } export const REVIEW_STRATEGY_LEVELS: ReviewStrategyLevel[] = [ @@ -151,13 +188,7 @@ export function getReviewStrategyProfile( return REVIEW_STRATEGY_PROFILES[strategyLevel]; } -export type ReviewTeamCoreRoleKey = - | 'businessLogic' - | 'performance' - | 'security' - | 'architecture' - | 'frontend' - | 'judge'; +export type ReviewTeamCoreRoleKey = string; export interface ReviewTeamCoreRoleDefinition { key: ReviewTeamCoreRoleKey; @@ -171,6 +202,20 @@ export interface ReviewTeamCoreRoleDefinition { conditional?: boolean; } +export interface ReviewTeamDefinition { + id: string; + name: string; + description: string; + warning: string; + defaultModel: string; + defaultStrategyLevel: ReviewStrategyLevel; + defaultExecutionPolicy: ReviewTeamExecutionPolicy; + coreRoles: ReviewTeamCoreRoleDefinition[]; + strategyProfiles: Record; + disallowedExtraSubagentIds: string[]; + hiddenAgentIds: string[]; +} + export interface ReviewTeamStoredConfig { extra_subagent_ids: string[]; strategy_level: ReviewStrategyLevel; @@ -179,6 +224,12 @@ export interface ReviewTeamStoredConfig { judge_timeout_seconds: number; reviewer_file_split_threshold: number; max_same_role_instances: number; + max_retries_per_role: number; + max_parallel_reviewers: number; + max_queue_wait_seconds: number; + allow_provider_capacity_queue: boolean; + allow_bounded_auto_retry: boolean; + auto_retry_elapsed_guard_seconds: number; } export interface ReviewTeamExecutionPolicy { @@ -186,6 +237,199 @@ export interface ReviewTeamExecutionPolicy { judgeTimeoutSeconds: number; reviewerFileSplitThreshold: number; maxSameRoleInstances: number; + maxRetriesPerRole: number; +} + +export interface ReviewTeamConcurrencyPolicy { + maxParallelInstances: number; + staggerSeconds: number; + maxQueueWaitSeconds: number; + batchExtrasSeparately: boolean; + allowProviderCapacityQueue: boolean; + allowBoundedAutoRetry: boolean; + autoRetryElapsedGuardSeconds: number; +} + +export interface ReviewTeamRateLimitStatus { + remaining: number; +} + +export type ReviewTeamManifestMemberReason = + | 'disabled' + | 'unavailable' + | 'not_applicable' + | 'budget_limited' + | 'invalid_tooling'; + +export type ReviewTokenBudgetMode = 'economy' | 'balanced' | 'thorough'; +export type ReviewPromptByteEstimateSource = 'manifest_heuristic'; +export type ReviewTeamTokenBudgetDecisionKind = + | 'summary_first_full_scope' + | 'skip_extra_reviewers'; +export type ReviewTeamTokenBudgetDecisionReason = + | 'prompt_bytes_exceeded' + | 'extra_reviewers_skipped'; + +export interface ReviewTeamTokenBudgetDecision { + kind: ReviewTeamTokenBudgetDecisionKind; + reason: ReviewTeamTokenBudgetDecisionReason; + detail: string; + affectedReviewerIds?: string[]; +} + +export interface ReviewTeamTokenBudgetPlan { + mode: ReviewTokenBudgetMode; + estimatedReviewerCalls: number; + maxReviewerCalls: number; + maxExtraReviewers: number; + maxFilesPerReviewer?: number; + maxPromptBytesPerReviewer?: number; + estimatedPromptBytesPerReviewer?: number; + promptByteEstimateSource?: ReviewPromptByteEstimateSource; + promptByteLimitExceeded?: boolean; + largeDiffSummaryFirst: boolean; + decisions?: ReviewTeamTokenBudgetDecision[]; + skippedReviewerIds: string[]; + warnings: string[]; +} + +export interface ReviewTeamChangeStats { + fileCount: number; + totalLinesChanged?: number; + lineCountSource: 'unknown' | 'diff_stat' | 'estimated'; +} + +export interface ReviewTeamRiskFactors { + fileCount: number; + totalLinesChanged?: number; + lineCountSource: ReviewTeamChangeStats['lineCountSource']; + securityFileCount: number; + workspaceAreaCount: number; + contractSurfaceChanged: boolean; +} + +export interface ReviewTeamStrategyRecommendation { + strategyLevel: ReviewStrategyLevel; + score: number; + rationale: string; + factors: ReviewTeamRiskFactors; +} + +export type ReviewTeamStrategyAuthority = 'mismatch_warning'; +export type ReviewTeamStrategyMismatchSeverity = 'none' | 'low' | 'medium' | 'high'; + +export interface ReviewTeamBackendRiskFactors { + fileCount: number; + totalLinesChanged: number; + lineCountSource: ReviewTeamChangeStats['lineCountSource']; + filesInSecurityPaths: number; + crossCrateChanges: number; + maxCyclomaticComplexityDelta: number; + maxCyclomaticComplexityDeltaSource: 'not_measured'; +} + +export interface ReviewTeamBackendStrategyRecommendation { + strategyLevel: ReviewStrategyLevel; + score: number; + rationale: string; + factors: ReviewTeamBackendRiskFactors; +} + +export interface ReviewTeamStrategyDecision { + authority: ReviewTeamStrategyAuthority; + teamDefaultStrategy: ReviewStrategyLevel; + userOverride?: ReviewStrategyLevel; + finalStrategy: ReviewStrategyLevel; + frontendRecommendation: ReviewTeamStrategyRecommendation; + backendRecommendation: ReviewTeamBackendStrategyRecommendation; + mismatch: boolean; + mismatchSeverity: ReviewTeamStrategyMismatchSeverity; + rationale: string; +} + +export interface ReviewTeamPreReviewSummaryArea { + key: string; + fileCount: number; + sampleFiles: string[]; +} + +export interface ReviewTeamPreReviewSummary { + source: 'target_manifest'; + summary: string; + fileCount: number; + excludedFileCount: number; + lineCount?: number; + lineCountSource: ReviewTeamChangeStats['lineCountSource']; + targetTags: ReviewDomainTag[]; + workspaceAreas: ReviewTeamPreReviewSummaryArea[]; + warnings: ReviewTargetClassification['warnings'][number]['code'][]; +} + +export type ReviewTeamSharedContextTool = 'GetFileDiff' | 'Read'; + +export interface ReviewTeamSharedContextCacheEntry { + cacheKey: string; + path: string; + workspaceArea: string; + recommendedTools: ReviewTeamSharedContextTool[]; + consumerPacketIds: string[]; +} + +export interface ReviewTeamSharedContextCachePlan { + source: 'work_packets'; + strategy: 'reuse_readonly_file_context_by_cache_key'; + entries: ReviewTeamSharedContextCacheEntry[]; + omittedEntryCount: number; +} + +export type ReviewTeamIncrementalReviewCacheInvalidation = + | 'target_file_set_changed' + | 'target_line_count_changed' + | 'target_tag_changed' + | 'target_warning_changed' + | 'reviewer_roster_changed' + | 'strategy_changed'; + +export interface ReviewTeamIncrementalReviewCachePlan { + source: 'target_manifest'; + strategy: 'reuse_completed_packets_when_fingerprint_matches'; + cacheKey: string; + fingerprint: string; + filePaths: string[]; + workspaceAreas: string[]; + targetTags: ReviewDomainTag[]; + reviewerPacketIds: string[]; + lineCount?: number; + lineCountSource: ReviewTeamChangeStats['lineCountSource']; + invalidatesOn: ReviewTeamIncrementalReviewCacheInvalidation[]; +} + +export interface ReviewTeamWorkPacketScope { + kind: 'review_target'; + targetSource: ReviewTargetClassification['source']; + targetResolution: ReviewTargetClassification['resolution']; + targetTags: ReviewDomainTag[]; + fileCount: number; + files: string[]; + excludedFileCount: number; + groupIndex?: number; + groupCount?: number; +} + +export interface ReviewTeamWorkPacket { + packetId: string; + phase: 'reviewer' | 'judge'; + launchBatch: number; + subagentId: string; + displayName: string; + roleName: string; + assignedScope: ReviewTeamWorkPacketScope; + allowedTools: string[]; + timeoutSeconds: number; + requiredOutputFields: string[]; + strategyLevel: ReviewStrategyLevel; + strategyDirective: string; + model: string; } export interface ReviewTeamMember { @@ -209,6 +453,10 @@ export interface ReviewTeamMember { source: 'core' | 'extra'; subagentSource: SubagentSource; accentColor: string; + allowedTools: string[]; + defaultModelSlot?: ReviewStrategyProfile['defaultModelSlot']; + strategyDirective?: string; + skipReason?: ReviewTeamManifestMemberReason; } export interface ReviewTeam { @@ -219,6 +467,8 @@ export interface ReviewTeam { strategyLevel: ReviewStrategyLevel; memberStrategyOverrides: Record; executionPolicy: ReviewTeamExecutionPolicy; + concurrencyPolicy: ReviewTeamConcurrencyPolicy; + definition: ReviewTeamDefinition; members: ReviewTeamMember[]; coreMembers: ReviewTeamMember[]; extraMembers: ReviewTeamMember[]; @@ -238,19 +488,39 @@ export interface ReviewTeamManifestMember { locked: boolean; source: ReviewTeamMember['source']; subagentSource: ReviewTeamMember['subagentSource']; - reason?: 'disabled' | 'unavailable' | 'non_applicable'; + reason?: ReviewTeamManifestMemberReason; } export interface ReviewTeamRunManifest { reviewMode: 'deep'; workspacePath?: string; policySource: 'default-review-team-config'; + target: ReviewTargetClassification; strategyLevel: ReviewStrategyLevel; + strategyRecommendation?: ReviewTeamStrategyRecommendation; + strategyDecision: ReviewTeamStrategyDecision; executionPolicy: ReviewTeamExecutionPolicy; + concurrencyPolicy: ReviewTeamConcurrencyPolicy; + changeStats?: ReviewTeamChangeStats; + preReviewSummary: ReviewTeamPreReviewSummary; + sharedContextCache: ReviewTeamSharedContextCachePlan; + incrementalReviewCache: ReviewTeamIncrementalReviewCachePlan; + tokenBudget: ReviewTeamTokenBudgetPlan; coreReviewers: ReviewTeamManifestMember[]; qualityGateReviewer?: ReviewTeamManifestMember; enabledExtraReviewers: ReviewTeamManifestMember[]; skippedReviewers: ReviewTeamManifestMember[]; + workPackets?: ReviewTeamWorkPacket[]; +} + +export function getActiveReviewTeamManifestMembers( + manifest: ReviewTeamRunManifest, +): ReviewTeamManifestMember[] { + return [ + ...manifest.coreReviewers, + ...manifest.enabledExtraReviewers, + ...(manifest.qualityGateReviewer ? [manifest.qualityGateReviewer] : []), + ]; } const EXTRA_MEMBER_DEFAULTS = { @@ -265,6 +535,32 @@ const EXTRA_MEMBER_DEFAULTS = { accentColor: '#64748b', }; +const REVIEW_WORK_PACKET_ALLOWED_TOOLS = [ + 'GetFileDiff', + 'Read', + 'Grep', + 'Glob', + 'LS', + 'Git', +] as const; + +const REVIEWER_WORK_PACKET_REQUIRED_OUTPUT_FIELDS = [ + 'packet_id', + 'status', + 'verdict', + 'findings', + 'reviewer_summary', +] as const; + +const JUDGE_WORK_PACKET_REQUIRED_OUTPUT_FIELDS = [ + 'packet_id', + 'status', + 'decision_summary', + 'validated_findings', + 'rejected_or_downgraded_notes', + 'coverage_notes', +] as const; + export const DEFAULT_REVIEW_TEAM_CORE_ROLES: ReviewTeamCoreRoleDefinition[] = [ { key: 'businessLogic', @@ -363,6 +659,166 @@ const DISALLOWED_REVIEW_TEAM_MEMBER_IDS = new Set([ 'ReviewFixer', ]); +export const FALLBACK_REVIEW_TEAM_DEFINITION: ReviewTeamDefinition = { + id: DEFAULT_REVIEW_TEAM_ID, + name: 'Code Review Team', + description: + 'A multi-reviewer team for deep code review with mandatory logic, performance, security, architecture, conditional frontend, and quality-gate roles.', + warning: + 'Deep review may take longer and usually consumes more tokens than a standard review.', + defaultModel: DEFAULT_REVIEW_TEAM_MODEL, + defaultStrategyLevel: DEFAULT_REVIEW_TEAM_STRATEGY_LEVEL, + defaultExecutionPolicy: { + ...DEFAULT_REVIEW_TEAM_EXECUTION_POLICY, + }, + coreRoles: DEFAULT_REVIEW_TEAM_CORE_ROLES, + strategyProfiles: REVIEW_STRATEGY_PROFILES, + disallowedExtraSubagentIds: [...DISALLOWED_REVIEW_TEAM_MEMBER_IDS], + hiddenAgentIds: [ + 'DeepReview', + ...DEFAULT_REVIEW_TEAM_CORE_ROLES.map((role) => role.subagentId), + ], +}; + +function isReviewTeamCoreRoleDefinition(value: unknown): value is ReviewTeamCoreRoleDefinition { + if (!value || typeof value !== 'object') return false; + const role = value as Partial; + return ( + typeof role.key === 'string' && + typeof role.subagentId === 'string' && + typeof role.funName === 'string' && + typeof role.roleName === 'string' && + typeof role.description === 'string' && + Array.isArray(role.responsibilities) && + role.responsibilities.every((item) => typeof item === 'string') && + typeof role.accentColor === 'string' + ); +} + +function isReviewStrategyProfile(value: unknown): value is ReviewStrategyProfile { + if (!value || typeof value !== 'object') return false; + const profile = value as Partial; + return ( + isReviewStrategyLevel(profile.level) && + typeof profile.label === 'string' && + typeof profile.summary === 'string' && + typeof profile.tokenImpact === 'string' && + typeof profile.runtimeImpact === 'string' && + (profile.defaultModelSlot === 'fast' || profile.defaultModelSlot === 'primary') && + typeof profile.promptDirective === 'string' && + Boolean(profile.roleDirectives) && + typeof profile.roleDirectives === 'object' + ); +} + +function nonEmptyStringOrFallback(value: unknown, fallback: string): string { + if (typeof value !== 'string') { + return fallback; + } + + return value.trim() || fallback; +} + +function normalizeReviewTeamDefinition(raw: unknown): ReviewTeamDefinition { + if (!raw || typeof raw !== 'object') { + return FALLBACK_REVIEW_TEAM_DEFINITION; + } + + const source = raw as Partial; + const coreRoles = Array.isArray(source.coreRoles) + ? source.coreRoles.filter(isReviewTeamCoreRoleDefinition) + : []; + const strategyProfiles = REVIEW_STRATEGY_LEVELS.reduce< + Partial> + >((profiles, level) => { + const profile = source.strategyProfiles?.[level]; + profiles[level] = isReviewStrategyProfile(profile) + ? profile + : FALLBACK_REVIEW_TEAM_DEFINITION.strategyProfiles[level]; + return profiles; + }, {}) as Record; + const disallowedExtraSubagentIds = Array.isArray(source.disallowedExtraSubagentIds) + ? dedupeIds(source.disallowedExtraSubagentIds.filter((id): id is string => typeof id === 'string')) + : []; + const hiddenAgentIds = Array.isArray(source.hiddenAgentIds) + ? dedupeIds(source.hiddenAgentIds.filter((id): id is string => typeof id === 'string')) + : []; + + return { + id: nonEmptyStringOrFallback(source.id, FALLBACK_REVIEW_TEAM_DEFINITION.id), + name: nonEmptyStringOrFallback(source.name, FALLBACK_REVIEW_TEAM_DEFINITION.name), + description: nonEmptyStringOrFallback( + source.description, + FALLBACK_REVIEW_TEAM_DEFINITION.description, + ), + warning: nonEmptyStringOrFallback( + source.warning, + FALLBACK_REVIEW_TEAM_DEFINITION.warning, + ), + defaultModel: nonEmptyStringOrFallback( + source.defaultModel, + FALLBACK_REVIEW_TEAM_DEFINITION.defaultModel, + ), + defaultStrategyLevel: isReviewStrategyLevel(source.defaultStrategyLevel) + ? source.defaultStrategyLevel + : FALLBACK_REVIEW_TEAM_DEFINITION.defaultStrategyLevel, + defaultExecutionPolicy: source.defaultExecutionPolicy + ? { + reviewerTimeoutSeconds: clampInteger( + source.defaultExecutionPolicy.reviewerTimeoutSeconds, + 0, + 3600, + FALLBACK_REVIEW_TEAM_DEFINITION.defaultExecutionPolicy.reviewerTimeoutSeconds, + ), + judgeTimeoutSeconds: clampInteger( + source.defaultExecutionPolicy.judgeTimeoutSeconds, + 0, + 3600, + FALLBACK_REVIEW_TEAM_DEFINITION.defaultExecutionPolicy.judgeTimeoutSeconds, + ), + reviewerFileSplitThreshold: clampInteger( + source.defaultExecutionPolicy.reviewerFileSplitThreshold, + 0, + 9999, + FALLBACK_REVIEW_TEAM_DEFINITION.defaultExecutionPolicy.reviewerFileSplitThreshold, + ), + maxSameRoleInstances: clampInteger( + source.defaultExecutionPolicy.maxSameRoleInstances, + 1, + 8, + FALLBACK_REVIEW_TEAM_DEFINITION.defaultExecutionPolicy.maxSameRoleInstances, + ), + maxRetriesPerRole: clampInteger( + source.defaultExecutionPolicy.maxRetriesPerRole, + 0, + 3, + FALLBACK_REVIEW_TEAM_DEFINITION.defaultExecutionPolicy.maxRetriesPerRole, + ), + } + : FALLBACK_REVIEW_TEAM_DEFINITION.defaultExecutionPolicy, + coreRoles: coreRoles.length > 0 ? coreRoles : FALLBACK_REVIEW_TEAM_DEFINITION.coreRoles, + strategyProfiles, + disallowedExtraSubagentIds: + disallowedExtraSubagentIds.length > 0 + ? disallowedExtraSubagentIds + : FALLBACK_REVIEW_TEAM_DEFINITION.disallowedExtraSubagentIds, + hiddenAgentIds: + hiddenAgentIds.length > 0 + ? hiddenAgentIds + : FALLBACK_REVIEW_TEAM_DEFINITION.hiddenAgentIds, + }; +} + +export async function loadDefaultReviewTeamDefinition(): Promise { + try { + return normalizeReviewTeamDefinition( + await agentAPI.getDefaultReviewTeamDefinition(), + ); + } catch { + return FALLBACK_REVIEW_TEAM_DEFINITION; + } +} + function dedupeIds(ids: string[]): string[] { return Array.from( new Set( @@ -411,6 +867,42 @@ function normalizeMemberStrategyOverrides( }, {}); } +function normalizeProjectStrategyOverrideKey(workspacePath?: string): string | undefined { + const normalized = workspacePath?.trim().replace(/\\/g, '/'); + if (!normalized) { + return undefined; + } + if (normalized === '/' || /^[a-zA-Z]:\/$/.test(normalized)) { + return normalized.toLowerCase(); + } + return normalized.replace(/\/+$/, '').toLowerCase(); +} + +function normalizeProjectStrategyOverrideStore( + raw: unknown, +): Record { + if (!raw || typeof raw !== 'object' || Array.isArray(raw)) { + return {}; + } + + return Object.entries(raw as Record).reduce< + Record + >((result, [workspacePath, value]) => { + const key = normalizeProjectStrategyOverrideKey(workspacePath); + if (!key) { + return result; + } + if (isReviewStrategyLevel(value)) { + result[key] = value; + } else { + console.warn( + `[ReviewTeamService] Ignoring invalid project strategy override for '${key}': expected one of ${REVIEW_STRATEGY_LEVELS.join(', ')}, got '${value}'`, + ); + } + return result; + }, {}); +} + function clampInteger( value: unknown, min: number, @@ -425,6 +917,136 @@ function clampInteger( return Math.min(max, Math.max(min, Math.floor(numeric))); } +function normalizeConcurrencyPolicy( + raw?: Partial, +): ReviewTeamConcurrencyPolicy { + return { + maxParallelInstances: clampInteger( + raw?.maxParallelInstances, + 1, + MAX_PARALLEL_REVIEWER_INSTANCES, + DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.maxParallelInstances, + ), + staggerSeconds: clampInteger( + raw?.staggerSeconds, + 0, + 60, + DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.staggerSeconds, + ), + maxQueueWaitSeconds: clampInteger( + raw?.maxQueueWaitSeconds, + 0, + MAX_QUEUE_WAIT_SECONDS, + DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.maxQueueWaitSeconds, + ), + batchExtrasSeparately: + typeof raw?.batchExtrasSeparately === 'boolean' + ? raw.batchExtrasSeparately + : DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.batchExtrasSeparately, + allowProviderCapacityQueue: + typeof raw?.allowProviderCapacityQueue === 'boolean' + ? raw.allowProviderCapacityQueue + : DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.allowProviderCapacityQueue, + allowBoundedAutoRetry: + typeof raw?.allowBoundedAutoRetry === 'boolean' + ? raw.allowBoundedAutoRetry + : DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.allowBoundedAutoRetry, + autoRetryElapsedGuardSeconds: clampInteger( + raw?.autoRetryElapsedGuardSeconds, + 30, + MAX_AUTO_RETRY_ELAPSED_GUARD_SECONDS, + DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.autoRetryElapsedGuardSeconds, + ), + }; +} + +function normalizeStoredConcurrencyPolicy( + raw: unknown, +): Pick< + ReviewTeamStoredConfig, + | 'max_parallel_reviewers' + | 'max_queue_wait_seconds' + | 'allow_provider_capacity_queue' + | 'allow_bounded_auto_retry' + | 'auto_retry_elapsed_guard_seconds' +> { + const config = raw as Partial | undefined; + + return { + max_parallel_reviewers: clampInteger( + config?.max_parallel_reviewers, + 1, + MAX_PARALLEL_REVIEWER_INSTANCES, + DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.maxParallelInstances, + ), + max_queue_wait_seconds: clampInteger( + config?.max_queue_wait_seconds, + 0, + MAX_QUEUE_WAIT_SECONDS, + DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.maxQueueWaitSeconds, + ), + allow_provider_capacity_queue: + typeof config?.allow_provider_capacity_queue === 'boolean' + ? config.allow_provider_capacity_queue + : DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.allowProviderCapacityQueue, + allow_bounded_auto_retry: + typeof config?.allow_bounded_auto_retry === 'boolean' + ? config.allow_bounded_auto_retry + : DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.allowBoundedAutoRetry, + auto_retry_elapsed_guard_seconds: clampInteger( + config?.auto_retry_elapsed_guard_seconds, + 30, + MAX_AUTO_RETRY_ELAPSED_GUARD_SECONDS, + DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.autoRetryElapsedGuardSeconds, + ), + }; +} + +function applyRateLimitToConcurrencyPolicy( + policy: ReviewTeamConcurrencyPolicy, + rateLimitStatus?: ReviewTeamRateLimitStatus | null, +): ReviewTeamConcurrencyPolicy { + const remaining = Math.floor(Number(rateLimitStatus?.remaining)); + if (!Number.isFinite(remaining)) { + return policy; + } + + if (remaining > policy.maxParallelInstances * 2) { + return policy; + } + + if (remaining > policy.maxParallelInstances) { + return { + ...policy, + staggerSeconds: Math.max(policy.staggerSeconds, 5), + }; + } + + return { + ...policy, + maxParallelInstances: Math.max( + 1, + Math.min(policy.maxParallelInstances, Math.max(2, remaining)), + ), + staggerSeconds: Math.max(policy.staggerSeconds, 10), + }; +} + +function normalizeRateLimitStatus(raw: unknown): ReviewTeamRateLimitStatus | null { + if (!raw || typeof raw !== 'object' || Array.isArray(raw)) { + return null; + } + + const remaining = Math.floor(Number((raw as { remaining?: unknown }).remaining)); + if (!Number.isFinite(remaining)) { + return null; + } + + return { + remaining: Math.max(0, remaining), + }; +} + function normalizeExecutionPolicy( raw: unknown, ): Pick< @@ -433,6 +1055,7 @@ function normalizeExecutionPolicy( | 'judge_timeout_seconds' | 'reviewer_file_split_threshold' | 'max_same_role_instances' + | 'max_retries_per_role' > { const config = raw as Partial | undefined; @@ -461,6 +1084,12 @@ function normalizeExecutionPolicy( 8, DEFAULT_REVIEW_TEAM_EXECUTION_POLICY.maxSameRoleInstances, ), + max_retries_per_role: clampInteger( + config?.max_retries_per_role, + 0, + 3, + DEFAULT_REVIEW_TEAM_EXECUTION_POLICY.maxRetriesPerRole, + ), }; } @@ -472,15 +1101,31 @@ function executionPolicyFromStoredConfig( judgeTimeoutSeconds: config.judge_timeout_seconds, reviewerFileSplitThreshold: config.reviewer_file_split_threshold, maxSameRoleInstances: config.max_same_role_instances, + maxRetriesPerRole: config.max_retries_per_role, }; } +function concurrencyPolicyFromStoredConfig( + config: ReviewTeamStoredConfig, +): ReviewTeamConcurrencyPolicy { + return normalizeConcurrencyPolicy({ + maxParallelInstances: config.max_parallel_reviewers, + staggerSeconds: DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.staggerSeconds, + maxQueueWaitSeconds: config.max_queue_wait_seconds, + batchExtrasSeparately: DEFAULT_REVIEW_TEAM_CONCURRENCY_POLICY.batchExtrasSeparately, + allowProviderCapacityQueue: config.allow_provider_capacity_queue, + allowBoundedAutoRetry: config.allow_bounded_auto_retry, + autoRetryElapsedGuardSeconds: config.auto_retry_elapsed_guard_seconds, + }); +} + function normalizeStoredConfig(raw: unknown): ReviewTeamStoredConfig { const extraIds = Array.isArray((raw as { extra_subagent_ids?: unknown })?.extra_subagent_ids) ? (raw as { extra_subagent_ids: unknown[] }).extra_subagent_ids .filter((value): value is string => typeof value === 'string') : []; const executionPolicy = normalizeExecutionPolicy(raw); + const concurrencyPolicy = normalizeStoredConcurrencyPolicy(raw); const config = raw as Partial | undefined; return { @@ -490,6 +1135,7 @@ function normalizeStoredConfig(raw: unknown): ReviewTeamStoredConfig { config?.member_strategy_overrides, ), ...executionPolicy, + ...concurrencyPolicy, }; } @@ -530,9 +1176,75 @@ export async function saveDefaultReviewTeamConfig( judge_timeout_seconds: normalizedConfig.judge_timeout_seconds, reviewer_file_split_threshold: normalizedConfig.reviewer_file_split_threshold, max_same_role_instances: normalizedConfig.max_same_role_instances, + max_retries_per_role: normalizedConfig.max_retries_per_role, + max_parallel_reviewers: normalizedConfig.max_parallel_reviewers, + max_queue_wait_seconds: normalizedConfig.max_queue_wait_seconds, + allow_provider_capacity_queue: normalizedConfig.allow_provider_capacity_queue, + allow_bounded_auto_retry: normalizedConfig.allow_bounded_auto_retry, + auto_retry_elapsed_guard_seconds: normalizedConfig.auto_retry_elapsed_guard_seconds, }); } +export async function loadReviewTeamRateLimitStatus(): Promise { + try { + const raw = await configAPI.getConfig( + DEFAULT_REVIEW_TEAM_RATE_LIMIT_STATUS_CONFIG_PATH, + { skipRetryOnNotFound: true }, + ); + return normalizeRateLimitStatus(raw); + } catch (error) { + console.warn('[ReviewTeamService] Failed to load review team rate limit status', error); + return null; + } +} + +export async function loadReviewTeamProjectStrategyOverride( + workspacePath?: string, +): Promise { + const key = normalizeProjectStrategyOverrideKey(workspacePath); + if (!key) { + return undefined; + } + + try { + const raw = await configAPI.getConfig( + DEFAULT_REVIEW_TEAM_PROJECT_STRATEGY_OVERRIDES_CONFIG_PATH, + { skipRetryOnNotFound: true }, + ); + return normalizeProjectStrategyOverrideStore(raw)[key]; + } catch (error) { + console.warn('[ReviewTeamService] Failed to load project review strategy override', error); + return undefined; + } +} + +export async function saveReviewTeamProjectStrategyOverride( + workspacePath: string | undefined, + strategyLevel?: ReviewStrategyLevel, +): Promise { + const key = normalizeProjectStrategyOverrideKey(workspacePath); + if (!key) { + return; + } + + const raw = await configAPI.getConfig( + DEFAULT_REVIEW_TEAM_PROJECT_STRATEGY_OVERRIDES_CONFIG_PATH, + { skipRetryOnNotFound: true }, + ).catch(() => undefined); + const nextOverrides = normalizeProjectStrategyOverrideStore(raw); + + if (strategyLevel) { + nextOverrides[key] = normalizeTeamStrategyLevel(strategyLevel); + } else { + delete nextOverrides[key]; + } + + await configAPI.setConfig( + DEFAULT_REVIEW_TEAM_PROJECT_STRATEGY_OVERRIDES_CONFIG_PATH, + nextOverrides, + ); +} + export async function addDefaultReviewTeamMember(subagentId: string): Promise { const current = await loadDefaultReviewTeamConfig(); await saveDefaultReviewTeamConfig({ @@ -559,7 +1271,41 @@ export async function saveDefaultReviewTeamExecutionPolicy( judge_timeout_seconds: policy.judgeTimeoutSeconds, reviewer_file_split_threshold: policy.reviewerFileSplitThreshold, max_same_role_instances: policy.maxSameRoleInstances, + max_retries_per_role: policy.maxRetriesPerRole, + }); +} + +export async function saveDefaultReviewTeamConcurrencyPolicy( + policy: ReviewTeamConcurrencyPolicy, +): Promise { + const current = await loadDefaultReviewTeamConfig(); + const normalizedPolicy = normalizeConcurrencyPolicy(policy); + await saveDefaultReviewTeamConfig({ + ...current, + max_parallel_reviewers: normalizedPolicy.maxParallelInstances, + max_queue_wait_seconds: normalizedPolicy.maxQueueWaitSeconds, + allow_provider_capacity_queue: normalizedPolicy.allowProviderCapacityQueue, + allow_bounded_auto_retry: normalizedPolicy.allowBoundedAutoRetry, + auto_retry_elapsed_guard_seconds: normalizedPolicy.autoRetryElapsedGuardSeconds, + }); +} + +export async function lowerDefaultReviewTeamMaxParallelReviewers(): Promise { + const current = await loadDefaultReviewTeamConfig(); + const currentPolicy = concurrencyPolicyFromStoredConfig(current); + const nextPolicy = { + ...currentPolicy, + maxParallelInstances: Math.max(1, currentPolicy.maxParallelInstances - 1), + }; + await saveDefaultReviewTeamConfig({ + ...current, + max_parallel_reviewers: nextPolicy.maxParallelInstances, + max_queue_wait_seconds: nextPolicy.maxQueueWaitSeconds, + allow_provider_capacity_queue: nextPolicy.allowProviderCapacityQueue, + allow_bounded_auto_retry: nextPolicy.allowBoundedAutoRetry, + auto_retry_elapsed_guard_seconds: nextPolicy.autoRetryElapsedGuardSeconds, }); + return nextPolicy; } export async function saveDefaultReviewTeamStrategyLevel( @@ -597,6 +1343,7 @@ export async function saveDefaultReviewTeamMemberStrategyOverride( export interface ResolveDefaultReviewTeamOptions { availableModelIds?: string[]; + definition?: ReviewTeamDefinition; } function extractAvailableModelIds(rawModels: unknown): string[] | undefined { @@ -646,13 +1393,14 @@ function resolveMemberModel( configuredModel: string | undefined, strategyLevel: ReviewStrategyLevel, availableModelIds?: Set, + strategyProfiles: Record = REVIEW_STRATEGY_PROFILES, ): { model: string; configuredModel: string; modelFallbackReason?: ReviewModelFallbackReason; } { const normalizedConfiguredModel = configuredModel?.trim() || ''; - const defaultModelSlot = getReviewStrategyProfile(strategyLevel).defaultModelSlot; + const defaultModelSlot = strategyProfiles[strategyLevel].defaultModelSlot; if ( !normalizedConfiguredModel || @@ -684,13 +1432,16 @@ function buildCoreMember( info: SubagentInfo | undefined, storedConfig: ReviewTeamStoredConfig, availableModelIds?: Set, + strategyProfiles: Record = REVIEW_STRATEGY_PROFILES, ): ReviewTeamMember { const strategy = resolveMemberStrategy(storedConfig, definition.subagentId); const model = resolveMemberModel( info?.model || DEFAULT_REVIEW_TEAM_MODEL, strategy.strategyLevel, availableModelIds, + strategyProfiles, ); + const strategyProfile = strategyProfiles[strategy.strategyLevel]; return { id: `core:${definition.subagentId}`, @@ -713,6 +1464,11 @@ function buildCoreMember( source: 'core', subagentSource: info?.subagentSource ?? 'builtin', accentColor: definition.accentColor, + allowedTools: [...REVIEW_WORK_PACKET_ALLOWED_TOOLS], + defaultModelSlot: strategyProfile.defaultModelSlot, + strategyDirective: + strategyProfile.roleDirectives[definition.subagentId] || + strategyProfile.promptDirective, }; } @@ -720,13 +1476,21 @@ function buildExtraMember( info: SubagentInfo, storedConfig: ReviewTeamStoredConfig, availableModelIds?: Set, + options: { + available?: boolean; + skipReason?: ReviewTeamManifestMemberReason; + strategyProfiles?: Record; + } = {}, ): ReviewTeamMember { const strategy = resolveMemberStrategy(storedConfig, info.id); + const strategyProfiles = options.strategyProfiles ?? REVIEW_STRATEGY_PROFILES; const model = resolveMemberModel( info.model || DEFAULT_REVIEW_TEAM_MODEL, strategy.strategyLevel, availableModelIds, + strategyProfiles, ); + const strategyProfile = strategyProfiles[strategy.strategyLevel]; return { id: `extra:${info.id}`, @@ -742,11 +1506,59 @@ function buildExtraMember( : {}), ...strategy, enabled: info.enabled, - available: true, + available: options.available ?? true, locked: false, source: 'extra', subagentSource: info.subagentSource ?? 'builtin', accentColor: EXTRA_MEMBER_DEFAULTS.accentColor, + allowedTools: + info.defaultTools && info.defaultTools.length > 0 + ? [...info.defaultTools] + : [...REVIEW_WORK_PACKET_ALLOWED_TOOLS], + defaultModelSlot: strategyProfile.defaultModelSlot, + strategyDirective: strategyProfile.promptDirective, + ...(options.skipReason ? { skipReason: options.skipReason } : {}), + }; +} + +function buildUnavailableExtraMember( + subagentId: string, + storedConfig: ReviewTeamStoredConfig, + availableModelIds?: Set, + strategyProfiles: Record = REVIEW_STRATEGY_PROFILES, +): ReviewTeamMember { + const strategy = resolveMemberStrategy(storedConfig, subagentId); + const model = resolveMemberModel( + DEFAULT_REVIEW_TEAM_MODEL, + strategy.strategyLevel, + availableModelIds, + strategyProfiles, + ); + const strategyProfile = strategyProfiles[strategy.strategyLevel]; + + return { + id: `extra:${subagentId}`, + subagentId, + displayName: subagentId, + roleName: EXTRA_MEMBER_DEFAULTS.roleName, + description: EXTRA_MEMBER_DEFAULTS.description, + responsibilities: EXTRA_MEMBER_DEFAULTS.responsibilities, + model: model.model, + configuredModel: model.configuredModel, + ...(model.modelFallbackReason + ? { modelFallbackReason: model.modelFallbackReason } + : {}), + ...strategy, + enabled: true, + available: false, + locked: false, + source: 'extra', + subagentSource: 'user', + accentColor: EXTRA_MEMBER_DEFAULTS.accentColor, + allowedTools: [], + defaultModelSlot: strategyProfile.defaultModelSlot, + strategyDirective: strategyProfile.promptDirective, + skipReason: 'unavailable', }; } @@ -791,10 +1603,23 @@ export function canAddSubagentToReviewTeam(subagentId: string): boolean { return !DISALLOWED_REVIEW_TEAM_MEMBER_IDS.has(subagentId); } -export function canUseSubagentAsReviewTeamMember( +function hasReviewTeamExtraMemberShape( subagent: Pick, ): boolean { - return subagent.isReview && subagent.isReadonly && canAddSubagentToReviewTeam(subagent.id); + return ( + subagent.isReview && + subagent.isReadonly && + canAddSubagentToReviewTeam(subagent.id) + ); +} + +export function canUseSubagentAsReviewTeamMember( + subagent: Pick, +): boolean { + return ( + hasReviewTeamExtraMemberShape(subagent) && + evaluateReviewSubagentToolReadiness(subagent.defaultTools ?? []).readiness !== 'invalid' + ); } export function resolveDefaultReviewTeam( @@ -802,34 +1627,67 @@ export function resolveDefaultReviewTeam( storedConfig: ReviewTeamStoredConfig, options: ResolveDefaultReviewTeamOptions = {}, ): ReviewTeam { + const definition = options.definition ?? FALLBACK_REVIEW_TEAM_DEFINITION; const byId = new Map(subagents.map((subagent) => [subagent.id, subagent])); const availableModelIds = options.availableModelIds ? new Set(options.availableModelIds) : undefined; - const coreMembers = DEFAULT_REVIEW_TEAM_CORE_ROLES.map((definition) => + const coreMembers = definition.coreRoles.map((roleDefinition) => buildCoreMember( - definition, - byId.get(definition.subagentId), + roleDefinition, + byId.get(roleDefinition.subagentId), storedConfig, availableModelIds, + definition.strategyProfiles, ), ); + const disallowedExtraSubagentIds = new Set(definition.disallowedExtraSubagentIds); const extraMembers = storedConfig.extra_subagent_ids - .map((subagentId) => byId.get(subagentId)) - .filter((subagent): subagent is SubagentInfo => Boolean(subagent)) - .filter(canUseSubagentAsReviewTeamMember) - .map((subagent) => buildExtraMember(subagent, storedConfig, availableModelIds)); - + .filter((subagentId) => !disallowedExtraSubagentIds.has(subagentId)) + .map((subagentId) => { + const subagent = byId.get(subagentId); + if (!subagent) { + return buildUnavailableExtraMember( + subagentId, + storedConfig, + availableModelIds, + definition.strategyProfiles, + ); + } + if (!hasReviewTeamExtraMemberShape(subagent)) { + return buildExtraMember(subagent, storedConfig, availableModelIds, { + available: false, + skipReason: 'invalid_tooling', + strategyProfiles: definition.strategyProfiles, + }); + } + const toolingReadiness = evaluateReviewSubagentToolReadiness( + subagent.defaultTools ?? [], + ); + return buildExtraMember( + subagent, + storedConfig, + availableModelIds, + toolingReadiness.readiness === 'invalid' + ? { + available: false, + skipReason: 'invalid_tooling', + strategyProfiles: definition.strategyProfiles, + } + : { strategyProfiles: definition.strategyProfiles }, + ); + }); + return { - id: DEFAULT_REVIEW_TEAM_ID, - name: 'Code Review Team', - description: - 'A multi-reviewer team for deep code review with mandatory logic, performance, security, architecture, conditional frontend, and quality-gate roles.', - warning: - 'Deep review may take longer and usually consumes more tokens than a standard review.', + id: definition.id, + name: definition.name, + description: definition.description, + warning: definition.warning, strategyLevel: storedConfig.strategy_level, memberStrategyOverrides: storedConfig.member_strategy_overrides, executionPolicy: executionPolicyFromStoredConfig(storedConfig), + concurrencyPolicy: concurrencyPolicyFromStoredConfig(storedConfig), + definition, members: [...coreMembers, ...extraMembers], coreMembers, extraMembers, @@ -839,24 +1697,33 @@ export function resolveDefaultReviewTeam( export async function loadDefaultReviewTeam( workspacePath?: string, ): Promise { - const [storedConfig, subagents, rawModels] = await Promise.all([ + const [definition, storedConfig, subagents, rawModels] = await Promise.all([ + loadDefaultReviewTeamDefinition(), loadDefaultReviewTeamConfig(), SubagentAPI.listSubagents({ workspacePath }), configAPI.getConfig('ai.models').catch(() => undefined), ]); return resolveDefaultReviewTeam(subagents, storedConfig, { + definition, availableModelIds: extractAvailableModelIds(rawModels), }); } interface ReviewTeamLaunchOptions { + target?: ReviewTargetClassification; reviewTargetFilePaths?: string[]; } interface ReviewTeamManifestOptions { workspacePath?: string; policySource?: ReviewTeamRunManifest['policySource']; + target?: ReviewTargetClassification; + changeStats?: Partial; + tokenBudgetMode?: ReviewTokenBudgetMode; + concurrencyPolicy?: Partial; + rateLimitStatus?: ReviewTeamRateLimitStatus | null; + strategyOverride?: ReviewStrategyLevel; reviewTargetFilePaths?: string[]; } @@ -864,33 +1731,32 @@ function hasExplicitReviewTarget(filePaths?: string[]): boolean { return Boolean(filePaths?.some((filePath) => filePath.trim().length > 0)); } -function isFrontendReviewTarget(filePath: string): boolean { - const normalizedPath = filePath.replace(/\\/g, '/').toLowerCase(); - return ( - normalizedPath.startsWith('src/web-ui/') || - normalizedPath.includes('/src/web-ui/') || - normalizedPath.includes('/locales/') || - normalizedPath.startsWith('locales/') || - /\.(tsx|jsx|scss|css)$/.test(normalizedPath) - ); +function resolveReviewTargetForOptions( + target: ReviewTargetClassification | undefined, + reviewTargetFilePaths: string[] | undefined, + fallbackSource: Parameters[0], +): ReviewTargetClassification { + if (target) { + return target; + } + if (hasExplicitReviewTarget(reviewTargetFilePaths)) { + return classifyReviewTargetFromFiles(reviewTargetFilePaths ?? [], 'session_files'); + } + return createUnknownReviewTargetClassification(fallbackSource); } -function isConditionalCoreMemberApplicable( +function isCoreMemberApplicableForLaunch( member: ReviewTeamMember, - reviewTargetFilePaths?: string[], + options: ReviewTeamLaunchOptions, ): boolean { - if (!member.conditional) { - return true; - } - if (!hasExplicitReviewTarget(reviewTargetFilePaths)) { - return true; - } - if (member.definitionKey === 'frontend') { - // The frontend reviewer is optional. Only include it for explicit targets - // that match the same frontend/i18n signal described in the DeepReview prompt. - return (reviewTargetFilePaths ?? []).some(isFrontendReviewTarget); - } - return true; + return shouldRunCoreReviewerForTarget( + member, + resolveReviewTargetForOptions( + options.target, + options.reviewTargetFilePaths, + 'unknown', + ), + ); } export async function prepareDefaultReviewTeamForLaunch( @@ -901,7 +1767,7 @@ export async function prepareDefaultReviewTeamForLaunch( const missingCoreMembers = team.coreMembers.filter( (member) => !member.available && - isConditionalCoreMemberApplicable(member, options.reviewTargetFilePaths), + isCoreMemberApplicableForLaunch(member, options), ); if (missingCoreMembers.length > 0) { @@ -916,7 +1782,7 @@ export async function prepareDefaultReviewTeamForLaunch( (member) => member.available && !member.enabled && - isConditionalCoreMemberApplicable(member, options.reviewTargetFilePaths), + isCoreMemberApplicableForLaunch(member, options), ); if (coreMembersToEnable.length > 0) { @@ -960,10 +1826,11 @@ function toManifestMember( model: member.model || DEFAULT_REVIEW_TEAM_MODEL, configuredModel: member.configuredModel || member.model || DEFAULT_REVIEW_TEAM_MODEL, modelFallbackReason: member.modelFallbackReason, - defaultModelSlot: strategyProfile.defaultModelSlot, + defaultModelSlot: member.defaultModelSlot ?? strategyProfile.defaultModelSlot, strategyLevel: member.strategyLevel, strategySource: member.strategySource, - strategyDirective: roleDirective || strategyProfile.promptDirective, + strategyDirective: + member.strategyDirective || roleDirective || strategyProfile.promptDirective, locked: member.locked, source: member.source, subagentSource: member.subagentSource, @@ -971,38 +1838,1124 @@ function toManifestMember( }; } +function resolveManifestMemberModelForStrategy( + member: ReviewTeamMember, + strategyLevel: ReviewStrategyLevel, +): { + model: string; + configuredModel: string; + modelFallbackReason?: ReviewModelFallbackReason; +} { + if (member.modelFallbackReason === 'model_removed') { + return { + model: getReviewStrategyProfile(strategyLevel).defaultModelSlot, + configuredModel: member.configuredModel, + modelFallbackReason: member.modelFallbackReason, + }; + } + + return resolveMemberModel( + member.configuredModel || member.model || DEFAULT_REVIEW_TEAM_MODEL, + strategyLevel, + ); +} + +function applyTeamStrategyOverrideToMember( + member: ReviewTeamMember, + strategyLevel: ReviewStrategyLevel, +): ReviewTeamMember { + if (member.strategySource === 'member' || member.strategyLevel === strategyLevel) { + return member; + } + + const strategyProfile = getReviewStrategyProfile(strategyLevel); + const model = resolveManifestMemberModelForStrategy(member, strategyLevel); + return { + ...member, + model: model.model, + configuredModel: model.configuredModel, + modelFallbackReason: model.modelFallbackReason, + strategyOverride: DEFAULT_REVIEW_MEMBER_STRATEGY_LEVEL, + strategyLevel, + strategySource: 'team', + defaultModelSlot: strategyProfile.defaultModelSlot, + strategyDirective: + strategyProfile.roleDirectives[member.subagentId as ReviewRoleDirectiveKey] || + strategyProfile.promptDirective, + }; +} + +function shouldRunCoreReviewerForTarget( + member: ReviewTeamMember, + target: ReviewTargetClassification, +): boolean { + return shouldRunReviewerForTarget(member.subagentId, target); +} + +function resolveMaxExtraReviewers( + mode: ReviewTokenBudgetMode, + eligibleExtraReviewerCount: number, +): number { + if (mode === 'economy') { + return 0; + } + return eligibleExtraReviewerCount; +} + +function resolveChangeStats( + target: ReviewTargetClassification, + stats?: Partial, +): ReviewTeamChangeStats { + const fileCount = Math.max( + 0, + Math.floor( + stats?.fileCount ?? + target.files.filter((file) => !file.excluded).length, + ), + ); + const totalLinesChanged = + typeof stats?.totalLinesChanged === 'number' && + Number.isFinite(stats.totalLinesChanged) + ? Math.max(0, Math.floor(stats.totalLinesChanged)) + : undefined; + + return { + fileCount, + ...(totalLinesChanged !== undefined ? { totalLinesChanged } : {}), + lineCountSource: + totalLinesChanged !== undefined + ? stats?.lineCountSource ?? 'diff_stat' + : 'unknown', + }; +} + +const SECURITY_SENSITIVE_PATH_PATTERN = + /(^|[/._-])(auth|oauth|crypto|security|permission|permissions|secret|secrets|token|tokens|credential|credentials)([/._-]|$)/; + +function isSecuritySensitiveReviewPath(normalizedPath: string): boolean { + return SECURITY_SENSITIVE_PATH_PATTERN.test(normalizedPath.toLowerCase()); +} + +function workspaceAreaForReviewPath(normalizedPath: string): string { + const crateMatch = normalizedPath.match(/^src\/crates\/([^/]+)/); + if (crateMatch) { + return `crate:${crateMatch[1]}`; + } + + const appMatch = normalizedPath.match(/^src\/apps\/([^/]+)/); + if (appMatch) { + return `app:${appMatch[1]}`; + } + + if (normalizedPath.startsWith('src/web-ui/')) { + return 'web-ui'; + } + + if (normalizedPath.startsWith('BitFun-Installer/')) { + return 'installer'; + } + + const [root] = normalizedPath.split('/'); + return root || 'unknown'; +} + +function pluralize(count: number, singular: string): string { + return `${count} ${singular}${count === 1 ? '' : 's'}`; +} + +const PRE_REVIEW_SUMMARY_SAMPLE_FILE_LIMIT = 3; +const PRE_REVIEW_SUMMARY_AREA_LIMIT = 8; + +function buildPreReviewSummary( + target: ReviewTargetClassification, + changeStats: ReviewTeamChangeStats, +): ReviewTeamPreReviewSummary { + const includedFiles = target.files + .filter((file) => !file.excluded) + .map((file) => file.normalizedPath); + const excludedFileCount = target.files.length - includedFiles.length; + const allWorkspaceAreas = groupFilesByWorkspaceArea(includedFiles) + .sort((a, b) => b.files.length - a.files.length || a.index - b.index); + const workspaceAreas = allWorkspaceAreas + .slice(0, PRE_REVIEW_SUMMARY_AREA_LIMIT) + .map((area) => ({ + key: area.key, + fileCount: area.files.length, + sampleFiles: area.files.slice(0, PRE_REVIEW_SUMMARY_SAMPLE_FILE_LIMIT), + })); + const lineCount = changeStats.totalLinesChanged; + const lineCountLabel = + lineCount === undefined + ? 'unknown changed lines' + : `${lineCount} changed lines`; + const areaLabel = workspaceAreas.length > 0 + ? workspaceAreas.map((area) => `${area.key} (${area.fileCount})`).join(', ') + : 'no resolved workspace area'; + const targetTags = [...target.tags]; + const tagLabel = targetTags.filter((tag) => tag !== 'unknown').join(', ') || 'unknown'; + const omittedAreaCount = Math.max( + 0, + allWorkspaceAreas.length - workspaceAreas.length, + ); + const summaryParts = [ + `${pluralize(changeStats.fileCount, 'file')}, ${lineCountLabel} across ${pluralize(allWorkspaceAreas.length, 'workspace area')}: ${areaLabel}`, + `tags: ${tagLabel}`, + omittedAreaCount > 0 ? `${pluralize(omittedAreaCount, 'workspace area')} omitted from summary` : undefined, + ].filter(Boolean); + + return { + source: 'target_manifest', + summary: summaryParts.join('; '), + fileCount: changeStats.fileCount, + excludedFileCount, + ...(lineCount !== undefined ? { lineCount } : {}), + lineCountSource: changeStats.lineCountSource, + targetTags, + workspaceAreas, + warnings: target.warnings.map((warning) => warning.code), + }; +} + +export function recommendReviewStrategyForTarget( + target: ReviewTargetClassification, + changeStats: ReviewTeamChangeStats, +): ReviewTeamStrategyRecommendation { + const includedFiles = target.files.filter((file) => !file.excluded); + const securityFileCount = includedFiles.filter((file) => + isSecuritySensitiveReviewPath(file.normalizedPath), + ).length; + const workspaceAreaCount = new Set( + includedFiles.map((file) => workspaceAreaForReviewPath(file.normalizedPath)), + ).size; + const contractSurfaceChanged = target.tags.includes('frontend_contract') || + target.tags.includes('desktop_contract') || + target.tags.includes('web_server_contract') || + target.tags.includes('api_layer') || + target.tags.includes('transport'); + const totalLinesChanged = changeStats.totalLinesChanged; + const factors: ReviewTeamRiskFactors = { + fileCount: changeStats.fileCount, + ...(totalLinesChanged !== undefined ? { totalLinesChanged } : {}), + lineCountSource: changeStats.lineCountSource, + securityFileCount, + workspaceAreaCount, + contractSurfaceChanged, + }; + + if (target.resolution === 'unknown' || changeStats.fileCount === 0) { + return { + strategyLevel: 'normal', + score: 0, + rationale: 'unresolved target; keep a conservative normal review recommendation.', + factors, + }; + } + + const lineScore = + totalLinesChanged === undefined + ? 0 + : Math.floor(totalLinesChanged / 100); + const crossAreaScore = Math.max(0, workspaceAreaCount - 1) * 2; + const score = + changeStats.fileCount + + lineScore + + securityFileCount * 3 + + crossAreaScore + + (contractSurfaceChanged ? 2 : 0); + const strategyLevel: ReviewStrategyLevel = + score <= 5 + ? 'quick' + : score <= 20 + ? 'normal' + : 'deep'; + const sizeLabel = totalLinesChanged === undefined + ? `${changeStats.fileCount} files, unknown lines` + : `${changeStats.fileCount} files, ${totalLinesChanged} lines`; + const riskDetails = [ + pluralize(securityFileCount, 'security-sensitive file'), + pluralize(workspaceAreaCount, 'workspace area'), + contractSurfaceChanged ? 'contract surface changed' : undefined, + ].filter(Boolean).join(', '); + const rationale = + strategyLevel === 'quick' + ? `Small change (${sizeLabel}). Quick scan sufficient.` + : strategyLevel === 'normal' + ? `Medium change (${sizeLabel}; ${riskDetails}). Standard review recommended.` + : `Large/high-risk change (${sizeLabel}; ${riskDetails}). Deep review recommended.`; + + return { + strategyLevel, + score, + rationale, + factors, + }; +} + +const REVIEW_STRATEGY_RANK: Record = { + quick: 0, + normal: 1, + deep: 2, +}; + +function crossCrateChangeCountForReviewTarget( + target: ReviewTargetClassification, +): number { + const crateNames = new Set( + target.files + .filter((file) => !file.excluded) + .map((file) => /^src\/crates\/([^/]+)/.exec(file.normalizedPath)?.[1]) + .filter((crateName): crateName is string => Boolean(crateName)), + ); + + return Math.max(0, crateNames.size - 1); +} + +function buildBackendCompatibleRiskFactors( + target: ReviewTargetClassification, + changeStats: ReviewTeamChangeStats, +): ReviewTeamBackendRiskFactors { + const includedFiles = target.files.filter((file) => !file.excluded); + + return { + fileCount: changeStats.fileCount, + totalLinesChanged: changeStats.totalLinesChanged ?? 0, + lineCountSource: changeStats.lineCountSource, + filesInSecurityPaths: includedFiles.filter((file) => + isSecuritySensitiveReviewPath(file.normalizedPath), + ).length, + crossCrateChanges: crossCrateChangeCountForReviewTarget(target), + maxCyclomaticComplexityDelta: 0, + maxCyclomaticComplexityDeltaSource: 'not_measured', + }; +} + +function recommendBackendCompatibleStrategyForTarget( + target: ReviewTargetClassification, + changeStats: ReviewTeamChangeStats, +): ReviewTeamBackendStrategyRecommendation { + const factors = buildBackendCompatibleRiskFactors(target, changeStats); + const score = + factors.fileCount + + Math.floor(factors.totalLinesChanged / 100) + + factors.filesInSecurityPaths * 3 + + factors.crossCrateChanges * 2; + const strategyLevel: ReviewStrategyLevel = + score <= 5 + ? 'quick' + : score <= 20 + ? 'normal' + : 'deep'; + const rationale = + strategyLevel === 'quick' + ? `Backend-compatible policy sees a small change (${factors.fileCount} files, ${factors.totalLinesChanged} lines).` + : strategyLevel === 'normal' + ? `Backend-compatible policy sees a medium change (${factors.fileCount} files, ${factors.totalLinesChanged} lines).` + : `Backend-compatible policy sees a large/high-risk change (${factors.fileCount} files, ${factors.totalLinesChanged} lines, ${factors.filesInSecurityPaths} security files).`; + + return { + strategyLevel, + score, + rationale, + factors, + }; +} + +function resolveStrategyMismatchSeverity(params: { + finalStrategy: ReviewStrategyLevel; + frontendRecommendation: ReviewStrategyLevel; + backendRecommendation: ReviewStrategyLevel; +}): ReviewTeamStrategyMismatchSeverity { + const finalRank = REVIEW_STRATEGY_RANK[params.finalStrategy]; + const recommendedRank = Math.max( + REVIEW_STRATEGY_RANK[params.frontendRecommendation], + REVIEW_STRATEGY_RANK[params.backendRecommendation], + ); + const distance = Math.abs(finalRank - recommendedRank); + + if (distance === 0) { + return 'none'; + } + if (distance >= 2) { + return 'high'; + } + return finalRank < recommendedRank ? 'medium' : 'low'; +} + +function buildReviewStrategyDecision(params: { + teamDefaultStrategy: ReviewStrategyLevel; + finalStrategy: ReviewStrategyLevel; + userOverride?: ReviewStrategyLevel; + frontendRecommendation: ReviewTeamStrategyRecommendation; + backendRecommendation: ReviewTeamBackendStrategyRecommendation; +}): ReviewTeamStrategyDecision { + const mismatch = + params.finalStrategy !== params.frontendRecommendation.strategyLevel || + params.finalStrategy !== params.backendRecommendation.strategyLevel; + const mismatchSeverity = resolveStrategyMismatchSeverity({ + finalStrategy: params.finalStrategy, + frontendRecommendation: params.frontendRecommendation.strategyLevel, + backendRecommendation: params.backendRecommendation.strategyLevel, + }); + const recommendationSummary = [ + `frontend=${params.frontendRecommendation.strategyLevel}`, + `backend=${params.backendRecommendation.strategyLevel}`, + ].join(', '); + + return { + authority: 'mismatch_warning', + teamDefaultStrategy: params.teamDefaultStrategy, + ...(params.userOverride ? { userOverride: params.userOverride } : {}), + finalStrategy: params.finalStrategy, + frontendRecommendation: params.frontendRecommendation, + backendRecommendation: params.backendRecommendation, + mismatch, + mismatchSeverity, + rationale: mismatch + ? `Final strategy ${params.finalStrategy} differs from advisory recommendations (${recommendationSummary}); keep this as non-blocking launch/report metadata.` + : `Final strategy ${params.finalStrategy} matches advisory recommendations (${recommendationSummary}).`, + }; +} + +function buildWorkPacketScopeFromFiles( + target: ReviewTargetClassification, + files: string[], + group?: { index: number; count: number }, +): ReviewTeamWorkPacketScope { + return { + kind: 'review_target', + targetSource: target.source, + targetResolution: target.resolution, + targetTags: [...target.tags], + fileCount: files.length, + files, + excludedFileCount: + target.files.length - target.files.filter((file) => !file.excluded).length, + ...(group ? { groupIndex: group.index, groupCount: group.count } : {}), + }; +} + +function buildWorkPacket(params: { + member: ReviewTeamMember; + phase: ReviewTeamWorkPacket['phase']; + launchBatch: number; + scope: ReviewTeamWorkPacketScope; + timeoutSeconds: number; +}): ReviewTeamWorkPacket { + const manifestMember = toManifestMember(params.member); + const packetGroupSuffix = + params.phase === 'reviewer' && + params.scope.groupIndex !== undefined && + params.scope.groupCount !== undefined + ? `:group-${params.scope.groupIndex}-of-${params.scope.groupCount}` + : ''; + + return { + packetId: `${params.phase}:${manifestMember.subagentId}${packetGroupSuffix}`, + phase: params.phase, + launchBatch: params.launchBatch, + subagentId: manifestMember.subagentId, + displayName: manifestMember.displayName, + roleName: manifestMember.roleName, + assignedScope: params.scope, + allowedTools: [...params.member.allowedTools], + timeoutSeconds: params.timeoutSeconds, + requiredOutputFields: + params.phase === 'judge' + ? [...JUDGE_WORK_PACKET_REQUIRED_OUTPUT_FIELDS] + : [...REVIEWER_WORK_PACKET_REQUIRED_OUTPUT_FIELDS], + strategyLevel: manifestMember.strategyLevel, + strategyDirective: manifestMember.strategyDirective, + model: manifestMember.model || DEFAULT_REVIEW_TEAM_MODEL, + }; +} + +function splitFilesIntoGroups(files: string[], groupCount: number): string[][] { + if (groupCount <= 1) { + return [files]; + } + + const groups: string[][] = []; + let cursor = 0; + for (let index = 0; index < groupCount; index += 1) { + const remainingFiles = files.length - cursor; + const remainingGroups = groupCount - index; + const groupSize = Math.ceil(remainingFiles / remainingGroups); + groups.push(files.slice(cursor, cursor + groupSize)); + cursor += groupSize; + } + return groups; +} + +interface WorkspaceAreaFileBucket { + key: string; + index: number; + files: string[]; +} + +function groupFilesByWorkspaceArea(files: string[]): WorkspaceAreaFileBucket[] { + const buckets: WorkspaceAreaFileBucket[] = []; + const bucketByKey = new Map(); + + for (const file of files) { + const key = workspaceAreaForReviewPath(file); + let bucket = bucketByKey.get(key); + if (!bucket) { + bucket = { + key, + index: buckets.length, + files: [], + }; + buckets.push(bucket); + bucketByKey.set(key, bucket); + } + bucket.files.push(file); + } + + return buckets; +} + +function splitFilesIntoModuleAwareGroups( + files: string[], + groupCount: number, +): string[][] { + if (groupCount <= 1) { + return [files]; + } + + const buckets = groupFilesByWorkspaceArea(files); + if (buckets.length <= 1) { + return splitFilesIntoGroups(files, groupCount); + } + + if (buckets.length >= groupCount) { + const groups = Array.from({ length: groupCount }, () => [] as string[]); + const sortedBuckets = [...buckets].sort( + (a, b) => b.files.length - a.files.length || a.index - b.index, + ); + + for (const bucket of sortedBuckets) { + let targetIndex = 0; + for (let index = 1; index < groups.length; index += 1) { + if (groups[index].length < groups[targetIndex].length) { + targetIndex = index; + } + } + groups[targetIndex].push(...bucket.files); + } + + return groups.filter((group) => group.length > 0); + } + + const chunkCounts = buckets.map(() => 1); + let remainingChunks = groupCount - buckets.length; + while (remainingChunks > 0) { + let targetBucketIndex = -1; + let largestAverageChunkSize = 0; + + for (let index = 0; index < buckets.length; index += 1) { + if (chunkCounts[index] >= buckets[index].files.length) { + continue; + } + const averageChunkSize = buckets[index].files.length / chunkCounts[index]; + if (averageChunkSize > largestAverageChunkSize) { + largestAverageChunkSize = averageChunkSize; + targetBucketIndex = index; + } + } + + if (targetBucketIndex === -1) { + break; + } + + chunkCounts[targetBucketIndex] += 1; + remainingChunks -= 1; + } + + return buckets.flatMap((bucket, index) => + splitFilesIntoGroups(bucket.files, chunkCounts[index]), + ); +} + +function effectiveMaxSameRoleInstances(params: { + executionPolicy: ReviewTeamExecutionPolicy; + concurrencyPolicy: ReviewTeamConcurrencyPolicy; + reviewerMemberCount: number; +}): number { + const reviewerMemberCount = Math.max(1, params.reviewerMemberCount); + const maxPerRole = Math.floor( + params.concurrencyPolicy.maxParallelInstances / reviewerMemberCount, + ); + + return Math.max( + 1, + Math.min(params.executionPolicy.maxSameRoleInstances, Math.max(1, maxPerRole)), + ); +} + +function resolveReviewerPacketScopes( + target: ReviewTargetClassification, + executionPolicy: ReviewTeamExecutionPolicy, + concurrencyPolicy: ReviewTeamConcurrencyPolicy, + reviewerMemberCount: number, +): ReviewTeamWorkPacketScope[] { + const includedFiles = target.files + .filter((file) => !file.excluded) + .map((file) => file.normalizedPath); + const shouldSplit = + executionPolicy.reviewerFileSplitThreshold > 0 && + executionPolicy.maxSameRoleInstances > 1 && + includedFiles.length > executionPolicy.reviewerFileSplitThreshold; + + if (!shouldSplit) { + return [buildWorkPacketScopeFromFiles(target, includedFiles)]; + } + + const maxSameRoleInstances = effectiveMaxSameRoleInstances({ + executionPolicy, + concurrencyPolicy, + reviewerMemberCount, + }); + const groupCount = Math.min( + maxSameRoleInstances, + Math.ceil(includedFiles.length / executionPolicy.reviewerFileSplitThreshold), + ); + if (groupCount <= 1) { + return [buildWorkPacketScopeFromFiles(target, includedFiles)]; + } + + const fileGroups = splitFilesIntoModuleAwareGroups(includedFiles, groupCount); + return fileGroups.map((files, index) => + buildWorkPacketScopeFromFiles(target, files, { + index: index + 1, + count: fileGroups.length, + }), + ); +} + +function buildWorkPackets(params: { + reviewerMembers: ReviewTeamMember[]; + judgeMember?: ReviewTeamMember; + target: ReviewTargetClassification; + executionPolicy: ReviewTeamExecutionPolicy; + concurrencyPolicy: ReviewTeamConcurrencyPolicy; +}): ReviewTeamWorkPacket[] { + const reviewerScopes = resolveReviewerPacketScopes( + params.target, + params.executionPolicy, + params.concurrencyPolicy, + params.reviewerMembers.length, + ); + const fullScope = buildWorkPacketScopeFromFiles( + params.target, + params.target.files + .filter((file) => !file.excluded) + .map((file) => file.normalizedPath), + ); + const reviewerSeeds = params.reviewerMembers.flatMap((member) => + reviewerScopes.map((scope) => ({ member, scope })), + ); + const orderedReviewerSeeds = params.concurrencyPolicy.batchExtrasSeparately + ? [ + ...reviewerSeeds.filter((seed) => seed.member.source === 'core'), + ...reviewerSeeds.filter((seed) => seed.member.source === 'extra'), + ] + : reviewerSeeds; + const reviewerPackets = orderedReviewerSeeds.map((seed, index) => + buildWorkPacket({ + member: seed.member, + phase: 'reviewer', + launchBatch: + Math.floor(index / params.concurrencyPolicy.maxParallelInstances) + 1, + scope: seed.scope, + timeoutSeconds: params.executionPolicy.reviewerTimeoutSeconds, + }), + ); + const finalReviewerBatch = reviewerPackets.reduce( + (maxBatch, packet) => Math.max(maxBatch, packet.launchBatch), + 0, + ); + const judgePacket = params.judgeMember + ? [ + buildWorkPacket({ + member: params.judgeMember, + phase: 'judge', + launchBatch: finalReviewerBatch + 1, + scope: fullScope, + timeoutSeconds: params.executionPolicy.judgeTimeoutSeconds, + }), + ] + : []; + + return [...reviewerPackets, ...judgePacket]; +} + +const SHARED_CONTEXT_CACHE_ENTRY_LIMIT = 80; +const SHARED_CONTEXT_CACHE_RECOMMENDED_TOOLS: ReviewTeamSharedContextTool[] = [ + 'GetFileDiff', + 'Read', +]; + +function buildSharedContextCachePlan( + workPackets: ReviewTeamWorkPacket[] = [], +): ReviewTeamSharedContextCachePlan { + const fileContextByPath = new Map< + string, + { + path: string; + workspaceArea: string; + consumerPacketIds: string[]; + firstSeenIndex: number; + } + >(); + let nextSeenIndex = 0; + + for (const packet of workPackets) { + if (packet.phase !== 'reviewer') { + continue; + } + + for (const path of packet.assignedScope.files) { + let entry = fileContextByPath.get(path); + if (!entry) { + entry = { + path, + workspaceArea: workspaceAreaForReviewPath(path), + consumerPacketIds: [], + firstSeenIndex: nextSeenIndex, + }; + nextSeenIndex += 1; + fileContextByPath.set(path, entry); + } + if (!entry.consumerPacketIds.includes(packet.packetId)) { + entry.consumerPacketIds.push(packet.packetId); + } + } + } + + const repeatedFileContexts = Array.from(fileContextByPath.values()) + .filter((entry) => entry.consumerPacketIds.length > 1) + .sort((a, b) => a.firstSeenIndex - b.firstSeenIndex); + const entries = repeatedFileContexts + .slice(0, SHARED_CONTEXT_CACHE_ENTRY_LIMIT) + .map((entry, index) => ({ + cacheKey: `shared-context:${index + 1}`, + path: entry.path, + workspaceArea: entry.workspaceArea, + recommendedTools: [...SHARED_CONTEXT_CACHE_RECOMMENDED_TOOLS], + consumerPacketIds: entry.consumerPacketIds, + })); + + return { + source: 'work_packets', + strategy: 'reuse_readonly_file_context_by_cache_key', + entries, + omittedEntryCount: Math.max( + 0, + repeatedFileContexts.length - SHARED_CONTEXT_CACHE_ENTRY_LIMIT, + ), + }; +} + +const INCREMENTAL_REVIEW_CACHE_INVALIDATIONS: ReviewTeamIncrementalReviewCacheInvalidation[] = [ + 'target_file_set_changed', + 'target_line_count_changed', + 'target_tag_changed', + 'target_warning_changed', + 'reviewer_roster_changed', + 'strategy_changed', +]; + +function stableFingerprint(input: unknown): string { + const serialized = JSON.stringify(input); + let hash = 0x811c9dc5; + for (let index = 0; index < serialized.length; index += 1) { + hash ^= serialized.charCodeAt(index); + hash = Math.imul(hash, 0x01000193); + } + return (hash >>> 0).toString(16).padStart(8, '0'); +} + +function buildIncrementalReviewCachePlan(params: { + target: ReviewTargetClassification; + changeStats: ReviewTeamChangeStats; + strategyLevel: ReviewStrategyLevel; + workPackets: ReviewTeamWorkPacket[]; +}): ReviewTeamIncrementalReviewCachePlan { + const filePaths = params.target.files + .filter((file) => !file.excluded) + .map((file) => file.normalizedPath) + .sort((a, b) => a.localeCompare(b)); + const workspaceAreas = Array.from( + new Set(filePaths.map((file) => workspaceAreaForReviewPath(file))), + ).sort((a, b) => a.localeCompare(b)); + const targetTags = [...params.target.tags].sort((a, b) => a.localeCompare(b)); + const targetWarnings = params.target.warnings + .map((warning) => warning.code) + .sort((a, b) => a.localeCompare(b)); + const reviewerPacketIds = params.workPackets + .filter((packet) => packet.phase === 'reviewer') + .map((packet) => packet.packetId) + .sort((a, b) => a.localeCompare(b)); + const fingerprint = stableFingerprint({ + source: params.target.source, + resolution: params.target.resolution, + filePaths, + workspaceAreas, + targetTags, + targetWarnings, + lineCount: params.changeStats.totalLinesChanged ?? null, + lineCountSource: params.changeStats.lineCountSource, + reviewerPacketIds, + strategyLevel: params.strategyLevel, + }); + + return { + source: 'target_manifest', + strategy: 'reuse_completed_packets_when_fingerprint_matches', + cacheKey: `incremental-review:${fingerprint}`, + fingerprint, + filePaths, + workspaceAreas, + targetTags, + reviewerPacketIds, + ...(params.changeStats.totalLinesChanged !== undefined + ? { lineCount: params.changeStats.totalLinesChanged } + : {}), + lineCountSource: params.changeStats.lineCountSource, + invalidatesOn: [...INCREMENTAL_REVIEW_CACHE_INVALIDATIONS], + }; +} + +function predictTimeoutSeconds(params: { + role: 'reviewer' | 'judge'; + strategyLevel: ReviewStrategyLevel; + changeStats: ReviewTeamChangeStats; + reviewerCount: number; +}): number { + const totalLinesChanged = params.changeStats.totalLinesChanged ?? 0; + const base = PREDICTIVE_TIMEOUT_BASE_SECONDS[params.strategyLevel]; + const raw = + base + + params.changeStats.fileCount * PREDICTIVE_TIMEOUT_PER_FILE_SECONDS + + Math.floor(totalLinesChanged / 100) * + PREDICTIVE_TIMEOUT_PER_100_LINES_SECONDS; + const reviewerCount = Math.max(1, params.reviewerCount); + const multiplier = + params.role === 'judge' + ? 1 + Math.floor((reviewerCount - 1) / 3) + : 1; + + return Math.min(raw * multiplier, MAX_PREDICTIVE_TIMEOUT_SECONDS); +} + +function buildEffectiveExecutionPolicy(params: { + basePolicy: ReviewTeamExecutionPolicy; + strategyLevel: ReviewStrategyLevel; + target: ReviewTargetClassification; + changeStats: ReviewTeamChangeStats; + reviewerCount: number; +}): ReviewTeamExecutionPolicy { + if ( + params.target.resolution === 'unknown' && + params.changeStats.fileCount === 0 && + params.changeStats.totalLinesChanged === undefined + ) { + return params.basePolicy; + } + + const reviewerTimeoutSeconds = predictTimeoutSeconds({ + role: 'reviewer', + strategyLevel: params.strategyLevel, + changeStats: params.changeStats, + reviewerCount: params.reviewerCount, + }); + const judgeTimeoutSeconds = predictTimeoutSeconds({ + role: 'judge', + strategyLevel: params.strategyLevel, + changeStats: params.changeStats, + reviewerCount: params.reviewerCount, + }); + + return { + ...params.basePolicy, + reviewerTimeoutSeconds: + params.basePolicy.reviewerTimeoutSeconds === 0 + ? 0 + : Math.max( + params.basePolicy.reviewerTimeoutSeconds, + reviewerTimeoutSeconds, + ), + judgeTimeoutSeconds: + params.basePolicy.judgeTimeoutSeconds === 0 + ? 0 + : Math.max( + params.basePolicy.judgeTimeoutSeconds, + judgeTimeoutSeconds, + ), + }; +} + +function estimateChangedLinesForScope(params: { + scope: ReviewTeamWorkPacketScope; + changeStats: ReviewTeamChangeStats; + totalIncludedFileCount: number; +}): number { + if (params.changeStats.totalLinesChanged === undefined) { + return params.scope.fileCount * PROMPT_BYTE_ESTIMATE_UNKNOWN_LINES_PER_FILE; + } + + if (params.totalIncludedFileCount <= 0) { + return params.changeStats.totalLinesChanged; + } + + return Math.ceil( + params.changeStats.totalLinesChanged * + (params.scope.fileCount / params.totalIncludedFileCount), + ); +} + +function estimateReviewerPromptBytes(params: { + packet: ReviewTeamWorkPacket; + changeStats: ReviewTeamChangeStats; + totalIncludedFileCount: number; +}): number { + const pathBytes = params.packet.assignedScope.files.reduce( + (total, filePath) => total + filePath.length + 1, + 0, + ); + const estimatedChangedLines = estimateChangedLinesForScope({ + scope: params.packet.assignedScope, + changeStats: params.changeStats, + totalIncludedFileCount: params.totalIncludedFileCount, + }); + + return Math.ceil( + PROMPT_BYTE_ESTIMATE_BASE_BYTES + + pathBytes + + params.packet.assignedScope.fileCount * PROMPT_BYTE_ESTIMATE_PER_FILE_BYTES + + estimatedChangedLines * PROMPT_BYTE_ESTIMATE_PER_CHANGED_LINE_BYTES, + ); +} + +function estimateMaxReviewerPromptBytes(params: { + workPackets: ReviewTeamWorkPacket[]; + target: ReviewTargetClassification; + changeStats: ReviewTeamChangeStats; +}): number { + const reviewerPackets = params.workPackets.filter( + (packet) => packet.phase === 'reviewer', + ); + const totalIncludedFileCount = params.target.files.filter( + (file) => !file.excluded, + ).length; + + if (reviewerPackets.length === 0) { + return PROMPT_BYTE_ESTIMATE_BASE_BYTES; + } + + return Math.max( + ...reviewerPackets.map((packet) => + estimateReviewerPromptBytes({ + packet, + changeStats: params.changeStats, + totalIncludedFileCount, + }), + ), + ); +} + +function buildTokenBudgetPlan(params: { + mode: ReviewTokenBudgetMode; + activeReviewerCalls: number; + eligibleExtraReviewerCount: number; + maxExtraReviewers: number; + skippedReviewerIds: string[]; + target: ReviewTargetClassification; + changeStats: ReviewTeamChangeStats; + executionPolicy: ReviewTeamExecutionPolicy; + workPackets: ReviewTeamWorkPacket[]; +}): ReviewTeamTokenBudgetPlan { + const includedFileCount = params.target.files.filter( + (file) => !file.excluded, + ).length; + const fileSplitGuardrailActive = + params.executionPolicy.reviewerFileSplitThreshold > 0 && + includedFileCount > params.executionPolicy.reviewerFileSplitThreshold; + const maxPromptBytesPerReviewer = + TOKEN_BUDGET_PROMPT_BYTE_LIMIT_BY_MODE[params.mode]; + const estimatedPromptBytesPerReviewer = estimateMaxReviewerPromptBytes({ + workPackets: params.workPackets, + target: params.target, + changeStats: params.changeStats, + }); + const promptByteLimitExceeded = + estimatedPromptBytesPerReviewer > maxPromptBytesPerReviewer; + const largeDiffSummaryFirst = promptByteLimitExceeded; + const decisions: ReviewTeamTokenBudgetDecision[] = []; + const warnings: string[] = []; + + if (promptByteLimitExceeded) { + decisions.push({ + kind: 'summary_first_full_scope', + reason: 'prompt_bytes_exceeded', + detail: + `Estimated reviewer prompt ${estimatedPromptBytesPerReviewer} bytes exceeds ${maxPromptBytesPerReviewer} bytes for ${params.mode} budget; use summary-first while keeping every assigned_scope file visible.`, + }); + warnings.push( + 'Estimated reviewer prompt exceeds the selected token budget; use summary-first without hiding assigned files.', + ); + } + + if (params.skippedReviewerIds.length > 0) { + decisions.push({ + kind: 'skip_extra_reviewers', + reason: 'extra_reviewers_skipped', + detail: + 'Some extra reviewers were skipped by the selected token budget mode.', + affectedReviewerIds: [...params.skippedReviewerIds], + }); + warnings.push( + 'Some extra reviewers were skipped by the selected token budget mode.', + ); + } + + return { + mode: params.mode, + estimatedReviewerCalls: params.activeReviewerCalls, + maxReviewerCalls: + params.activeReviewerCalls + + Math.max(0, params.eligibleExtraReviewerCount - params.maxExtraReviewers), + maxExtraReviewers: params.maxExtraReviewers, + ...(fileSplitGuardrailActive + ? { maxFilesPerReviewer: params.executionPolicy.reviewerFileSplitThreshold } + : {}), + maxPromptBytesPerReviewer, + estimatedPromptBytesPerReviewer, + promptByteEstimateSource: 'manifest_heuristic', + promptByteLimitExceeded, + largeDiffSummaryFirst, + decisions, + skippedReviewerIds: params.skippedReviewerIds, + warnings, + }; +} + export function buildEffectiveReviewTeamManifest( team: ReviewTeam, options: ReviewTeamManifestOptions = {}, ): ReviewTeamRunManifest { - const availableCoreMembers = team.coreMembers.filter((member) => member.available); - const unavailableCoreMembers = team.coreMembers.filter((member) => !member.available); - const inapplicableCoreMembers = availableCoreMembers.filter( - (member) => !isConditionalCoreMemberApplicable(member, options.reviewTargetFilePaths), + const target = resolveReviewTargetForOptions( + options.target, + options.reviewTargetFilePaths, + 'unknown', + ); + const tokenBudgetMode = options.tokenBudgetMode ?? 'balanced'; + const changeStats = resolveChangeStats(target, options.changeStats); + const baseConcurrencyPolicy = normalizeConcurrencyPolicy(team.concurrencyPolicy); + const concurrencyPolicy = applyRateLimitToConcurrencyPolicy( + normalizeConcurrencyPolicy({ + ...baseConcurrencyPolicy, + ...options.concurrencyPolicy, + }), + options.rateLimitStatus, + ); + const strategyLevel = options.strategyOverride ?? team.strategyLevel; + const strategyRecommendation = recommendReviewStrategyForTarget(target, changeStats); + const backendStrategyRecommendation = recommendBackendCompatibleStrategyForTarget( + target, + changeStats, + ); + const strategyDecision = buildReviewStrategyDecision({ + teamDefaultStrategy: team.strategyLevel, + finalStrategy: strategyLevel, + ...(options.strategyOverride ? { userOverride: options.strategyOverride } : {}), + frontendRecommendation: strategyRecommendation, + backendRecommendation: backendStrategyRecommendation, + }); + const preReviewSummary = buildPreReviewSummary(target, changeStats); + const coreMembers = team.coreMembers.map((member) => + applyTeamStrategyOverrideToMember(member, strategyLevel), + ); + const extraMembers = team.extraMembers.map((member) => + applyTeamStrategyOverrideToMember(member, strategyLevel), ); - const applicableCoreMembers = availableCoreMembers.filter( - (member) => isConditionalCoreMemberApplicable(member, options.reviewTargetFilePaths), + const availableCoreMembers = coreMembers.filter((member) => member.available); + const unavailableCoreMembers = coreMembers.filter((member) => !member.available); + const notApplicableCoreMembers = availableCoreMembers.filter( + (member) => + member.definitionKey !== 'judge' && + !shouldRunCoreReviewerForTarget(member, target), ); - const coreReviewers = applicableCoreMembers + const coreReviewerMembers = availableCoreMembers .filter((member) => member.definitionKey !== 'judge') - .map((member) => toManifestMember(member)); - const qualityGateReviewer = applicableCoreMembers.find( + .filter((member) => shouldRunCoreReviewerForTarget(member, target)); + const coreReviewers = coreReviewerMembers.map((member) => toManifestMember(member)); + const qualityGateReviewerMember = availableCoreMembers.find( (member) => member.definitionKey === 'judge', ); - const enabledExtraReviewers = team.extraMembers - .filter((member) => member.available && member.enabled) + const qualityGateReviewer = qualityGateReviewerMember + ? toManifestMember(qualityGateReviewerMember) + : undefined; + const eligibleExtraMembers = extraMembers + .filter((member) => member.available && member.enabled); + const maxExtraReviewers = resolveMaxExtraReviewers( + tokenBudgetMode, + eligibleExtraMembers.length, + ); + const enabledExtraMembers = eligibleExtraMembers.slice(0, maxExtraReviewers); + const budgetLimitedExtraMembers = eligibleExtraMembers.slice(maxExtraReviewers); + const enabledExtraReviewers = enabledExtraMembers .map((member) => toManifestMember(member)); + const reviewerCount = coreReviewers.length + enabledExtraReviewers.length; + const executionPolicy = buildEffectiveExecutionPolicy({ + basePolicy: team.executionPolicy, + strategyLevel, + target, + changeStats, + reviewerCount, + }); + const workPackets = buildWorkPackets({ + reviewerMembers: [...coreReviewerMembers, ...enabledExtraMembers], + judgeMember: qualityGateReviewerMember, + target, + executionPolicy, + concurrencyPolicy, + }); + const sharedContextCache = buildSharedContextCachePlan(workPackets); + const incrementalReviewCache = buildIncrementalReviewCachePlan({ + target, + changeStats, + strategyLevel, + workPackets, + }); + const tokenBudget = buildTokenBudgetPlan({ + mode: tokenBudgetMode, + activeReviewerCalls: workPackets.length, + eligibleExtraReviewerCount: eligibleExtraMembers.length, + maxExtraReviewers, + skippedReviewerIds: budgetLimitedExtraMembers.map((member) => member.subagentId), + target, + changeStats, + executionPolicy, + workPackets, + }); const skippedReviewers = [ - ...team.extraMembers + ...extraMembers .filter((member) => !member.available || !member.enabled) .map((member) => - toManifestMember(member, member.available ? 'disabled' : 'unavailable'), + toManifestMember( + member, + member.skipReason ?? (member.available ? 'disabled' : 'unavailable'), + ), ), + ...budgetLimitedExtraMembers.map((member) => + toManifestMember(member, 'budget_limited'), + ), ...unavailableCoreMembers.map((member) => toManifestMember(member, 'unavailable'), ), - ...inapplicableCoreMembers.map((member) => - toManifestMember(member, 'non_applicable'), + ...notApplicableCoreMembers.map((member) => + toManifestMember(member, 'not_applicable'), ), ]; @@ -1010,14 +2963,22 @@ export function buildEffectiveReviewTeamManifest( reviewMode: 'deep', ...(options.workspacePath ? { workspacePath: options.workspacePath } : {}), policySource: options.policySource ?? 'default-review-team-config', - strategyLevel: team.strategyLevel, - executionPolicy: team.executionPolicy, + target, + strategyLevel, + strategyRecommendation, + strategyDecision, + executionPolicy, + concurrencyPolicy, + changeStats, + preReviewSummary, + sharedContextCache, + incrementalReviewCache, + tokenBudget, coreReviewers, - ...(qualityGateReviewer - ? { qualityGateReviewer: toManifestMember(qualityGateReviewer) } - : {}), + ...(qualityGateReviewer ? { qualityGateReviewer } : {}), enabledExtraReviewers, skippedReviewers, + workPackets, }; } @@ -1025,8 +2986,11 @@ function formatResponsibilities(items: string[]): string { return items.map((item) => ` - ${item}`).join('\n'); } -function formatStrategyImpact(strategyLevel: ReviewStrategyLevel): string { - const definition = getReviewStrategyProfile(strategyLevel); +function formatStrategyImpact( + strategyLevel: ReviewStrategyLevel, + strategyProfiles: Record = REVIEW_STRATEGY_PROFILES, +): string { + const definition = strategyProfiles[strategyLevel]; return `Token/time impact: approximately ${definition.tokenImpact} token usage and ${definition.runtimeImpact} runtime.`; } @@ -1047,6 +3011,116 @@ function formatManifestList( .join(', '); } +function workPacketToPromptPayload(packet: ReviewTeamWorkPacket) { + return { + packet_id: packet.packetId, + phase: packet.phase, + launch_batch: packet.launchBatch, + subagent_type: packet.subagentId, + display_name: packet.displayName, + role: packet.roleName, + assigned_scope: { + kind: packet.assignedScope.kind, + target_source: packet.assignedScope.targetSource, + target_resolution: packet.assignedScope.targetResolution, + target_tags: packet.assignedScope.targetTags, + file_count: packet.assignedScope.fileCount, + files: packet.assignedScope.files, + excluded_file_count: packet.assignedScope.excludedFileCount, + ...(packet.assignedScope.groupIndex !== undefined + ? { group_index: packet.assignedScope.groupIndex } + : {}), + ...(packet.assignedScope.groupCount !== undefined + ? { group_count: packet.assignedScope.groupCount } + : {}), + }, + allowed_tools: packet.allowedTools, + timeout_seconds: packet.timeoutSeconds, + required_output_fields: packet.requiredOutputFields, + strategy: packet.strategyLevel, + model_id: packet.model, + prompt_directive: packet.strategyDirective, + }; +} + +function formatWorkPacketBlock(workPackets: ReviewTeamWorkPacket[] = []): string { + if (workPackets.length === 0) { + return '- none'; + } + + return [ + '```json', + JSON.stringify(workPackets.map(workPacketToPromptPayload), null, 2), + '```', + ].join('\n'); +} + +function formatPreReviewSummaryBlock(summary: ReviewTeamPreReviewSummary): string { + return [ + 'Pre-generated diff summary:', + '```json', + JSON.stringify(summary, null, 2), + '```', + ].join('\n'); +} + +function sharedContextCacheToPromptPayload(plan: ReviewTeamSharedContextCachePlan) { + return { + source: plan.source, + strategy: plan.strategy, + omitted_entry_count: plan.omittedEntryCount, + entries: plan.entries.map((entry) => ({ + cache_key: entry.cacheKey, + path: entry.path, + workspace_area: entry.workspaceArea, + recommended_tools: entry.recommendedTools, + consumer_packet_ids: entry.consumerPacketIds, + })), + }; +} + +function formatSharedContextCacheBlock(plan: ReviewTeamSharedContextCachePlan): string { + return [ + 'Shared context cache plan:', + '```json', + JSON.stringify(sharedContextCacheToPromptPayload(plan), null, 2), + '```', + ].join('\n'); +} + +function incrementalReviewCacheToPromptPayload(plan: ReviewTeamIncrementalReviewCachePlan) { + return { + source: plan.source, + strategy: plan.strategy, + cache_key: plan.cacheKey, + fingerprint: plan.fingerprint, + file_paths: plan.filePaths, + workspace_areas: plan.workspaceAreas, + target_tags: plan.targetTags, + reviewer_packet_ids: plan.reviewerPacketIds, + ...(plan.lineCount !== undefined ? { line_count: plan.lineCount } : {}), + line_count_source: plan.lineCountSource, + invalidates_on: plan.invalidatesOn, + }; +} + +function formatIncrementalReviewCacheBlock(plan: ReviewTeamIncrementalReviewCachePlan): string { + return [ + 'Incremental review cache plan:', + '```json', + JSON.stringify(incrementalReviewCacheToPromptPayload(plan), null, 2), + '```', + ].join('\n'); +} + +function formatTokenBudgetDecisionKinds( + decisions: ReviewTeamTokenBudgetDecision[] = [], +): string { + return decisions.length > 0 + ? decisions.map((decision) => decision.kind).join(', ') + : 'none'; +} + export function buildReviewTeamPromptBlock( team: ReviewTeam, manifest = buildEffectiveReviewTeamManifest(team), @@ -1093,17 +3167,62 @@ export function buildReviewTeamPromptBlock( }) .join('\n'); const executionPolicy = [ - `- reviewer_timeout_seconds: ${team.executionPolicy.reviewerTimeoutSeconds}`, - `- judge_timeout_seconds: ${team.executionPolicy.judgeTimeoutSeconds}`, - `- reviewer_file_split_threshold: ${team.executionPolicy.reviewerFileSplitThreshold}`, - `- max_same_role_instances: ${team.executionPolicy.maxSameRoleInstances}`, + `- reviewer_timeout_seconds: ${manifest.executionPolicy.reviewerTimeoutSeconds}`, + `- judge_timeout_seconds: ${manifest.executionPolicy.judgeTimeoutSeconds}`, + `- reviewer_file_split_threshold: ${manifest.executionPolicy.reviewerFileSplitThreshold}`, + `- max_same_role_instances: ${manifest.executionPolicy.maxSameRoleInstances}`, + `- max_retries_per_role: ${manifest.executionPolicy.maxRetriesPerRole}`, ].join('\n'); + const concurrencyPolicy = [ + `- max_parallel_instances: ${manifest.concurrencyPolicy.maxParallelInstances}`, + `- stagger_seconds: ${manifest.concurrencyPolicy.staggerSeconds}`, + `- max_queue_wait_seconds: ${manifest.concurrencyPolicy.maxQueueWaitSeconds}`, + `- batch_extras_separately: ${manifest.concurrencyPolicy.batchExtrasSeparately ? 'yes' : 'no'}`, + `- allow_provider_capacity_queue: ${manifest.concurrencyPolicy.allowProviderCapacityQueue ? 'yes' : 'no'}`, + `- allow_bounded_auto_retry: ${manifest.concurrencyPolicy.allowBoundedAutoRetry ? 'yes' : 'no'}`, + `- auto_retry_elapsed_guard_seconds: ${manifest.concurrencyPolicy.autoRetryElapsedGuardSeconds}`, + ].join('\n'); + const targetLineCount = + manifest.changeStats?.totalLinesChanged !== undefined + ? `${manifest.changeStats.totalLinesChanged}` + : 'unknown'; const manifestBlock = [ 'Run manifest:', `- review_mode: ${manifest.reviewMode}`, `- team_strategy: ${manifest.strategyLevel}`, + `- strategy_authority: ${manifest.strategyDecision.authority}`, + `- final_strategy: ${manifest.strategyDecision.finalStrategy}`, + `- frontend_recommended_strategy: ${manifest.strategyDecision.frontendRecommendation.strategyLevel}`, + `- backend_recommended_strategy: ${manifest.strategyDecision.backendRecommendation.strategyLevel}`, + `- strategy_user_override: ${manifest.strategyDecision.userOverride ?? 'none'}`, + `- strategy_mismatch: ${manifest.strategyDecision.mismatch ? 'yes' : 'no'}`, + `- strategy_mismatch_severity: ${manifest.strategyDecision.mismatchSeverity}`, + `- max_cyclomatic_complexity_delta: ${manifest.strategyDecision.backendRecommendation.factors.maxCyclomaticComplexityDelta}`, + `- max_cyclomatic_complexity_delta_source: ${manifest.strategyDecision.backendRecommendation.factors.maxCyclomaticComplexityDeltaSource}`, + ...(manifest.strategyRecommendation + ? [ + `- recommended_strategy: ${manifest.strategyRecommendation.strategyLevel}`, + `- strategy_recommendation_score: ${manifest.strategyRecommendation.score}`, + `- strategy_recommendation_rationale: ${manifest.strategyRecommendation.rationale}`, + ] + : []), `- workspace_path: ${manifest.workspacePath || 'inherited from current session'}`, `- policy_source: ${manifest.policySource}`, + `- target_source: ${manifest.target.source}`, + `- target_resolution: ${manifest.target.resolution}`, + `- target_tags: ${manifest.target.tags.join(', ') || 'none'}`, + `- target_warnings: ${manifest.target.warnings.map((warning) => warning.code).join(', ') || 'none'}`, + `- target_file_count: ${manifest.changeStats?.fileCount ?? manifest.target.files.length}`, + `- target_line_count: ${targetLineCount}`, + `- target_line_count_source: ${manifest.changeStats?.lineCountSource ?? 'unknown'}`, + `- token_budget_mode: ${manifest.tokenBudget.mode}`, + `- estimated_reviewer_calls: ${manifest.tokenBudget.estimatedReviewerCalls}`, + `- max_prompt_bytes_per_reviewer: ${manifest.tokenBudget.maxPromptBytesPerReviewer ?? 'none'}`, + `- estimated_prompt_bytes_per_reviewer: ${manifest.tokenBudget.estimatedPromptBytesPerReviewer ?? 'unknown'}`, + `- prompt_byte_estimate_source: ${manifest.tokenBudget.promptByteEstimateSource ?? 'none'}`, + `- prompt_byte_limit_exceeded: ${manifest.tokenBudget.promptByteLimitExceeded ? 'yes' : 'no'}`, + `- token_budget_decisions: ${formatTokenBudgetDecisionKinds(manifest.tokenBudget.decisions)}`, + `- budget_limited_reviewers: ${manifest.tokenBudget.skippedReviewerIds.join(', ') || 'none'}`, `- core_reviewers: ${formatManifestList(manifest.coreReviewers, 'none')}`, `- quality_gate_reviewer: ${manifest.qualityGateReviewer?.subagentId || 'none'}`, `- enabled_extra_reviewers: ${formatManifestList(manifest.enabledExtraReviewers, 'none')}`, @@ -1114,15 +3233,16 @@ export function buildReviewTeamPromptBlock( ) : [' - none']), ].join('\n'); + const strategyProfiles = team.definition?.strategyProfiles ?? REVIEW_STRATEGY_PROFILES; const strategyRules = REVIEW_STRATEGY_LEVELS.map((level) => { - const definition = getReviewStrategyProfile(level); + const definition = strategyProfiles[level]; const roleEntries = Object.entries(definition.roleDirectives) as [ReviewRoleDirectiveKey, string][]; const roleLines = roleEntries.map( ([role, directive]) => ` - ${role}: ${directive}`, ); return [ `- ${level}: ${definition.summary}`, - ` - ${formatStrategyImpact(level)}`, + ` - ${formatStrategyImpact(level, strategyProfiles)}`, ` - Default model slot: ${definition.defaultModelSlot}`, ` - Prompt directive (fallback): ${definition.promptDirective}`, ` - Role-specific directives:`, @@ -1135,25 +3255,57 @@ export function buildReviewTeamPromptBlock( return [ manifestBlock, + formatPreReviewSummaryBlock(manifest.preReviewSummary), + formatSharedContextCacheBlock(manifest.sharedContextCache), + formatIncrementalReviewCacheBlock(manifest.incrementalReviewCache), + 'Review work packets:', + formatWorkPacketBlock(manifest.workPackets), + 'Work packet rules:', + '- Each reviewer Task prompt must include the matching work packet verbatim.', + '- Include the packet_id in each Task description, for example "Security review [packet reviewer:ReviewSecurity:group-1-of-3]".', + '- Each reviewer and judge response must echo packet_id and set status to completed, partial_timeout, timed_out, cancelled_by_user, failed, or skipped.', + '- If the reviewer reports packet_id itself, mark reviewers[].packet_status_source as reported in the final submit_code_review payload.', + '- If the reviewer omits packet_id but the Task was launched from a packet, infer the packet_id from the Task description or work packet and mark packet_status_source as inferred.', + '- If packet_id cannot be reported or inferred, mark packet_status_source as missing and explain the confidence impact in coverage_notes.', + '- If a reviewer response is missing packet_id or status, the judge must treat that reviewer output as lower confidence instead of discarding the whole review.', + '- Use the pre-generated diff summary for initial orientation and token discipline, but verify claims against assigned files or diffs before reporting findings.', + '- When prompt_byte_limit_exceeded is yes, use the pre-generated diff summary before detailed reads. Do not remove files from assigned_scope or hide unreviewed files; if a file cannot be covered, report it in coverage_notes and reliability_signals.', + '- Use shared_context_cache entries to reuse read-only GetFileDiff/Read context by cache_key across reviewer packets. Do not duplicate full-file reads when a reusable cached diff or file summary already covers the same path.', + '- Use incremental_review_cache only when the target fingerprint matches a prior run; preserve completed reviewer outputs by packet_id and rerun only missing, failed, timed-out, or stale packets. If any invalidates_on condition changed, ignore the cache and explain the fresh review boundary.', + '- The assigned_scope is the default scope for that packet; only widen it when a critical cross-file dependency requires it and note the reason in coverage_notes.', 'Configured code review team:', members || '- No team members available.', 'Execution policy:', executionPolicy, + 'Concurrency policy:', + concurrencyPolicy, 'Team execution rules:', - '- Always run the four locked core reviewer roles first: ReviewBusinessLogic, ReviewPerformance, ReviewSecurity, and ReviewArchitecture.', + '- Run only reviewers listed in core_reviewers and enabled_extra_reviewers.', + '- Do not launch skipped_reviewers.', + '- If a skipped reviewer has reason not_applicable, mention it in coverage notes without treating it as reduced confidence.', + '- If a skipped reviewer has reason budget_limited, mention the budget mode and the coverage tradeoff.', + '- If a skipped reviewer has reason invalid_tooling, report it as a configuration issue and do not reduce confidence in the reviewers that did run.', + '- If target_resolution is unknown, conditional reviewers may be activated conservatively; report that as coverage context.', + `- Run the active core reviewer roles first: ${formatManifestList(manifest.coreReviewers, 'none')}.`, + '- Launch reviewer Tasks by launch_batch. Do not launch a later reviewer batch until every reviewer Task in the earlier batch has completed, failed, timed out, or returned partial_timeout.', + '- Never launch more reviewer Tasks in one batch than max_parallel_instances. If stagger_seconds is greater than 0, wait that many seconds before starting the next launch_batch.', '- Run ReviewJudge only after the reviewer batch finishes, as the quality-gate pass.', - '- If the Frontend Reviewer is enabled, run it in parallel with the locked reviewers whenever the change contains frontend files (src/web-ui/, .tsx, .scss, .css, locales/).', '- If other extra reviewers are configured and enabled, run them in parallel with the locked reviewers whenever possible.', '- When a configured member entry provides model_id, pass model_id with that value to the matching Task call.', '- If reviewer_timeout_seconds is greater than 0, pass timeout_seconds with that value to every reviewer Task call.', '- If judge_timeout_seconds is greater than 0, pass timeout_seconds with that value to the ReviewJudge Task call.', - '- If reviewer_file_split_threshold is greater than 0 and the target file count exceeds it, split files across multiple same-role reviewer instances (up to max_same_role_instances per role). Launch all split instances in the same parallel message.', - '- When file splitting is active, each same-role instance must only review its assigned file group. Label instances in the Task description (e.g. "Security review [group 1/3]").', + '- If a reviewer Task returns status partial_timeout, treat its output as partial evidence: preserve it in reviewers[].partial_output, mark the reviewer status partial_timeout, and mention the confidence impact in coverage_notes.', + '- If a reviewer fails or times out without useful partial output, retry that same reviewer at most max_retries_per_role times: reduce its scope, downgrade strategy by one level when possible, use a shorter timeout, and set retry to true on the retry Task call.', + '- In the final submit_code_review payload, populate reliability_signals for context_pressure, compression_preserved, partial_reviewer, and user_decision when those conditions apply. Use severity info/warning/action, count when useful, and source runtime/manifest/report/inferred.', + '- If reviewer_file_split_threshold is greater than 0 and the target file count exceeds it, split files across multiple same-role reviewer instances only up to the concurrency-capped max_same_role_instances for this run.', + '- Prefer module/workspace-area coherent file groups when splitting reviewer work; avoid mixing unrelated workspace areas in the same packet when the group budget allows it.', + '- When file splitting is active, each same-role instance must only review its assigned file group. Label instances in the Task description with both group and packet_id (e.g. "Security review [group 1/3] [packet reviewer:ReviewSecurity:group-1-of-3]").', '- Do not run ReviewFixer during the review pass.', '- Wait for explicit user approval before starting any remediation.', '- The Review Quality Inspector acts as a third-party arbiter: it primarily examines reviewer reports for logical consistency and evidence quality, and only uses code inspection tools for targeted spot-checks when a specific claim needs verification.', 'Review strategy rules:', - `- Team strategy: ${team.strategyLevel}. ${formatStrategyImpact(team.strategyLevel)}`, + `- Team strategy: ${manifest.strategyLevel}. ${formatStrategyImpact(manifest.strategyLevel, strategyProfiles)}`, + '- Risk recommendation is advisory; follow team_strategy, member strategy fields, and work-packet strategy for this run unless the user explicitly changes strategy.', commonStrategyRules, 'Review strategy profiles:', strategyRules, diff --git a/src/web-ui/src/shared/types/session-history.ts b/src/web-ui/src/shared/types/session-history.ts index 1db855337..ad7dcc24b 100644 --- a/src/web-ui/src/shared/types/session-history.ts +++ b/src/web-ui/src/shared/types/session-history.ts @@ -4,6 +4,8 @@ * Used by session lists and persistence metadata in the frontend. */ +import type { ReviewTeamRunManifest } from '@/shared/services/reviewTeamService'; + export type SessionKind = 'normal' | 'btw' | 'review' | 'deep_review'; export type PersistedSessionKind = 'standard' | 'subagent'; export type SessionTitleSource = 'text' | 'i18n'; @@ -63,6 +65,11 @@ export interface SessionMetadata { * Allows restoring the review action bar across app restarts. */ reviewActionState?: ReviewActionPersistedState; + /** + * The per-run Deep Review reviewer manifest used to launch this session. + * Continuation and later backend gates use this as the source of truth. + */ + deepReviewRunManifest?: ReviewTeamRunManifest; } export interface ReviewActionPersistedState {