From 5c786f5c6f3233290b356e0fa94965e8f82562c2 Mon Sep 17 00:00:00 2001 From: Hu Qiantao Date: Tue, 2 Jun 2026 00:09:46 +0800 Subject: [PATCH] test(client): add plan mode toggle byte-stability invariant test Add test plan_mode_toggle_preserves_catalog_byte_stability that verifies three invariants critical for DeepSeek's KV prefix cache: 1. Building the tool catalog twice for the same mode produces identical JSON bytes. This catches any non-determinism in catalog construction (e.g., HashMap iteration order, timestamp-dependent logic). 2. Non-deferred tools common to Plan and Agent modes appear in the same order. Plan mode excludes execution tools, but the tools that are present in both modes must have stable byte positions so that toggling between modes doesn't shift byte offsets of shared tools. 3. Activating a deferred tool mid-session appends to the tail without reordering the catalog head. This is the existing invariant from #263, now covered by a dedicated byte-level assertion. Also add a doc comment to build_model_tool_catalog documenting the catalog-head stability invariant. --- crates/tui/src/core/engine/tests.rs | 131 +++++++++++++++++++++ crates/tui/src/core/engine/tool_catalog.rs | 9 ++ 2 files changed, 140 insertions(+) diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 783f31283..96451f891 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -1051,6 +1051,137 @@ fn turn_tool_registry_builder_keeps_plan_mode_read_only_for_files() { ); } +/// Plan mode toggle must not change the byte representation of the tool +/// catalog head. DeepSeek's KV prefix cache includes the tools array in +/// the immutable prefix; if toggling between Plan and Agent mode changes +/// the tool bytes, every mode switch forces a full re-prefill. +/// +/// This test verifies two invariants: +/// 1. Building the catalog twice for the same mode produces identical bytes. +/// 2. The head of the catalog (non-deferred tools) preserves its order +/// when deferred tools are activated mid-session. +#[test] +fn plan_mode_toggle_preserves_catalog_byte_stability() { + let always_load = HashSet::new(); + + // Build catalog for Plan mode twice — must be byte-identical. + let plan_native = vec![ + api_tool("read_file"), + api_tool("list_dir"), + api_tool("write_file"), + api_tool("edit_file"), + api_tool("exec_shell"), + ]; + let plan_mcp = vec![api_tool("mcp_search"), api_tool("mcp_write")]; + + let catalog_a = build_model_tool_catalog( + plan_native.clone(), + plan_mcp.clone(), + AppMode::Plan, + &always_load, + ); + let catalog_b = build_model_tool_catalog( + plan_native.clone(), + plan_mcp.clone(), + AppMode::Plan, + &always_load, + ); + + let json_a = serde_json::to_string(&catalog_a).unwrap(); + let json_b = serde_json::to_string(&catalog_b).unwrap(); + assert_eq!( + json_a, json_b, + "building the catalog twice for Plan mode must produce identical bytes" + ); + + // Build catalog for Agent mode twice — must be byte-identical. + let agent_catalog_a = build_model_tool_catalog( + plan_native.clone(), + plan_mcp.clone(), + AppMode::Agent, + &always_load, + ); + let agent_catalog_b = build_model_tool_catalog( + plan_native.clone(), + plan_mcp.clone(), + AppMode::Agent, + &always_load, + ); + + let agent_json_a = serde_json::to_string(&agent_catalog_a).unwrap(); + let agent_json_b = serde_json::to_string(&agent_catalog_b).unwrap(); + assert_eq!( + agent_json_a, agent_json_b, + "building the catalog twice for Agent mode must produce identical bytes" + ); + + // Verify that the non-deferred tools that are common to both modes + // appear in the same order. Plan mode excludes execution tools, but + // the tools that are present in both modes must have stable ordering. + let plan_names: Vec<&str> = catalog_a + .iter() + .filter(|t| !t.defer_loading.unwrap_or(false)) + .map(|t| t.name.as_str()) + .collect(); + let agent_names: Vec<&str> = agent_catalog_a + .iter() + .filter(|t| !t.defer_loading.unwrap_or(false)) + .map(|t| t.name.as_str()) + .collect(); + + // The common prefix of non-deferred tools must be identical. + let common_len = plan_names.len().min(agent_names.len()); + assert_eq!( + &plan_names[..common_len], + &agent_names[..common_len], + "non-deferred tools common to Plan and Agent must appear in the same order" + ); + + // Verify that activating a deferred tool mid-session appends to the + // tail without reordering the head. + let mut tools_with_deferred = plan_native.clone(); + tools_with_deferred.push({ + let mut t = api_tool("deferred_search"); + t.defer_loading = Some(true); + t + }); + let catalog_with_deferred = build_model_tool_catalog( + tools_with_deferred, + plan_mcp.clone(), + AppMode::Agent, + &always_load, + ); + + // Activate the deferred tool. + let mut active: HashSet = catalog_with_deferred + .iter() + .filter(|t| !t.defer_loading.unwrap_or(false)) + .map(|t| t.name.clone()) + .collect(); + active.insert("deferred_search".to_string()); + + let listed = active_tools_for_step(&catalog_with_deferred, &active, false); + let listed_names: Vec<&str> = listed.iter().map(|t| t.name.as_str()).collect(); + + // The head (non-deferred tools) must still be in their original order. + let head_names: Vec<&str> = catalog_with_deferred + .iter() + .filter(|t| !t.defer_loading.unwrap_or(false)) + .map(|t| t.name.as_str()) + .collect(); + assert!( + listed_names.starts_with(&head_names), + "activating a deferred tool must not reorder the catalog head: \ + expected {head_names:?} as prefix, got {listed_names:?}" + ); + // The deferred tool must be at the tail. + assert_eq!( + listed_names.last(), + Some(&"deferred_search"), + "deferred tool must be appended at the tail" + ); +} + #[test] fn parent_turn_registry_includes_recall_archive_for_investigative_modes() { let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default()); diff --git a/crates/tui/src/core/engine/tool_catalog.rs b/crates/tui/src/core/engine/tool_catalog.rs index 517896326..21f8c333a 100644 --- a/crates/tui/src/core/engine/tool_catalog.rs +++ b/crates/tui/src/core/engine/tool_catalog.rs @@ -107,6 +107,15 @@ pub(super) fn apply_mcp_tool_deferral(catalog: &mut [Tool], mode: AppMode) { } } +/// Build the model tool catalog from native and MCP tool lists. +/// +/// **Catalog-head stability invariant.** The head of the catalog (all +/// non-deferred tools) must remain byte-identical across mode toggles +/// (Plan ↔ Agent ↔ YOLO) for tools that are common to both modes. +/// Deferred tool activations append to the tail and never reorder the +/// head. This invariant is critical for DeepSeek's KV prefix cache: +/// the tools array is part of the immutable prefix, and any byte-level +/// change in the head forces a full re-prefill on the next turn. pub(super) fn build_model_tool_catalog( mut native_tools: Vec, mut mcp_tools: Vec,