From fb089cf28143ad5b17b9ed9efdd70bedf26b942f Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Mon, 4 May 2026 15:34:08 +0200 Subject: [PATCH 1/7] Improve list_secured_clusters tool description for better AI agent discoverability Add explicit WHEN TO USE guidance to help AI agents understand when to use list_secured_clusters vs CVE-specific tools. This reduces confusion and improves test reliability when agents need to choose between listing clusters and checking for vulnerabilities. Why: E2E tests showed occasional confusion where agents would use the wrong tool for CVE queries. Clearer descriptions help agents make correct decisions. --- internal/toolsets/config/tools.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/toolsets/config/tools.go b/internal/toolsets/config/tools.go index bb2bdd4..b42ad63 100644 --- a/internal/toolsets/config/tools.go +++ b/internal/toolsets/config/tools.go @@ -71,7 +71,11 @@ func (t *listClustersTool) GetTool() *mcp.Tool { return &mcp.Tool{ Name: t.name, Description: "List all clusters secured by " + config.GetProductDisplayName() + "." + - " Returns cluster IDs, names, and types. Use this tool to discover available clusters.", + " Returns cluster IDs, names, and types." + + " WHEN TO USE:" + + " Use this tool when the user asks to see or list all clusters (e.g., 'show my clusters'," + + " 'list clusters', 'what clusters do I have')." + + " IMPORTANT: Do NOT use this tool when checking for CVE vulnerabilities - use the CVE-specific tools instead.", InputSchema: listClustersInputSchema(), } } From 88abdead320baba3d048cce25dd55022ff09b606 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Mon, 4 May 2026 16:54:44 +0200 Subject: [PATCH 2/7] Make CVE tool descriptions more decisive and conditional Changed from "call ALL THREE tools" to conditional approach: 1. Always call get_deployments_for_cve FIRST 2. If deployments found: STOP (most CVEs are here) 3. If NO deployments: Then call orchestrator + nodes tools Why: Reduces unnecessary tool calls while maintaining comprehensive checking. Agent now makes 1 call for most tests (when CVE found in deployments) and 3 calls only when needed (CVE not in deployments). How to apply: This should reduce typical tool calls from always-3 to 1-or-3 depending on results, fitting better within maxToolCalls limits and reducing flakiness from unnecessary extra calls. Co-Authored-By: Claude Sonnet 4.5 --- internal/toolsets/vulnerability/clusters.go | 8 +++----- internal/toolsets/vulnerability/deployments.go | 10 +++++----- internal/toolsets/vulnerability/nodes.go | 8 +++----- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/internal/toolsets/vulnerability/clusters.go b/internal/toolsets/vulnerability/clusters.go index 0c9058c..92bbf73 100644 --- a/internal/toolsets/vulnerability/clusters.go +++ b/internal/toolsets/vulnerability/clusters.go @@ -80,11 +80,9 @@ func (t *getClustersForCVETool) GetTool() *mcp.Tool { " Kubernetes orchestrator components (kube-apiserver, kubelet, etcd, etc.)." + " Supports CVE, RHSA, RHEA, RHBA identifiers." + " USAGE PATTERNS:" + - " 1) When user asks 'Is CVE-X detected in my clusters?' (plural, general question):" + - " Call ALL THREE CVE tools (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)" + - " for comprehensive coverage." + - " 2) When user asks specifically about 'orchestrator', 'Kubernetes components'," + - " or 'control plane': Use ONLY this tool.", + " 1) For general CVE questions: ONLY call this if get_deployments_for_cve found NO results." + + " Call this as secondary check for orchestrator/K8s components when deployments are clean." + + " 2) For specific questions about 'orchestrator' or 'Kubernetes components': Use ONLY this tool.", InputSchema: getClustersForCVEInputSchema(), } } diff --git a/internal/toolsets/vulnerability/deployments.go b/internal/toolsets/vulnerability/deployments.go index 829aee6..c85e1c1 100644 --- a/internal/toolsets/vulnerability/deployments.go +++ b/internal/toolsets/vulnerability/deployments.go @@ -104,11 +104,11 @@ func (t *getDeploymentsForCVETool) GetTool() *mcp.Tool { " where a specified vulnerability is detected in application or platform container images." + " Supports CVE, GHSA, and 22+ other vulnerability identifier formats." + " USAGE PATTERNS:" + - " 1) When user asks 'Is CVE-X detected in my clusters?' (plural, general question):" + - " Call ALL THREE CVE tools (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)" + - " for comprehensive coverage." + - " 2) When user asks specifically about 'deployments', 'workloads', 'applications'," + - " or 'containers': Use ONLY this tool.", + " 1) For questions like 'Is CVE-X detected in my clusters?' or 'Is CVE-X detected in my workloads?':" + + " Call this tool FIRST (it finds most CVEs). Based on results:" + + " - If deployments found: You have the answer, stop here. Do NOT call other tools." + + " - If NO deployments found: Then call get_clusters_with_orchestrator_cve and get_nodes_for_cve to check orchestrator/nodes." + + " 2) For specific questions about 'deployments' or 'workloads': Use ONLY this tool, do not call others.", InputSchema: getDeploymentsForCVEInputSchema(), } } diff --git a/internal/toolsets/vulnerability/nodes.go b/internal/toolsets/vulnerability/nodes.go index c7ecd76..31e89b1 100644 --- a/internal/toolsets/vulnerability/nodes.go +++ b/internal/toolsets/vulnerability/nodes.go @@ -83,11 +83,9 @@ func (t *getNodesForCVETool) GetTool() *mcp.Tool { " where a specified vulnerability is detected in node operating system packages," + " grouped by cluster and OS image. Supports CVE, RHSA, RHEA, RHBA identifiers." + " USAGE PATTERNS:" + - " 1) When user asks 'Is CVE-X detected in my clusters?' (plural, general question):" + - " Call ALL THREE CVE tools (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)" + - " for comprehensive coverage." + - " 2) When user asks specifically about 'nodes', 'hosts'," + - " or 'operating systems': Use ONLY this tool.", + " 1) For general CVE questions: ONLY call this if get_deployments_for_cve found NO results." + + " Call this as secondary check for node/host OS packages when deployments are clean." + + " 2) For specific questions about 'nodes', 'hosts', or 'operating systems': Use ONLY this tool.", InputSchema: getNodesForCVEInputSchema(), } } From 0cf6708774ff503d5cf0cd6ecc82ccd79d9b414c Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Mon, 4 May 2026 17:01:51 +0200 Subject: [PATCH 3/7] Revert "Make CVE tool descriptions more decisive and conditional" This reverts commit 97c8274f0924f468fb9aa1cff8c16253f72e34c7. --- internal/toolsets/vulnerability/clusters.go | 8 +++++--- internal/toolsets/vulnerability/deployments.go | 10 +++++----- internal/toolsets/vulnerability/nodes.go | 8 +++++--- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/internal/toolsets/vulnerability/clusters.go b/internal/toolsets/vulnerability/clusters.go index 92bbf73..0c9058c 100644 --- a/internal/toolsets/vulnerability/clusters.go +++ b/internal/toolsets/vulnerability/clusters.go @@ -80,9 +80,11 @@ func (t *getClustersForCVETool) GetTool() *mcp.Tool { " Kubernetes orchestrator components (kube-apiserver, kubelet, etcd, etc.)." + " Supports CVE, RHSA, RHEA, RHBA identifiers." + " USAGE PATTERNS:" + - " 1) For general CVE questions: ONLY call this if get_deployments_for_cve found NO results." + - " Call this as secondary check for orchestrator/K8s components when deployments are clean." + - " 2) For specific questions about 'orchestrator' or 'Kubernetes components': Use ONLY this tool.", + " 1) When user asks 'Is CVE-X detected in my clusters?' (plural, general question):" + + " Call ALL THREE CVE tools (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)" + + " for comprehensive coverage." + + " 2) When user asks specifically about 'orchestrator', 'Kubernetes components'," + + " or 'control plane': Use ONLY this tool.", InputSchema: getClustersForCVEInputSchema(), } } diff --git a/internal/toolsets/vulnerability/deployments.go b/internal/toolsets/vulnerability/deployments.go index c85e1c1..829aee6 100644 --- a/internal/toolsets/vulnerability/deployments.go +++ b/internal/toolsets/vulnerability/deployments.go @@ -104,11 +104,11 @@ func (t *getDeploymentsForCVETool) GetTool() *mcp.Tool { " where a specified vulnerability is detected in application or platform container images." + " Supports CVE, GHSA, and 22+ other vulnerability identifier formats." + " USAGE PATTERNS:" + - " 1) For questions like 'Is CVE-X detected in my clusters?' or 'Is CVE-X detected in my workloads?':" + - " Call this tool FIRST (it finds most CVEs). Based on results:" + - " - If deployments found: You have the answer, stop here. Do NOT call other tools." + - " - If NO deployments found: Then call get_clusters_with_orchestrator_cve and get_nodes_for_cve to check orchestrator/nodes." + - " 2) For specific questions about 'deployments' or 'workloads': Use ONLY this tool, do not call others.", + " 1) When user asks 'Is CVE-X detected in my clusters?' (plural, general question):" + + " Call ALL THREE CVE tools (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)" + + " for comprehensive coverage." + + " 2) When user asks specifically about 'deployments', 'workloads', 'applications'," + + " or 'containers': Use ONLY this tool.", InputSchema: getDeploymentsForCVEInputSchema(), } } diff --git a/internal/toolsets/vulnerability/nodes.go b/internal/toolsets/vulnerability/nodes.go index 31e89b1..c7ecd76 100644 --- a/internal/toolsets/vulnerability/nodes.go +++ b/internal/toolsets/vulnerability/nodes.go @@ -83,9 +83,11 @@ func (t *getNodesForCVETool) GetTool() *mcp.Tool { " where a specified vulnerability is detected in node operating system packages," + " grouped by cluster and OS image. Supports CVE, RHSA, RHEA, RHBA identifiers." + " USAGE PATTERNS:" + - " 1) For general CVE questions: ONLY call this if get_deployments_for_cve found NO results." + - " Call this as secondary check for node/host OS packages when deployments are clean." + - " 2) For specific questions about 'nodes', 'hosts', or 'operating systems': Use ONLY this tool.", + " 1) When user asks 'Is CVE-X detected in my clusters?' (plural, general question):" + + " Call ALL THREE CVE tools (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)" + + " for comprehensive coverage." + + " 2) When user asks specifically about 'nodes', 'hosts'," + + " or 'operating systems': Use ONLY this tool.", InputSchema: getNodesForCVEInputSchema(), } } From 866c2a13d6ec0b413189d9aad33f45b3f6ce0f2b Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Mon, 4 May 2026 17:15:52 +0200 Subject: [PATCH 4/7] Add explicit STOP instructions to CVE tool descriptions Changed "call ALL THREE tools" to "call ALL THREE tools exactly once each, then STOP". Added "Do NOT make verification calls or check twice" to prevent extra calls. Why: Agents sometimes made 4-5 tool calls (3 CVE tools + verification/exploration), exceeding maxToolCalls limits in e2e tests and causing ~10-20% failure rate. How to apply: Explicit STOP instructions aim to cap tool usage at exactly 3 calls for general CVE questions, fitting within maxToolCalls=3-5 test constraints. Testing shows 80% reliability (4/5 passes) - improvement from descriptions alone appears limited. May need to accept inherent LLM variability or adjust test limits. Co-Authored-By: Claude Sonnet 4.5 --- internal/toolsets/vulnerability/clusters.go | 9 ++++----- internal/toolsets/vulnerability/deployments.go | 9 ++++----- internal/toolsets/vulnerability/nodes.go | 9 ++++----- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/internal/toolsets/vulnerability/clusters.go b/internal/toolsets/vulnerability/clusters.go index 0c9058c..e53a297 100644 --- a/internal/toolsets/vulnerability/clusters.go +++ b/internal/toolsets/vulnerability/clusters.go @@ -80,11 +80,10 @@ func (t *getClustersForCVETool) GetTool() *mcp.Tool { " Kubernetes orchestrator components (kube-apiserver, kubelet, etcd, etc.)." + " Supports CVE, RHSA, RHEA, RHBA identifiers." + " USAGE PATTERNS:" + - " 1) When user asks 'Is CVE-X detected in my clusters?' (plural, general question):" + - " Call ALL THREE CVE tools (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)" + - " for comprehensive coverage." + - " 2) When user asks specifically about 'orchestrator', 'Kubernetes components'," + - " or 'control plane': Use ONLY this tool.", + " 1) For general CVE questions ('Is CVE-X detected in my clusters?'):" + + " Call ALL THREE CVE tools exactly once each (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)," + + " then STOP and provide answer. Do NOT make verification calls or check twice." + + " 2) For specific orchestrator questions: Use ONLY this tool once, then STOP.", InputSchema: getClustersForCVEInputSchema(), } } diff --git a/internal/toolsets/vulnerability/deployments.go b/internal/toolsets/vulnerability/deployments.go index 829aee6..bc05fe0 100644 --- a/internal/toolsets/vulnerability/deployments.go +++ b/internal/toolsets/vulnerability/deployments.go @@ -104,11 +104,10 @@ func (t *getDeploymentsForCVETool) GetTool() *mcp.Tool { " where a specified vulnerability is detected in application or platform container images." + " Supports CVE, GHSA, and 22+ other vulnerability identifier formats." + " USAGE PATTERNS:" + - " 1) When user asks 'Is CVE-X detected in my clusters?' (plural, general question):" + - " Call ALL THREE CVE tools (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)" + - " for comprehensive coverage." + - " 2) When user asks specifically about 'deployments', 'workloads', 'applications'," + - " or 'containers': Use ONLY this tool.", + " 1) For general CVE questions ('Is CVE-X detected in my clusters?'):" + + " Call ALL THREE CVE tools exactly once each (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)," + + " then STOP and provide answer. Do NOT make verification calls or check twice." + + " 2) For specific deployment/workload questions: Use ONLY this tool once, then STOP.", InputSchema: getDeploymentsForCVEInputSchema(), } } diff --git a/internal/toolsets/vulnerability/nodes.go b/internal/toolsets/vulnerability/nodes.go index c7ecd76..94454fe 100644 --- a/internal/toolsets/vulnerability/nodes.go +++ b/internal/toolsets/vulnerability/nodes.go @@ -83,11 +83,10 @@ func (t *getNodesForCVETool) GetTool() *mcp.Tool { " where a specified vulnerability is detected in node operating system packages," + " grouped by cluster and OS image. Supports CVE, RHSA, RHEA, RHBA identifiers." + " USAGE PATTERNS:" + - " 1) When user asks 'Is CVE-X detected in my clusters?' (plural, general question):" + - " Call ALL THREE CVE tools (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)" + - " for comprehensive coverage." + - " 2) When user asks specifically about 'nodes', 'hosts'," + - " or 'operating systems': Use ONLY this tool.", + " 1) For general CVE questions ('Is CVE-X detected in my clusters?'):" + + " Call ALL THREE CVE tools exactly once each (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)," + + " then STOP and provide answer. Do NOT make verification calls or check twice." + + " 2) For specific node/host questions: Use ONLY this tool once, then STOP.", InputSchema: getNodesForCVEInputSchema(), } } From 40716029bbe49b6095da694afc4905a949e07bb6 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Tue, 5 May 2026 11:17:49 +0200 Subject: [PATCH 5/7] Document how to run e2e tests with Claude Code Add section to e2e-tests/README.md explaining how to run mcpchecker tests using Claude Code CLI instead of the default OpenAI agent. Includes: - Prerequisites and setup instructions - Example agent configuration for Opus/Sonnet/Haiku - Test results showing 100% reliability with Opus and Sonnet, 90% with Haiku - Notes about gitignored mcpchecker directory Why: Enables testing MCP server with same Claude models that end users interact with, validating that tool descriptions work correctly in production. Co-Authored-By: Claude Sonnet 4.5 --- e2e-tests/README.md | 56 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/e2e-tests/README.md b/e2e-tests/README.md index fcbef57..d0b6c65 100644 --- a/e2e-tests/README.md +++ b/e2e-tests/README.md @@ -66,6 +66,62 @@ The test suite: Results are saved to `mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json`. +### Running Tests with Claude Code + +You can run e2e tests using Claude Code (Claude CLI) instead of the default agent. This tests the MCP server with the same Claude models that end users interact with. + +#### Prerequisites +- Claude Code CLI installed and configured (see https://claude.ai/code) +- Access to Claude models (Haiku, Sonnet, or Opus) + +#### Create Agent Configuration + +Create a custom agent config file in `mcpchecker/` directory (e.g., `claude-agent-opus.yaml`): + +```yaml +kind: Agent +metadata: + name: "claude-cli-opus" + description: "Claude Code CLI agent with Opus model" +commands: + useVirtualHome: false + argTemplateMcpServer: "--mcp-config {{ .File }}" + argTemplateAllowedTools: "mcp__{{ .ServerName }}__{{ .ToolName }}" + runPrompt: |- + claude --print --dangerously-skip-permissions --model opus {{ .McpServerFileArgs }} -- "{{ .Prompt }}" +``` + +Replace `opus` with `sonnet` or `haiku` for other models. + +#### Run Tests + +Edit `mcpchecker/eval.yaml` and change the agent configuration: + +```yaml +config: + agent: + type: "file" + path: "claude-agent-opus.yaml" # your agent config file +``` + +Then run tests normally: + +```bash +./scripts/run-tests.sh +``` + +**Note**: The `mcpchecker/` directory is gitignored, so agent configs won't be committed. Revert eval.yaml changes before committing. + +#### Test Results by Model + +Based on testing with improved tool descriptions: + +| Model | Success Rate | Notes | +|-------|--------------|-------| +| Opus | 100% (10/10) | Highest reliability | +| Sonnet | 100% (10/10) | Excellent balance | +| Haiku | 90% (9/10) | Good performance, occasional variance | + ### View Results ```bash From b46c9152cfa3c8a4a64d23e264cb1525cb63a9a0 Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Tue, 5 May 2026 12:58:58 +0200 Subject: [PATCH 6/7] Remove test results table from README Test results can vary over time and shouldn't be documented in README. --- e2e-tests/README.md | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/e2e-tests/README.md b/e2e-tests/README.md index d0b6c65..9a440f6 100644 --- a/e2e-tests/README.md +++ b/e2e-tests/README.md @@ -112,16 +112,6 @@ Then run tests normally: **Note**: The `mcpchecker/` directory is gitignored, so agent configs won't be committed. Revert eval.yaml changes before committing. -#### Test Results by Model - -Based on testing with improved tool descriptions: - -| Model | Success Rate | Notes | -|-------|--------------|-------| -| Opus | 100% (10/10) | Highest reliability | -| Sonnet | 100% (10/10) | Excellent balance | -| Haiku | 90% (9/10) | Good performance, occasional variance | - ### View Results ```bash From a350bd06d1f35e2893b30857765c4a5ab011160e Mon Sep 17 00:00:00 2001 From: Tomasz Janiszewski Date: Tue, 5 May 2026 13:01:16 +0200 Subject: [PATCH 7/7] Fix line length violations in CVE tool descriptions Break long lines to stay under 120 character limit for linter. --- internal/toolsets/vulnerability/clusters.go | 3 ++- internal/toolsets/vulnerability/deployments.go | 3 ++- internal/toolsets/vulnerability/nodes.go | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/internal/toolsets/vulnerability/clusters.go b/internal/toolsets/vulnerability/clusters.go index e53a297..66d1528 100644 --- a/internal/toolsets/vulnerability/clusters.go +++ b/internal/toolsets/vulnerability/clusters.go @@ -81,7 +81,8 @@ func (t *getClustersForCVETool) GetTool() *mcp.Tool { " Supports CVE, RHSA, RHEA, RHBA identifiers." + " USAGE PATTERNS:" + " 1) For general CVE questions ('Is CVE-X detected in my clusters?'):" + - " Call ALL THREE CVE tools exactly once each (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)," + + " Call ALL THREE CVE tools exactly once each" + + " (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)," + " then STOP and provide answer. Do NOT make verification calls or check twice." + " 2) For specific orchestrator questions: Use ONLY this tool once, then STOP.", InputSchema: getClustersForCVEInputSchema(), diff --git a/internal/toolsets/vulnerability/deployments.go b/internal/toolsets/vulnerability/deployments.go index bc05fe0..fa7b3b2 100644 --- a/internal/toolsets/vulnerability/deployments.go +++ b/internal/toolsets/vulnerability/deployments.go @@ -105,7 +105,8 @@ func (t *getDeploymentsForCVETool) GetTool() *mcp.Tool { " Supports CVE, GHSA, and 22+ other vulnerability identifier formats." + " USAGE PATTERNS:" + " 1) For general CVE questions ('Is CVE-X detected in my clusters?'):" + - " Call ALL THREE CVE tools exactly once each (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)," + + " Call ALL THREE CVE tools exactly once each" + + " (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)," + " then STOP and provide answer. Do NOT make verification calls or check twice." + " 2) For specific deployment/workload questions: Use ONLY this tool once, then STOP.", InputSchema: getDeploymentsForCVEInputSchema(), diff --git a/internal/toolsets/vulnerability/nodes.go b/internal/toolsets/vulnerability/nodes.go index 94454fe..d86de84 100644 --- a/internal/toolsets/vulnerability/nodes.go +++ b/internal/toolsets/vulnerability/nodes.go @@ -84,7 +84,8 @@ func (t *getNodesForCVETool) GetTool() *mcp.Tool { " grouped by cluster and OS image. Supports CVE, RHSA, RHEA, RHBA identifiers." + " USAGE PATTERNS:" + " 1) For general CVE questions ('Is CVE-X detected in my clusters?'):" + - " Call ALL THREE CVE tools exactly once each (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)," + + " Call ALL THREE CVE tools exactly once each" + + " (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)," + " then STOP and provide answer. Do NOT make verification calls or check twice." + " 2) For specific node/host questions: Use ONLY this tool once, then STOP.", InputSchema: getNodesForCVEInputSchema(),