diff --git a/e2e-tests/README.md b/e2e-tests/README.md index fcbef57..9a440f6 100644 --- a/e2e-tests/README.md +++ b/e2e-tests/README.md @@ -66,6 +66,52 @@ The test suite: Results are saved to `mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json`. +### Running Tests with Claude Code + +You can run e2e tests using Claude Code (Claude CLI) instead of the default agent. This tests the MCP server with the same Claude models that end users interact with. + +#### Prerequisites +- Claude Code CLI installed and configured (see https://claude.ai/code) +- Access to Claude models (Haiku, Sonnet, or Opus) + +#### Create Agent Configuration + +Create a custom agent config file in `mcpchecker/` directory (e.g., `claude-agent-opus.yaml`): + +```yaml +kind: Agent +metadata: + name: "claude-cli-opus" + description: "Claude Code CLI agent with Opus model" +commands: + useVirtualHome: false + argTemplateMcpServer: "--mcp-config {{ .File }}" + argTemplateAllowedTools: "mcp__{{ .ServerName }}__{{ .ToolName }}" + runPrompt: |- + claude --print --dangerously-skip-permissions --model opus {{ .McpServerFileArgs }} -- "{{ .Prompt }}" +``` + +Replace `opus` with `sonnet` or `haiku` for other models. + +#### Run Tests + +Edit `mcpchecker/eval.yaml` and change the agent configuration: + +```yaml +config: + agent: + type: "file" + path: "claude-agent-opus.yaml" # your agent config file +``` + +Then run tests normally: + +```bash +./scripts/run-tests.sh +``` + +**Note**: The `mcpchecker/` directory is gitignored, so agent configs won't be committed. Revert eval.yaml changes before committing. + ### View Results ```bash diff --git a/internal/toolsets/config/tools.go b/internal/toolsets/config/tools.go index bb2bdd4..b42ad63 100644 --- a/internal/toolsets/config/tools.go +++ b/internal/toolsets/config/tools.go @@ -71,7 +71,11 @@ func (t *listClustersTool) GetTool() *mcp.Tool { return &mcp.Tool{ Name: t.name, Description: "List all clusters secured by " + config.GetProductDisplayName() + "." + - " Returns cluster IDs, names, and types. Use this tool to discover available clusters.", + " Returns cluster IDs, names, and types." + + " WHEN TO USE:" + + " Use this tool when the user asks to see or list all clusters (e.g., 'show my clusters'," + + " 'list clusters', 'what clusters do I have')." + + " IMPORTANT: Do NOT use this tool when checking for CVE vulnerabilities - use the CVE-specific tools instead.", InputSchema: listClustersInputSchema(), } } diff --git a/internal/toolsets/vulnerability/clusters.go b/internal/toolsets/vulnerability/clusters.go index 0c9058c..66d1528 100644 --- a/internal/toolsets/vulnerability/clusters.go +++ b/internal/toolsets/vulnerability/clusters.go @@ -80,11 +80,11 @@ func (t *getClustersForCVETool) GetTool() *mcp.Tool { " Kubernetes orchestrator components (kube-apiserver, kubelet, etcd, etc.)." + " Supports CVE, RHSA, RHEA, RHBA identifiers." + " USAGE PATTERNS:" + - " 1) When user asks 'Is CVE-X detected in my clusters?' (plural, general question):" + - " Call ALL THREE CVE tools (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)" + - " for comprehensive coverage." + - " 2) When user asks specifically about 'orchestrator', 'Kubernetes components'," + - " or 'control plane': Use ONLY this tool.", + " 1) For general CVE questions ('Is CVE-X detected in my clusters?'):" + + " Call ALL THREE CVE tools exactly once each" + + " (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)," + + " then STOP and provide answer. Do NOT make verification calls or check twice." + + " 2) For specific orchestrator questions: Use ONLY this tool once, then STOP.", InputSchema: getClustersForCVEInputSchema(), } } diff --git a/internal/toolsets/vulnerability/deployments.go b/internal/toolsets/vulnerability/deployments.go index 829aee6..fa7b3b2 100644 --- a/internal/toolsets/vulnerability/deployments.go +++ b/internal/toolsets/vulnerability/deployments.go @@ -104,11 +104,11 @@ func (t *getDeploymentsForCVETool) GetTool() *mcp.Tool { " where a specified vulnerability is detected in application or platform container images." + " Supports CVE, GHSA, and 22+ other vulnerability identifier formats." + " USAGE PATTERNS:" + - " 1) When user asks 'Is CVE-X detected in my clusters?' (plural, general question):" + - " Call ALL THREE CVE tools (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)" + - " for comprehensive coverage." + - " 2) When user asks specifically about 'deployments', 'workloads', 'applications'," + - " or 'containers': Use ONLY this tool.", + " 1) For general CVE questions ('Is CVE-X detected in my clusters?'):" + + " Call ALL THREE CVE tools exactly once each" + + " (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)," + + " then STOP and provide answer. Do NOT make verification calls or check twice." + + " 2) For specific deployment/workload questions: Use ONLY this tool once, then STOP.", InputSchema: getDeploymentsForCVEInputSchema(), } } diff --git a/internal/toolsets/vulnerability/nodes.go b/internal/toolsets/vulnerability/nodes.go index c7ecd76..d86de84 100644 --- a/internal/toolsets/vulnerability/nodes.go +++ b/internal/toolsets/vulnerability/nodes.go @@ -83,11 +83,11 @@ func (t *getNodesForCVETool) GetTool() *mcp.Tool { " where a specified vulnerability is detected in node operating system packages," + " grouped by cluster and OS image. Supports CVE, RHSA, RHEA, RHBA identifiers." + " USAGE PATTERNS:" + - " 1) When user asks 'Is CVE-X detected in my clusters?' (plural, general question):" + - " Call ALL THREE CVE tools (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)" + - " for comprehensive coverage." + - " 2) When user asks specifically about 'nodes', 'hosts'," + - " or 'operating systems': Use ONLY this tool.", + " 1) For general CVE questions ('Is CVE-X detected in my clusters?'):" + + " Call ALL THREE CVE tools exactly once each" + + " (get_clusters_with_orchestrator_cve, get_deployments_for_cve, get_nodes_for_cve)," + + " then STOP and provide answer. Do NOT make verification calls or check twice." + + " 2) For specific node/host questions: Use ONLY this tool once, then STOP.", InputSchema: getNodesForCVEInputSchema(), } }