Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,21 @@ jobs:
} >> "$GITHUB_OUTPUT"
fi

# Verify that the mcpchecker JSON output format is compatible with our
# doc-generation script. This catches breaking changes in mcpchecker's
# output schema before they reach the weekly model-evaluation workflow.
- name: Validate eval script compatibility
if: always() && steps.parse_results.outcome == 'success'
run: |
RESULTS_FILE="e2e-tests/mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json"
if [ -f "$RESULTS_FILE" ]; then
./scripts/update-model-evaluation.sh \
--model-id "smoke-test" \
--results "$RESULTS_FILE"
# Revert the doc file.
git checkout docs/model-evaluation.md
fi

- name: Upload test artifacts
if: always()
id: upload_artifacts
Expand Down
4 changes: 2 additions & 2 deletions e2e-tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@ Results are saved to `mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json`.

```bash
# Summary
jq '.[] | {taskName, taskPassed}' mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json
jq '.results[] | {taskName, taskPassed}' mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json

# Tool calls
jq '[.[] | .callHistory.ToolCalls[]? | {name: .request.Params.name, arguments: .request.Params.arguments}]' mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json
jq '[.results[] | .callHistory.ToolCalls[]? | {name: .request.Params.name, arguments: .request.Params.arguments}]' mcpchecker/mcpchecker-stackrox-mcp-e2e-out.json
```

## Test Cases
Expand Down
10 changes: 5 additions & 5 deletions scripts/update-model-evaluation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ END_MARKER="<!-- model:${MODEL_ID} end -->"
# Generate the markdown block
generate_block() {
local total passed
total=$(jq 'length' "${RESULTS_FILE}")
passed=$(jq '[.[] | select(.taskPassed == true)] | length' "${RESULTS_FILE}")
total=$(jq '.results | length' "${RESULTS_FILE}")
passed=$(jq '[.results[] | select(.taskPassed == true)] | length' "${RESULTS_FILE}")
local pct=$((100 * passed / total))

echo "${START_MARKER}"
Expand All @@ -93,7 +93,7 @@ generate_block() {

# Generate table rows
jq -r '
to_entries[] |
.results | to_entries[] |
.key as $i |
.value |
($i + 1) as $num |
Expand Down Expand Up @@ -123,8 +123,8 @@ generate_block() {

# Token totals
local input_tokens output_tokens
input_tokens=$(jq '[.[].tokenEstimate.inputTokens] | add' "${RESULTS_FILE}")
output_tokens=$(jq '[.[].tokenEstimate.outputTokens] | add' "${RESULTS_FILE}")
input_tokens=$(jq '[.results[].tokenEstimate.inputTokens] | add' "${RESULTS_FILE}")
output_tokens=$(jq '[.results[].tokenEstimate.outputTokens] | add' "${RESULTS_FILE}")
echo "**Total input tokens**: ${input_tokens} | **Total output tokens**: ${output_tokens}"
echo ""
echo "${END_MARKER}"
Expand Down
Loading