|
| 1 | +# AgentEval CI workflow with quality gates. |
| 2 | +# Fails the build if metrics regress beyond thresholds. |
| 3 | + |
| 4 | +name: AgentEval Gates |
| 5 | +on: |
| 6 | + pull_request: |
| 7 | + branches: [main] |
| 8 | + |
| 9 | +permissions: |
| 10 | + pull-requests: write |
| 11 | + |
| 12 | +jobs: |
| 13 | + eval: |
| 14 | + runs-on: ubuntu-latest |
| 15 | + steps: |
| 16 | + - uses: actions/checkout@v4 |
| 17 | + |
| 18 | + - uses: actions/setup-python@v5 |
| 19 | + with: |
| 20 | + python-version: "3.12" |
| 21 | + |
| 22 | + - name: Install agenteval |
| 23 | + run: pip install agentevalkit |
| 24 | + |
| 25 | + - name: Run evaluation suite |
| 26 | + run: agenteval run --suite suite.yaml --agent my_agent:run --format json -o results.json |
| 27 | + |
| 28 | + - name: Compare with gates |
| 29 | + run: | |
| 30 | + agenteval compare \ |
| 31 | + --baseline baseline.json \ |
| 32 | + --current results.json \ |
| 33 | + --gate pass_rate:0.95 \ |
| 34 | + --gate avg_score:0.8 \ |
| 35 | + --gate max_latency_ms:5000 \ |
| 36 | + --format json -o gate-results.json |
| 37 | +
|
| 38 | + - name: Check gate status |
| 39 | + run: | |
| 40 | + python3 -c " |
| 41 | + import json, sys |
| 42 | + data = json.load(open('gate-results.json')) |
| 43 | + if not data.get('gates_passed', False): |
| 44 | + for g in data.get('failures', []): |
| 45 | + print(f\"GATE FAILED: {g['gate']} — got {g['actual']}, required {g['threshold']}\") |
| 46 | + sys.exit(1) |
| 47 | + print('All quality gates passed.') |
| 48 | + " |
| 49 | +
|
| 50 | + - name: Post PR comment |
| 51 | + if: always() && github.event_name == 'pull_request' |
| 52 | + env: |
| 53 | + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |
| 54 | + run: | |
| 55 | + STATUS=$(python3 -c "import json; d=json.load(open('gate-results.json')); print('passed' if d.get('gates_passed') else 'FAILED')") |
| 56 | + gh pr comment ${{ github.event.number }} \ |
| 57 | + --body "## AgentEval Gate Results: ${STATUS} |
| 58 | + $(cat gate-results.json | python3 -m json.tool)" \ |
| 59 | + --edit-last || \ |
| 60 | + gh pr comment ${{ github.event.number }} \ |
| 61 | + --body "## AgentEval Gate Results: ${STATUS} |
| 62 | + $(cat gate-results.json | python3 -m json.tool)" |
0 commit comments