agentkitai
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 10 additions & 0 deletions b/‎.gitignore‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎examples/docker/Dockerfile‎
Lines changed: 11 additions & 0 deletions b/‎examples/docker/Dockerfile‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎examples/docker/README.md‎
Lines changed: 30 additions & 0 deletions b/‎examples/docker/README.md‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎examples/docker/docker-compose.yml‎
Lines changed: 27 additions & 0 deletions b/‎examples/docker/docker-compose.yml‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎examples/docker/suite.yaml‎
Lines changed: 15 additions & 0 deletions b/‎examples/docker/suite.yaml‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎examples/github-actions/README.md‎
Lines changed: 26 additions & 0 deletions b/‎examples/github-actions/README.md‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎examples/github-actions/basic.yml‎
Lines changed: 32 additions & 0 deletions b/‎examples/github-actions/basic.yml‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎examples/github-actions/with-comparison.yml‎
Lines changed: 48 additions & 0 deletions b/‎examples/github-actions/with-comparison.yml‎
Lines changed: 48 additions & 0 deletions
diff --git a/‎examples/github-actions/with-gates.yml‎
Lines changed: 62 additions & 0 deletions b/‎examples/github-actions/with-gates.yml‎
Lines changed: 62 additions & 0 deletions
@@ -22,3 +22,6 @@ jobs:
         run: pip install pip-audit && pip-audit --desc
         continue-on-error: true
       - run: pytest
+      - name: Type check
+        run: pip install mypy types-PyYAML && mypy src/agenteval/ --ignore-missing-imports
+        continue-on-error: true
@@ -5,3 +5,13 @@ __pycache__/
 dist/
 build/
 *.pyc
+
+# Aperant data directory
+.auto-claude/
+
+# Claude Code / BMAD
+.claude/
+_bmad/
+
+# SQLite databases
+*.db
@@ -0,0 +1,11 @@
+FROM python:3.12-slim
+
+WORKDIR /work
+
+# Install agenteval with distributed extras for optional Redis support
+RUN pip install --no-cache-dir agentevalkit[distributed]
+
+# Copy example suite (override at runtime with -v)
+COPY suite.yaml .
+
+CMD ["agenteval", "run", "--suite", "suite.yaml"]
@@ -0,0 +1,30 @@
+# Running AgentEval in Docker
+
+This example shows how to run agenteval in a Docker container, with an
+optional Redis service for distributed mode.
+
+## Quick start
+
+```bash
+# Build the image
+docker build -t agenteval .
+
+# Run a suite
+docker run --rm -v $(pwd)/suite.yaml:/work/suite.yaml agenteval \
+  agenteval run --suite suite.yaml --agent my_agent:run
+```
+
+## With Docker Compose (distributed mode)
+
+```bash
+docker compose up
+```
+
+This starts a Redis instance and runs the agenteval worker. You can then
+submit jobs from the agenteval container.
+
+## Customisation
+
+- Mount your agent code into `/work` to make it importable.
+- Set `OPENAI_API_KEY` via environment variable or `.env` file.
+- Add extra pip packages in the Dockerfile as needed.
@@ -0,0 +1,27 @@
+services:
+  redis:
+    image: redis:7-alpine
+    ports:
+      - "6379:6379"
+
+  agenteval:
+    build: .
+    depends_on:
+      - redis
+    environment:
+      - AGENTEVAL_REDIS_URL=redis://redis:6379/0
+      - OPENAI_API_KEY=${OPENAI_API_KEY:-}
+    volumes:
+      - .:/work
+    command: ["agenteval", "run", "--suite", "suite.yaml"]
+
+  worker:
+    build: .
+    depends_on:
+      - redis
+    environment:
+      - AGENTEVAL_REDIS_URL=redis://redis:6379/0
+      - OPENAI_API_KEY=${OPENAI_API_KEY:-}
+    volumes:
+      - .:/work
+    command: ["agenteval", "worker", "--redis-url", "redis://redis:6379/0"]
@@ -0,0 +1,15 @@
+name: docker-example-tests
+agent: my_agent:run
+
+cases:
+  - name: basic-response
+    input: "Hello, agent!"
+    expected:
+      contains: "Hello"
+    grader: contains
+
+  - name: factual-check
+    input: "What is 2 + 2?"
+    expected:
+      contains: "4"
+    grader: contains
@@ -0,0 +1,26 @@
+# GitHub Actions CI Templates for AgentEval
+
+Reusable workflow templates for running agenteval in CI.
+
+## Templates
+
+### basic.yml
+
+Minimal workflow: installs agenteval, runs a test suite, and fails on
+non-zero exit code.
+
+### with-comparison.yml
+
+Runs a suite, compares results with a stored baseline, and posts a
+summary comment on the pull request.
+
+### with-gates.yml
+
+Runs a suite with quality gates. Fails the build if any metric
+regresses beyond the configured threshold.
+
+## Usage
+
+Copy the desired `.yml` file into your repository's `.github/workflows/`
+directory and adjust the suite path, agent reference, and any thresholds
+to match your project.
@@ -0,0 +1,32 @@
+# Basic agenteval CI workflow.
+# Runs a test suite and fails if any case fails.
+
+name: AgentEval Basic
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  eval:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install agenteval
+        run: pip install agentevalkit
+
+      - name: Run evaluation suite
+        run: agenteval run --suite suite.yaml --agent my_agent:run
+
+      - name: Upload results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: eval-results
+          path: agenteval.db
@@ -0,0 +1,48 @@
+# AgentEval CI workflow with baseline comparison and PR comment.
+
+name: AgentEval Compare
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  pull-requests: write
+
+jobs:
+  eval:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install agenteval
+        run: pip install agentevalkit
+
+      - name: Run evaluation suite
+        run: agenteval run --suite suite.yaml --agent my_agent:run --format json -o results.json
+
+      - name: Compare with baseline
+        run: agenteval compare --baseline baseline.json --current results.json --format markdown -o comparison.md
+
+      - name: Post PR comment
+        if: github.event_name == 'pull_request'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh pr comment ${{ github.event.number }} \
+            --body-file comparison.md \
+            --edit-last || \
+          gh pr comment ${{ github.event.number }} \
+            --body-file comparison.md
+
+      - name: Upload artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: eval-artifacts
+          path: |
+            results.json
+            comparison.md
@@ -0,0 +1,62 @@
+# AgentEval CI workflow with quality gates.
+# Fails the build if metrics regress beyond thresholds.
+
+name: AgentEval Gates
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  pull-requests: write
+
+jobs:
+  eval:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install agenteval
+        run: pip install agentevalkit
+
+      - name: Run evaluation suite
+        run: agenteval run --suite suite.yaml --agent my_agent:run --format json -o results.json
+
+      - name: Compare with gates
+        run: |
+          agenteval compare \
+            --baseline baseline.json \
+            --current results.json \
+            --gate pass_rate:0.95 \
+            --gate avg_score:0.8 \
+            --gate max_latency_ms:5000 \
+            --format json -o gate-results.json
+
+      - name: Check gate status
+        run: |
+          python3 -c "
+          import json, sys
+          data = json.load(open('gate-results.json'))
+          if not data.get('gates_passed', False):
+              for g in data.get('failures', []):
+                  print(f\"GATE FAILED: {g['gate']} — got {g['actual']}, required {g['threshold']}\")
+              sys.exit(1)
+          print('All quality gates passed.')
+          "
+
+      - name: Post PR comment
+        if: always() && github.event_name == 'pull_request'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          STATUS=$(python3 -c "import json; d=json.load(open('gate-results.json')); print('passed' if d.get('gates_passed') else 'FAILED')")
+          gh pr comment ${{ github.event.number }} \
+            --body "## AgentEval Gate Results: ${STATUS}
+          $(cat gate-results.json | python3 -m json.tool)" \
+            --edit-last || \
+          gh pr comment ${{ github.event.number }} \
+            --body "## AgentEval Gate Results: ${STATUS}
+          $(cat gate-results.json | python3 -m json.tool)"