Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def eval_uppercase(case: Case):
```

```plain
$ uv run pytest
$ uv run pyeval

============================== test session starts ==============================
platform darwin -- Python 3.13.1, pytest-9.0.2, pluggy-1.6.0
Expand All @@ -64,3 +64,21 @@ tests/evals/eval_example.py ●●
```shell
uv add --dev pytest-pyeval
```

## Running evals

`pytest-pyeval` keeps evals separate from your regular test suite. Evals are
excluded from `pytest` by default, since they are typically slower, hit live
APIs, and run on a different cadence to unit tests.

| Command | What runs |
|---|---|
| `pytest` | Regular tests only (`test_*.py`) |
| `pytest --evals` | Eval tests only (`eval_*.py`) |
| `pyeval` | Shorthand for `pytest --evals` |

```shell
pyeval # discover and run all evals in the project
pyeval evals/ # run evals under a specific path
pyeval evals/eval_foo.py # run a single eval file
```
8 changes: 6 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ logfire = ["logfire"]
Repository = "https://github.com/alxwrd/pytest-pyeval"
Releases = "https://github.com/alxwrd/pytest-pyeval/releases"

[project.scripts]
pyeval = "pyeval.cli:main"

[project.entry-points."pytest11"]
pytest_pyeval = "pyeval.plugin"

Expand All @@ -61,7 +64,8 @@ format = ["_format", "_sort"]
lint = "ruff check"
check = "ty check"
test = "pytest"
all = ["format", "lint", "check", "test"]
evals = "pyeval"
all = ["format", "lint", "check", "test", "evals"]
_check-format = "ruff format --check"
_check-sort = "ruff check --select I"
ci = ["_check-format", "_check-sort", "lint", "check", "test"]
ci = ["_check-format", "_check-sort", "lint", "check", "test", "evals"]
9 changes: 9 additions & 0 deletions src/pyeval/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from __future__ import annotations

import sys

import pytest


def main() -> None:
sys.exit(pytest.main(["--evals"] + sys.argv[1:]))
23 changes: 23 additions & 0 deletions src/pyeval/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,33 @@ def _score_symbol(score: float) -> tuple[str, str]:
)


def pytest_addoption(parser) -> None:
parser.addoption(
"--evals",
action="store_true",
default=False,
help="Run only eval tests (@dataset-decorated functions in eval_*.py files).",
)


def pytest_configure(config) -> None:
config.addinivalue_line("python_files", "eval_*.py")


def pytest_ignore_collect(collection_path, config) -> bool | None:
if not collection_path.is_file() or collection_path.suffix != ".py":
return None

is_eval_file = collection_path.name.startswith("eval_")
run_evals = config.getoption("--evals", default=False)

if run_evals and not is_eval_file:
return True
if not run_evals and is_eval_file:
return True
return None


def pytest_report_teststatus(
report: pytest.TestReport, config: pytest.Config
) -> tuple[str, str, str] | None:
Expand Down
Loading