diff --git a/README.md b/README.md index f0c9eb8..f555be7 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ def eval_uppercase(case: Case): ``` ```plain -$ uv run pytest +$ uv run pyeval ============================== test session starts ============================== platform darwin -- Python 3.13.1, pytest-9.0.2, pluggy-1.6.0 @@ -64,3 +64,21 @@ tests/evals/eval_example.py ●● ```shell uv add --dev pytest-pyeval ``` + +## Running evals + +`pytest-pyeval` keeps evals separate from your regular test suite. Evals are +excluded from `pytest` by default, since they are typically slower, hit live +APIs, and run on a different cadence to unit tests. + +| Command | What runs | +|---|---| +| `pytest` | Regular tests only (`test_*.py`) | +| `pytest --evals` | Eval tests only (`eval_*.py`) | +| `pyeval` | Shorthand for `pytest --evals` | + +```shell +pyeval # discover and run all evals in the project +pyeval evals/ # run evals under a specific path +pyeval evals/eval_foo.py # run a single eval file +``` diff --git a/pyproject.toml b/pyproject.toml index d4c43bd..de5e824 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,9 @@ logfire = ["logfire"] Repository = "https://github.com/alxwrd/pytest-pyeval" Releases = "https://github.com/alxwrd/pytest-pyeval/releases" +[project.scripts] +pyeval = "pyeval.cli:main" + [project.entry-points."pytest11"] pytest_pyeval = "pyeval.plugin" @@ -61,7 +64,8 @@ format = ["_format", "_sort"] lint = "ruff check" check = "ty check" test = "pytest" -all = ["format", "lint", "check", "test"] +evals = "pyeval" +all = ["format", "lint", "check", "test", "evals"] _check-format = "ruff format --check" _check-sort = "ruff check --select I" -ci = ["_check-format", "_check-sort", "lint", "check", "test"] +ci = ["_check-format", "_check-sort", "lint", "check", "test", "evals"] diff --git a/src/pyeval/cli.py b/src/pyeval/cli.py new file mode 100644 index 0000000..b35a3ae --- /dev/null +++ b/src/pyeval/cli.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +import sys + +import pytest + + +def main() -> None: + sys.exit(pytest.main(["--evals"] + sys.argv[1:])) diff --git a/src/pyeval/plugin.py b/src/pyeval/plugin.py index 784f138..ac0399a 100644 --- a/src/pyeval/plugin.py +++ b/src/pyeval/plugin.py @@ -39,10 +39,33 @@ def _score_symbol(score: float) -> tuple[str, str]: ) +def pytest_addoption(parser) -> None: + parser.addoption( + "--evals", + action="store_true", + default=False, + help="Run only eval tests (@dataset-decorated functions in eval_*.py files).", + ) + + def pytest_configure(config) -> None: config.addinivalue_line("python_files", "eval_*.py") +def pytest_ignore_collect(collection_path, config) -> bool | None: + if not collection_path.is_file() or collection_path.suffix != ".py": + return None + + is_eval_file = collection_path.name.startswith("eval_") + run_evals = config.getoption("--evals", default=False) + + if run_evals and not is_eval_file: + return True + if not run_evals and is_eval_file: + return True + return None + + def pytest_report_teststatus( report: pytest.TestReport, config: pytest.Config ) -> tuple[str, str, str] | None: