alxwrd · alxwrd · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/README.md b/README.md
@@ -46,7 +46,7 @@ def eval_uppercase(case: Case):
 ```
 
 ```plain
-$ uv run pytest
+$ uv run pyeval
 
 ============================== test session starts ==============================
 platform darwin -- Python 3.13.1, pytest-9.0.2, pluggy-1.6.0
@@ -64,3 +64,21 @@ tests/evals/eval_example.py ●●
 ```shell
 uv add --dev pytest-pyeval
 ```
+
+## Running evals
+
+`pytest-pyeval` keeps evals separate from your regular test suite. Evals are
+excluded from `pytest` by default, since they are typically slower, hit live
+APIs, and run on a different cadence to unit tests.
+
+| Command | What runs |
+|---|---|
+| `pytest` | Regular tests only (`test_*.py`) |
+| `pytest --evals` | Eval tests only (`eval_*.py`) |
+| `pyeval` | Shorthand for `pytest --evals` |
+
+```shell
+pyeval                     # discover and run all evals in the project
+pyeval evals/              # run evals under a specific path
+pyeval evals/eval_foo.py   # run a single eval file
+```
diff --git a/pyproject.toml b/pyproject.toml
@@ -37,6 +37,9 @@ logfire = ["logfire"]
 Repository = "https://github.com/alxwrd/pytest-pyeval"
 Releases = "https://github.com/alxwrd/pytest-pyeval/releases"
 
+[project.scripts]
+pyeval = "pyeval.cli:main"
+
 [project.entry-points."pytest11"]
 pytest_pyeval = "pyeval.plugin"
 
@@ -61,7 +64,8 @@ format = ["_format", "_sort"]
 lint = "ruff check"
 check = "ty check"
 test = "pytest"
-all = ["format", "lint", "check", "test"]
+evals = "pyeval"
+all = ["format", "lint", "check", "test", "evals"]
 _check-format = "ruff format --check"
 _check-sort = "ruff check --select I"
-ci = ["_check-format", "_check-sort", "lint", "check", "test"]
+ci = ["_check-format", "_check-sort", "lint", "check", "test", "evals"]
diff --git a/src/pyeval/cli.py b/src/pyeval/cli.py
@@ -0,0 +1,9 @@
+from __future__ import annotations
+
+import sys
+
+import pytest
+
+
+def main() -> None:
+    sys.exit(pytest.main(["--evals"] + sys.argv[1:]))
diff --git a/src/pyeval/plugin.py b/src/pyeval/plugin.py
@@ -39,10 +39,33 @@ def _score_symbol(score: float) -> tuple[str, str]:
     )
 
 
+def pytest_addoption(parser) -> None:
+    parser.addoption(
+        "--evals",
+        action="store_true",
+        default=False,
+        help="Run only eval tests (@dataset-decorated functions in eval_*.py files).",
+    )
+
+
 def pytest_configure(config) -> None:
     config.addinivalue_line("python_files", "eval_*.py")
 
 
+def pytest_ignore_collect(collection_path, config) -> bool | None:
+    if not collection_path.is_file() or collection_path.suffix != ".py":
+        return None
+
+    is_eval_file = collection_path.name.startswith("eval_")
+    run_evals = config.getoption("--evals", default=False)
+
+    if run_evals and not is_eval_file:
+        return True
+    if not run_evals and is_eval_file:
+        return True
+    return None
+
+
 def pytest_report_teststatus(
     report: pytest.TestReport, config: pytest.Config
 ) -> tuple[str, str, str] | None: