Skip to content

Commit 12d6b73

Browse files
authored
ep local-test (#327)
* local test command * mount for ep logs * update * try to force linux/amd64 * revert * set home * try * store in utc * tests * fix bug * test fix
1 parent c99037e commit 12d6b73

File tree

5 files changed

+473
-4
lines changed

5 files changed

+473
-4
lines changed

eval_protocol/cli.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,37 @@ def parse_args(args=None):
427427
rft_parser.add_argument("--dry-run", action="store_true", help="Print planned REST calls without sending")
428428
rft_parser.add_argument("--force", action="store_true", help="Overwrite existing evaluator with the same ID")
429429

430+
# Local test command
431+
local_test_parser = subparsers.add_parser(
432+
"local-test",
433+
help="Select an evaluation test and run it locally. If a Dockerfile exists, build and run via Docker; otherwise run on host.",
434+
)
435+
local_test_parser.add_argument(
436+
"--entry",
437+
help="Entrypoint to run (path::function or path). If not provided, a selector will be shown (unless --yes).",
438+
)
439+
local_test_parser.add_argument(
440+
"--ignore-docker",
441+
action="store_true",
442+
help="Ignore Dockerfile even if present; run pytest on host",
443+
)
444+
local_test_parser.add_argument(
445+
"--yes",
446+
"-y",
447+
action="store_true",
448+
help="Non-interactive: if multiple tests exist and no --entry, fails with guidance",
449+
)
450+
local_test_parser.add_argument(
451+
"--docker-build-extra",
452+
default="",
453+
help="Extra flags to pass to 'docker build' (quoted string, e.g. \"--no-cache --pull --progress=plain\")",
454+
)
455+
local_test_parser.add_argument(
456+
"--docker-run-extra",
457+
default="",
458+
help="Extra flags to pass to 'docker run' (quoted string, e.g. \"--env-file .env --memory=8g\")",
459+
)
460+
430461
# Run command (for Hydra-based evaluations)
431462
# This subparser intentionally defines no arguments itself.
432463
# All arguments after 'run' will be passed to Hydra by parse_known_args.
@@ -559,6 +590,10 @@ def _extract_flag_value(argv_list, flag_name):
559590
return create_rft_command(args)
560591
print("Error: missing subcommand for 'create'. Try: eval-protocol create rft")
561592
return 1
593+
elif args.command == "local-test":
594+
from .cli_commands.local_test import local_test_command
595+
596+
return local_test_command(args)
562597
elif args.command == "run":
563598
# For the 'run' command, Hydra takes over argument parsing.
564599

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
import argparse
2+
import os
3+
import subprocess
4+
import sys
5+
import shlex
6+
from typing import List
7+
8+
from .upload import _discover_tests, _prompt_select
9+
10+
11+
def _find_dockerfiles(root: str) -> List[str]:
12+
skip_dirs = {".venv", "venv", "node_modules", "dist", "build", "__pycache__", ".git", "vendor"}
13+
dockerfiles: List[str] = []
14+
for dirpath, dirnames, filenames in os.walk(root):
15+
dirnames[:] = [d for d in dirnames if d not in skip_dirs and not d.startswith(".")]
16+
for name in filenames:
17+
if name == "Dockerfile":
18+
dockerfiles.append(os.path.join(dirpath, name))
19+
return dockerfiles
20+
21+
22+
def _run_pytest_host(pytest_target: str) -> int:
23+
print(f"Running locally: pytest {pytest_target} -vs")
24+
proc = subprocess.run([sys.executable, "-m", "pytest", pytest_target, "-vs"])
25+
return proc.returncode
26+
27+
28+
def _build_docker_image(dockerfile_path: str, image_tag: str, build_extras: List[str] | None = None) -> bool:
29+
context_dir = os.path.dirname(dockerfile_path)
30+
print(f"Building Docker image '{image_tag}' from {dockerfile_path} ...")
31+
try:
32+
base_cmd = ["docker", "build"]
33+
if build_extras:
34+
base_cmd += build_extras
35+
base_cmd += ["-t", image_tag, "-f", dockerfile_path, context_dir]
36+
proc = subprocess.run(base_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
37+
print(proc.stdout)
38+
return proc.returncode == 0
39+
except FileNotFoundError:
40+
print("Error: docker not found in PATH. Install Docker or use --ignore-docker.")
41+
return False
42+
43+
44+
def _run_pytest_in_docker(
45+
project_root: str, image_tag: str, pytest_target: str, run_extras: List[str] | None = None
46+
) -> int:
47+
workdir = "/workspace"
48+
# Host HOME logs directory to map into container
49+
host_home = os.path.expanduser("~")
50+
host_logs_dir = os.path.join(host_home, ".eval_protocol")
51+
try:
52+
os.makedirs(host_logs_dir, exist_ok=True)
53+
except Exception:
54+
pass
55+
# Mount read-only is safer; but tests may write artifacts. Use read-write.
56+
cmd = [
57+
"docker",
58+
"run",
59+
"--rm",
60+
"-v",
61+
f"{project_root}:{workdir}",
62+
"-v",
63+
f"{host_logs_dir}:/container_home/.eval_protocol",
64+
"-e",
65+
"HOME=/container_home",
66+
"-e",
67+
"EVAL_PROTOCOL_DIR=/container_home/.eval_protocol",
68+
"-w",
69+
workdir,
70+
]
71+
# Try to match host user to avoid permission problems on mounted volume
72+
try:
73+
uid = os.getuid() # type: ignore[attr-defined]
74+
gid = os.getgid() # type: ignore[attr-defined]
75+
cmd += ["--user", f"{uid}:{gid}"]
76+
except Exception:
77+
pass
78+
if run_extras:
79+
cmd += run_extras
80+
cmd += [image_tag, "pytest", pytest_target, "-vs"]
81+
print("Running in Docker:", " ".join(cmd))
82+
try:
83+
proc = subprocess.run(cmd)
84+
return proc.returncode
85+
except FileNotFoundError:
86+
print("Error: docker not found in PATH. Install Docker or use --ignore-docker.")
87+
return 1
88+
89+
90+
def local_test_command(args: argparse.Namespace) -> int:
91+
project_root = os.getcwd()
92+
93+
# Selection and pytest target resolution
94+
pytest_target: str = ""
95+
entry = getattr(args, "entry", None)
96+
if entry:
97+
if "::" in entry:
98+
file_part, func_part = entry.split("::", 1)
99+
file_path = (
100+
file_part if os.path.isabs(file_part) else os.path.abspath(os.path.join(project_root, file_part))
101+
)
102+
# Convert to project-relative like the non-:: path
103+
try:
104+
rel = os.path.relpath(file_path, project_root)
105+
except Exception:
106+
rel = file_path
107+
pytest_target = f"{rel}::{func_part}"
108+
else:
109+
file_path = entry if os.path.isabs(entry) else os.path.abspath(os.path.join(project_root, entry))
110+
# Use path relative to project_root when possible
111+
try:
112+
rel = os.path.relpath(file_path, project_root)
113+
except Exception:
114+
rel = file_path
115+
pytest_target = rel
116+
else:
117+
tests = _discover_tests(project_root)
118+
if not tests:
119+
print("No evaluation tests found.\nHint: Ensure @evaluation_test is applied.")
120+
return 1
121+
non_interactive = bool(getattr(args, "yes", False))
122+
selected = _prompt_select(tests, non_interactive=non_interactive)
123+
if not selected:
124+
print("No tests selected.")
125+
return 1
126+
if len(selected) != 1:
127+
print("Error: Please select exactly one evaluation test for 'local-test'.")
128+
return 1
129+
chosen = selected[0]
130+
abs_path = os.path.abspath(chosen.file_path)
131+
try:
132+
rel = os.path.relpath(abs_path, project_root)
133+
except Exception:
134+
rel = abs_path
135+
pytest_target = rel
136+
137+
ignore_docker = bool(getattr(args, "ignore_docker", False))
138+
build_extras_str = getattr(args, "docker_build_extra", "") or ""
139+
run_extras_str = getattr(args, "docker_run_extra", "") or ""
140+
build_extras = shlex.split(build_extras_str) if build_extras_str else []
141+
run_extras = shlex.split(run_extras_str) if run_extras_str else []
142+
if ignore_docker:
143+
if not pytest_target:
144+
print("Error: Failed to resolve a pytest target to run.")
145+
return 1
146+
return _run_pytest_host(pytest_target)
147+
148+
dockerfiles = _find_dockerfiles(project_root)
149+
if len(dockerfiles) > 1:
150+
print("Error: Multiple Dockerfiles found. Only one Dockerfile is allowed for local-test.")
151+
for df in dockerfiles:
152+
print(f" - {df}")
153+
print("Hint: use --ignore-docker to bypass Docker.")
154+
return 1
155+
if len(dockerfiles) == 1:
156+
# Ensure host home logs directory exists so container writes are visible to host ep logs
157+
try:
158+
os.makedirs(os.path.join(os.path.expanduser("~"), ".eval_protocol"), exist_ok=True)
159+
except Exception:
160+
pass
161+
image_tag = "ep-evaluator:local"
162+
ok = _build_docker_image(dockerfiles[0], image_tag, build_extras=build_extras)
163+
if not ok:
164+
print("Docker build failed. See logs above.")
165+
return 1
166+
if not pytest_target:
167+
print("Error: Failed to resolve a pytest target to run.")
168+
return 1
169+
return _run_pytest_in_docker(project_root, image_tag, pytest_target, run_extras=run_extras)
170+
171+
# No Dockerfile: run on host
172+
if not pytest_target:
173+
print("Error: Failed to resolve a pytest target to run.")
174+
return 1
175+
return _run_pytest_host(pytest_target)

eval_protocol/cli_commands/upload.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ def _prompt_select_interactive(tests: list[DiscoveredTest]) -> list[DiscoveredTe
437437
# Check if only one test - auto-select it
438438
if len(tests) == 1:
439439
print(f"\nFound 1 test: {_format_test_choice(tests[0], 1)}")
440-
confirm = questionary.confirm("Upload this test?", default=True, style=custom_style).ask()
440+
confirm = questionary.confirm("Select this test?", default=True, style=custom_style).ask()
441441
if confirm:
442442
return tests
443443
else:
@@ -500,7 +500,7 @@ def _prompt_select_fallback(tests: list[DiscoveredTest]) -> list[DiscoveredTest]
500500

501501
print("=" * 80)
502502
try:
503-
choice = input("Enter the number to upload: ").strip()
503+
choice = input("Enter the number to select: ").strip()
504504
except KeyboardInterrupt:
505505
print("\n\nUpload cancelled.")
506506
return []

eval_protocol/models.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import os
22
import logging
33
import importlib
4-
from datetime import datetime
4+
from datetime import datetime, timezone
55
from enum import Enum
66
from typing import Any, ClassVar, Dict, List, Literal, Optional, TypedDict, Union
77

@@ -825,7 +825,10 @@ class EvaluationRow(BaseModel):
825825
description="Metadata about the execution of the evaluation.",
826826
)
827827

828-
created_at: datetime = Field(default_factory=datetime.now, description="The timestamp when the row was created.")
828+
created_at: datetime = Field(
829+
default_factory=lambda: datetime.now(timezone.utc),
830+
description="The timestamp when the row was created (UTC).",
831+
)
829832

830833
eval_metadata: Optional[EvalMetadata] = Field(
831834
default=None, description="Metadata about the evaluation that was run."

0 commit comments

Comments
 (0)