diff --git a/eval_protocol/cli.py b/eval_protocol/cli.py
index 30ac1ad5..7fe78232 100644
--- a/eval_protocol/cli.py
+++ b/eval_protocol/cli.py
@@ -427,6 +427,37 @@ def parse_args(args=None):
     rft_parser.add_argument("--dry-run", action="store_true", help="Print planned REST calls without sending")
     rft_parser.add_argument("--force", action="store_true", help="Overwrite existing evaluator with the same ID")
 
+    # Local test command
+    local_test_parser = subparsers.add_parser(
+        "local-test",
+        help="Select an evaluation test and run it locally. If a Dockerfile exists, build and run via Docker; otherwise run on host.",
+    )
+    local_test_parser.add_argument(
+        "--entry",
+        help="Entrypoint to run (path::function or path). If not provided, a selector will be shown (unless --yes).",
+    )
+    local_test_parser.add_argument(
+        "--ignore-docker",
+        action="store_true",
+        help="Ignore Dockerfile even if present; run pytest on host",
+    )
+    local_test_parser.add_argument(
+        "--yes",
+        "-y",
+        action="store_true",
+        help="Non-interactive: if multiple tests exist and no --entry, fails with guidance",
+    )
+    local_test_parser.add_argument(
+        "--docker-build-extra",
+        default="",
+        help="Extra flags to pass to 'docker build' (quoted string, e.g. \"--no-cache --pull --progress=plain\")",
+    )
+    local_test_parser.add_argument(
+        "--docker-run-extra",
+        default="",
+        help="Extra flags to pass to 'docker run' (quoted string, e.g. \"--env-file .env --memory=8g\")",
+    )
+
     # Run command (for Hydra-based evaluations)
     # This subparser intentionally defines no arguments itself.
     # All arguments after 'run' will be passed to Hydra by parse_known_args.
@@ -559,6 +590,10 @@ def _extract_flag_value(argv_list, flag_name):
             return create_rft_command(args)
         print("Error: missing subcommand for 'create'. Try: eval-protocol create rft")
         return 1
+    elif args.command == "local-test":
+        from .cli_commands.local_test import local_test_command
+
+        return local_test_command(args)
     elif args.command == "run":
         # For the 'run' command, Hydra takes over argument parsing.
 
diff --git a/eval_protocol/cli_commands/local_test.py b/eval_protocol/cli_commands/local_test.py
new file mode 100644
index 00000000..49d34190
--- /dev/null
+++ b/eval_protocol/cli_commands/local_test.py
@@ -0,0 +1,175 @@
+import argparse
+import os
+import subprocess
+import sys
+import shlex
+from typing import List
+
+from .upload import _discover_tests, _prompt_select
+
+
+def _find_dockerfiles(root: str) -> List[str]:
+    skip_dirs = {".venv", "venv", "node_modules", "dist", "build", "__pycache__", ".git", "vendor"}
+    dockerfiles: List[str] = []
+    for dirpath, dirnames, filenames in os.walk(root):
+        dirnames[:] = [d for d in dirnames if d not in skip_dirs and not d.startswith(".")]
+        for name in filenames:
+            if name == "Dockerfile":
+                dockerfiles.append(os.path.join(dirpath, name))
+    return dockerfiles
+
+
+def _run_pytest_host(pytest_target: str) -> int:
+    print(f"Running locally: pytest {pytest_target} -vs")
+    proc = subprocess.run([sys.executable, "-m", "pytest", pytest_target, "-vs"])
+    return proc.returncode
+
+
+def _build_docker_image(dockerfile_path: str, image_tag: str, build_extras: List[str] | None = None) -> bool:
+    context_dir = os.path.dirname(dockerfile_path)
+    print(f"Building Docker image '{image_tag}' from {dockerfile_path} ...")
+    try:
+        base_cmd = ["docker", "build"]
+        if build_extras:
+            base_cmd += build_extras
+        base_cmd += ["-t", image_tag, "-f", dockerfile_path, context_dir]
+        proc = subprocess.run(base_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
+        print(proc.stdout)
+        return proc.returncode == 0
+    except FileNotFoundError:
+        print("Error: docker not found in PATH. Install Docker or use --ignore-docker.")
+        return False
+
+
+def _run_pytest_in_docker(
+    project_root: str, image_tag: str, pytest_target: str, run_extras: List[str] | None = None
+) -> int:
+    workdir = "/workspace"
+    # Host HOME logs directory to map into container
+    host_home = os.path.expanduser("~")
+    host_logs_dir = os.path.join(host_home, ".eval_protocol")
+    try:
+        os.makedirs(host_logs_dir, exist_ok=True)
+    except Exception:
+        pass
+    # Mount read-only is safer; but tests may write artifacts. Use read-write.
+    cmd = [
+        "docker",
+        "run",
+        "--rm",
+        "-v",
+        f"{project_root}:{workdir}",
+        "-v",
+        f"{host_logs_dir}:/container_home/.eval_protocol",
+        "-e",
+        "HOME=/container_home",
+        "-e",
+        "EVAL_PROTOCOL_DIR=/container_home/.eval_protocol",
+        "-w",
+        workdir,
+    ]
+    # Try to match host user to avoid permission problems on mounted volume
+    try:
+        uid = os.getuid()  # type: ignore[attr-defined]
+        gid = os.getgid()  # type: ignore[attr-defined]
+        cmd += ["--user", f"{uid}:{gid}"]
+    except Exception:
+        pass
+    if run_extras:
+        cmd += run_extras
+    cmd += [image_tag, "pytest", pytest_target, "-vs"]
+    print("Running in Docker:", " ".join(cmd))
+    try:
+        proc = subprocess.run(cmd)
+        return proc.returncode
+    except FileNotFoundError:
+        print("Error: docker not found in PATH. Install Docker or use --ignore-docker.")
+        return 1
+
+
+def local_test_command(args: argparse.Namespace) -> int:
+    project_root = os.getcwd()
+
+    # Selection and pytest target resolution
+    pytest_target: str = ""
+    entry = getattr(args, "entry", None)
+    if entry:
+        if "::" in entry:
+            file_part, func_part = entry.split("::", 1)
+            file_path = (
+                file_part if os.path.isabs(file_part) else os.path.abspath(os.path.join(project_root, file_part))
+            )
+            # Convert to project-relative like the non-:: path
+            try:
+                rel = os.path.relpath(file_path, project_root)
+            except Exception:
+                rel = file_path
+            pytest_target = f"{rel}::{func_part}"
+        else:
+            file_path = entry if os.path.isabs(entry) else os.path.abspath(os.path.join(project_root, entry))
+            # Use path relative to project_root when possible
+            try:
+                rel = os.path.relpath(file_path, project_root)
+            except Exception:
+                rel = file_path
+            pytest_target = rel
+    else:
+        tests = _discover_tests(project_root)
+        if not tests:
+            print("No evaluation tests found.\nHint: Ensure @evaluation_test is applied.")
+            return 1
+        non_interactive = bool(getattr(args, "yes", False))
+        selected = _prompt_select(tests, non_interactive=non_interactive)
+        if not selected:
+            print("No tests selected.")
+            return 1
+        if len(selected) != 1:
+            print("Error: Please select exactly one evaluation test for 'local-test'.")
+            return 1
+        chosen = selected[0]
+        abs_path = os.path.abspath(chosen.file_path)
+        try:
+            rel = os.path.relpath(abs_path, project_root)
+        except Exception:
+            rel = abs_path
+        pytest_target = rel
+
+    ignore_docker = bool(getattr(args, "ignore_docker", False))
+    build_extras_str = getattr(args, "docker_build_extra", "") or ""
+    run_extras_str = getattr(args, "docker_run_extra", "") or ""
+    build_extras = shlex.split(build_extras_str) if build_extras_str else []
+    run_extras = shlex.split(run_extras_str) if run_extras_str else []
+    if ignore_docker:
+        if not pytest_target:
+            print("Error: Failed to resolve a pytest target to run.")
+            return 1
+        return _run_pytest_host(pytest_target)
+
+    dockerfiles = _find_dockerfiles(project_root)
+    if len(dockerfiles) > 1:
+        print("Error: Multiple Dockerfiles found. Only one Dockerfile is allowed for local-test.")
+        for df in dockerfiles:
+            print(f" - {df}")
+        print("Hint: use --ignore-docker to bypass Docker.")
+        return 1
+    if len(dockerfiles) == 1:
+        # Ensure host home logs directory exists so container writes are visible to host ep logs
+        try:
+            os.makedirs(os.path.join(os.path.expanduser("~"), ".eval_protocol"), exist_ok=True)
+        except Exception:
+            pass
+        image_tag = "ep-evaluator:local"
+        ok = _build_docker_image(dockerfiles[0], image_tag, build_extras=build_extras)
+        if not ok:
+            print("Docker build failed. See logs above.")
+            return 1
+        if not pytest_target:
+            print("Error: Failed to resolve a pytest target to run.")
+            return 1
+        return _run_pytest_in_docker(project_root, image_tag, pytest_target, run_extras=run_extras)
+
+    # No Dockerfile: run on host
+    if not pytest_target:
+        print("Error: Failed to resolve a pytest target to run.")
+        return 1
+    return _run_pytest_host(pytest_target)
diff --git a/eval_protocol/cli_commands/upload.py b/eval_protocol/cli_commands/upload.py
index 51283b23..8c6e7baf 100644
--- a/eval_protocol/cli_commands/upload.py
+++ b/eval_protocol/cli_commands/upload.py
@@ -437,7 +437,7 @@ def _prompt_select_interactive(tests: list[DiscoveredTest]) -> list[DiscoveredTe
         # Check if only one test - auto-select it
         if len(tests) == 1:
             print(f"\nFound 1 test: {_format_test_choice(tests[0], 1)}")
-            confirm = questionary.confirm("Upload this test?", default=True, style=custom_style).ask()
+            confirm = questionary.confirm("Select this test?", default=True, style=custom_style).ask()
             if confirm:
                 return tests
             else:
@@ -500,7 +500,7 @@ def _prompt_select_fallback(tests: list[DiscoveredTest]) -> list[DiscoveredTest]
 
     print("=" * 80)
     try:
-        choice = input("Enter the number to upload: ").strip()
+        choice = input("Enter the number to select: ").strip()
     except KeyboardInterrupt:
         print("\n\nUpload cancelled.")
         return []
diff --git a/eval_protocol/models.py b/eval_protocol/models.py
index 6ec94210..67d287ba 100644
--- a/eval_protocol/models.py
+++ b/eval_protocol/models.py
@@ -1,7 +1,7 @@
 import os
 import logging
 import importlib
-from datetime import datetime
+from datetime import datetime, timezone
 from enum import Enum
 from typing import Any, ClassVar, Dict, List, Literal, Optional, TypedDict, Union
 
@@ -825,7 +825,10 @@ class EvaluationRow(BaseModel):
         description="Metadata about the execution of the evaluation.",
     )
 
-    created_at: datetime = Field(default_factory=datetime.now, description="The timestamp when the row was created.")
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        description="The timestamp when the row was created (UTC).",
+    )
 
     eval_metadata: Optional[EvalMetadata] = Field(
         default=None, description="Metadata about the evaluation that was run."
diff --git a/tests/test_cli_local_test.py b/tests/test_cli_local_test.py
new file mode 100644
index 00000000..6ab0b14e
--- /dev/null
+++ b/tests/test_cli_local_test.py
@@ -0,0 +1,256 @@
+import os
+from types import SimpleNamespace
+
+import pytest
+
+
+def test_local_test_runs_host_pytest_with_entry(tmp_path, monkeypatch):
+    project = tmp_path / "proj"
+    project.mkdir()
+    monkeypatch.chdir(project)
+
+    # Create a dummy test file
+    test_file = project / "metric" / "test_one.py"
+    test_file.parent.mkdir(parents=True, exist_ok=True)
+    test_file.write_text("def test_dummy():\n    assert True\n", encoding="utf-8")
+
+    # Import module under test
+    from eval_protocol.cli_commands import local_test as lt
+
+    # Avoid Docker path
+    monkeypatch.setattr(lt, "_find_dockerfiles", lambda root: [])
+
+    captured = {"target": ""}
+
+    def _fake_host(target: str) -> int:
+        captured["target"] = target
+        return 0
+
+    monkeypatch.setattr(lt, "_run_pytest_host", _fake_host)
+
+    args = SimpleNamespace(entry=str(test_file), ignore_docker=False, yes=True)
+    rc = lt.local_test_command(args)  # pyright: ignore[reportArgumentType]
+    assert rc == 0
+    # Expect relative path target
+    assert captured["target"] == os.path.relpath(str(test_file), str(project))
+
+
+def test_local_test_ignores_docker_when_flag_set(tmp_path, monkeypatch):
+    project = tmp_path / "proj"
+    project.mkdir()
+    monkeypatch.chdir(project)
+
+    test_file = project / "metric" / "test_two.py"
+    test_file.parent.mkdir(parents=True, exist_ok=True)
+    test_file.write_text("def test_dummy():\n    assert True\n", encoding="utf-8")
+
+    from eval_protocol.cli_commands import local_test as lt
+
+    # Pretend we have Dockerfile(s), but ignore_docker=True should skip
+    monkeypatch.setattr(lt, "_find_dockerfiles", lambda root: [str(project / "Dockerfile")])
+
+    called = {"host": False}
+
+    def _fake_host(target: str) -> int:
+        called["host"] = True
+        return 0
+
+    monkeypatch.setattr(lt, "_run_pytest_host", _fake_host)
+
+    args = SimpleNamespace(entry=str(test_file), ignore_docker=True, yes=True)
+    rc = lt.local_test_command(args)  # pyright: ignore[reportArgumentType]
+    assert rc == 0
+    assert called["host"] is True
+
+
+def test_local_test_errors_on_multiple_dockerfiles(tmp_path, monkeypatch):
+    project = tmp_path / "proj"
+    project.mkdir()
+    monkeypatch.chdir(project)
+
+    test_file = project / "metric" / "test_three.py"
+    test_file.parent.mkdir(parents=True, exist_ok=True)
+    test_file.write_text("def test_dummy():\n    assert True\n", encoding="utf-8")
+
+    from eval_protocol.cli_commands import local_test as lt
+
+    monkeypatch.setattr(
+        lt, "_find_dockerfiles", lambda root: [str(project / "Dockerfile"), str(project / "another" / "Dockerfile")]
+    )
+
+    args = SimpleNamespace(entry=str(test_file), ignore_docker=False, yes=True)
+    rc = lt.local_test_command(args)  # pyright: ignore[reportArgumentType]
+    assert rc == 1
+
+
+def test_local_test_builds_and_runs_in_docker(tmp_path, monkeypatch):
+    project = tmp_path / "proj"
+    project.mkdir()
+    monkeypatch.chdir(project)
+
+    test_file = project / "metric" / "test_four.py"
+    test_file.parent.mkdir(parents=True, exist_ok=True)
+    test_file.write_text("def test_dummy():\n    assert True\n", encoding="utf-8")
+
+    from eval_protocol.cli_commands import local_test as lt
+
+    monkeypatch.setattr(lt, "_find_dockerfiles", lambda root: [str(project / "Dockerfile")])
+    monkeypatch.setattr(lt, "_build_docker_image", lambda dockerfile, tag, build_extras=None: True)
+
+    captured = {"target": "", "image": ""}
+
+    def _fake_run_docker(root: str, image_tag: str, pytest_target: str, run_extras=None) -> int:
+        captured["target"] = pytest_target
+        captured["image"] = image_tag
+        return 0
+
+    monkeypatch.setattr(lt, "_run_pytest_in_docker", _fake_run_docker)
+
+    args = SimpleNamespace(entry=str(test_file), ignore_docker=False, yes=True)
+    rc = lt.local_test_command(args)  # pyright: ignore[reportArgumentType]
+    assert rc == 0
+    assert captured["image"] == "ep-evaluator:local"
+    assert captured["target"] == os.path.relpath(str(test_file), str(project))
+
+
+def test_local_test_selector_single_test(tmp_path, monkeypatch):
+    project = tmp_path / "proj"
+    project.mkdir()
+    monkeypatch.chdir(project)
+
+    test_file = project / "metric" / "test_sel.py"
+    test_file.parent.mkdir(parents=True, exist_ok=True)
+    test_file.write_text("def test_dummy():\n    assert True\n", encoding="utf-8")
+
+    from eval_protocol.cli_commands import local_test as lt
+
+    # No entry; force discover + selector
+    disc = SimpleNamespace(qualname="metric.test_sel", file_path=str(test_file))
+    monkeypatch.setattr(lt, "_discover_tests", lambda root: [disc])
+    monkeypatch.setattr(lt, "_prompt_select", lambda tests, non_interactive=False: tests[:1])
+    monkeypatch.setattr(lt, "_find_dockerfiles", lambda root: [])
+
+    called = {"host": False}
+
+    def _fake_host(target: str) -> int:
+        called["host"] = True
+        return 0
+
+    monkeypatch.setattr(lt, "_run_pytest_host", _fake_host)
+
+    args = SimpleNamespace(entry=None, ignore_docker=False, yes=True)
+    rc = lt.local_test_command(args)  # pyright: ignore[reportArgumentType]
+    assert rc == 0
+    assert called["host"] is True
+
+
+def test_local_test_passes_docker_build_extra(tmp_path, monkeypatch):
+    project = tmp_path / "proj"
+    project.mkdir()
+    monkeypatch.chdir(project)
+
+    test_file = project / "metric" / "test_build_extra.py"
+    test_file.parent.mkdir(parents=True, exist_ok=True)
+    test_file.write_text("def test_dummy():\n    assert True\n", encoding="utf-8")
+
+    from eval_protocol.cli_commands import local_test as lt
+
+    monkeypatch.setattr(lt, "_find_dockerfiles", lambda root: [str(project / "Dockerfile")])
+
+    captured = {"extras": None}
+
+    def _fake_build(dockerfile, tag, build_extras=None):
+        captured["extras"] = build_extras
+        return True
+
+    def _fake_run_docker(root: str, image_tag: str, pytest_target: str, run_extras=None) -> int:
+        return 0
+
+    monkeypatch.setattr(lt, "_build_docker_image", _fake_build)
+    monkeypatch.setattr(lt, "_run_pytest_in_docker", _fake_run_docker)
+
+    # Extras string with multiple flags and equals-arg
+    args = SimpleNamespace(
+        entry=str(test_file),
+        ignore_docker=False,
+        yes=True,
+        docker_build_extra="--no-cache --pull --progress=plain --build-arg KEY=VAL",
+        docker_run_extra="",
+    )
+    rc = lt.local_test_command(args)  # pyright: ignore[reportArgumentType]
+    assert rc == 0
+    # Expect split list preserving tokens order
+    assert captured["extras"] == ["--no-cache", "--pull", "--progress=plain", "--build-arg", "KEY=VAL"]
+
+
+def test_local_test_passes_docker_run_extra(tmp_path, monkeypatch):
+    project = tmp_path / "proj"
+    project.mkdir()
+    monkeypatch.chdir(project)
+
+    test_file = project / "metric" / "test_run_extra.py"
+    test_file.parent.mkdir(parents=True, exist_ok=True)
+    test_file.write_text("def test_dummy():\n    assert True\n", encoding="utf-8")
+
+    from eval_protocol.cli_commands import local_test as lt
+
+    monkeypatch.setattr(lt, "_find_dockerfiles", lambda root: [str(project / "Dockerfile")])
+    monkeypatch.setattr(lt, "_build_docker_image", lambda dockerfile, tag, build_extras=None: True)
+
+    captured = {"extras": None}
+
+    def _fake_run_docker(root: str, image_tag: str, pytest_target: str, run_extras=None) -> int:
+        captured["extras"] = run_extras
+        return 0
+
+    monkeypatch.setattr(lt, "_run_pytest_in_docker", _fake_run_docker)
+
+    args = SimpleNamespace(
+        entry=str(test_file),
+        ignore_docker=False,
+        yes=True,
+        docker_build_extra="",
+        docker_run_extra="--env-file .env --memory=8g --cpus=2 --add-host=host.docker.internal:host-gateway",
+    )
+    rc = lt.local_test_command(args)  # pyright: ignore[reportArgumentType]
+    assert rc == 0
+    assert captured["extras"] == [
+        "--env-file",
+        ".env",
+        "--memory=8g",
+        "--cpus=2",
+        "--add-host=host.docker.internal:host-gateway",
+    ]
+
+
+def test_local_test_normalizes_entry_with_selector(tmp_path, monkeypatch):
+    project = tmp_path / "proj"
+    project.mkdir()
+    monkeypatch.chdir(project)
+
+    # Create a dummy test file
+    test_file = project / "metric" / "test_sel_abs.py"
+    test_file.parent.mkdir(parents=True, exist_ok=True)
+    test_file.write_text("def test_dummy():\n    assert True\n", encoding="utf-8")
+
+    abs_entry = f"{str(test_file)}::test_dummy"
+
+    from eval_protocol.cli_commands import local_test as lt
+
+    # Avoid Docker path
+    monkeypatch.setattr(lt, "_find_dockerfiles", lambda root: [])
+
+    captured = {"target": ""}
+
+    def _fake_host(target: str) -> int:
+        captured["target"] = target
+        return 0
+
+    monkeypatch.setattr(lt, "_run_pytest_host", _fake_host)
+
+    args = SimpleNamespace(entry=abs_entry, ignore_docker=False, yes=True)
+    rc = lt.local_test_command(args)  # pyright: ignore[reportArgumentType]
+    assert rc == 0
+    # Expect project-relative path plus selector
+    rel = os.path.relpath(str(test_file), str(project))
+    assert captured["target"] == f"{rel}::test_dummy"