eval-protocol · dphuang2 · Sep 20, 2025 · Sep 19, 2025 · Sep 19, 2025 · Sep 19, 2025
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -10,5 +10,8 @@
   "editor.formatOnSave": true,
   "[python]": {
     "editor.defaultFormatter": "charliermarsh.ruff"
+  },
+  "[typescript]": {
+    "editor.defaultFormatter": "esbenp.prettier-vscode"
   }
 }
diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py
@@ -59,6 +59,7 @@
     parse_ep_passed_threshold,
     rollout_processor_with_retry,
 )
+from eval_protocol.utils.show_results_url import show_results_url
 
 from ..common_utils import load_jsonl
 
@@ -555,6 +556,9 @@ async def execute_run_with_progress(run_idx: int, config):
                             experiment_duration_seconds,
                         )
 
+                    # Show URL for viewing results (after all postprocessing is complete)
+                    show_results_url(invocation_id)
+
                 except AssertionError:
                     _log_eval_error(
                         Status.eval_finished(),

diff --git a/eval_protocol/utils/check_server_status.py b/eval_protocol/utils/check_server_status.py
@@ -0,0 +1,77 @@
+"""
+Utility functions for checking server status and generating UI URLs.
+"""
+
+import socket
+import urllib.parse
+from typing import List, Dict, Any
+
+
+def is_server_running(host: str = "localhost", port: int = 8000) -> bool:
+    """
+    Check if a server is running on the specified host and port.
+
+    Args:
+            host: The host to check (default: "localhost")
+            port: The port to check (default: 8000)
+
+    Returns:
+            True if server is running, False otherwise
+    """
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.settimeout(1)
+            result = s.connect_ex((host, port))
+            return result == 0
+    except Exception:
+        return False
+
+
+def generate_invocation_filter_url(invocation_id: str, base_url: str = "http://localhost:8000") -> str:
+    """
+    Generate a URL for viewing results filtered by invocation_id.
+
+    Args:
+            invocation_id: The invocation ID to filter results by
+            base_url: The base URL for the UI (default: "http://localhost:8000")
+
+    Returns:
+            URL-encoded URL with filter configuration
+    """
+    filter_config = [
+        {
+            "logic": "AND",
+            "filters": [
+                {
+                    "field": "$.execution_metadata.invocation_id",
+                    "operator": "equals",
+                    "value": invocation_id,
+                    "type": "text",
+                }
+            ],
+        }
+    ]
+
+    # URL encode the filter config
+    filter_config_json = str(filter_config).replace("'", '"')
+    encoded_filter = urllib.parse.quote(filter_config_json)
+
+    return f"{base_url}/pivot?filterConfig={encoded_filter}"
+
+
+def show_results_url(invocation_id: str) -> None:
+    """
+    Show a URL for viewing evaluation results filtered by invocation_id.
+
+    If the server is not running, prints a message to run "ep logs" to start the local UI.
+    If the server is running, prints a URL to view results filtered by invocation_id.
+
+    Args:
+            invocation_id: The invocation ID to filter results by
+    """
+    if is_server_running():
+        url = generate_invocation_filter_url(invocation_id)
+        print(f"View your evaluation results: {url}")
+    else:
+        url = generate_invocation_filter_url(invocation_id)
+        print(f"Start the local UI with 'ep logs', then visit: {url}")
diff --git a/eval_protocol/utils/show_results_url.py b/eval_protocol/utils/show_results_url.py
@@ -0,0 +1,82 @@
+"""
+Utility functions for showing evaluation results URLs and checking server status.
+"""
+
+import socket
+import urllib.parse
+
+
+def is_server_running(host: str = "localhost", port: int = 8000) -> bool:
+    """
+    Check if a server is running on the specified host and port.
+
+    Args:
+            host: The host to check (default: "localhost")
+            port: The port to check (default: 8000)
+
+    Returns:
+            True if server is running, False otherwise
+    """
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.settimeout(1)
+            result = s.connect_ex((host, port))
+            return result == 0
+    except Exception:
+        return False
+
+
+def generate_invocation_filter_url(invocation_id: str, base_url: str = "http://localhost:8000") -> str:
+    """
+    Generate a URL for viewing results filtered by invocation_id.
+
+    Args:
+            invocation_id: The invocation ID to filter results by
+            base_url: The base URL for the UI (default: "http://localhost:8000")
+
+    Returns:
+            URL-encoded URL with filter configuration
+    """
+    filter_config = [
+        {
+            "logic": "AND",
+            "filters": [
+                {
+                    "field": "$.execution_metadata.invocation_id",
+                    "operator": "==",
+                    "value": invocation_id,
+                    "type": "text",
+                }
+            ],
+        }
+    ]
+
+    # URL encode the filter config
+    filter_config_json = str(filter_config).replace("'", '"')
+    encoded_filter = urllib.parse.quote(filter_config_json)
+
+    return f"{base_url}?filterConfig={encoded_filter}"
+
+
+def show_results_url(invocation_id: str) -> None:
+    """
+    Show URLs for viewing evaluation results filtered by invocation_id.
+
+    If the server is not running, prints a message to run "ep logs" to start the local UI.
+    If the server is running, prints URLs to view results filtered by invocation_id.
+
+    Args:
+            invocation_id: The invocation ID to filter results by
+    """
+    if is_server_running():
+        pivot_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/pivot")
+        table_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/table")
+        print("View your evaluation results:")
+        print(f"  📊 Aggregate scores: {pivot_url}")
+        print(f"  📋 Trajectories: {table_url}")
+    else:
+        pivot_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/pivot")
+        table_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/table")
+        print("Start the local UI with 'ep logs', then visit:")
+        print(f"  📊 Aggregate scores: {pivot_url}")
+        print(f"  📋 Trajectories: {table_url}")
diff --git a/pytest.ini b/pytest.ini
@@ -5,8 +5,6 @@ asyncio_mode = auto
 asyncio_default_fixture_loop_scope = function
 testpaths = tests ./eval_protocol/quickstart
 python_files = test_*.py llm_judge_*.py
-plugins =
-    eval_protocol.pytest.plugin
 python_classes = Test*
 python_functions = test_*
 # Configure stdout/stderr capture for debugging