eval-protocol
diff --git a/‎.vscode/settings.json‎
Lines changed: 3 additions & 0 deletions b/‎.vscode/settings.json‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎eval_protocol/pytest/evaluation_test.py‎
Lines changed: 4 additions & 0 deletions b/‎eval_protocol/pytest/evaluation_test.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎eval_protocol/utils/check_server_status.py‎
Lines changed: 77 additions & 0 deletions b/‎eval_protocol/utils/check_server_status.py‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎eval_protocol/utils/show_results_url.py‎
Lines changed: 82 additions & 0 deletions b/‎eval_protocol/utils/show_results_url.py‎
Lines changed: 82 additions & 0 deletions
diff --git a/‎pytest.ini‎
Lines changed: 0 additions & 2 deletions b/‎pytest.ini‎
Lines changed: 0 additions & 2 deletions
@@ -10,5 +10,8 @@
   "editor.formatOnSave": true,
   "[python]": {
     "editor.defaultFormatter": "charliermarsh.ruff"
+  },
+  "[typescript]": {
+    "editor.defaultFormatter": "esbenp.prettier-vscode"
   }
 }
@@ -59,6 +59,7 @@
     parse_ep_passed_threshold,
     rollout_processor_with_retry,
 )
+from eval_protocol.utils.show_results_url import show_results_url
 
 from ..common_utils import load_jsonl
 
@@ -555,6 +556,9 @@ async def execute_run_with_progress(run_idx: int, config):
                             experiment_duration_seconds,
                         )
 
+                    # Show URL for viewing results (after all postprocessing is complete)
+                    show_results_url(invocation_id)
+
                 except AssertionError:
                     _log_eval_error(
                         Status.eval_finished(),
 
@@ -0,0 +1,77 @@
+"""
+Utility functions for checking server status and generating UI URLs.
+"""
+
+import socket
+import urllib.parse
+from typing import List, Dict, Any
+
+
+def is_server_running(host: str = "localhost", port: int = 8000) -> bool:
+    """
+    Check if a server is running on the specified host and port.
+
+    Args:
+            host: The host to check (default: "localhost")
+            port: The port to check (default: 8000)
+
+    Returns:
+            True if server is running, False otherwise
+    """
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.settimeout(1)
+            result = s.connect_ex((host, port))
+            return result == 0
+    except Exception:
+        return False
+
+
+def generate_invocation_filter_url(invocation_id: str, base_url: str = "http://localhost:8000") -> str:
+    """
+    Generate a URL for viewing results filtered by invocation_id.
+
+    Args:
+            invocation_id: The invocation ID to filter results by
+            base_url: The base URL for the UI (default: "http://localhost:8000")
+
+    Returns:
+            URL-encoded URL with filter configuration
+    """
+    filter_config = [
+        {
+            "logic": "AND",
+            "filters": [
+                {
+                    "field": "$.execution_metadata.invocation_id",
+                    "operator": "equals",
+                    "value": invocation_id,
+                    "type": "text",
+                }
+            ],
+        }
+    ]
+
+    # URL encode the filter config
+    filter_config_json = str(filter_config).replace("'", '"')
+    encoded_filter = urllib.parse.quote(filter_config_json)
+
+    return f"{base_url}/pivot?filterConfig={encoded_filter}"
+
+
+def show_results_url(invocation_id: str) -> None:
+    """
+    Show a URL for viewing evaluation results filtered by invocation_id.
+
+    If the server is not running, prints a message to run "ep logs" to start the local UI.
+    If the server is running, prints a URL to view results filtered by invocation_id.
+
+    Args:
+            invocation_id: The invocation ID to filter results by
+    """
+    if is_server_running():
+        url = generate_invocation_filter_url(invocation_id)
+        print(f"View your evaluation results: {url}")
+    else:
+        url = generate_invocation_filter_url(invocation_id)
+        print(f"Start the local UI with 'ep logs', then visit: {url}")
@@ -0,0 +1,82 @@
+"""
+Utility functions for showing evaluation results URLs and checking server status.
+"""
+
+import socket
+import urllib.parse
+
+
+def is_server_running(host: str = "localhost", port: int = 8000) -> bool:
+    """
+    Check if a server is running on the specified host and port.
+
+    Args:
+            host: The host to check (default: "localhost")
+            port: The port to check (default: 8000)
+
+    Returns:
+            True if server is running, False otherwise
+    """
+    try:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.settimeout(1)
+            result = s.connect_ex((host, port))
+            return result == 0
+    except Exception:
+        return False
+
+
+def generate_invocation_filter_url(invocation_id: str, base_url: str = "http://localhost:8000") -> str:
+    """
+    Generate a URL for viewing results filtered by invocation_id.
+
+    Args:
+            invocation_id: The invocation ID to filter results by
+            base_url: The base URL for the UI (default: "http://localhost:8000")
+
+    Returns:
+            URL-encoded URL with filter configuration
+    """
+    filter_config = [
+        {
+            "logic": "AND",
+            "filters": [
+                {
+                    "field": "$.execution_metadata.invocation_id",
+                    "operator": "==",
+                    "value": invocation_id,
+                    "type": "text",
+                }
+            ],
+        }
+    ]
+
+    # URL encode the filter config
+    filter_config_json = str(filter_config).replace("'", '"')
+    encoded_filter = urllib.parse.quote(filter_config_json)
+
+    return f"{base_url}?filterConfig={encoded_filter}"
+
+
+def show_results_url(invocation_id: str) -> None:
+    """
+    Show URLs for viewing evaluation results filtered by invocation_id.
+
+    If the server is not running, prints a message to run "ep logs" to start the local UI.
+    If the server is running, prints URLs to view results filtered by invocation_id.
+
+    Args:
+            invocation_id: The invocation ID to filter results by
+    """
+    if is_server_running():
+        pivot_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/pivot")
+        table_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/table")
+        print("View your evaluation results:")
+        print(f"  📊 Aggregate scores: {pivot_url}")
+        print(f"  📋 Trajectories: {table_url}")
+    else:
+        pivot_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/pivot")
+        table_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/table")
+        print("Start the local UI with 'ep logs', then visit:")
+        print(f"  📊 Aggregate scores: {pivot_url}")
+        print(f"  📋 Trajectories: {table_url}")
@@ -5,8 +5,6 @@ asyncio_mode = auto
 asyncio_default_fixture_loop_scope = function
 testpaths = tests ./eval_protocol/quickstart
 python_files = test_*.py llm_judge_*.py
-plugins =
-    eval_protocol.pytest.plugin
 python_classes = Test*
 python_functions = test_*
 # Configure stdout/stderr capture for debugging
Original file line number	Diff line number	Diff line change
`@@ -10,5 +10,8 @@`
`10`	`10`	`"editor.formatOnSave": true,`
`11`	`11`	`"[python]": {`
`12`	`12`	`"editor.defaultFormatter": "charliermarsh.ruff"`
	`13`	`+ },`
	`14`	`+ "[typescript]": {`
	`15`	`+ "editor.defaultFormatter": "esbenp.prettier-vscode"`
`13`	`16`	`}`
`14`	`17`	`}`
Original file line number	Diff line number	Diff line change
`@@ -59,6 +59,7 @@`
`59`	`59`	`parse_ep_passed_threshold,`
`60`	`60`	`rollout_processor_with_retry,`
`61`	`61`	`)`
	`62`	`+from eval_protocol.utils.show_results_url import show_results_url`
`62`	`63`
`63`	`64`	`from ..common_utils import load_jsonl`
`64`	`65`
`@@ -555,6 +556,9 @@ async def execute_run_with_progress(run_idx: int, config):`
`555`	`556`	`experiment_duration_seconds,`
`556`	`557`	`)`
`557`	`558`
	`559`	`+ # Show URL for viewing results (after all postprocessing is complete)`
	`560`	`+ show_results_url(invocation_id)`
	`561`	`+`
`558`	`562`	`except AssertionError:`
`559`	`563`	`_log_eval_error(`
`560`	`564`	`Status.eval_finished(),`