Skip to content

Commit f1cbc97

Browse files
author
Dylan Huang
committed
Add show_results_url utility and integrate into evaluation test
- Imported show_results_url from utils to display the results URL after postprocessing in evaluation_test. - Updated __init__.py to include show_results_url and related functions in the module exports.
1 parent 096bb91 commit f1cbc97

File tree

5 files changed

+412
-1
lines changed

5 files changed

+412
-1
lines changed

eval_protocol/pytest/evaluation_test.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
parse_ep_passed_threshold,
6060
rollout_processor_with_retry,
6161
)
62+
from eval_protocol.utils.show_results_url import show_results_url
6263

6364
from ..common_utils import load_jsonl
6465

@@ -555,6 +556,9 @@ async def execute_run_with_progress(run_idx: int, config):
555556
experiment_duration_seconds,
556557
)
557558

559+
# Show URL for viewing results (after all postprocessing is complete)
560+
show_results_url(invocation_id)
561+
558562
except AssertionError:
559563
_log_eval_error(
560564
Status.eval_finished(),

eval_protocol/utils/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@
99

1010
# Export ViteServer for easier access
1111
from .logs_server import LogsServer
12+
from .show_results_url import show_results_url, is_server_running, generate_invocation_filter_url
1213

13-
__all__ = ["LogsServer"]
14+
__all__ = ["LogsServer", "show_results_url", "is_server_running", "generate_invocation_filter_url"]
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
"""
2+
Utility functions for checking server status and generating UI URLs.
3+
"""
4+
5+
import socket
6+
import urllib.parse
7+
from typing import List, Dict, Any
8+
9+
10+
def is_server_running(host: str = "localhost", port: int = 8000) -> bool:
11+
"""
12+
Check if a server is running on the specified host and port.
13+
14+
Args:
15+
host: The host to check (default: "localhost")
16+
port: The port to check (default: 8000)
17+
18+
Returns:
19+
True if server is running, False otherwise
20+
"""
21+
try:
22+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
23+
s.settimeout(1)
24+
result = s.connect_ex((host, port))
25+
return result == 0
26+
except Exception:
27+
return False
28+
29+
30+
def generate_invocation_filter_url(invocation_id: str, base_url: str = "http://localhost:8000") -> str:
31+
"""
32+
Generate a URL for viewing results filtered by invocation_id.
33+
34+
Args:
35+
invocation_id: The invocation ID to filter results by
36+
base_url: The base URL for the UI (default: "http://localhost:8000")
37+
38+
Returns:
39+
URL-encoded URL with filter configuration
40+
"""
41+
filter_config = [
42+
{
43+
"logic": "AND",
44+
"filters": [
45+
{
46+
"field": "$.execution_metadata.invocation_id",
47+
"operator": "equals",
48+
"value": invocation_id,
49+
"type": "text",
50+
}
51+
],
52+
}
53+
]
54+
55+
# URL encode the filter config
56+
filter_config_json = str(filter_config).replace("'", '"')
57+
encoded_filter = urllib.parse.quote(filter_config_json)
58+
59+
return f"{base_url}/pivot?filterConfig={encoded_filter}"
60+
61+
62+
def show_results_url(invocation_id: str) -> None:
63+
"""
64+
Show a URL for viewing evaluation results filtered by invocation_id.
65+
66+
If the server is not running, prints a message to run "ep logs" to start the local UI.
67+
If the server is running, prints a URL to view results filtered by invocation_id.
68+
69+
Args:
70+
invocation_id: The invocation ID to filter results by
71+
"""
72+
if is_server_running():
73+
url = generate_invocation_filter_url(invocation_id)
74+
print(f"View your evaluation results: {url}")
75+
else:
76+
url = generate_invocation_filter_url(invocation_id)
77+
print(f"Start the local UI with 'ep logs', then visit: {url}")
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
"""
2+
Utility functions for showing evaluation results URLs and checking server status.
3+
"""
4+
5+
import socket
6+
import urllib.parse
7+
from typing import List, Dict, Any
8+
9+
10+
def is_server_running(host: str = "localhost", port: int = 8000) -> bool:
11+
"""
12+
Check if a server is running on the specified host and port.
13+
14+
Args:
15+
host: The host to check (default: "localhost")
16+
port: The port to check (default: 8000)
17+
18+
Returns:
19+
True if server is running, False otherwise
20+
"""
21+
try:
22+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
23+
s.settimeout(1)
24+
result = s.connect_ex((host, port))
25+
return result == 0
26+
except Exception:
27+
return False
28+
29+
30+
def generate_invocation_filter_url(invocation_id: str, base_url: str = "http://localhost:8000") -> str:
31+
"""
32+
Generate a URL for viewing results filtered by invocation_id.
33+
34+
Args:
35+
invocation_id: The invocation ID to filter results by
36+
base_url: The base URL for the UI (default: "http://localhost:8000")
37+
38+
Returns:
39+
URL-encoded URL with filter configuration
40+
"""
41+
filter_config = [
42+
{
43+
"logic": "AND",
44+
"filters": [
45+
{
46+
"field": "$.execution_metadata.invocation_id",
47+
"operator": "==",
48+
"value": invocation_id,
49+
"type": "text",
50+
}
51+
],
52+
}
53+
]
54+
55+
# URL encode the filter config
56+
filter_config_json = str(filter_config).replace("'", '"')
57+
encoded_filter = urllib.parse.quote(filter_config_json)
58+
59+
return f"{base_url}?filterConfig={encoded_filter}"
60+
61+
62+
def show_results_url(invocation_id: str) -> None:
63+
"""
64+
Show URLs for viewing evaluation results filtered by invocation_id.
65+
66+
If the server is not running, prints a message to run "ep logs" to start the local UI.
67+
If the server is running, prints URLs to view results filtered by invocation_id.
68+
69+
Args:
70+
invocation_id: The invocation ID to filter results by
71+
"""
72+
if is_server_running():
73+
pivot_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/pivot")
74+
table_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/table")
75+
print("View your evaluation results:")
76+
print(f" 📊 Aggregate scores: {pivot_url}")
77+
print(f" 📋 Trajectories: {table_url}")
78+
else:
79+
pivot_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/pivot")
80+
table_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/table")
81+
print("Start the local UI with 'ep logs', then visit:")
82+
print(f" 📊 Aggregate scores: {pivot_url}")
83+
print(f" 📋 Trajectories: {table_url}")

0 commit comments

Comments
 (0)