Skip to content

Commit 111cbc4

Browse files
author
Dylan Huang
authored
Link to local UI (#199)
* more typing * Add TypeScript formatter to VSCode settings * Refactor GlobalState to use GlobalConfig and improve pagination and sorting management - Introduced GlobalConfig interface to encapsulate pivot, filter, pagination, and sort configurations. - Updated GlobalState to utilize computed properties for pagination and sorting. - Refactored methods to directly manipulate pagination and sort configurations. - Enhanced FilterSelector and TableContainer components to use new FilterLogic and SortDirection types. - Improved type definitions in configs and filter utilities for better clarity and maintainability. * tests work * fix pivot.test.ts * tighten the types up * updates query params / preserves state across tabs * pnp build * Refactor GlobalState to consolidate configuration management under GlobalConfig - Replaced individual configuration properties (pivot, filter, pagination, sort) with a unified GlobalConfig object. - Updated methods to load and save configurations from/to localStorage as a single entity. - Adjusted computed properties to access individual configurations from GlobalConfig. - Enhanced state management for pagination and sorting updates to reflect changes in the new structure. * query params are applied when provided * vite build * remove thing that was causing warnings * Add show_results_url utility and integrate into evaluation test - Imported show_results_url from utils to display the results URL after postprocessing in evaluation_test. - Updated __init__.py to include show_results_url and related functions in the module exports. * try removing module export
1 parent 36e88b1 commit 111cbc4

24 files changed

+2086
-518
lines changed

β€Ž.vscode/settings.jsonβ€Ž

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,8 @@
1010
"editor.formatOnSave": true,
1111
"[python]": {
1212
"editor.defaultFormatter": "charliermarsh.ruff"
13+
},
14+
"[typescript]": {
15+
"editor.defaultFormatter": "esbenp.prettier-vscode"
1316
}
1417
}

β€Ževal_protocol/pytest/evaluation_test.pyβ€Ž

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
parse_ep_passed_threshold,
6060
rollout_processor_with_retry,
6161
)
62+
from eval_protocol.utils.show_results_url import show_results_url
6263

6364
from ..common_utils import load_jsonl
6465

@@ -555,6 +556,9 @@ async def execute_run_with_progress(run_idx: int, config):
555556
experiment_duration_seconds,
556557
)
557558

559+
# Show URL for viewing results (after all postprocessing is complete)
560+
show_results_url(invocation_id)
561+
558562
except AssertionError:
559563
_log_eval_error(
560564
Status.eval_finished(),
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
"""
2+
Utility functions for checking server status and generating UI URLs.
3+
"""
4+
5+
import socket
6+
import urllib.parse
7+
from typing import List, Dict, Any
8+
9+
10+
def is_server_running(host: str = "localhost", port: int = 8000) -> bool:
11+
"""
12+
Check if a server is running on the specified host and port.
13+
14+
Args:
15+
host: The host to check (default: "localhost")
16+
port: The port to check (default: 8000)
17+
18+
Returns:
19+
True if server is running, False otherwise
20+
"""
21+
try:
22+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
23+
s.settimeout(1)
24+
result = s.connect_ex((host, port))
25+
return result == 0
26+
except Exception:
27+
return False
28+
29+
30+
def generate_invocation_filter_url(invocation_id: str, base_url: str = "http://localhost:8000") -> str:
31+
"""
32+
Generate a URL for viewing results filtered by invocation_id.
33+
34+
Args:
35+
invocation_id: The invocation ID to filter results by
36+
base_url: The base URL for the UI (default: "http://localhost:8000")
37+
38+
Returns:
39+
URL-encoded URL with filter configuration
40+
"""
41+
filter_config = [
42+
{
43+
"logic": "AND",
44+
"filters": [
45+
{
46+
"field": "$.execution_metadata.invocation_id",
47+
"operator": "equals",
48+
"value": invocation_id,
49+
"type": "text",
50+
}
51+
],
52+
}
53+
]
54+
55+
# URL encode the filter config
56+
filter_config_json = str(filter_config).replace("'", '"')
57+
encoded_filter = urllib.parse.quote(filter_config_json)
58+
59+
return f"{base_url}/pivot?filterConfig={encoded_filter}"
60+
61+
62+
def show_results_url(invocation_id: str) -> None:
63+
"""
64+
Show a URL for viewing evaluation results filtered by invocation_id.
65+
66+
If the server is not running, prints a message to run "ep logs" to start the local UI.
67+
If the server is running, prints a URL to view results filtered by invocation_id.
68+
69+
Args:
70+
invocation_id: The invocation ID to filter results by
71+
"""
72+
if is_server_running():
73+
url = generate_invocation_filter_url(invocation_id)
74+
print(f"View your evaluation results: {url}")
75+
else:
76+
url = generate_invocation_filter_url(invocation_id)
77+
print(f"Start the local UI with 'ep logs', then visit: {url}")
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
"""
2+
Utility functions for showing evaluation results URLs and checking server status.
3+
"""
4+
5+
import socket
6+
import urllib.parse
7+
8+
9+
def is_server_running(host: str = "localhost", port: int = 8000) -> bool:
10+
"""
11+
Check if a server is running on the specified host and port.
12+
13+
Args:
14+
host: The host to check (default: "localhost")
15+
port: The port to check (default: 8000)
16+
17+
Returns:
18+
True if server is running, False otherwise
19+
"""
20+
try:
21+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
22+
s.settimeout(1)
23+
result = s.connect_ex((host, port))
24+
return result == 0
25+
except Exception:
26+
return False
27+
28+
29+
def generate_invocation_filter_url(invocation_id: str, base_url: str = "http://localhost:8000") -> str:
30+
"""
31+
Generate a URL for viewing results filtered by invocation_id.
32+
33+
Args:
34+
invocation_id: The invocation ID to filter results by
35+
base_url: The base URL for the UI (default: "http://localhost:8000")
36+
37+
Returns:
38+
URL-encoded URL with filter configuration
39+
"""
40+
filter_config = [
41+
{
42+
"logic": "AND",
43+
"filters": [
44+
{
45+
"field": "$.execution_metadata.invocation_id",
46+
"operator": "==",
47+
"value": invocation_id,
48+
"type": "text",
49+
}
50+
],
51+
}
52+
]
53+
54+
# URL encode the filter config
55+
filter_config_json = str(filter_config).replace("'", '"')
56+
encoded_filter = urllib.parse.quote(filter_config_json)
57+
58+
return f"{base_url}?filterConfig={encoded_filter}"
59+
60+
61+
def show_results_url(invocation_id: str) -> None:
62+
"""
63+
Show URLs for viewing evaluation results filtered by invocation_id.
64+
65+
If the server is not running, prints a message to run "ep logs" to start the local UI.
66+
If the server is running, prints URLs to view results filtered by invocation_id.
67+
68+
Args:
69+
invocation_id: The invocation ID to filter results by
70+
"""
71+
if is_server_running():
72+
pivot_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/pivot")
73+
table_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/table")
74+
print("View your evaluation results:")
75+
print(f" πŸ“Š Aggregate scores: {pivot_url}")
76+
print(f" πŸ“‹ Trajectories: {table_url}")
77+
else:
78+
pivot_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/pivot")
79+
table_url = generate_invocation_filter_url(invocation_id, "http://localhost:8000/table")
80+
print("Start the local UI with 'ep logs', then visit:")
81+
print(f" πŸ“Š Aggregate scores: {pivot_url}")
82+
print(f" πŸ“‹ Trajectories: {table_url}")

β€Žpytest.iniβ€Ž

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ asyncio_mode = auto
55
asyncio_default_fixture_loop_scope = function
66
testpaths = tests ./eval_protocol/quickstart
77
python_files = test_*.py llm_judge_*.py
8-
plugins =
9-
eval_protocol.pytest.plugin
108
python_classes = Test*
119
python_functions = test_*
1210
# Configure stdout/stderr capture for debugging

0 commit comments

Comments
Β (0)