Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 2 additions & 14 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,10 @@
}
},
{
"name": "Python: Debug Module",
"name": "Python: Debug Logs Server",
"type": "python",
"request": "launch",
"module": "eval_protocol",
"console": "integratedTerminal",
"justMyCode": false,
"env": {
"PYTHONPATH": "${workspaceFolder}"
}
},
{
"name": "Python: Debug Logs Server (Uvicorn)",
"type": "python",
"request": "launch",
"module": "uvicorn",
"args": ["eval_protocol.utils.logs_server:app", "--reload"],
"module": "eval_protocol.utils.logs_server",
"console": "integratedTerminal",
"justMyCode": false,
"env": {
Expand Down
23 changes: 22 additions & 1 deletion eval_protocol/pytest/evaluation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@
run_tasks_with_eval_progress,
run_tasks_with_run_progress,
)
from eval_protocol.utils.show_results_url import store_local_ui_results_url
from eval_protocol.utils.show_results_url import store_local_ui_results_url, generate_invocation_filter_url
from eval_protocol.utils.browser_utils import is_logs_server_running, open_browser_tab

from ..common_utils import load_jsonl

Expand All @@ -80,6 +81,7 @@ def evaluation_test(
rollout_processor_kwargs: RolloutProcessorInputParam | None = None,
aggregation_method: AggregationMethod = "mean",
passed_threshold: EvaluationThreshold | float | EvaluationThresholdDict | None = None,
disable_browser_open: bool = False,
num_runs: int = 1,
filtered_row_ids: Sequence[str] | None = None,
max_dataset_rows: int | None = None,
Expand Down Expand Up @@ -246,10 +248,29 @@ def create_wrapper_with_signature() -> Callable[[], None]:
else:
invocation_id = generate_id()

# Track whether we've opened browser for this invocation
browser_opened_for_invocation = False

async def wrapper_body(**kwargs: Unpack[ParameterizedTestKwargs]) -> None:
nonlocal browser_opened_for_invocation

# Store URL for viewing results (after all postprocessing is complete)
store_local_ui_results_url(invocation_id)

# Auto-open browser if server is running and not disabled (only once per invocation)
if (
not browser_opened_for_invocation
and not disable_browser_open
and os.environ.get("EP_DISABLE_AUTO_BROWSER") is None
):
is_running, port = is_logs_server_running()
if is_running:
# Generate URL for table view with invocation filter
base_url = f"http://localhost:{port}" if port else "http://localhost:8000"
table_url = generate_invocation_filter_url(invocation_id, f"{base_url}/table")
open_browser_tab(table_url)
browser_opened_for_invocation = True

eval_metadata = None

all_results: list[list[EvaluationRow]] = [[] for _ in range(num_runs)]
Expand Down
114 changes: 114 additions & 0 deletions eval_protocol/utils/browser_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""
Browser utilities for auto-opening evaluation results in the local UI.
"""

import json
import os
import threading
import time
import webbrowser
from pathlib import Path
from typing import Tuple, Optional

try:
import psutil

PSUTIL_AVAILABLE = True
except ImportError:
PSUTIL_AVAILABLE = False


def _get_pid_file_path() -> Path:
"""Get the path to the logs server PID file."""
from eval_protocol.directory_utils import find_eval_protocol_dir

return Path(find_eval_protocol_dir()) / "logs_server.pid"


def write_pid_file(pid: int, port: int) -> None:
"""
Write the server PID and port to a file for external processes to check.

Args:
pid: The process ID of the logs server
port: The port the server is running on
"""
try:
pid_file = _get_pid_file_path()

data = {"pid": pid, "port": port}

with open(pid_file, "w") as f:
json.dump(data, f)

# Use print instead of logger to avoid circular imports
print(f"Wrote PID file: {pid_file} with PID {pid} and port {port}")
except Exception as e:
print(f"Warning: Failed to write PID file: {e}")


def is_logs_server_running() -> Tuple[bool, Optional[int]]:
"""
Check if the logs server is running by reading the PID file and verifying the process.

Returns:
Tuple of (is_running, port) where:
- is_running: True if server is running, False otherwise
- port: The port the server is running on, or None if not running
"""
if not PSUTIL_AVAILABLE:
return False, None

pid_file = _get_pid_file_path()
if not pid_file.exists():
return False, None

try:
with open(pid_file, "r") as f:
data = json.load(f)
pid = data.get("pid")
port = data.get("port")
except (json.JSONDecodeError, KeyError, FileNotFoundError):
return False, None

if pid is None:
return False, None

try:
# Check if the process is still running
process = psutil.Process(pid)
if not process.is_running():
return False, None

# Optionally verify it's listening on the expected port
if port is not None:
try:
connections = process.net_connections()
for conn in connections:
if conn.laddr.port == port and conn.status == "LISTEN":
return True, port
except (psutil.AccessDenied, psutil.NoSuchProcess):
# If we can't check connections, assume it's running if process exists
pass

return True, port
except (psutil.NoSuchProcess, psutil.AccessDenied):
return False, None


def open_browser_tab(url: str, delay: float = 0.5) -> None:
"""
Open a URL in a new browser tab with an optional delay.

Args:
url: The URL to open
delay: Delay in seconds before opening browser (default: 0.5)
"""

def _open():
time.sleep(delay) # Give the server time to start
webbrowser.open_new_tab(url)

thread = threading.Thread(target=_open)
thread.daemon = True
thread.start()
10 changes: 9 additions & 1 deletion eval_protocol/utils/logs_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import time
from datetime import datetime
from contextlib import asynccontextmanager
from pathlib import Path
from queue import Queue
from typing import TYPE_CHECKING, Any, Dict, List, Optional

Expand All @@ -23,6 +24,7 @@
from eval_protocol.log_utils.elasticsearch_client import ElasticsearchClient
from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
from eval_protocol.utils.logs_models import LogEntry, LogsResponse
from eval_protocol.utils.browser_utils import write_pid_file

if TYPE_CHECKING:
from eval_protocol.models import EvaluationRow
Expand Down Expand Up @@ -378,7 +380,7 @@ def __init__(
event_bus.subscribe(self._handle_event)
logger.debug("[LOGS_SERVER_INIT] Successfully subscribed to event bus")

logger.info(f"[LOGS_SERVER_INIT] LogsServer initialized on {host}:{port}")
logger.info(f"[LOGS_SERVER_INIT] LogsServer initialized on {self.host}:{self.port}")

def _setup_websocket_routes(self):
"""Set up WebSocket routes for real-time communication."""
Expand Down Expand Up @@ -541,6 +543,12 @@ async def run_async(self):
)

server = uvicorn.Server(config)

# Write PID file after server is configured but before serving
logger.debug(f"[LOGS_SERVER_RUN_ASYNC] Writing PID file for port {self.port}")
write_pid_file(os.getpid(), self.port)
logger.debug(f"[LOGS_SERVER_RUN_ASYNC] Successfully wrote PID file for port {self.port}")

await server.serve()

except KeyboardInterrupt:
Expand Down
141 changes: 141 additions & 0 deletions tests/test_show_results_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@
from unittest.mock import patch, MagicMock
import pytest

try:
import psutil

PSUTIL_AVAILABLE = True
except ImportError:
PSUTIL_AVAILABLE = False

from eval_protocol.utils.show_results_url import (
is_server_running,
generate_invocation_filter_url,
Expand Down Expand Up @@ -193,3 +200,137 @@ def test_full_workflow_stores_urls(self, mock_store):
assert "table" in call_args[2]
assert "integration-test" in call_args[1]
assert "integration-test" in call_args[2]


class TestBrowserUtilities:
"""Test browser utility functions."""

def test_get_pid_file_path(self):
"""Test PID file path generation."""
from eval_protocol.utils.browser_utils import _get_pid_file_path
from eval_protocol.directory_utils import find_eval_protocol_dir
from pathlib import Path

pid_file = _get_pid_file_path()
expected = Path(find_eval_protocol_dir()) / "logs_server.pid"
assert pid_file == expected

def test_is_logs_server_running_no_pid_file(self, tmp_path, monkeypatch):
"""Test server detection when PID file doesn't exist."""
from eval_protocol.utils.browser_utils import is_logs_server_running

# Mock the PID file path to a non-existent file
monkeypatch.setattr(
"eval_protocol.utils.browser_utils._get_pid_file_path", lambda: tmp_path / "nonexistent.pid"
)

is_running, port = is_logs_server_running()
assert not is_running
assert port is None

def test_is_logs_server_running_invalid_pid_file(self, tmp_path, monkeypatch):
"""Test server detection with invalid PID file content."""
from eval_protocol.utils.browser_utils import is_logs_server_running

# Create invalid PID file
pid_file = tmp_path / "invalid.pid"
pid_file.write_text("invalid json")
monkeypatch.setattr("eval_protocol.utils.browser_utils._get_pid_file_path", lambda: pid_file)

is_running, port = is_logs_server_running()
assert not is_running
assert port is None

def test_is_logs_server_running_missing_pid_key(self, tmp_path, monkeypatch):
"""Test server detection with PID file missing required keys."""
from eval_protocol.utils.browser_utils import is_logs_server_running
import json

# Create PID file with missing pid key
pid_file = tmp_path / "missing_pid.pid"
pid_file.write_text(json.dumps({"port": 8000}))
monkeypatch.setattr("eval_protocol.utils.browser_utils._get_pid_file_path", lambda: pid_file)

is_running, port = is_logs_server_running()
assert not is_running
assert port is None

@pytest.mark.skipif(not PSUTIL_AVAILABLE, reason="psutil not available")
def test_is_logs_server_running_nonexistent_process(self, tmp_path, monkeypatch):
"""Test server detection with PID file pointing to non-existent process."""
from eval_protocol.utils.browser_utils import is_logs_server_running
import json

# Create PID file with non-existent PID
pid_file = tmp_path / "nonexistent_process.pid"
pid_file.write_text(json.dumps({"pid": 999999, "port": 8000}))
monkeypatch.setattr("eval_protocol.utils.browser_utils._get_pid_file_path", lambda: pid_file)

is_running, port = is_logs_server_running()
assert not is_running
assert port is None

@pytest.mark.skipif(not PSUTIL_AVAILABLE, reason="psutil not available")
def test_is_logs_server_running_current_process(self, tmp_path, monkeypatch):
"""Test server detection with PID file pointing to current process."""
from eval_protocol.utils.browser_utils import is_logs_server_running
import json
import os

# Create PID file with current process PID
pid_file = tmp_path / "current_process.pid"
pid_file.write_text(json.dumps({"pid": os.getpid(), "port": 8000}))
monkeypatch.setattr("eval_protocol.utils.browser_utils._get_pid_file_path", lambda: pid_file)

is_running, port = is_logs_server_running()
assert is_running
assert port == 8000

def test_open_browser_tab(self, monkeypatch):
"""Test browser tab opening."""
from eval_protocol.utils.browser_utils import open_browser_tab

opened_urls = []

def mock_open_new_tab(url):
opened_urls.append(url)

monkeypatch.setattr("webbrowser.open_new_tab", mock_open_new_tab)

# Test with delay
open_browser_tab("http://example.com", delay=0.01)

# Wait a bit for the thread to execute
import time

time.sleep(0.02)

assert len(opened_urls) == 1
assert opened_urls[0] == "http://example.com"


class TestLogsServerPidFile:
"""Test logs server PID file functionality."""

def test_write_pid_file(self, tmp_path, monkeypatch):
"""Test PID file writing."""
from eval_protocol.utils.browser_utils import write_pid_file
import json

# Mock the find_eval_protocol_dir function
monkeypatch.setattr("eval_protocol.directory_utils.find_eval_protocol_dir", lambda: str(tmp_path))

# Test writing PID file
write_pid_file(12345, 8000)

# Check that PID file was created
pid_file = tmp_path / "logs_server.pid"
assert pid_file.exists()

# Check content
with open(pid_file, "r") as f:
data = json.load(f)
assert "pid" in data
assert "port" in data
assert data["port"] == 8000
assert data["pid"] == 12345
Loading