eval-protocol · dphuang2 · Oct 9, 2025 · Oct 9, 2025
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -25,22 +25,10 @@
       }
     },
     {
-      "name": "Python: Debug Module",
+      "name": "Python: Debug Logs Server",
       "type": "python",
       "request": "launch",
-      "module": "eval_protocol",
-      "console": "integratedTerminal",
-      "justMyCode": false,
-      "env": {
-        "PYTHONPATH": "${workspaceFolder}"
-      }
-    },
-    {
-      "name": "Python: Debug Logs Server (Uvicorn)",
-      "type": "python",
-      "request": "launch",
-      "module": "uvicorn",
-      "args": ["eval_protocol.utils.logs_server:app", "--reload"],
+      "module": "eval_protocol.utils.logs_server",
       "console": "integratedTerminal",
       "justMyCode": false,
       "env": {

diff --git a/eval_protocol/pytest/evaluation_test.py b/eval_protocol/pytest/evaluation_test.py
@@ -62,7 +62,8 @@
     run_tasks_with_eval_progress,
     run_tasks_with_run_progress,
 )
-from eval_protocol.utils.show_results_url import store_local_ui_results_url
+from eval_protocol.utils.show_results_url import store_local_ui_results_url, generate_invocation_filter_url
+from eval_protocol.utils.browser_utils import is_logs_server_running, open_browser_tab
 
 from ..common_utils import load_jsonl
 
@@ -80,6 +81,7 @@ def evaluation_test(
     rollout_processor_kwargs: RolloutProcessorInputParam | None = None,
     aggregation_method: AggregationMethod = "mean",
     passed_threshold: EvaluationThreshold | float | EvaluationThresholdDict | None = None,
+    disable_browser_open: bool = False,
     num_runs: int = 1,
     filtered_row_ids: Sequence[str] | None = None,
     max_dataset_rows: int | None = None,
@@ -246,10 +248,29 @@ def create_wrapper_with_signature() -> Callable[[], None]:
             else:
                 invocation_id = generate_id()
 
+            # Track whether we've opened browser for this invocation
+            browser_opened_for_invocation = False
+
             async def wrapper_body(**kwargs: Unpack[ParameterizedTestKwargs]) -> None:
+                nonlocal browser_opened_for_invocation
+
                 # Store URL for viewing results (after all postprocessing is complete)
                 store_local_ui_results_url(invocation_id)
 
+                # Auto-open browser if server is running and not disabled (only once per invocation)
+                if (
+                    not browser_opened_for_invocation
+                    and not disable_browser_open
+                    and os.environ.get("EP_DISABLE_AUTO_BROWSER") is None
+                ):
+                    is_running, port = is_logs_server_running()
+                    if is_running:
+                        # Generate URL for table view with invocation filter
+                        base_url = f"http://localhost:{port}" if port else "http://localhost:8000"
+                        table_url = generate_invocation_filter_url(invocation_id, f"{base_url}/table")
+                        open_browser_tab(table_url)
+                        browser_opened_for_invocation = True
+
                 eval_metadata = None
 
                 all_results: list[list[EvaluationRow]] = [[] for _ in range(num_runs)]

diff --git a/eval_protocol/utils/browser_utils.py b/eval_protocol/utils/browser_utils.py
@@ -0,0 +1,114 @@
+"""
+Browser utilities for auto-opening evaluation results in the local UI.
+"""
+
+import json
+import os
+import threading
+import time
+import webbrowser
+from pathlib import Path
+from typing import Tuple, Optional
+
+try:
+    import psutil
+
+    PSUTIL_AVAILABLE = True
+except ImportError:
+    PSUTIL_AVAILABLE = False
+
+
+def _get_pid_file_path() -> Path:
+    """Get the path to the logs server PID file."""
+    from eval_protocol.directory_utils import find_eval_protocol_dir
+
+    return Path(find_eval_protocol_dir()) / "logs_server.pid"
+
+
+def write_pid_file(pid: int, port: int) -> None:
+    """
+    Write the server PID and port to a file for external processes to check.
+
+    Args:
+        pid: The process ID of the logs server
+        port: The port the server is running on
+    """
+    try:
+        pid_file = _get_pid_file_path()
+
+        data = {"pid": pid, "port": port}
+
+        with open(pid_file, "w") as f:
+            json.dump(data, f)
+
+        # Use print instead of logger to avoid circular imports
+        print(f"Wrote PID file: {pid_file} with PID {pid} and port {port}")
+    except Exception as e:
+        print(f"Warning: Failed to write PID file: {e}")
+
+
+def is_logs_server_running() -> Tuple[bool, Optional[int]]:
+    """
+    Check if the logs server is running by reading the PID file and verifying the process.
+
+    Returns:
+        Tuple of (is_running, port) where:
+        - is_running: True if server is running, False otherwise
+        - port: The port the server is running on, or None if not running
+    """
+    if not PSUTIL_AVAILABLE:
+        return False, None
+
+    pid_file = _get_pid_file_path()
+    if not pid_file.exists():
+        return False, None
+
+    try:
+        with open(pid_file, "r") as f:
+            data = json.load(f)
+            pid = data.get("pid")
+            port = data.get("port")
+    except (json.JSONDecodeError, KeyError, FileNotFoundError):
+        return False, None
+
+    if pid is None:
+        return False, None
+
+    try:
+        # Check if the process is still running
+        process = psutil.Process(pid)
+        if not process.is_running():
+            return False, None
+
+        # Optionally verify it's listening on the expected port
+        if port is not None:
+            try:
+                connections = process.net_connections()
+                for conn in connections:
+                    if conn.laddr.port == port and conn.status == "LISTEN":
+                        return True, port
+            except (psutil.AccessDenied, psutil.NoSuchProcess):
+                # If we can't check connections, assume it's running if process exists
+                pass
+
+        return True, port
+    except (psutil.NoSuchProcess, psutil.AccessDenied):
+        return False, None
+
+
+def open_browser_tab(url: str, delay: float = 0.5) -> None:
+    """
+    Open a URL in a new browser tab with an optional delay.
+
+    Args:
+        url: The URL to open
+        delay: Delay in seconds before opening browser (default: 0.5)
+    """
+
+    def _open():
+        time.sleep(delay)  # Give the server time to start
+        webbrowser.open_new_tab(url)
+
+    thread = threading.Thread(target=_open)
+    thread.daemon = True
+    thread.start()
diff --git a/eval_protocol/utils/logs_server.py b/eval_protocol/utils/logs_server.py
@@ -6,6 +6,7 @@
 import time
 from datetime import datetime
 from contextlib import asynccontextmanager
+from pathlib import Path
 from queue import Queue
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
@@ -23,6 +24,7 @@
 from eval_protocol.log_utils.elasticsearch_client import ElasticsearchClient
 from eval_protocol.types.remote_rollout_processor import ElasticsearchConfig
 from eval_protocol.utils.logs_models import LogEntry, LogsResponse
+from eval_protocol.utils.browser_utils import write_pid_file
 
 if TYPE_CHECKING:
     from eval_protocol.models import EvaluationRow
@@ -378,7 +380,7 @@ def __init__(
         event_bus.subscribe(self._handle_event)
         logger.debug("[LOGS_SERVER_INIT] Successfully subscribed to event bus")
 
-        logger.info(f"[LOGS_SERVER_INIT] LogsServer initialized on {host}:{port}")
+        logger.info(f"[LOGS_SERVER_INIT] LogsServer initialized on {self.host}:{self.port}")
 
     def _setup_websocket_routes(self):
         """Set up WebSocket routes for real-time communication."""
@@ -541,6 +543,12 @@ async def run_async(self):
             )
 
             server = uvicorn.Server(config)
+
+            # Write PID file after server is configured but before serving
+            logger.debug(f"[LOGS_SERVER_RUN_ASYNC] Writing PID file for port {self.port}")
+            write_pid_file(os.getpid(), self.port)
+            logger.debug(f"[LOGS_SERVER_RUN_ASYNC] Successfully wrote PID file for port {self.port}")
+
             await server.serve()
 
         except KeyboardInterrupt:

diff --git a/tests/test_show_results_url.py b/tests/test_show_results_url.py
@@ -6,6 +6,13 @@
 from unittest.mock import patch, MagicMock
 import pytest
 
+try:
+    import psutil
+
+    PSUTIL_AVAILABLE = True
+except ImportError:
+    PSUTIL_AVAILABLE = False
+
 from eval_protocol.utils.show_results_url import (
     is_server_running,
     generate_invocation_filter_url,
@@ -193,3 +200,137 @@ def test_full_workflow_stores_urls(self, mock_store):
         assert "table" in call_args[2]
         assert "integration-test" in call_args[1]
         assert "integration-test" in call_args[2]
+
+
+class TestBrowserUtilities:
+    """Test browser utility functions."""
+
+    def test_get_pid_file_path(self):
+        """Test PID file path generation."""
+        from eval_protocol.utils.browser_utils import _get_pid_file_path
+        from eval_protocol.directory_utils import find_eval_protocol_dir
+        from pathlib import Path
+
+        pid_file = _get_pid_file_path()
+        expected = Path(find_eval_protocol_dir()) / "logs_server.pid"
+        assert pid_file == expected
+
+    def test_is_logs_server_running_no_pid_file(self, tmp_path, monkeypatch):
+        """Test server detection when PID file doesn't exist."""
+        from eval_protocol.utils.browser_utils import is_logs_server_running
+
+        # Mock the PID file path to a non-existent file
+        monkeypatch.setattr(
+            "eval_protocol.utils.browser_utils._get_pid_file_path", lambda: tmp_path / "nonexistent.pid"
+        )
+
+        is_running, port = is_logs_server_running()
+        assert not is_running
+        assert port is None
+
+    def test_is_logs_server_running_invalid_pid_file(self, tmp_path, monkeypatch):
+        """Test server detection with invalid PID file content."""
+        from eval_protocol.utils.browser_utils import is_logs_server_running
+
+        # Create invalid PID file
+        pid_file = tmp_path / "invalid.pid"
+        pid_file.write_text("invalid json")
+        monkeypatch.setattr("eval_protocol.utils.browser_utils._get_pid_file_path", lambda: pid_file)
+
+        is_running, port = is_logs_server_running()
+        assert not is_running
+        assert port is None
+
+    def test_is_logs_server_running_missing_pid_key(self, tmp_path, monkeypatch):
+        """Test server detection with PID file missing required keys."""
+        from eval_protocol.utils.browser_utils import is_logs_server_running
+        import json
+
+        # Create PID file with missing pid key
+        pid_file = tmp_path / "missing_pid.pid"
+        pid_file.write_text(json.dumps({"port": 8000}))
+        monkeypatch.setattr("eval_protocol.utils.browser_utils._get_pid_file_path", lambda: pid_file)
+
+        is_running, port = is_logs_server_running()
+        assert not is_running
+        assert port is None
+
+    @pytest.mark.skipif(not PSUTIL_AVAILABLE, reason="psutil not available")
+    def test_is_logs_server_running_nonexistent_process(self, tmp_path, monkeypatch):
+        """Test server detection with PID file pointing to non-existent process."""
+        from eval_protocol.utils.browser_utils import is_logs_server_running
+        import json
+
+        # Create PID file with non-existent PID
+        pid_file = tmp_path / "nonexistent_process.pid"
+        pid_file.write_text(json.dumps({"pid": 999999, "port": 8000}))
+        monkeypatch.setattr("eval_protocol.utils.browser_utils._get_pid_file_path", lambda: pid_file)
+
+        is_running, port = is_logs_server_running()
+        assert not is_running
+        assert port is None
+
+    @pytest.mark.skipif(not PSUTIL_AVAILABLE, reason="psutil not available")
+    def test_is_logs_server_running_current_process(self, tmp_path, monkeypatch):
+        """Test server detection with PID file pointing to current process."""
+        from eval_protocol.utils.browser_utils import is_logs_server_running
+        import json
+        import os
+
+        # Create PID file with current process PID
+        pid_file = tmp_path / "current_process.pid"
+        pid_file.write_text(json.dumps({"pid": os.getpid(), "port": 8000}))
+        monkeypatch.setattr("eval_protocol.utils.browser_utils._get_pid_file_path", lambda: pid_file)
+
+        is_running, port = is_logs_server_running()
+        assert is_running
+        assert port == 8000
+
+    def test_open_browser_tab(self, monkeypatch):
+        """Test browser tab opening."""
+        from eval_protocol.utils.browser_utils import open_browser_tab
+
+        opened_urls = []
+
+        def mock_open_new_tab(url):
+            opened_urls.append(url)
+
+        monkeypatch.setattr("webbrowser.open_new_tab", mock_open_new_tab)
+
+        # Test with delay
+        open_browser_tab("http://example.com", delay=0.01)
+
+        # Wait a bit for the thread to execute
+        import time
+
+        time.sleep(0.02)
+
+        assert len(opened_urls) == 1
+        assert opened_urls[0] == "http://example.com"
+
+
+class TestLogsServerPidFile:
+    """Test logs server PID file functionality."""
+
+    def test_write_pid_file(self, tmp_path, monkeypatch):
+        """Test PID file writing."""
+        from eval_protocol.utils.browser_utils import write_pid_file
+        import json
+
+        # Mock the find_eval_protocol_dir function
+        monkeypatch.setattr("eval_protocol.directory_utils.find_eval_protocol_dir", lambda: str(tmp_path))
+
+        # Test writing PID file
+        write_pid_file(12345, 8000)
+
+        # Check that PID file was created
+        pid_file = tmp_path / "logs_server.pid"
+        assert pid_file.exists()
+
+        # Check content
+        with open(pid_file, "r") as f:
+            data = json.load(f)
+            assert "pid" in data
+            assert "port" in data
+            assert data["port"] == 8000
+            assert data["pid"] == 12345