From c1f77016b573200b0d062667b372079f26d8c7a1 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 29 Oct 2025 03:34:39 -0700 Subject: [PATCH 01/16] adding status --- eval_protocol/models.py | 133 ++++++++++++++++++ .../pytest/remote_rollout_processor.py | 2 + 2 files changed, 135 insertions(+) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index d3fe0f61..98491dff 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -1,4 +1,6 @@ import os +import logging +import importlib from datetime import datetime from enum import Enum from typing import Any, ClassVar, Dict, List, Literal, Optional, TypedDict, Union @@ -19,6 +21,9 @@ from eval_protocol.types import TerminationReason +logger = logging.getLogger(__name__) + + class ErrorInfo(BaseModel): """ AIP-193 ErrorInfo model for structured error details. @@ -163,6 +168,134 @@ def rollout_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] details.append(ErrorInfo.extra_info(extra_info).to_aip193_format()) return cls.error(error_message, details) + @classmethod + def rollout_error_from_exception( + cls, exception: Exception, extra_info: Optional[Dict[str, Any]] = None + ) -> "Status": + """ + Create a status indicating the rollout failed with an exception. + Simple approach that stores exception info directly in details. + """ + details = [] + + details.append( + { + "exception_type": f"{type(exception).__module__}.{type(exception).__name__}", + "exception_message": str(exception), + } + ) + + if extra_info: + details.append({"extra_info": extra_info}) + + return cls(code=cls.Code.INTERNAL, message=str(exception), details=details) + + @classmethod + def raise_from_status_details(cls, status_details: List[Dict[str, Any]]) -> bool: + """ + Try to raise original exception from simple status details using dynamic imports. + """ + + for detail in status_details: + # Look for simple exception info + if "exception_type" in detail and "exception_message" in detail: + exception_type = detail["exception_type"] + exception_message = detail["exception_message"] + + logger.info(f"Found exception info: {exception_type}") + + # Dynamically import and raise the exception + exception_class = cls._import_exception_class(exception_type) + if exception_class: + logger.info(f"Re-raising {exception_type} from status details") + # Try different constructor patterns + exception_to_raise = cls._create_exception_instance(exception_class, exception_message) + if exception_to_raise: + raise exception_to_raise + else: + logger.debug(f"Could not create instance of {exception_type}") + continue + else: + logger.debug(f"Could not import exception type: {exception_type}") + continue + + return False + + @classmethod + def _create_exception_instance(cls, exception_class: type, message: str) -> Optional[Exception]: + """ + Try to create an exception instance using different constructor patterns. + + Args: + exception_class: The exception class to instantiate + message: The error message + + Returns: + Exception instance if successful, None otherwise + """ + # Common constructor patterns to try + patterns = [ + # Pattern 1: Just message + lambda: exception_class(message), + # Pattern 2: Message as named parameter + lambda: exception_class(message=message), + # Pattern 3: Message + common litellm parameters + # NOTE: we are losing some diagnostic information here by not passing the model and llm_provider. We could try to capture full exception state in rollout_error_from_exception. + lambda: exception_class(message, model="unknown", llm_provider="unknown"), + lambda: exception_class(message=message, model="unknown", llm_provider="unknown"), + # Pattern 4: No arguments (fallback) + lambda: exception_class(), + ] + + for i, pattern in enumerate(patterns): + try: + instance = pattern() + logger.debug(f"Successfully created {exception_class.__name__} using pattern {i + 1}") + return instance + except (TypeError, ValueError) as e: + logger.debug(f"Pattern {i + 1} failed for {exception_class.__name__}: {e}") + continue + + logger.debug(f"All constructor patterns failed for {exception_class.__name__}") + return None + + @classmethod + def _import_exception_class(cls, exception_type: str) -> Optional[type]: + """ + Dynamically import an exception class from a string. + + Args: + exception_type: Exception type string like "litellm.exceptions.NotFoundError", + "openai.BadRequestError", "requests.exceptions.ConnectionError", etc. + + Returns: + The exception class if found, None otherwise + """ + try: + # Require fully qualified names (no automatic prefixing) + if "." not in exception_type: + logging.getLogger(__name__).debug(f"Exception type must be fully qualified: {exception_type}") + return None + + # Parse module and class name + module_name, class_name = exception_type.rsplit(".", 1) + + # Import the module + module = importlib.import_module(module_name) + + # Get the exception class + exception_class = getattr(module, class_name, None) + + # Verify it's actually an exception class + if exception_class and issubclass(exception_class, BaseException): + return exception_class + + return None + + except (ImportError, AttributeError, ValueError) as e: + logging.getLogger(__name__).debug(f"Could not import exception class {exception_type}: {e}") + return None + @classmethod def error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": """Create a status indicating the rollout failed with an error.""" diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py index 68d47dcd..d27f878f 100644 --- a/eval_protocol/pytest/remote_rollout_processor.py +++ b/eval_protocol/pytest/remote_rollout_processor.py @@ -166,6 +166,8 @@ def _get_status() -> Dict[str, Any]: f"Found Fireworks log for rollout {row.execution_metadata.rollout_id} with status code {status_code}" ) + Status.raise_from_status_details(status_details) + row.rollout_status = Status( code=Status.Code(status_code), message=status_message, From d3ef46c4ee668a1e77317470184a9daf9c6d5e5c Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 29 Oct 2025 03:43:33 -0700 Subject: [PATCH 02/16] better logging --- eval_protocol/models.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 98491dff..6a8e7fad 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -207,10 +207,11 @@ def raise_from_status_details(cls, status_details: List[Dict[str, Any]]) -> bool # Dynamically import and raise the exception exception_class = cls._import_exception_class(exception_type) if exception_class: - logger.info(f"Re-raising {exception_type} from status details") + logger.info(f"Found exception class: {exception_class}") # Try different constructor patterns exception_to_raise = cls._create_exception_instance(exception_class, exception_message) if exception_to_raise: + logger.info(f"Re-raising {exception_type} from status details") raise exception_to_raise else: logger.debug(f"Could not create instance of {exception_type}") From f84133471cd09ac683c082262720f30b9dfaaa2d Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 29 Oct 2025 03:48:45 -0700 Subject: [PATCH 03/16] retry on openai too --- eval_protocol/pytest/exception_config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/eval_protocol/pytest/exception_config.py b/eval_protocol/pytest/exception_config.py index 355e716a..452b8563 100644 --- a/eval_protocol/pytest/exception_config.py +++ b/eval_protocol/pytest/exception_config.py @@ -11,6 +11,7 @@ import litellm import requests import httpx +import openai # Default exceptions that should be retried with backoff DEFAULT_RETRYABLE_EXCEPTIONS: Set[Type[Exception]] = { @@ -34,6 +35,8 @@ litellm.exceptions.NotFoundError, litellm.exceptions.BadRequestError, # remove this once we have a long term solution litellm.exceptions.ServiceUnavailableError, + openai.NotFoundError, + openai.BadRequestError, # remove this once we have a long term solution } From c705cb8d88a8d5966f22c84172d885a4352debc0 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 29 Oct 2025 04:00:08 -0700 Subject: [PATCH 04/16] reraising is working now --- eval_protocol/models.py | 38 ++- .../svg_agent/vercel_svg_server/api/init.py | 243 ++++++++++-------- 2 files changed, 164 insertions(+), 117 deletions(-) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 6a8e7fad..23231c22 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -214,10 +214,10 @@ def raise_from_status_details(cls, status_details: List[Dict[str, Any]]) -> bool logger.info(f"Re-raising {exception_type} from status details") raise exception_to_raise else: - logger.debug(f"Could not create instance of {exception_type}") + logger.info(f"Could not create instance of {exception_type}") continue else: - logger.debug(f"Could not import exception type: {exception_type}") + logger.info(f"Could not import exception type: {exception_type}") continue return False @@ -244,7 +244,9 @@ def _create_exception_instance(cls, exception_class: type, message: str) -> Opti # NOTE: we are losing some diagnostic information here by not passing the model and llm_provider. We could try to capture full exception state in rollout_error_from_exception. lambda: exception_class(message, model="unknown", llm_provider="unknown"), lambda: exception_class(message=message, model="unknown", llm_provider="unknown"), - # Pattern 4: No arguments (fallback) + # Pattern 5: OpenAI exceptions - create mock response object + lambda: cls._create_openai_exception(exception_class, message), + # Pattern 7: No arguments (fallback) lambda: exception_class(), ] @@ -260,6 +262,36 @@ def _create_exception_instance(cls, exception_class: type, message: str) -> Opti logger.debug(f"All constructor patterns failed for {exception_class.__name__}") return None + @classmethod + def _create_openai_exception(cls, exception_class: type, message: str) -> Optional[Exception]: + """ + Create OpenAI exception with a mock response object. + + OpenAI exceptions require httpx.Response objects which are complex to create, + so we create a minimal mock that satisfies the basic requirements. + """ + try: + import httpx + + # Create a minimal mock response object + class MockRequest: + def __init__(self): + self.method = "POST" + self.url = "https://api.openai.com/v1/chat/completions" + + class MockResponse: + def __init__(self): + self.status_code = 404 + self.headers = {"x-request-id": "mock-request-id"} + self.request = MockRequest() + + mock_response = MockResponse() + return exception_class(message, response=mock_response, body=None) + + except Exception as e: + logging.getLogger(__name__).debug(f"Failed to create OpenAI exception with mock response: {e}") + return None + @classmethod def _import_exception_class(cls, exception_type: str) -> Optional[type]: """ diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py b/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py index d376880e..502eb96a 100644 --- a/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +++ b/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py @@ -9,7 +9,8 @@ import os import logging import sys -from http.server import BaseHTTPRequestHandler +import asyncio +from flask import Flask, request, jsonify from openai import OpenAI from dotenv import load_dotenv @@ -44,119 +45,133 @@ def filter(self, record: logging.LogRecord) -> bool: # Attach Fireworks tracing handler to root logger (non-stream HTTP sink) root_logger.addHandler(FireworksTracingHttpHandler()) +# Create Flask app +app = Flask(__name__) + + +async def execute_rollout_background(req, api_key): + """Execute the OpenAI completion in background and log results""" + # Attach rollout_id filter to logger + logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}") + logger.addFilter(RolloutIdFilter(req.metadata.rollout_id)) + + try: + model = req.completion_params.get("model") + # Uncomment if you need to strip fireworks_ai/ prefix + # if model and isinstance(model, str) and model.startswith("fireworks_ai/"): + # model = model[len("fireworks_ai/"):] + + # Prepare completion arguments + completion_kwargs = { + "messages": req.messages, + # "messages": [{"role": "user", "content": "Hello, how are you?"}], + "model": model, + "temperature": req.completion_params.get("temperature"), + "max_tokens": req.completion_params.get("max_tokens"), + } + + # Add tools if present + if req.tools: + completion_kwargs["tools"] = req.tools + + logger.info( + f"DEBUG: {req.model_base_url}, COMPLETION_KWARGS: {completion_kwargs}, API_KEY: {api_key}, MODEL: {model}" + ) + + # Create AsyncOpenAI client + # client = AsyncOpenAI(base_url=req.model_base_url, api_key=api_key) + client = OpenAI(base_url=req.model_base_url, api_key=api_key) + + logger.info(f"Sending completion request to model {model}") -class handler(BaseHTTPRequestHandler): - def do_POST(self): - try: - # Read and parse request body - content_length = int(self.headers.get("Content-Length", 0)) - request_body = self.rfile.read(content_length).decode("utf-8") - request_data = json.loads(request_body) - - # Parse as InitRequest - req = InitRequest(**request_data) - - # Attach rollout_id filter to logger - logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}") - logger.addFilter(RolloutIdFilter(req.metadata.rollout_id)) - - # Validate required fields - if not req.messages: - error_msg = "messages is required" - logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)}) - self._send_error(400, error_msg) - return - - model = req.completion_params.get("model") - if model and isinstance(model, str) and model.startswith("fireworks_ai/"): - model = model[len("fireworks_ai/") :] - - # Prepare completion arguments - completion_kwargs = { - "messages": req.messages, - "model": model, - "temperature": req.completion_params.get("temperature"), - "max_tokens": req.completion_params.get("max_tokens"), - } - - # Add tools if present - if req.tools: - completion_kwargs["tools"] = req.tools - - # Get API key (prefer request api_key, fallback to environment) - api_key = req.api_key or os.environ.get("FIREWORKS_API_KEY") - if not api_key: - error_msg = "API key not provided in request or FIREWORKS_API_KEY environment variable" - logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)}) - self._send_error(500, error_msg) - return - - # Create OpenAI client - client = OpenAI(base_url=req.model_base_url, api_key=api_key) - - logger.info(f"Sending completion request to model {req.completion_params.get('model')}") - - # Make the model call - completion = client.chat.completions.create(**completion_kwargs) - - logger.info(f"Completed response: {completion}") - - # Log completion status - logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()}) - - # Return the completion response - response_data = { - "status": "completed", - "rollout_id": req.metadata.rollout_id, - "choices": [ - { - "message": { - "role": completion.choices[0].message.role, - "content": completion.choices[0].message.content, - } - } - ], - } - - self._send_json_response(200, response_data) - - except Exception as e: - # Log error if we have the request context - if "req" in locals() and "logger" in locals(): - logger.error(f"āŒ Error in rollout {req.metadata.rollout_id}: {e}") - logger.error(str(e), extra={"status": Status.rollout_error(str(e))}) - - self._send_error(500, str(e)) - - def do_GET(self): - """Health check endpoint""" - self._send_json_response( - 200, - { - "status": "ok", - "message": "SVGBench Vercel Serverless Function", - "endpoints": {"POST /": "Process SVGBench evaluation requests"}, - }, + # Make the async model call with timeout + import time + + logger.info(f"timing start: {time.time()}") + completion = client.chat.completions.create(**completion_kwargs) + logger.info(f"Completed response: {completion}") + logger.info(f"timing end: {time.time()}") + # Log successful completion - THIS IS WHAT RemoteRolloutProcessor POLLS FOR + logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()}) + + except Exception as e: + # Log error with structured status - THIS IS WHAT RemoteRolloutProcessor POLLS FOR + logger.error( + f"Rollout {req.metadata.rollout_id} failed: {e}", extra={"status": Status.rollout_error_from_exception(e)} ) - def do_OPTIONS(self): - """Handle CORS preflight requests""" - self.send_response(200) - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS") - self.send_header("Access-Control-Allow-Headers", "Content-Type") - self.end_headers() - - def _send_json_response(self, status_code: int, data: dict): - """Send a JSON response""" - self.send_response(status_code) - self.send_header("Content-Type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS") - self.send_header("Access-Control-Allow-Headers", "Content-Type") - self.end_headers() - self.wfile.write(json.dumps(data).encode("utf-8")) - - def _send_error(self, status_code: int, message: str): - """Send an error response""" - self._send_json_response(status_code, {"error": message}) + +@app.route("/init", methods=["POST"]) +async def init(): + try: + # Parse as InitRequest + req = InitRequest(**request.get_json()) + + # Create logger for immediate validation logging + logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}") + logger.addFilter(RolloutIdFilter(req.metadata.rollout_id)) + + # Validate required fields + if not req.messages: + error_msg = "messages is required" + logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)}) + return jsonify({"error": error_msg}), 400 + + # Get API key (prefer request api_key, fallback to environment) + if req.api_key: + logger.info("Using API key from request") + api_key = req.api_key + elif os.environ.get("FIREWORKS_API_KEY"): + logger.info("Using API key from environment") + api_key = os.environ.get("FIREWORKS_API_KEY") + else: + error_msg = "API key not provided in request or environment variable" + logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)}) + return jsonify({"error": error_msg}), 401 + + # šŸ”„ FIRE: Return immediately with acceptance (within 30s requirement) + response_data = { + "status": "accepted", + "rollout_id": req.metadata.rollout_id, + "message": "Rollout processing started", + } + + # Fire and forget: Execute rollout asynchronously + asyncio.create_task(execute_rollout_background(req, api_key)) + + return jsonify(response_data), 200 + + except Exception as e: + # For request parsing errors, return error immediately (don't retry) + return jsonify({"error": f"Request parsing error: {str(e)}"}), 400 + + +@app.route("/", methods=["GET"]) +def health_check(): + """Health check endpoint""" + return jsonify( + { + "status": "ok", + "message": "SVGBench Vercel Serverless Function", + "endpoints": {"POST /": "Process SVGBench evaluation requests"}, + } + ) + + +@app.route("/", methods=["OPTIONS"]) +def options_handler(): + """Handle CORS preflight requests""" + response = jsonify({}) + response.headers["Access-Control-Allow-Origin"] = "*" + response.headers["Access-Control-Allow-Methods"] = "POST, GET, OPTIONS" + response.headers["Access-Control-Allow-Headers"] = "Content-Type" + return response + + +# Add CORS headers to all responses +@app.after_request +def add_cors_headers(response): + response.headers["Access-Control-Allow-Origin"] = "*" + response.headers["Access-Control-Allow-Methods"] = "POST, GET, OPTIONS" + response.headers["Access-Control-Allow-Headers"] = "Content-Type" + return response From 779c6aa93fe84dcfe7bdbfdc100f6e68ca9754c0 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 29 Oct 2025 04:00:56 -0700 Subject: [PATCH 05/16] new server --- .../quickstart/svg_agent/vercel_svg_server/requirements.txt | 3 ++- .../quickstart/svg_agent/vercel_svg_server/vercel.json | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt b/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt index dadd2db1..50f0def0 100644 --- a/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt +++ b/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt @@ -1,3 +1,4 @@ openai>=1.0.0 python-dotenv>=0.19.0 -eval_protocol>=0.2.58 +eval_protocol==0.2.69-dev3 +Flask[async]==3.0.3 diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/vercel.json b/eval_protocol/quickstart/svg_agent/vercel_svg_server/vercel.json index 112be6e9..4291b8c1 100644 --- a/eval_protocol/quickstart/svg_agent/vercel_svg_server/vercel.json +++ b/eval_protocol/quickstart/svg_agent/vercel_svg_server/vercel.json @@ -1,3 +1,5 @@ { - "redirects": [{ "source": "/init", "destination": "/api/init" }] + "rewrites": [ + { "source": "/(.*)", "destination": "/api/init" } + ] } From 7c94a7e58aa9bd8775cef5778ce5516c1c553e58 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 29 Oct 2025 04:01:08 -0700 Subject: [PATCH 06/16] tests for status logging --- tests/test_status.py | 417 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 417 insertions(+) create mode 100644 tests/test_status.py diff --git a/tests/test_status.py b/tests/test_status.py new file mode 100644 index 00000000..f0ac0932 --- /dev/null +++ b/tests/test_status.py @@ -0,0 +1,417 @@ +""" +Tests for Status exception handling functionality. + +Tests the round-trip flow: +1. Exception → Status.rollout_error_from_exception() → structured logging +2. Structured data → Status.raise_from_status_details() → original exception +""" + +import pytest +from eval_protocol.models import Status + +# Test with different exception types that might be available +try: + import litellm.exceptions + + LITELLM_AVAILABLE = True +except ImportError: + LITELLM_AVAILABLE = False + +try: + import requests.exceptions + + REQUESTS_AVAILABLE = True +except ImportError: + REQUESTS_AVAILABLE = False + +try: + import httpx + + HTTPX_AVAILABLE = True +except ImportError: + HTTPX_AVAILABLE = False + +try: + import openai + + OPENAI_AVAILABLE = True +except ImportError: + OPENAI_AVAILABLE = False + + +def test_rollout_error_from_exception_basic(): + """Test creating Status from a basic exception.""" + # Create a simple exception + original_exception = ValueError("Test error message") + + # Create status from exception + status = Status.rollout_error_from_exception(original_exception) + + # Verify the status structure + assert status.code == Status.Code.INTERNAL + assert status.message == "Test error message" + assert len(status.details) == 1 + + detail = status.details[0] + assert detail["exception_type"] == "builtins.ValueError" + assert detail["exception_message"] == "Test error message" + + +def test_exception_round_trip_basic(): + """Test the complete round-trip: exception → status → re-raise exception.""" + # Create original exception + original_exception = ValueError("Round trip test") + + # Convert to status + status = Status.rollout_error_from_exception(original_exception) + + # Try to re-raise from status details + with pytest.raises(ValueError) as exc_info: + Status.raise_from_status_details(status.details) + + # Verify the re-raised exception has the same message + assert str(exc_info.value) == "Round trip test" + + +@pytest.mark.skipif(not LITELLM_AVAILABLE, reason="litellm not available") +def test_litellm_exception_round_trip(): + """Test round-trip with litellm exceptions.""" + # Create a litellm exception - try different constructor patterns + original_exception = litellm.exceptions.NotFoundError( + message="Model not found", model="test-model", llm_provider="test-provider" + ) + # Convert to status + status = Status.rollout_error_from_exception(original_exception) + + # Verify status details + detail = status.details[0] + assert detail["exception_type"] == "litellm.exceptions.NotFoundError" + # Message might contain additional info, just check it contains our text + assert "Model not found" in detail["exception_message"] or "not found" in detail["exception_message"].lower() + + # Re-raise and verify type + with pytest.raises(litellm.exceptions.NotFoundError) as exc_info: + Status.raise_from_status_details(status.details) + + # The re-raised exception should be the same type + assert isinstance(exc_info.value, litellm.exceptions.NotFoundError) + + +@pytest.mark.skipif(not REQUESTS_AVAILABLE, reason="requests not available") +def test_requests_exception_round_trip(): + """Test round-trip with requests exceptions.""" + # Create a requests exception + original_exception = requests.exceptions.ConnectionError("Connection failed") + + # Convert to status + status = Status.rollout_error_from_exception(original_exception) + + # Verify status details + detail = status.details[0] + assert detail["exception_type"] == "requests.exceptions.ConnectionError" + assert detail["exception_message"] == "Connection failed" + + # Re-raise and verify type + with pytest.raises(requests.exceptions.ConnectionError) as exc_info: + Status.raise_from_status_details(status.details) + + assert str(exc_info.value) == "Connection failed" + + +def test_unknown_exception_type(): + """Test behavior with unknown/non-importable exception type.""" + # Create status details with fake exception type + fake_details = [{"exception_type": "fake.module.FakeException", "exception_message": "This should not raise"}] + + # Should not raise anything, just return False + result = Status.raise_from_status_details(fake_details) + assert result is False + + +def test_malformed_status_details(): + """Test behavior with malformed status details.""" + # Various malformed details + malformed_cases = [ + [], # Empty list + [{}], # Empty dict + [{"exception_type": "ValueError"}], # Missing message + [{"exception_message": "test"}], # Missing type + [{"wrong_key": "wrong_value"}], # Wrong keys + ] + + for malformed_details in malformed_cases: + result = Status.raise_from_status_details(malformed_details) + assert result is False + + +def test_rollout_error_with_extra_info(): + """Test rollout_error_from_exception with extra_info.""" + original_exception = ValueError("Test with extra info") + extra_info = {"context": "test_context", "user_id": "123"} + + status = Status.rollout_error_from_exception(original_exception, extra_info) + + # Should have both exception info and extra info + assert len(status.details) == 2 + + # First detail should be exception info + exception_detail = status.details[0] + assert exception_detail["exception_type"] == "builtins.ValueError" + assert exception_detail["exception_message"] == "Test with extra info" + + # Second detail should be extra info + extra_detail = status.details[1] + assert extra_detail["extra_info"]["context"] == "test_context" + assert extra_detail["extra_info"]["user_id"] == "123" + + +def test_multiple_exception_details(): + """Test raise_from_status_details with multiple details (should use first valid one).""" + # Create details with multiple exception info + details = [ + {"other_info": "ignored"}, # Should be ignored + {"exception_type": "builtins.ValueError", "exception_message": "First exception"}, # Should be used + {"exception_type": "builtins.RuntimeError", "exception_message": "Second exception"}, # Should be ignored + ] + + # Should raise the first valid exception + with pytest.raises(ValueError) as exc_info: + Status.raise_from_status_details(details) + + assert str(exc_info.value) == "First exception" + + +@pytest.mark.skipif(not LITELLM_AVAILABLE, reason="litellm not available") +def test_different_litellm_exceptions(): + """Test various litellm exception types.""" + # Test with a few common litellm exceptions + exception_classes = [ + litellm.exceptions.RateLimitError, + litellm.exceptions.InternalServerError, + litellm.exceptions.BadRequestError, + ] + + for exception_class in exception_classes: + # Try to create an exception instance (try different constructor patterns) + original_exception = None + exception_name = exception_class.__name__ + + try: + # Try with just message + original_exception = exception_class(f"Test {exception_name}") + except TypeError: + try: + # Try with message and required parameters + original_exception = exception_class( + message=f"Test {exception_name}", model="test-model", llm_provider="test-provider" + ) + except TypeError: + try: + # Try with positional args + original_exception = exception_class(f"Test {exception_name}", "test-model", "test-provider") + except TypeError: + # Skip this particular exception type + continue + + if original_exception is None: + continue + + # Test the round-trip + status = Status.rollout_error_from_exception(original_exception) + + # Should be able to re-raise the same type + with pytest.raises(exception_class): + Status.raise_from_status_details(status.details) + + +def test_edge_case_empty_message(): + """Test with exception that has empty message.""" + original_exception = ValueError() # Empty message + + status = Status.rollout_error_from_exception(original_exception) + + # Should handle empty message gracefully + detail = status.details[0] + assert detail["exception_type"] == "builtins.ValueError" + assert detail["exception_message"] == "" + + # Should still re-raise correctly + with pytest.raises(ValueError): + Status.raise_from_status_details(status.details) + + +def test_all_default_retryable_exceptions(): + """ + Comprehensive test of all exceptions in DEFAULT_RETRYABLE_EXCEPTIONS. + + This ensures our Status exception handling works with every exception type + that the retry system claims to support. + """ + # Test cases: (exception_class, test_message, required_modules, skip_reason) + test_cases = [ + # Standard library exceptions + (ConnectionError, "Connection failed", [], None), + (TimeoutError, "Request timeout", [], None), + (OSError, "OS error occurred", [], None), + ] + + # Add requests exceptions if available + if REQUESTS_AVAILABLE: + import requests.exceptions + + test_cases.extend( + [ + (requests.exceptions.ConnectionError, "Requests connection error", ["requests"], None), + (requests.exceptions.Timeout, "Requests timeout", ["requests"], None), + (requests.exceptions.HTTPError, "HTTP error occurred", ["requests"], None), + (requests.exceptions.RequestException, "Request exception", ["requests"], None), + ] + ) + + # Add httpx exceptions if available + if HTTPX_AVAILABLE: + import httpx + + test_cases.extend( + [ + (httpx.ConnectError, "HTTPX connect error", ["httpx"], None), + (httpx.TimeoutException, "HTTPX timeout", ["httpx"], None), + (httpx.NetworkError, "HTTPX network error", ["httpx"], None), + (httpx.RemoteProtocolError, "HTTPX protocol error", ["httpx"], None), + ] + ) + + # Add openai exceptions if available + if OPENAI_AVAILABLE: + import openai + + test_cases.extend( + [ + (openai.NotFoundError, "OpenAI model not found", ["openai"], None), + (openai.BadRequestError, "OpenAI bad request", ["openai"], None), + (openai.RateLimitError, "OpenAI rate limit", ["openai"], None), + ] + ) + + # Add litellm exceptions if available + if LITELLM_AVAILABLE: + import litellm.exceptions + + test_cases.extend( + [ + (litellm.exceptions.RateLimitError, "Rate limit exceeded", ["litellm"], None), + (litellm.exceptions.InternalServerError, "Internal server error", ["litellm"], None), + (litellm.exceptions.Timeout, "LiteLLM timeout", ["litellm"], None), + (litellm.exceptions.NotFoundError, "Model not found", ["litellm"], None), + (litellm.exceptions.BadRequestError, "Bad request", ["litellm"], None), + (litellm.exceptions.ServiceUnavailableError, "Service unavailable", ["litellm"], None), + ] + ) + + successful_tests = 0 + failed_tests = [] + + for exception_class, test_message, required_modules, skip_reason in test_cases: + exception_name = f"{exception_class.__module__}.{exception_class.__name__}" + + try: + # Try to create the original exception with different patterns + original_exception = None + + # Pattern 1: Just message + try: + original_exception = exception_class(test_message) + except TypeError: + # Pattern 2: Message as named parameter + try: + original_exception = exception_class(message=test_message) + except TypeError: + # Pattern 3: For litellm - try with required parameters + if "litellm" in required_modules: + try: + original_exception = exception_class( + message=test_message, model="test-model", llm_provider="test-provider" + ) + except TypeError: + try: + original_exception = exception_class(test_message, "test-model", "test-provider") + except TypeError: + pass + # Pattern 4: For OpenAI - create mock response object + elif "openai" in required_modules and original_exception is None: + try: + # Create minimal mock objects for OpenAI exceptions + class MockRequest: + def __init__(self): + self.method = "POST" + self.url = "https://api.openai.com/v1/chat/completions" + + class MockResponse: + def __init__(self): + self.status_code = 404 + self.headers = {"x-request-id": "test-request-id"} + self.request = MockRequest() + + mock_response = MockResponse() + original_exception = exception_class(test_message, response=mock_response, body=None) + except (TypeError, AttributeError) as e: + # If mock approach fails, skip OpenAI for now + failed_tests.append((exception_name, f"OpenAI mock creation failed: {e}")) + continue + + # Pattern 5: No arguments fallback + if original_exception is None: + try: + original_exception = exception_class() + except TypeError: + failed_tests.append((exception_name, "Could not create exception instance")) + continue + + if original_exception is None: + failed_tests.append((exception_name, "Could not create exception instance")) + continue + + # Test the round-trip: exception -> status -> re-raise + try: + # Convert to status + status = Status.rollout_error_from_exception(original_exception) + + # Verify status structure + assert len(status.details) >= 1 + detail = status.details[0] + assert "exception_type" in detail + assert "exception_message" in detail + assert detail["exception_type"] == exception_name + + # Try to re-raise from status details + with pytest.raises(exception_class) as exc_info: + Status.raise_from_status_details(status.details) + + # Verify we got the right exception type back + assert isinstance(exc_info.value, exception_class) + successful_tests += 1 + + print(f"āœ… {exception_name}: Round-trip successful") + + except Exception as e: + failed_tests.append((exception_name, f"Round-trip failed: {e}")) + continue + + except Exception as e: + failed_tests.append((exception_name, f"Setup failed: {e}")) + continue + + # Report results + print("\nšŸŽÆ Exception Round-trip Test Results:") + print(f"āœ… Successful: {successful_tests}") + print(f"āŒ Failed: {len(failed_tests)}") + + if failed_tests: + print("\nāŒ Failed exceptions:") + for exception_name, reason in failed_tests: + print(f" - {exception_name}: {reason}") + + # We expect most to pass, but some failures are acceptable due to complex constructors + # Require at least 85% success rate (17/20 = 85% is good, indicates robust support) + success_rate = successful_tests / (successful_tests + len(failed_tests)) + assert success_rate >= 0.85, f"Success rate {success_rate:.1%} too low. Failed tests: {failed_tests}" From 55043e4c645183991486ef6df476d8fe5b8e1e5e Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 29 Oct 2025 04:20:44 -0700 Subject: [PATCH 07/16] version --- .../quickstart/svg_agent/vercel_svg_server/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt b/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt index 50f0def0..2296770c 100644 --- a/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt +++ b/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt @@ -1,4 +1,4 @@ openai>=1.0.0 python-dotenv>=0.19.0 -eval_protocol==0.2.69-dev3 +eval_protocol>=0.2.70 Flask[async]==3.0.3 From 0ebd0177dafc55bfa302a49b2d674c0487516eff Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 29 Oct 2025 17:18:36 -0700 Subject: [PATCH 08/16] eval protocol exception retry --- eval_protocol/exceptions.py | 176 ++++++++ eval_protocol/models.py | 294 ++++++------ eval_protocol/pytest/exception_config.py | 16 +- .../pytest/remote_rollout_processor.py | 16 +- eval_protocol/pytest/tracing_utils.py | 4 +- tests/test_exceptions.py | 350 +++++++++++++++ tests/test_status.py | 417 ------------------ 7 files changed, 702 insertions(+), 571 deletions(-) create mode 100644 eval_protocol/exceptions.py create mode 100644 tests/test_exceptions.py delete mode 100644 tests/test_status.py diff --git a/eval_protocol/exceptions.py b/eval_protocol/exceptions.py new file mode 100644 index 00000000..f294977d --- /dev/null +++ b/eval_protocol/exceptions.py @@ -0,0 +1,176 @@ +""" +Custom exceptions for Eval Protocol that map to gRPC Status codes. + +These exceptions provide a clean way to handle errors and map them to appropriate +Status objects following the AIP-193 standard. +""" + +from typing import Optional + + +class EvalProtocolError(Exception): + """ + Base exception for all Eval Protocol specific errors. + + Maps to Status.Code and can be converted to Status objects for structured logging. + """ + + pass + + +# Standard gRPC status code exceptions +class CancelledError(EvalProtocolError): + """Operation was cancelled (Status.Code.CANCELLED = 1)""" + + status_code = 1 + + +class UnknownError(EvalProtocolError): + """Unknown error occurred (Status.Code.UNKNOWN = 2)""" + + status_code = 2 + + +class InvalidArgumentError(EvalProtocolError): + """Invalid argument provided (Status.Code.INVALID_ARGUMENT = 3)""" + + status_code = 3 + + +class DeadlineExceededError(EvalProtocolError): + """Deadline exceeded (Status.Code.DEADLINE_EXCEEDED = 4)""" + + status_code = 4 + + +class NotFoundError(EvalProtocolError): + """Resource not found (Status.Code.NOT_FOUND = 5)""" + + status_code = 5 + + +class AlreadyExistsError(EvalProtocolError): + """Resource already exists (Status.Code.ALREADY_EXISTS = 6)""" + + status_code = 6 + + +class PermissionDeniedError(EvalProtocolError): + """Permission denied (Status.Code.PERMISSION_DENIED = 7)""" + + status_code = 7 + + +class ResourceExhaustedError(EvalProtocolError): + """Resource exhausted (Status.Code.RESOURCE_EXHAUSTED = 8)""" + + status_code = 8 + + +class FailedPreconditionError(EvalProtocolError): + """Failed precondition (Status.Code.FAILED_PRECONDITION = 9)""" + + status_code = 9 + + +class AbortedError(EvalProtocolError): + """Operation was aborted (Status.Code.ABORTED = 10)""" + + status_code = 10 + + +class OutOfRangeError(EvalProtocolError): + """Value out of range (Status.Code.OUT_OF_RANGE = 11)""" + + status_code = 11 + + +class UnimplementedError(EvalProtocolError): + """Operation is not implemented (Status.Code.UNIMPLEMENTED = 12)""" + + status_code = 12 + + +class InternalError(EvalProtocolError): + """Internal server error (Status.Code.INTERNAL = 13)""" + + status_code = 13 + + +class UnavailableError(EvalProtocolError): + """Service unavailable (Status.Code.UNAVAILABLE = 14)""" + + status_code = 14 + + +class DataLossError(EvalProtocolError): + """Unrecoverable data loss (Status.Code.DATA_LOSS = 15)""" + + status_code = 15 + + +class UnauthenticatedError(EvalProtocolError): + """Request lacks valid authentication (Status.Code.UNAUTHENTICATED = 16)""" + + status_code = 16 + + +# Custom EP exceptions +class RolloutFinishedError(EvalProtocolError): + """Rollout completed successfully (Status.Code.FINISHED = 100)""" + + status_code = 100 + + +class RolloutRunningError(EvalProtocolError): + """Rollout is still running (Status.Code.RUNNING = 101)""" + + status_code = 101 + + +class ScoreInvalidError(EvalProtocolError): + """Score is invalid (Status.Code.SCORE_INVALID = 102)""" + + status_code = 102 + + +# Convenience mapping from status codes to exception classes +# Only actual error conditions should raise exceptions +STATUS_CODE_TO_EXCEPTION = { + 0: None, # OK - success, no exception + 1: CancelledError, + 2: UnknownError, + 3: InvalidArgumentError, + 4: DeadlineExceededError, + 5: NotFoundError, + 6: AlreadyExistsError, + 7: PermissionDeniedError, + 8: ResourceExhaustedError, + 9: FailedPreconditionError, + 10: AbortedError, + 11: OutOfRangeError, + 12: UnimplementedError, + 13: InternalError, + 14: UnavailableError, + 15: DataLossError, + 16: UnauthenticatedError, + 100: None, # FINISHED - success, no exception + 101: None, # RUNNING - in progress, no exception + 102: None, # SCORE_INVALID - success, no exception +} + + +def exception_for_status_code(code: int) -> Optional[EvalProtocolError]: + """ + Create an exception instance for a given status code. + + Args: + code: Status code from Status.Code enum + + Returns: + Exception instance or None if code is OK (0) + """ + exception_class = STATUS_CODE_TO_EXCEPTION.get(code) + if exception_class is None: + return None + return exception_class() diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 23231c22..494d3129 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -136,6 +136,13 @@ def eval_finished(cls) -> "Status": """Create a status indicating the evaluation finished.""" return cls(code=cls.Code.FINISHED, message="Evaluation finished", details=[]) + @staticmethod + def _build_details_with_extra_info(extra_info: Optional[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Helper to build details list from extra_info.""" + if extra_info: + return [ErrorInfo.extra_info(extra_info).to_aip193_format()] + return [] + @classmethod def aborted(cls, message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": """Create a status indicating the evaluation was aborted.""" @@ -160,179 +167,190 @@ def finished(cls, message: str, details: Optional[List[Dict[str, Any]]] = None) """Create a status indicating the rollout finished.""" return cls(code=cls.Code.FINISHED, message=message, details=details or []) + # Error methods organized by Status.Code enum values (1-16) + + # CANCELLED = 1 @classmethod - def rollout_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status": - """Create a status indicating the rollout failed with an error.""" - details = [] - if extra_info: - details.append(ErrorInfo.extra_info(extra_info).to_aip193_format()) - return cls.error(error_message, details) + def rollout_cancelled_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status": + """Create a status indicating the rollout was cancelled.""" + return cls.cancelled_error(error_message, cls._build_details_with_extra_info(extra_info)) + + @classmethod + def cancelled_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating the operation was cancelled.""" + return cls(code=cls.Code.CANCELLED, message=error_message, details=details or []) + # UNKNOWN = 2 @classmethod - def rollout_error_from_exception( - cls, exception: Exception, extra_info: Optional[Dict[str, Any]] = None + def rollout_unknown_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status": + """Create a status indicating the rollout failed with an unknown error.""" + return cls.unknown_error(error_message, cls._build_details_with_extra_info(extra_info)) + + @classmethod + def unknown_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating an unknown error occurred.""" + return cls(code=cls.Code.UNKNOWN, message=error_message, details=details or []) + + # INVALID_ARGUMENT = 3 + @classmethod + def rollout_invalid_argument_error( + cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None ) -> "Status": - """ - Create a status indicating the rollout failed with an exception. - Simple approach that stores exception info directly in details. - """ - details = [] + """Create a status indicating the rollout failed with an invalid argument error.""" + return cls.invalid_argument_error(error_message, cls._build_details_with_extra_info(extra_info)) - details.append( - { - "exception_type": f"{type(exception).__module__}.{type(exception).__name__}", - "exception_message": str(exception), - } - ) + @classmethod + def invalid_argument_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating an invalid argument error occurred.""" + return cls(code=cls.Code.INVALID_ARGUMENT, message=error_message, details=details or []) - if extra_info: - details.append({"extra_info": extra_info}) + # DEADLINE_EXCEEDED = 4 + @classmethod + def rollout_deadline_exceeded_error( + cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None + ) -> "Status": + """Create a status indicating the rollout failed with a deadline exceeded error.""" + return cls.deadline_exceeded_error(error_message, cls._build_details_with_extra_info(extra_info)) - return cls(code=cls.Code.INTERNAL, message=str(exception), details=details) + @classmethod + def deadline_exceeded_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating a deadline exceeded error occurred.""" + return cls(code=cls.Code.DEADLINE_EXCEEDED, message=error_message, details=details or []) + # NOT_FOUND = 5 @classmethod - def raise_from_status_details(cls, status_details: List[Dict[str, Any]]) -> bool: - """ - Try to raise original exception from simple status details using dynamic imports. - """ + def rollout_not_found_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status": + """Create a status indicating the rollout failed with a not found error.""" + return cls.not_found_error(error_message, cls._build_details_with_extra_info(extra_info)) - for detail in status_details: - # Look for simple exception info - if "exception_type" in detail and "exception_message" in detail: - exception_type = detail["exception_type"] - exception_message = detail["exception_message"] - - logger.info(f"Found exception info: {exception_type}") - - # Dynamically import and raise the exception - exception_class = cls._import_exception_class(exception_type) - if exception_class: - logger.info(f"Found exception class: {exception_class}") - # Try different constructor patterns - exception_to_raise = cls._create_exception_instance(exception_class, exception_message) - if exception_to_raise: - logger.info(f"Re-raising {exception_type} from status details") - raise exception_to_raise - else: - logger.info(f"Could not create instance of {exception_type}") - continue - else: - logger.info(f"Could not import exception type: {exception_type}") - continue + @classmethod + def not_found_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating a not found error occurred.""" + return cls(code=cls.Code.NOT_FOUND, message=error_message, details=details or []) - return False + # ALREADY_EXISTS = 6 + @classmethod + def rollout_already_exists_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status": + """Create a status indicating the rollout failed with an already exists error.""" + return cls.already_exists_error(error_message, cls._build_details_with_extra_info(extra_info)) @classmethod - def _create_exception_instance(cls, exception_class: type, message: str) -> Optional[Exception]: - """ - Try to create an exception instance using different constructor patterns. + def already_exists_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating an already exists error occurred.""" + return cls(code=cls.Code.ALREADY_EXISTS, message=error_message, details=details or []) - Args: - exception_class: The exception class to instantiate - message: The error message + # PERMISSION_DENIED = 7 + @classmethod + def rollout_permission_denied_error( + cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None + ) -> "Status": + """Create a status indicating the rollout failed with a permission denied error.""" + return cls.permission_denied_error(error_message, cls._build_details_with_extra_info(extra_info)) - Returns: - Exception instance if successful, None otherwise - """ - # Common constructor patterns to try - patterns = [ - # Pattern 1: Just message - lambda: exception_class(message), - # Pattern 2: Message as named parameter - lambda: exception_class(message=message), - # Pattern 3: Message + common litellm parameters - # NOTE: we are losing some diagnostic information here by not passing the model and llm_provider. We could try to capture full exception state in rollout_error_from_exception. - lambda: exception_class(message, model="unknown", llm_provider="unknown"), - lambda: exception_class(message=message, model="unknown", llm_provider="unknown"), - # Pattern 5: OpenAI exceptions - create mock response object - lambda: cls._create_openai_exception(exception_class, message), - # Pattern 7: No arguments (fallback) - lambda: exception_class(), - ] - - for i, pattern in enumerate(patterns): - try: - instance = pattern() - logger.debug(f"Successfully created {exception_class.__name__} using pattern {i + 1}") - return instance - except (TypeError, ValueError) as e: - logger.debug(f"Pattern {i + 1} failed for {exception_class.__name__}: {e}") - continue + @classmethod + def permission_denied_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating a permission denied error occurred.""" + return cls(code=cls.Code.PERMISSION_DENIED, message=error_message, details=details or []) - logger.debug(f"All constructor patterns failed for {exception_class.__name__}") - return None + # RESOURCE_EXHAUSTED = 8 + @classmethod + def rollout_resource_exhausted_error( + cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None + ) -> "Status": + """Create a status indicating the rollout failed with a resource exhausted error.""" + return cls.resource_exhausted_error(error_message, cls._build_details_with_extra_info(extra_info)) @classmethod - def _create_openai_exception(cls, exception_class: type, message: str) -> Optional[Exception]: - """ - Create OpenAI exception with a mock response object. + def resource_exhausted_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating a resource exhausted error occurred.""" + return cls(code=cls.Code.RESOURCE_EXHAUSTED, message=error_message, details=details or []) - OpenAI exceptions require httpx.Response objects which are complex to create, - so we create a minimal mock that satisfies the basic requirements. - """ - try: - import httpx + # FAILED_PRECONDITION = 9 + @classmethod + def rollout_failed_precondition_error( + cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None + ) -> "Status": + """Create a status indicating the rollout failed with a failed precondition error.""" + return cls.failed_precondition_error(error_message, cls._build_details_with_extra_info(extra_info)) - # Create a minimal mock response object - class MockRequest: - def __init__(self): - self.method = "POST" - self.url = "https://api.openai.com/v1/chat/completions" + @classmethod + def failed_precondition_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating a failed precondition error occurred.""" + return cls(code=cls.Code.FAILED_PRECONDITION, message=error_message, details=details or []) - class MockResponse: - def __init__(self): - self.status_code = 404 - self.headers = {"x-request-id": "mock-request-id"} - self.request = MockRequest() + # ABORTED = 10 + @classmethod + def rollout_aborted_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status": + """Create a status indicating the rollout was aborted.""" + return cls.aborted(error_message, cls._build_details_with_extra_info(extra_info)) - mock_response = MockResponse() - return exception_class(message, response=mock_response, body=None) + # OUT_OF_RANGE = 11 + @classmethod + def rollout_out_of_range_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status": + """Create a status indicating the rollout failed with an out of range error.""" + return cls.out_of_range_error(error_message, cls._build_details_with_extra_info(extra_info)) - except Exception as e: - logging.getLogger(__name__).debug(f"Failed to create OpenAI exception with mock response: {e}") - return None + @classmethod + def out_of_range_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating an out of range error occurred.""" + return cls(code=cls.Code.OUT_OF_RANGE, message=error_message, details=details or []) + # UNIMPLEMENTED = 12 @classmethod - def _import_exception_class(cls, exception_type: str) -> Optional[type]: - """ - Dynamically import an exception class from a string. + def rollout_unimplemented_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status": + """Create a status indicating the rollout failed with an unimplemented error.""" + return cls.unimplemented_error(error_message, cls._build_details_with_extra_info(extra_info)) - Args: - exception_type: Exception type string like "litellm.exceptions.NotFoundError", - "openai.BadRequestError", "requests.exceptions.ConnectionError", etc. + @classmethod + def unimplemented_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating an unimplemented error occurred.""" + return cls(code=cls.Code.UNIMPLEMENTED, message=error_message, details=details or []) - Returns: - The exception class if found, None otherwise - """ - try: - # Require fully qualified names (no automatic prefixing) - if "." not in exception_type: - logging.getLogger(__name__).debug(f"Exception type must be fully qualified: {exception_type}") - return None + # INTERNAL = 13 + @classmethod + def rollout_internal_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status": + """Create a status indicating the rollout failed with an internal error.""" + return cls.internal_error(error_message, cls._build_details_with_extra_info(extra_info)) - # Parse module and class name - module_name, class_name = exception_type.rsplit(".", 1) + @classmethod + def internal_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating an internal error occurred.""" + return cls(code=cls.Code.INTERNAL, message=error_message, details=details or []) - # Import the module - module = importlib.import_module(module_name) + # UNAVAILABLE = 14 + @classmethod + def rollout_unavailable_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status": + """Create a status indicating the rollout failed with an unavailable error.""" + return cls.unavailable_error(error_message, cls._build_details_with_extra_info(extra_info)) - # Get the exception class - exception_class = getattr(module, class_name, None) + @classmethod + def unavailable_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating an unavailable error occurred.""" + return cls(code=cls.Code.UNAVAILABLE, message=error_message, details=details or []) - # Verify it's actually an exception class - if exception_class and issubclass(exception_class, BaseException): - return exception_class + # DATA_LOSS = 15 + @classmethod + def rollout_data_loss_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status": + """Create a status indicating the rollout failed with a data loss error.""" + return cls.data_loss_error(error_message, cls._build_details_with_extra_info(extra_info)) - return None + @classmethod + def data_loss_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating a data loss error occurred.""" + return cls(code=cls.Code.DATA_LOSS, message=error_message, details=details or []) - except (ImportError, AttributeError, ValueError) as e: - logging.getLogger(__name__).debug(f"Could not import exception class {exception_type}: {e}") - return None + # UNAUTHENTICATED = 16 + @classmethod + def rollout_unauthenticated_error( + cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None + ) -> "Status": + """Create a status indicating the rollout failed with an unauthenticated error.""" + return cls.unauthenticated_error(error_message, cls._build_details_with_extra_info(extra_info)) @classmethod - def error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": - """Create a status indicating the rollout failed with an error.""" - return cls(code=cls.Code.INTERNAL, message=error_message, details=details or []) + def unauthenticated_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating an unauthenticated error occurred.""" + return cls(code=cls.Code.UNAUTHENTICATED, message=error_message, details=details or []) @classmethod def score_invalid( diff --git a/eval_protocol/pytest/exception_config.py b/eval_protocol/pytest/exception_config.py index 7da0b8a0..42c97ea4 100644 --- a/eval_protocol/pytest/exception_config.py +++ b/eval_protocol/pytest/exception_config.py @@ -11,7 +11,8 @@ import litellm import requests import httpx -import openai + +import eval_protocol.exceptions # Default exceptions that should be retried with backoff @@ -35,12 +36,17 @@ litellm.exceptions.InternalServerError, litellm.exceptions.Timeout, litellm.exceptions.NotFoundError, - litellm.exceptions.BadRequestError, # remove this once we have a long term solution + # litellm.exceptions.BadRequestError, # remove this once we have a long term solution litellm.exceptions.ServiceUnavailableError, litellm.exceptions.APIError, - # OpenAI library exceptions - openai.NotFoundError, - openai.BadRequestError, # remove this once we have a long term solution + # Eval Protocol exceptions + eval_protocol.exceptions.UnknownError, + eval_protocol.exceptions.DeadlineExceededError, + eval_protocol.exceptions.NotFoundError, + eval_protocol.exceptions.PermissionDeniedError, + eval_protocol.exceptions.UnavailableError, + eval_protocol.exceptions.UnauthenticatedError, + eval_protocol.exceptions.ResourceExhaustedError, } diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py index d27f878f..6e57d93c 100644 --- a/eval_protocol/pytest/remote_rollout_processor.py +++ b/eval_protocol/pytest/remote_rollout_processor.py @@ -10,6 +10,7 @@ DataLoaderConfig, ) from eval_protocol.adapters.fireworks_tracing import FireworksTracingAdapter +from eval_protocol.exceptions import exception_for_status_code from .rollout_processor import RolloutProcessor from .types import RolloutProcessorConfig @@ -97,13 +98,7 @@ def _post_init() -> None: r.raise_for_status() except requests.exceptions.Timeout: raise TimeoutError( - "The /init endpoint timed out after 30 seconds. " - "CRITICAL: The /init endpoint must return immediately (within 30s) and NOT block on rollout execution. " - "Your remote server should:\n" - "1. Accept the /init request and return a 200 response immediately\n" - "2. Process the actual rollout asynchronously in the background\n" - "3. Use the /status endpoint to report progress\n" - "For Python/Node.js: Start a separate process per rollout to avoid blocking the /init response." + f"The /init endpoint tried {url} with {init_payload.model_dump()} but timed out after 30 seconds." ) await asyncio.to_thread(_post_init) @@ -166,7 +161,10 @@ def _get_status() -> Dict[str, Any]: f"Found Fireworks log for rollout {row.execution_metadata.rollout_id} with status code {status_code}" ) - Status.raise_from_status_details(status_details) + # Create and raise exception if appropriate + exception = exception_for_status_code(status_code) + if exception is not None: + raise exception row.rollout_status = Status( code=Status.Code(status_code), @@ -183,7 +181,7 @@ def _get_status() -> Dict[str, Any]: f"Loop completed without breaking for {row.execution_metadata.rollout_id}, which means we timed out" ) # Loop completed without breaking, which means we timed out - row.rollout_status = Status.rollout_error( + row.rollout_status = Status.rollout_deadline_exceeded_error( f"Rollout {row.execution_metadata.rollout_id} timed out after {timeout_seconds} seconds" ) diff --git a/eval_protocol/pytest/tracing_utils.py b/eval_protocol/pytest/tracing_utils.py index 27f8d14c..6ea69371 100644 --- a/eval_protocol/pytest/tracing_utils.py +++ b/eval_protocol/pytest/tracing_utils.py @@ -151,14 +151,14 @@ def update_row_with_remote_trace( output_rows: List[EvaluationRow] = [r for result in results for r in result.rows] if len(output_rows) == 0: # Fallback to original row if no remote data found - row.rollout_status = Status(code=Status.Code.NOT_FOUND, message="No remote data found for rollout") + row.rollout_status = Status.rollout_not_found_error("No remote data found for rollout") return None elif len(output_rows) == 1: # Return the remote row remote_row = output_rows[0] # if the remote_row has the same number of messages as the original row, something went wrong if len(remote_row.messages) == len(row.messages): - row.rollout_status = Status.rollout_error( + row.rollout_status = Status.rollout_internal_error( "Rollout finished with the same number of messages as the original row" ) return None diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py new file mode 100644 index 00000000..b4b039e2 --- /dev/null +++ b/tests/test_exceptions.py @@ -0,0 +1,350 @@ +""" +Tests for the eval_protocol exception handling system. + +Tests the status code to exception mapping functionality: +1. STATUS_CODE_TO_EXCEPTION mapping correctness +2. exception_for_status_code() function behavior +3. Success states don't raise exceptions (0, 100, 101, 102) +4. Error states raise appropriate exceptions (1-16) +5. Exception class inheritance and attributes +6. Integration with existing retry logic +""" + +import pytest +from eval_protocol.models import Status +from eval_protocol.exceptions import ( + exception_for_status_code, + STATUS_CODE_TO_EXCEPTION, + EvalProtocolError, + CancelledError, + UnknownError, + InvalidArgumentError, + DeadlineExceededError, + NotFoundError, + AlreadyExistsError, + PermissionDeniedError, + ResourceExhaustedError, + FailedPreconditionError, + AbortedError, + OutOfRangeError, + UnimplementedError, + InternalError, + UnavailableError, + DataLossError, + UnauthenticatedError, + RolloutFinishedError, + RolloutRunningError, + ScoreInvalidError, +) + + +def test_success_status_codes_no_exception(): + """Test that success/progress status codes don't raise exceptions.""" + success_codes = [ + (0, "OK"), + (100, "FINISHED"), + (101, "RUNNING"), + (102, "SCORE_INVALID"), # Changed to success state + ] + + for code, name in success_codes: + exception = exception_for_status_code(code) + assert exception is None, f"Status code {code} ({name}) should not raise exception" + + +def test_error_status_codes_raise_exceptions(): + """Test that error status codes raise appropriate exceptions.""" + error_test_cases = [ + (1, CancelledError, "CANCELLED"), + (2, UnknownError, "UNKNOWN"), + (3, InvalidArgumentError, "INVALID_ARGUMENT"), + (4, DeadlineExceededError, "DEADLINE_EXCEEDED"), + (5, NotFoundError, "NOT_FOUND"), + (6, AlreadyExistsError, "ALREADY_EXISTS"), + (7, PermissionDeniedError, "PERMISSION_DENIED"), + (8, ResourceExhaustedError, "RESOURCE_EXHAUSTED"), + (9, FailedPreconditionError, "FAILED_PRECONDITION"), + (10, AbortedError, "ABORTED"), + (11, OutOfRangeError, "OUT_OF_RANGE"), + (12, UnimplementedError, "UNIMPLEMENTED"), + (13, InternalError, "INTERNAL"), + (14, UnavailableError, "UNAVAILABLE"), + (15, DataLossError, "DATA_LOSS"), + (16, UnauthenticatedError, "UNAUTHENTICATED"), + ] + + for code, expected_exception_class, name in error_test_cases: + exception = exception_for_status_code(code) + assert exception is not None, f"Status code {code} ({name}) should raise exception" + assert isinstance(exception, expected_exception_class), ( + f"Status code {code} should raise {expected_exception_class.__name__}" + ) + assert isinstance(exception, EvalProtocolError), "All exceptions should inherit from EvalProtocolError" + + +def test_status_code_mapping_completeness(): + """Test that STATUS_CODE_TO_EXCEPTION mapping covers all expected codes.""" + expected_codes = [ + 0, # OK + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, # Standard gRPC codes + 100, + 101, + 102, # Custom EP codes + ] + + for code in expected_codes: + assert code in STATUS_CODE_TO_EXCEPTION, f"Status code {code} missing from mapping" + + +def test_invalid_status_codes(): + """Test behavior with invalid/unknown status codes.""" + invalid_codes = [-1, 17, 99, 103, 999] + + for code in invalid_codes: + exception = exception_for_status_code(code) + assert exception is None, f"Invalid status code {code} should return None" + + +def test_exception_attributes(): + """Test that exceptions have the expected attributes.""" + # Test a few exception types + test_cases = [ + (1, CancelledError, "CANCELLED"), + (5, NotFoundError, "NOT_FOUND"), + (13, InternalError, "INTERNAL"), + ] + + for code, expected_class, name in test_cases: + exception = exception_for_status_code(code) + assert hasattr(expected_class, "status_code"), f"{expected_class.__name__} should have status_code attribute" + assert expected_class.status_code == code, f"{expected_class.__name__}.status_code should be {code}" + + +def test_exception_raising_integration(): + """Test the pattern used in RemoteRolloutProcessor.""" + # Simulate the pattern used in remote_rollout_processor.py + status_codes_to_test = [ + (0, False), # OK - should not raise + (5, True), # NOT_FOUND - should raise NotFoundError + (13, True), # INTERNAL - should raise InternalError + (100, False), # FINISHED - should not raise + ] + + for status_code, should_raise in status_codes_to_test: + exception = exception_for_status_code(status_code) + + if should_raise: + assert exception is not None, f"Status code {status_code} should create exception" + # Test that we can raise it + with pytest.raises(EvalProtocolError): + raise exception + else: + assert exception is None, f"Status code {status_code} should not create exception" + + +def test_status_code_enum_consistency(): + """Test that our mapping is consistent with Status.Code enum.""" + # Test that our exception mapping aligns with Status.Code enum + status_code_mapping = { + Status.Code.OK: None, + Status.Code.CANCELLED: CancelledError, + Status.Code.UNKNOWN: UnknownError, + Status.Code.INVALID_ARGUMENT: InvalidArgumentError, + Status.Code.DEADLINE_EXCEEDED: DeadlineExceededError, + Status.Code.NOT_FOUND: NotFoundError, + Status.Code.ALREADY_EXISTS: AlreadyExistsError, + Status.Code.PERMISSION_DENIED: PermissionDeniedError, + Status.Code.RESOURCE_EXHAUSTED: ResourceExhaustedError, + Status.Code.FAILED_PRECONDITION: FailedPreconditionError, + Status.Code.ABORTED: AbortedError, + Status.Code.OUT_OF_RANGE: OutOfRangeError, + Status.Code.UNIMPLEMENTED: UnimplementedError, + Status.Code.INTERNAL: InternalError, + Status.Code.UNAVAILABLE: UnavailableError, + Status.Code.DATA_LOSS: DataLossError, + Status.Code.UNAUTHENTICATED: UnauthenticatedError, + Status.Code.FINISHED: None, + Status.Code.RUNNING: None, + Status.Code.SCORE_INVALID: None, + } + + for status_code_enum, expected_exception_class in status_code_mapping.items(): + code_value = int(status_code_enum) + actual_exception_class = STATUS_CODE_TO_EXCEPTION.get(code_value) + + if expected_exception_class is None: + assert actual_exception_class is None, ( + f"Status.Code.{status_code_enum.name} ({code_value}) should map to None" + ) + else: + assert actual_exception_class == expected_exception_class, ( + f"Status.Code.{status_code_enum.name} ({code_value}) should map to {expected_exception_class.__name__}" + ) + + +def test_exception_inheritance(): + """Test that all exception classes properly inherit from EvalProtocolError.""" + exception_classes = [ + CancelledError, + UnknownError, + InvalidArgumentError, + DeadlineExceededError, + NotFoundError, + AlreadyExistsError, + PermissionDeniedError, + ResourceExhaustedError, + FailedPreconditionError, + AbortedError, + OutOfRangeError, + UnimplementedError, + InternalError, + UnavailableError, + DataLossError, + UnauthenticatedError, + RolloutFinishedError, + RolloutRunningError, + ScoreInvalidError, + ] + + for exception_class in exception_classes: + assert issubclass(exception_class, EvalProtocolError), ( + f"{exception_class.__name__} should inherit from EvalProtocolError" + ) + assert issubclass(exception_class, Exception), f"{exception_class.__name__} should inherit from Exception" + + +def test_real_world_usage_scenarios(): + """Test realistic usage patterns from RemoteRolloutProcessor.""" + # Test scenarios that might occur in practice + scenarios = [ + # Success scenarios + {"status_code": 0, "description": "Successful API call", "should_raise": False}, + {"status_code": 100, "description": "Rollout completed successfully", "should_raise": False}, + {"status_code": 101, "description": "Rollout still in progress", "should_raise": False}, + # Error scenarios that should trigger retry logic + { + "status_code": 4, + "description": "Request timeout", + "should_raise": True, + "expected_exception": DeadlineExceededError, + }, + { + "status_code": 5, + "description": "Model not found", + "should_raise": True, + "expected_exception": NotFoundError, + }, + { + "status_code": 7, + "description": "API key invalid", + "should_raise": True, + "expected_exception": PermissionDeniedError, + }, + { + "status_code": 8, + "description": "Rate limit exceeded", + "should_raise": True, + "expected_exception": ResourceExhaustedError, + }, + { + "status_code": 13, + "description": "Internal server error", + "should_raise": True, + "expected_exception": InternalError, + }, + { + "status_code": 14, + "description": "Service temporarily unavailable", + "should_raise": True, + "expected_exception": UnavailableError, + }, + ] + + for scenario in scenarios: + status_code = scenario["status_code"] + description = scenario["description"] + should_raise = scenario["should_raise"] + + # This is the pattern used in RemoteRolloutProcessor + exception = exception_for_status_code(status_code) + + if should_raise: + expected_exception = scenario["expected_exception"] + assert exception is not None, f"Scenario '{description}' should create exception" + assert isinstance(exception, expected_exception), ( + f"Scenario '{description}' should create {expected_exception.__name__}" + ) + + # Test that the exception can be raised and caught for retry logic + with pytest.raises(expected_exception): + raise exception + + else: + assert exception is None, f"Scenario '{description}' should not create exception" + + +def test_exception_status_code_attributes(): + """Test that all exceptions have correct status_code attributes.""" + expected_mappings = [ + (CancelledError, 1), + (UnknownError, 2), + (InvalidArgumentError, 3), + (DeadlineExceededError, 4), + (NotFoundError, 5), + (AlreadyExistsError, 6), + (PermissionDeniedError, 7), + (ResourceExhaustedError, 8), + (FailedPreconditionError, 9), + (AbortedError, 10), + (OutOfRangeError, 11), + (UnimplementedError, 12), + (InternalError, 13), + (UnavailableError, 14), + (DataLossError, 15), + (UnauthenticatedError, 16), + (RolloutFinishedError, 100), + (RolloutRunningError, 101), + (ScoreInvalidError, 102), + ] + + for exception_class, expected_code in expected_mappings: + assert hasattr(exception_class, "status_code"), f"{exception_class.__name__} should have status_code attribute" + assert exception_class.status_code == expected_code, ( + f"{exception_class.__name__}.status_code should be {expected_code}" + ) + + +def test_integration_with_retry_logic(): + """Test that our exceptions integrate properly with existing retry logic.""" + from eval_protocol.pytest.exception_config import DEFAULT_RETRYABLE_EXCEPTIONS + + # Test that our error exceptions are covered by retry logic + our_error_exceptions = [ + UnknownError, + DeadlineExceededError, + NotFoundError, + PermissionDeniedError, + UnavailableError, + UnauthenticatedError, + ResourceExhaustedError, + ] + + for exception_class in our_error_exceptions: + assert exception_class in DEFAULT_RETRYABLE_EXCEPTIONS, ( + f"{exception_class.__name__} should be in DEFAULT_RETRYABLE_EXCEPTIONS for retry support" + ) diff --git a/tests/test_status.py b/tests/test_status.py deleted file mode 100644 index f0ac0932..00000000 --- a/tests/test_status.py +++ /dev/null @@ -1,417 +0,0 @@ -""" -Tests for Status exception handling functionality. - -Tests the round-trip flow: -1. Exception → Status.rollout_error_from_exception() → structured logging -2. Structured data → Status.raise_from_status_details() → original exception -""" - -import pytest -from eval_protocol.models import Status - -# Test with different exception types that might be available -try: - import litellm.exceptions - - LITELLM_AVAILABLE = True -except ImportError: - LITELLM_AVAILABLE = False - -try: - import requests.exceptions - - REQUESTS_AVAILABLE = True -except ImportError: - REQUESTS_AVAILABLE = False - -try: - import httpx - - HTTPX_AVAILABLE = True -except ImportError: - HTTPX_AVAILABLE = False - -try: - import openai - - OPENAI_AVAILABLE = True -except ImportError: - OPENAI_AVAILABLE = False - - -def test_rollout_error_from_exception_basic(): - """Test creating Status from a basic exception.""" - # Create a simple exception - original_exception = ValueError("Test error message") - - # Create status from exception - status = Status.rollout_error_from_exception(original_exception) - - # Verify the status structure - assert status.code == Status.Code.INTERNAL - assert status.message == "Test error message" - assert len(status.details) == 1 - - detail = status.details[0] - assert detail["exception_type"] == "builtins.ValueError" - assert detail["exception_message"] == "Test error message" - - -def test_exception_round_trip_basic(): - """Test the complete round-trip: exception → status → re-raise exception.""" - # Create original exception - original_exception = ValueError("Round trip test") - - # Convert to status - status = Status.rollout_error_from_exception(original_exception) - - # Try to re-raise from status details - with pytest.raises(ValueError) as exc_info: - Status.raise_from_status_details(status.details) - - # Verify the re-raised exception has the same message - assert str(exc_info.value) == "Round trip test" - - -@pytest.mark.skipif(not LITELLM_AVAILABLE, reason="litellm not available") -def test_litellm_exception_round_trip(): - """Test round-trip with litellm exceptions.""" - # Create a litellm exception - try different constructor patterns - original_exception = litellm.exceptions.NotFoundError( - message="Model not found", model="test-model", llm_provider="test-provider" - ) - # Convert to status - status = Status.rollout_error_from_exception(original_exception) - - # Verify status details - detail = status.details[0] - assert detail["exception_type"] == "litellm.exceptions.NotFoundError" - # Message might contain additional info, just check it contains our text - assert "Model not found" in detail["exception_message"] or "not found" in detail["exception_message"].lower() - - # Re-raise and verify type - with pytest.raises(litellm.exceptions.NotFoundError) as exc_info: - Status.raise_from_status_details(status.details) - - # The re-raised exception should be the same type - assert isinstance(exc_info.value, litellm.exceptions.NotFoundError) - - -@pytest.mark.skipif(not REQUESTS_AVAILABLE, reason="requests not available") -def test_requests_exception_round_trip(): - """Test round-trip with requests exceptions.""" - # Create a requests exception - original_exception = requests.exceptions.ConnectionError("Connection failed") - - # Convert to status - status = Status.rollout_error_from_exception(original_exception) - - # Verify status details - detail = status.details[0] - assert detail["exception_type"] == "requests.exceptions.ConnectionError" - assert detail["exception_message"] == "Connection failed" - - # Re-raise and verify type - with pytest.raises(requests.exceptions.ConnectionError) as exc_info: - Status.raise_from_status_details(status.details) - - assert str(exc_info.value) == "Connection failed" - - -def test_unknown_exception_type(): - """Test behavior with unknown/non-importable exception type.""" - # Create status details with fake exception type - fake_details = [{"exception_type": "fake.module.FakeException", "exception_message": "This should not raise"}] - - # Should not raise anything, just return False - result = Status.raise_from_status_details(fake_details) - assert result is False - - -def test_malformed_status_details(): - """Test behavior with malformed status details.""" - # Various malformed details - malformed_cases = [ - [], # Empty list - [{}], # Empty dict - [{"exception_type": "ValueError"}], # Missing message - [{"exception_message": "test"}], # Missing type - [{"wrong_key": "wrong_value"}], # Wrong keys - ] - - for malformed_details in malformed_cases: - result = Status.raise_from_status_details(malformed_details) - assert result is False - - -def test_rollout_error_with_extra_info(): - """Test rollout_error_from_exception with extra_info.""" - original_exception = ValueError("Test with extra info") - extra_info = {"context": "test_context", "user_id": "123"} - - status = Status.rollout_error_from_exception(original_exception, extra_info) - - # Should have both exception info and extra info - assert len(status.details) == 2 - - # First detail should be exception info - exception_detail = status.details[0] - assert exception_detail["exception_type"] == "builtins.ValueError" - assert exception_detail["exception_message"] == "Test with extra info" - - # Second detail should be extra info - extra_detail = status.details[1] - assert extra_detail["extra_info"]["context"] == "test_context" - assert extra_detail["extra_info"]["user_id"] == "123" - - -def test_multiple_exception_details(): - """Test raise_from_status_details with multiple details (should use first valid one).""" - # Create details with multiple exception info - details = [ - {"other_info": "ignored"}, # Should be ignored - {"exception_type": "builtins.ValueError", "exception_message": "First exception"}, # Should be used - {"exception_type": "builtins.RuntimeError", "exception_message": "Second exception"}, # Should be ignored - ] - - # Should raise the first valid exception - with pytest.raises(ValueError) as exc_info: - Status.raise_from_status_details(details) - - assert str(exc_info.value) == "First exception" - - -@pytest.mark.skipif(not LITELLM_AVAILABLE, reason="litellm not available") -def test_different_litellm_exceptions(): - """Test various litellm exception types.""" - # Test with a few common litellm exceptions - exception_classes = [ - litellm.exceptions.RateLimitError, - litellm.exceptions.InternalServerError, - litellm.exceptions.BadRequestError, - ] - - for exception_class in exception_classes: - # Try to create an exception instance (try different constructor patterns) - original_exception = None - exception_name = exception_class.__name__ - - try: - # Try with just message - original_exception = exception_class(f"Test {exception_name}") - except TypeError: - try: - # Try with message and required parameters - original_exception = exception_class( - message=f"Test {exception_name}", model="test-model", llm_provider="test-provider" - ) - except TypeError: - try: - # Try with positional args - original_exception = exception_class(f"Test {exception_name}", "test-model", "test-provider") - except TypeError: - # Skip this particular exception type - continue - - if original_exception is None: - continue - - # Test the round-trip - status = Status.rollout_error_from_exception(original_exception) - - # Should be able to re-raise the same type - with pytest.raises(exception_class): - Status.raise_from_status_details(status.details) - - -def test_edge_case_empty_message(): - """Test with exception that has empty message.""" - original_exception = ValueError() # Empty message - - status = Status.rollout_error_from_exception(original_exception) - - # Should handle empty message gracefully - detail = status.details[0] - assert detail["exception_type"] == "builtins.ValueError" - assert detail["exception_message"] == "" - - # Should still re-raise correctly - with pytest.raises(ValueError): - Status.raise_from_status_details(status.details) - - -def test_all_default_retryable_exceptions(): - """ - Comprehensive test of all exceptions in DEFAULT_RETRYABLE_EXCEPTIONS. - - This ensures our Status exception handling works with every exception type - that the retry system claims to support. - """ - # Test cases: (exception_class, test_message, required_modules, skip_reason) - test_cases = [ - # Standard library exceptions - (ConnectionError, "Connection failed", [], None), - (TimeoutError, "Request timeout", [], None), - (OSError, "OS error occurred", [], None), - ] - - # Add requests exceptions if available - if REQUESTS_AVAILABLE: - import requests.exceptions - - test_cases.extend( - [ - (requests.exceptions.ConnectionError, "Requests connection error", ["requests"], None), - (requests.exceptions.Timeout, "Requests timeout", ["requests"], None), - (requests.exceptions.HTTPError, "HTTP error occurred", ["requests"], None), - (requests.exceptions.RequestException, "Request exception", ["requests"], None), - ] - ) - - # Add httpx exceptions if available - if HTTPX_AVAILABLE: - import httpx - - test_cases.extend( - [ - (httpx.ConnectError, "HTTPX connect error", ["httpx"], None), - (httpx.TimeoutException, "HTTPX timeout", ["httpx"], None), - (httpx.NetworkError, "HTTPX network error", ["httpx"], None), - (httpx.RemoteProtocolError, "HTTPX protocol error", ["httpx"], None), - ] - ) - - # Add openai exceptions if available - if OPENAI_AVAILABLE: - import openai - - test_cases.extend( - [ - (openai.NotFoundError, "OpenAI model not found", ["openai"], None), - (openai.BadRequestError, "OpenAI bad request", ["openai"], None), - (openai.RateLimitError, "OpenAI rate limit", ["openai"], None), - ] - ) - - # Add litellm exceptions if available - if LITELLM_AVAILABLE: - import litellm.exceptions - - test_cases.extend( - [ - (litellm.exceptions.RateLimitError, "Rate limit exceeded", ["litellm"], None), - (litellm.exceptions.InternalServerError, "Internal server error", ["litellm"], None), - (litellm.exceptions.Timeout, "LiteLLM timeout", ["litellm"], None), - (litellm.exceptions.NotFoundError, "Model not found", ["litellm"], None), - (litellm.exceptions.BadRequestError, "Bad request", ["litellm"], None), - (litellm.exceptions.ServiceUnavailableError, "Service unavailable", ["litellm"], None), - ] - ) - - successful_tests = 0 - failed_tests = [] - - for exception_class, test_message, required_modules, skip_reason in test_cases: - exception_name = f"{exception_class.__module__}.{exception_class.__name__}" - - try: - # Try to create the original exception with different patterns - original_exception = None - - # Pattern 1: Just message - try: - original_exception = exception_class(test_message) - except TypeError: - # Pattern 2: Message as named parameter - try: - original_exception = exception_class(message=test_message) - except TypeError: - # Pattern 3: For litellm - try with required parameters - if "litellm" in required_modules: - try: - original_exception = exception_class( - message=test_message, model="test-model", llm_provider="test-provider" - ) - except TypeError: - try: - original_exception = exception_class(test_message, "test-model", "test-provider") - except TypeError: - pass - # Pattern 4: For OpenAI - create mock response object - elif "openai" in required_modules and original_exception is None: - try: - # Create minimal mock objects for OpenAI exceptions - class MockRequest: - def __init__(self): - self.method = "POST" - self.url = "https://api.openai.com/v1/chat/completions" - - class MockResponse: - def __init__(self): - self.status_code = 404 - self.headers = {"x-request-id": "test-request-id"} - self.request = MockRequest() - - mock_response = MockResponse() - original_exception = exception_class(test_message, response=mock_response, body=None) - except (TypeError, AttributeError) as e: - # If mock approach fails, skip OpenAI for now - failed_tests.append((exception_name, f"OpenAI mock creation failed: {e}")) - continue - - # Pattern 5: No arguments fallback - if original_exception is None: - try: - original_exception = exception_class() - except TypeError: - failed_tests.append((exception_name, "Could not create exception instance")) - continue - - if original_exception is None: - failed_tests.append((exception_name, "Could not create exception instance")) - continue - - # Test the round-trip: exception -> status -> re-raise - try: - # Convert to status - status = Status.rollout_error_from_exception(original_exception) - - # Verify status structure - assert len(status.details) >= 1 - detail = status.details[0] - assert "exception_type" in detail - assert "exception_message" in detail - assert detail["exception_type"] == exception_name - - # Try to re-raise from status details - with pytest.raises(exception_class) as exc_info: - Status.raise_from_status_details(status.details) - - # Verify we got the right exception type back - assert isinstance(exc_info.value, exception_class) - successful_tests += 1 - - print(f"āœ… {exception_name}: Round-trip successful") - - except Exception as e: - failed_tests.append((exception_name, f"Round-trip failed: {e}")) - continue - - except Exception as e: - failed_tests.append((exception_name, f"Setup failed: {e}")) - continue - - # Report results - print("\nšŸŽÆ Exception Round-trip Test Results:") - print(f"āœ… Successful: {successful_tests}") - print(f"āŒ Failed: {len(failed_tests)}") - - if failed_tests: - print("\nāŒ Failed exceptions:") - for exception_name, reason in failed_tests: - print(f" - {exception_name}: {reason}") - - # We expect most to pass, but some failures are acceptable due to complex constructors - # Require at least 85% success rate (17/20 = 85% is good, indicates robust support) - success_rate = successful_tests / (successful_tests + len(failed_tests)) - assert success_rate >= 0.85, f"Success rate {success_rate:.1%} too low. Failed tests: {failed_tests}" From 4c855e76a696143f7d7b40b40d336f3f7a76ffcf Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 29 Oct 2025 17:44:43 -0700 Subject: [PATCH 09/16] backward compat --- eval_protocol/models.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index 494d3129..ecb9f5dc 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -312,6 +312,12 @@ def rollout_internal_error(cls, error_message: str, extra_info: Optional[Dict[st """Create a status indicating the rollout failed with an internal error.""" return cls.internal_error(error_message, cls._build_details_with_extra_info(extra_info)) + # For backwards compatibility + @classmethod + def rollout_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status": + """Create a status indicating the rollout failed with an error.""" + return cls.internal_error(error_message, cls._build_details_with_extra_info(extra_info)) + @classmethod def internal_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": """Create a status indicating an internal error occurred.""" From d719a6c6aa251db3719134eab90542dd01548c01 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 29 Oct 2025 17:46:15 -0700 Subject: [PATCH 10/16] updated server --- .../svg_agent/vercel_svg_server/api/init.py | 103 +++++++++++------- .../vercel_svg_server/requirements.txt | 2 +- 2 files changed, 65 insertions(+), 40 deletions(-) diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py b/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py index 502eb96a..ffd8b9ea 100644 --- a/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py +++ b/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py @@ -12,6 +12,7 @@ import asyncio from flask import Flask, request, jsonify from openai import OpenAI +import openai from dotenv import load_dotenv from eval_protocol import Status, InitRequest, FireworksTracingHttpHandler, RolloutIdFilter @@ -49,56 +50,80 @@ def filter(self, record: logging.LogRecord) -> bool: app = Flask(__name__) -async def execute_rollout_background(req, api_key): +async def execute_rollout_background(req: InitRequest, api_key: str): """Execute the OpenAI completion in background and log results""" # Attach rollout_id filter to logger logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}") logger.addFilter(RolloutIdFilter(req.metadata.rollout_id)) - try: - model = req.completion_params.get("model") - # Uncomment if you need to strip fireworks_ai/ prefix - # if model and isinstance(model, str) and model.startswith("fireworks_ai/"): - # model = model[len("fireworks_ai/"):] - - # Prepare completion arguments - completion_kwargs = { - "messages": req.messages, - # "messages": [{"role": "user", "content": "Hello, how are you?"}], - "model": model, - "temperature": req.completion_params.get("temperature"), - "max_tokens": req.completion_params.get("max_tokens"), - } + model = req.completion_params.get("model") + # Uncomment if you need to strip fireworks_ai/ prefix + # if model and isinstance(model, str) and model.startswith("fireworks_ai/"): + # model = model[len("fireworks_ai/"):] + + # Prepare completion arguments + completion_kwargs = { + "messages": req.messages, + # "messages": [{"role": "user", "content": "Hello, how are you?"}], + "model": model, + "temperature": req.completion_params.get("temperature"), + "max_tokens": req.completion_params.get("max_tokens"), + } + + # Add tools if present + if req.tools: + completion_kwargs["tools"] = req.tools + + logger.info( + f"DEBUG: {req.model_base_url}, COMPLETION_KWARGS: {completion_kwargs}, API_KEY: {api_key}, MODEL: {model}" + ) - # Add tools if present - if req.tools: - completion_kwargs["tools"] = req.tools + # Create AsyncOpenAI client + # client = AsyncOpenAI(base_url=req.model_base_url, api_key=api_key) + client = OpenAI(base_url=req.model_base_url, api_key=api_key) - logger.info( - f"DEBUG: {req.model_base_url}, COMPLETION_KWARGS: {completion_kwargs}, API_KEY: {api_key}, MODEL: {model}" - ) + logger.info(f"Sending completion request to model {model}") - # Create AsyncOpenAI client - # client = AsyncOpenAI(base_url=req.model_base_url, api_key=api_key) - client = OpenAI(base_url=req.model_base_url, api_key=api_key) + # Make the async model call with timeout + import time - logger.info(f"Sending completion request to model {model}") + logger.info(f"timing start: {time.time()}") - # Make the async model call with timeout - import time - - logger.info(f"timing start: {time.time()}") + try: completion = client.chat.completions.create(**completion_kwargs) - logger.info(f"Completed response: {completion}") - logger.info(f"timing end: {time.time()}") - # Log successful completion - THIS IS WHAT RemoteRolloutProcessor POLLS FOR - logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()}) - + except ( + openai.AuthenticationError, + openai.PermissionDeniedError, + ) as e: + # These errors should be logged and will be retried by RemoteRolloutProcessor + logger.error( + f"Rollout {req.metadata.rollout_id} failed: {e}", + extra={"status": Status.rollout_permission_denied_error(str(e))}, + ) + return + except openai.NotFoundError as e: + logger.error( + f"Rollout {req.metadata.rollout_id} failed: {e}", extra={"status": Status.rollout_not_found_error(str(e))} + ) + return + except openai.RateLimitError as e: + logger.error( + f"Rollout {req.metadata.rollout_id} failed: {e}", + extra={"status": Status.rollout_resource_exhausted_error(str(e))}, + ) + return except Exception as e: - # Log error with structured status - THIS IS WHAT RemoteRolloutProcessor POLLS FOR + # Non-OpenAI errors (shouldn't normally happen but catch anyway) logger.error( - f"Rollout {req.metadata.rollout_id} failed: {e}", extra={"status": Status.rollout_error_from_exception(e)} + f"Rollout {req.metadata.rollout_id} failed with unexpected error: {e}", + extra={"status": Status.rollout_internal_error(str(e))}, ) + return + + logger.info(f"Completed response: {completion}") + logger.info(f"timing end: {time.time()}") + # Log successful completion - THIS IS WHAT RemoteRolloutProcessor POLLS FOR + logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()}) @app.route("/init", methods=["POST"]) @@ -114,7 +139,7 @@ async def init(): # Validate required fields if not req.messages: error_msg = "messages is required" - logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)}) + logger.error(error_msg, extra={"status": Status.rollout_internal_error(error_msg)}) return jsonify({"error": error_msg}), 400 # Get API key (prefer request api_key, fallback to environment) @@ -126,7 +151,7 @@ async def init(): api_key = os.environ.get("FIREWORKS_API_KEY") else: error_msg = "API key not provided in request or environment variable" - logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)}) + logger.error(error_msg, extra={"status": Status.rollout_internal_error(error_msg)}) return jsonify({"error": error_msg}), 401 # šŸ”„ FIRE: Return immediately with acceptance (within 30s requirement) @@ -137,7 +162,7 @@ async def init(): } # Fire and forget: Execute rollout asynchronously - asyncio.create_task(execute_rollout_background(req, api_key)) + asyncio.create_task(execute_rollout_background(req, api_key or "")) return jsonify(response_data), 200 diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt b/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt index 2296770c..f4ce92fc 100644 --- a/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt +++ b/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt @@ -1,4 +1,4 @@ openai>=1.0.0 python-dotenv>=0.19.0 -eval_protocol>=0.2.70 +eval_protocol>=0.2.71 Flask[async]==3.0.3 From aeafa1321fc75f1eaa2b044d5df64c4f173c5aa2 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 29 Oct 2025 17:48:38 -0700 Subject: [PATCH 11/16] nits --- eval_protocol/models.py | 3 --- eval_protocol/pytest/exception_config.py | 1 - 2 files changed, 4 deletions(-) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index ecb9f5dc..fdbd4b1a 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -21,9 +21,6 @@ from eval_protocol.types import TerminationReason -logger = logging.getLogger(__name__) - - class ErrorInfo(BaseModel): """ AIP-193 ErrorInfo model for structured error details. diff --git a/eval_protocol/pytest/exception_config.py b/eval_protocol/pytest/exception_config.py index 42c97ea4..5e1e1797 100644 --- a/eval_protocol/pytest/exception_config.py +++ b/eval_protocol/pytest/exception_config.py @@ -36,7 +36,6 @@ litellm.exceptions.InternalServerError, litellm.exceptions.Timeout, litellm.exceptions.NotFoundError, - # litellm.exceptions.BadRequestError, # remove this once we have a long term solution litellm.exceptions.ServiceUnavailableError, litellm.exceptions.APIError, # Eval Protocol exceptions From d9666fd9deb12642b92342f42901522971461012 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 29 Oct 2025 17:53:57 -0700 Subject: [PATCH 12/16] fix test --- eval_protocol/exceptions.py | 5 +++-- .../pytest/remote_rollout_processor.py | 4 ++-- tests/test_exceptions.py | 21 +++++++++++++++++++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/eval_protocol/exceptions.py b/eval_protocol/exceptions.py index f294977d..3b92c865 100644 --- a/eval_protocol/exceptions.py +++ b/eval_protocol/exceptions.py @@ -160,12 +160,13 @@ class ScoreInvalidError(EvalProtocolError): } -def exception_for_status_code(code: int) -> Optional[EvalProtocolError]: +def exception_for_status_code(code: int, message: str = "") -> Optional[EvalProtocolError]: """ Create an exception instance for a given status code. Args: code: Status code from Status.Code enum + message: Optional error message to include in the exception Returns: Exception instance or None if code is OK (0) @@ -173,4 +174,4 @@ def exception_for_status_code(code: int) -> Optional[EvalProtocolError]: exception_class = STATUS_CODE_TO_EXCEPTION.get(code) if exception_class is None: return None - return exception_class() + return exception_class(message) if message else exception_class() diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py index 6e57d93c..b8af095d 100644 --- a/eval_protocol/pytest/remote_rollout_processor.py +++ b/eval_protocol/pytest/remote_rollout_processor.py @@ -161,8 +161,8 @@ def _get_status() -> Dict[str, Any]: f"Found Fireworks log for rollout {row.execution_metadata.rollout_id} with status code {status_code}" ) - # Create and raise exception if appropriate - exception = exception_for_status_code(status_code) + # Create and raise exception if appropriate, preserving original message + exception = exception_for_status_code(status_code, status_message) if exception is not None: raise exception diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index b4b039e2..a1fcdeb6 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -348,3 +348,24 @@ def test_integration_with_retry_logic(): assert exception_class in DEFAULT_RETRYABLE_EXCEPTIONS, ( f"{exception_class.__name__} should be in DEFAULT_RETRYABLE_EXCEPTIONS for retry support" ) + + +def test_exception_message_preservation(): + """Test that error messages are properly preserved in exceptions.""" + test_cases = [ + (13, "test error", InternalError), + (5, "Model xyz not found", NotFoundError), + (7, "Invalid API key", PermissionDeniedError), + ] + + for status_code, message, expected_exception_class in test_cases: + # Test with message + exception = exception_for_status_code(status_code, message) + assert exception is not None + assert isinstance(exception, expected_exception_class) + assert str(exception) == message, f"Exception should preserve message '{message}'" + + # Test without message (should still work) + exception_no_msg = exception_for_status_code(status_code) + assert exception_no_msg is not None + assert isinstance(exception_no_msg, expected_exception_class) From d569a64ab95e76a56e9afbe01cb8d1db0bc17079 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Wed, 29 Oct 2025 18:02:54 -0700 Subject: [PATCH 13/16] backward compat and fix test again --- eval_protocol/models.py | 9 +++++++-- eval_protocol/pytest/evaluation_test_utils.py | 2 +- .../test_remote_fireworks_propagate_status.py | 13 +------------ 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/eval_protocol/models.py b/eval_protocol/models.py index fdbd4b1a..7180ed72 100644 --- a/eval_protocol/models.py +++ b/eval_protocol/models.py @@ -309,6 +309,11 @@ def rollout_internal_error(cls, error_message: str, extra_info: Optional[Dict[st """Create a status indicating the rollout failed with an internal error.""" return cls.internal_error(error_message, cls._build_details_with_extra_info(extra_info)) + @classmethod + def internal_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating an internal error occurred.""" + return cls(code=cls.Code.INTERNAL, message=error_message, details=details or []) + # For backwards compatibility @classmethod def rollout_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] = None) -> "Status": @@ -316,8 +321,8 @@ def rollout_error(cls, error_message: str, extra_info: Optional[Dict[str, Any]] return cls.internal_error(error_message, cls._build_details_with_extra_info(extra_info)) @classmethod - def internal_error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": - """Create a status indicating an internal error occurred.""" + def error(cls, error_message: str, details: Optional[List[Dict[str, Any]]] = None) -> "Status": + """Create a status indicating an error occurred.""" return cls(code=cls.Code.INTERNAL, message=error_message, details=details or []) # UNAVAILABLE = 14 diff --git a/eval_protocol/pytest/evaluation_test_utils.py b/eval_protocol/pytest/evaluation_test_utils.py index c582d4be..26b0d799 100644 --- a/eval_protocol/pytest/evaluation_test_utils.py +++ b/eval_protocol/pytest/evaluation_test_utils.py @@ -398,7 +398,7 @@ async def execute_row_with_backoff(task: asyncio.Task[EvaluationRow], row: Evalu else: # Non-retryable exception - fail immediately logging.error(f"āŒ Rollout failed (non-retryable error encountered): {repr(e)}") - row.rollout_status = Status.rollout_error(repr(e)) + row.rollout_status = Status.rollout_error(str(e)) return row async def execute_row_with_backoff_and_log( diff --git a/tests/remote_server/test_remote_fireworks_propagate_status.py b/tests/remote_server/test_remote_fireworks_propagate_status.py index e415ed61..7c05172f 100644 --- a/tests/remote_server/test_remote_fireworks_propagate_status.py +++ b/tests/remote_server/test_remote_fireworks_propagate_status.py @@ -1,15 +1,4 @@ -# MANUAL SERVER STARTUP REQUIRED: -# -# For Python server testing, start: -# python -m tests.remote_server.remote_server (runs on http://127.0.0.1:3000) -# -# For TypeScript server testing, start: -# cd tests/remote_server/typescript-server -# npm install -# npm start -# -# The TypeScript server should be running on http://127.0.0.1:3000 -# You only need to start one of the servers! +# AUTO SERVER STARTUP: Server is automatically started and stopped by the test import subprocess import socket From a71074ec111c9321e5cb2e8366dbb56504f2fc3a Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Thu, 30 Oct 2025 03:43:20 -0700 Subject: [PATCH 14/16] update remote rollout processor --- eval_protocol/pytest/remote_rollout_processor.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py index b8af095d..f0bbe360 100644 --- a/eval_protocol/pytest/remote_rollout_processor.py +++ b/eval_protocol/pytest/remote_rollout_processor.py @@ -94,7 +94,7 @@ async def _process_row(row: EvaluationRow) -> EvaluationRow: def _post_init() -> None: url = f"{remote_base_url}/init" try: - r = requests.post(url, json=init_payload.model_dump(), timeout=30) + r = requests.post(url, json=init_payload.model_dump(), timeout=300) r.raise_for_status() except requests.exceptions.Timeout: raise TimeoutError( @@ -133,9 +133,9 @@ def _get_status() -> Dict[str, Any]: # For all other exceptions, raise them raise - # Search Fireworks tracing logs for completion - completed_logs = self._tracing_adapter.search_logs( - tags=[f"rollout_id:{row.execution_metadata.rollout_id}"] + # Search Fireworks tracing logs for completion (run in thread to avoid blocking event loop) + completed_logs = await asyncio.to_thread( + self._tracing_adapter.search_logs, tags=[f"rollout_id:{row.execution_metadata.rollout_id}"] ) # Filter for logs that actually have status information status_logs = [] From 72e3506e544eff82f22237b374c22c2e395f2751 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Sat, 1 Nov 2025 01:59:25 -0700 Subject: [PATCH 15/16] fix: increase timeout for remote rollout processor --- eval_protocol/pytest/remote_rollout_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eval_protocol/pytest/remote_rollout_processor.py b/eval_protocol/pytest/remote_rollout_processor.py index f0bbe360..dd179e34 100644 --- a/eval_protocol/pytest/remote_rollout_processor.py +++ b/eval_protocol/pytest/remote_rollout_processor.py @@ -98,7 +98,7 @@ def _post_init() -> None: r.raise_for_status() except requests.exceptions.Timeout: raise TimeoutError( - f"The /init endpoint tried {url} with {init_payload.model_dump()} but timed out after 30 seconds." + f"The /init endpoint tried {url} with {init_payload.model_dump()} but timed out after 300 seconds." ) await asyncio.to_thread(_post_init) From d63e10985c9379057753a826985c8b1784343c32 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Sat, 1 Nov 2025 02:00:44 -0700 Subject: [PATCH 16/16] add back bad request --- eval_protocol/pytest/exception_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eval_protocol/pytest/exception_config.py b/eval_protocol/pytest/exception_config.py index 5e1e1797..e4bb1b7c 100644 --- a/eval_protocol/pytest/exception_config.py +++ b/eval_protocol/pytest/exception_config.py @@ -38,6 +38,7 @@ litellm.exceptions.NotFoundError, litellm.exceptions.ServiceUnavailableError, litellm.exceptions.APIError, + litellm.exceptions.BadRequestError, # Eval Protocol exceptions eval_protocol.exceptions.UnknownError, eval_protocol.exceptions.DeadlineExceededError,