eval-protocol · xzrderek · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025
diff --git a/eval_protocol/event_bus/__init__.py b/eval_protocol/event_bus/__init__.py
@@ -1,5 +1,25 @@
 # Global event bus instance - uses SqliteEventBus for cross-process functionality
 from eval_protocol.event_bus.event_bus import EventBus
-from eval_protocol.event_bus.sqlite_event_bus import SqliteEventBus
 
-event_bus: EventBus = SqliteEventBus()
+
+def _get_default_event_bus():
+    from eval_protocol.event_bus.sqlite_event_bus import SqliteEventBus
+
+    return SqliteEventBus()
+
+
+# Lazy property that creates the event bus only when accessed
+class _LazyEventBus(EventBus):
+    def __init__(self):
+        self._event_bus: EventBus | None = None
+
+    def _get_event_bus(self):
+        if self._event_bus is None:
+            self._event_bus = _get_default_event_bus()
+        return self._event_bus
+
+    def __getattr__(self, name):
+        return getattr(self._get_event_bus(), name)
+
+
+event_bus: EventBus = _LazyEventBus()
diff --git a/eval_protocol/pytest/tracing_utils.py b/eval_protocol/pytest/tracing_utils.py
@@ -3,6 +3,7 @@
 """
 
 import base64
+import os
 from typing import Any, Callable, Dict, List, Optional
 
 from eval_protocol.adapters.fireworks_tracing import FireworksTracingAdapter
@@ -126,12 +127,16 @@ def build_init_request(
     ):
         final_model_base_url = build_fireworks_tracing_url(model_base_url, meta, completion_params_base_url)
 
+    # Extract API key from environment or completion_params
+    api_key = os.environ.get("FIREWORKS_API_KEY")
+
     return InitRequest(
         completion_params=completion_params_dict,
         messages=clean_messages,
         tools=row.tools,
         metadata=meta,
         model_base_url=final_model_base_url,
+        api_key=api_key,
     )
 
 

diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/.env.example b/eval_protocol/quickstart/svg_agent/vercel_svg_server/.env.example
@@ -0,0 +1,8 @@
+# Fireworks API Key for SVGBench Vercel Function
+# Copy this file to .env and add your actual API key
+
+# Your Fireworks API key (get it from https://fireworks.ai)
+FIREWORKS_API_KEY=your-fireworks-api-key-here
+
+# Note: The API key can also be passed in the request payload
+# by RemoteRolloutProcessor. This .env file serves as a fallback.
diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/.gitignore b/eval_protocol/quickstart/svg_agent/vercel_svg_server/.gitignore
@@ -0,0 +1,3 @@
+.vercel
+.env
+!.env.example
diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/README.md b/eval_protocol/quickstart/svg_agent/vercel_svg_server/README.md
@@ -0,0 +1,158 @@
+# SVGBench Vercel Serverless Function
+
+A Vercel serverless function that handles model calls for the SVGBench evaluation pipeline.
+
+## What it does
+
+- Receives SVGBench evaluation requests via POST
+- Makes model calls to Fireworks AI
+- Returns model responses for local SVG evaluation
+- Handles CORS and provides health check endpoint
+
+## Setup
+
+### Option 1: Local Development (Recommended)
+
+1. **Install Vercel CLI:**
+   ```bash
+   npm install -g vercel
+   ```
+
+2. **Navigate to this directory:**
+   ```bash
+   cd eval_protocol/quickstart/svg_agent/vercel_svg_server
+   ```
+
+3. **Create .env file with your API key (optional):**
+   ```bash
+   cp .env.example .env
+   # Edit .env and add your actual API key
+   ```
+
+   **Note:** The API key can be provided either:
+   - In the request payload (automatically handled by `RemoteRolloutProcessor`)
+   - In a local `.env` file (fallback option)
+
+4. **Start local development server:**
+   ```bash
+   vercel dev
+   ```
+
+   Your function will be available at `http://localhost:3000`
+
+### Option 2: Production Deployment
+
+1. **Follow steps 1-2 above**
+
+2. **Deploy to Vercel:**
+   ```bash
+   vercel deploy
+   ```
+
+   Follow the prompts to:
+   - Create a new project (or link existing)
+   - Set project name (e.g., `svgbench-server`)
+
+3. **Deploy to production:**
+   ```bash
+   vercel --prod
+   ```
+
+**Note:** The function receives the API key in the request payload (automatically handled by `RemoteRolloutProcessor`), but can also fall back to a local `.env` file if needed.
+
+## Usage
+
+The function provides these endpoints:
+
+### `POST /`
+Processes SVGBench evaluation requests.
+
+**Request format:**
+```json
+{
+  "model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b",
+  "messages": [
+    {
+      "role": "user",
+      "content": "Your SVG generation prompt here"
+    }
+  ],
+  "model_base_url": "https://api.fireworks.ai/inference/v1",
+  "metadata": {
+    "rollout_id": "some-unique-id"
+  },
+  "api_key": "your-fireworks-api-key",
+  "completion_params": {
+    "temperature": 0.8,
+    "max_tokens": 32768
+  }
+}
+```
+
+**Note:** The `api_key` field is automatically populated by `RemoteRolloutProcessor` from your local `FIREWORKS_API_KEY` environment variable.
+
+**Response format:**
+```json
+{
+  "status": "completed",
+  "rollout_id": "some-unique-id",
+  "choices": [
+    {
+      "message": {
+        "role": "assistant",
+        "content": "SVG code generated by model"
+      }
+    }
+  ]
+}
+```
+
+### `GET /`
+Health check endpoint - returns server status.
+
+## Integration with Tests
+
+### Local Development
+For local testing with `vercel dev`:
+
+```python
+@evaluation_test(
+    rollout_processor=RemoteRolloutProcessor(
+        remote_base_url="http://localhost:3000",
+        timeout_seconds=300,
+    ),
+    # ... other params
+)
+```
+
+### Production Deployment
+For testing with deployed function:
+
+```python
+@evaluation_test(
+    rollout_processor=RemoteRolloutProcessor(
+        remote_base_url="https://vercel-svg-server.vercel.app",
+        timeout_seconds=300,
+    ),
+    # ... other params
+)
+```
+
+**Note:** The `RemoteRolloutProcessor` automatically passes your local `FIREWORKS_API_KEY` environment variable to the Vercel function.
+
+## API Key Configuration
+
+The function uses the following priority for API keys:
+
+1. **Request payload** (`req.api_key`) - Automatically provided by `RemoteRolloutProcessor`
+2. **Local .env file** (`FIREWORKS_API_KEY`) - Fallback for local development
+3. **Environment variable** - Fallback for other deployment methods
+
+This flexible approach works for both local development and production deployment.
+
+## Architecture
+
+- **Remote**: Model calls (handled by this serverless function)
+- **Local**: SVG extraction, rendering, evaluation, scoring (handled by test client)
+
+This keeps the heavy SVG processing local while scaling model calls in the cloud.
diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py b/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py
@@ -0,0 +1,138 @@
+"""
+Vercel serverless function for SVGBench remote evaluation.
+
+This function handles the model call part of the evaluation pipeline.
+The SVG evaluation logic remains in the test client.
+"""
+
+import json
+import os
+import logging
+from http.server import BaseHTTPRequestHandler
+from openai import OpenAI
+from dotenv import load_dotenv
+
+from eval_protocol import Status, InitRequest, FireworksTracingHttpHandler, RolloutIdFilter
+
+load_dotenv()
+
+# Attach Fireworks tracing handler to root logger
+fireworks_handler = FireworksTracingHttpHandler()
+logging.getLogger().addHandler(fireworks_handler)
+
+
+class handler(BaseHTTPRequestHandler):
+    def do_POST(self):
+        try:
+            # Read and parse request body
+            content_length = int(self.headers.get("Content-Length", 0))
+            request_body = self.rfile.read(content_length).decode("utf-8")
+            request_data = json.loads(request_body)
+
+            # Parse as InitRequest
+            req = InitRequest(**request_data)
+
+            # Attach rollout_id filter to logger
+            logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}")
+            logger.addFilter(RolloutIdFilter(req.metadata.rollout_id))
+
+            # Validate required fields
+            if not req.messages:
+                error_msg = "messages is required"
+                logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)})
+                self._send_error(400, error_msg)
+                return
+
+            # Prepare completion arguments
+            completion_kwargs = {
+                "messages": req.messages,
+                **req.completion_params,
+            }
+
+            # Add tools if present
+            if req.tools:
+                completion_kwargs["tools"] = req.tools
+
+            # Add completion parameters if they exist
+            # if hasattr(req, 'completion_params') and req.completion_params:
+            #     # Filter out any model override
+            #     params = {k: v for k, v in req.completion_params.items() if k != 'model'}
+            #     completion_kwargs.update(params)
+
+            # Get API key (prefer request api_key, fallback to environment)
+            api_key = req.api_key or os.environ.get("FIREWORKS_API_KEY")
+            if not api_key:
+                error_msg = "API key not provided in request or FIREWORKS_API_KEY environment variable"
+                logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)})
+                self._send_error(500, error_msg)
+                return
+
+            # Create OpenAI client
+            client = OpenAI(base_url=req.model_base_url, api_key=api_key)
+
+            logger.info(f"Sending completion request to model {req.completion_params.get('model')}")
+
+            # Make the model call
+            completion = client.chat.completions.create(**completion_kwargs)
+
+            logger.info(f"Completed response: {completion}")
+
+            # Log completion status
+            logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()})
+
+            # Return the completion response
+            response_data = {
+                "status": "completed",
+                "rollout_id": req.metadata.rollout_id,
+                "choices": [
+                    {
+                        "message": {
+                            "role": completion.choices[0].message.role,
+                            "content": completion.choices[0].message.content,
+                        }
+                    }
+                ],
+            }
+
+            self._send_json_response(200, response_data)
+
+        except Exception as e:
+            # Log error if we have the request context
+            if "req" in locals() and "logger" in locals():
+                logger.error(f"❌ Error in rollout {req.metadata.rollout_id}: {e}")
+                logger.error(str(e), extra={"status": Status.rollout_error(str(e))})
+
+            self._send_error(500, str(e))
+
+    def do_GET(self):
+        """Health check endpoint"""
+        self._send_json_response(
+            200,
+            {
+                "status": "ok",
+                "message": "SVGBench Vercel Serverless Function",
+                "endpoints": {"POST /": "Process SVGBench evaluation requests"},
+            },
+        )
+
+    def do_OPTIONS(self):
+        """Handle CORS preflight requests"""
+        self.send_response(200)
+        self.send_header("Access-Control-Allow-Origin", "*")
+        self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
+        self.send_header("Access-Control-Allow-Headers", "Content-Type")
+        self.end_headers()
+
+    def _send_json_response(self, status_code: int, data: dict):
+        """Send a JSON response"""
+        self.send_response(status_code)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Access-Control-Allow-Origin", "*")
+        self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
+        self.send_header("Access-Control-Allow-Headers", "Content-Type")
+        self.end_headers()
+        self.wfile.write(json.dumps(data).encode("utf-8"))
+
+    def _send_error(self, status_code: int, message: str):
+        """Send an error response"""
+        self._send_json_response(status_code, {"error": message})
diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt b/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt
@@ -0,0 +1,3 @@
+openai>=1.0.0
+python-dotenv>=0.19.0
+eval_protocol>=0.2.58
diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/vercel.json b/eval_protocol/quickstart/svg_agent/vercel_svg_server/vercel.json
@@ -0,0 +1,3 @@
+{
+  "redirects": [{ "source": "/init", "destination": "/api/init" }]
+}
diff --git a/eval_protocol/types/remote_rollout_processor.py b/eval_protocol/types/remote_rollout_processor.py
@@ -48,7 +48,6 @@ class InitRequest(BaseModel):
         default_factory=dict,
         description="Completion parameters including model and optional model_kwargs, temperature, etc.",
     )
-    elastic_search_config: Optional[ElasticsearchConfig] = None
     messages: Optional[List[Message]] = None
     tools: Optional[List[Dict[str, Any]]] = None
 
@@ -60,6 +59,7 @@ class InitRequest(BaseModel):
     """
 
     metadata: RolloutMetadata
+    api_key: Optional[str] = None
 
 
 class StatusResponse(BaseModel):

diff --git a/test_event_bus_helper.py b/test_event_bus_helper.py
@@ -4,8 +4,7 @@
 import asyncio
 import sys
 import json
-from eval_protocol.event_bus import SqliteEventBus
-from eval_protocol.models import EvaluationRow, InputMetadata
+from eval_protocol.event_bus.sqlite_event_bus import SqliteEventBus
 
 
 async def listener_process(db_path: str):