Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions eval_protocol/event_bus/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
# Global event bus instance - uses SqliteEventBus for cross-process functionality
from eval_protocol.event_bus.event_bus import EventBus
from eval_protocol.event_bus.sqlite_event_bus import SqliteEventBus

event_bus: EventBus = SqliteEventBus()

def _get_default_event_bus():
from eval_protocol.event_bus.sqlite_event_bus import SqliteEventBus

return SqliteEventBus()


# Lazy property that creates the event bus only when accessed
class _LazyEventBus(EventBus):
def __init__(self):
self._event_bus: EventBus | None = None

def _get_event_bus(self):
if self._event_bus is None:
self._event_bus = _get_default_event_bus()
return self._event_bus

def __getattr__(self, name):
return getattr(self._get_event_bus(), name)


event_bus: EventBus = _LazyEventBus()
5 changes: 5 additions & 0 deletions eval_protocol/pytest/tracing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import base64
import os
from typing import Any, Callable, Dict, List, Optional

from eval_protocol.adapters.fireworks_tracing import FireworksTracingAdapter
Expand Down Expand Up @@ -126,12 +127,16 @@ def build_init_request(
):
final_model_base_url = build_fireworks_tracing_url(model_base_url, meta, completion_params_base_url)

# Extract API key from environment or completion_params
api_key = os.environ.get("FIREWORKS_API_KEY")

return InitRequest(
completion_params=completion_params_dict,
messages=clean_messages,
tools=row.tools,
metadata=meta,
model_base_url=final_model_base_url,
api_key=api_key,
)


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Fireworks API Key for SVGBench Vercel Function
# Copy this file to .env and add your actual API key

# Your Fireworks API key (get it from https://fireworks.ai)
FIREWORKS_API_KEY=your-fireworks-api-key-here

# Note: The API key can also be passed in the request payload
# by RemoteRolloutProcessor. This .env file serves as a fallback.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.vercel
.env
!.env.example
158 changes: 158 additions & 0 deletions eval_protocol/quickstart/svg_agent/vercel_svg_server/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# SVGBench Vercel Serverless Function

A Vercel serverless function that handles model calls for the SVGBench evaluation pipeline.

## What it does

- Receives SVGBench evaluation requests via POST
- Makes model calls to Fireworks AI
- Returns model responses for local SVG evaluation
- Handles CORS and provides health check endpoint

## Setup

### Option 1: Local Development (Recommended)

1. **Install Vercel CLI:**
```bash
npm install -g vercel
```

2. **Navigate to this directory:**
```bash
cd eval_protocol/quickstart/svg_agent/vercel_svg_server
```

3. **Create .env file with your API key (optional):**
```bash
cp .env.example .env
# Edit .env and add your actual API key
```

**Note:** The API key can be provided either:
- In the request payload (automatically handled by `RemoteRolloutProcessor`)
- In a local `.env` file (fallback option)

4. **Start local development server:**
```bash
vercel dev
```

Your function will be available at `http://localhost:3000`

### Option 2: Production Deployment

1. **Follow steps 1-2 above**

2. **Deploy to Vercel:**
```bash
vercel deploy
```

Follow the prompts to:
- Create a new project (or link existing)
- Set project name (e.g., `svgbench-server`)

3. **Deploy to production:**
```bash
vercel --prod
```

**Note:** The function receives the API key in the request payload (automatically handled by `RemoteRolloutProcessor`), but can also fall back to a local `.env` file if needed.

## Usage

The function provides these endpoints:

### `POST /`
Processes SVGBench evaluation requests.

**Request format:**
```json
{
"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b",
"messages": [
{
"role": "user",
"content": "Your SVG generation prompt here"
}
],
"model_base_url": "https://api.fireworks.ai/inference/v1",
"metadata": {
"rollout_id": "some-unique-id"
},
"api_key": "your-fireworks-api-key",
"completion_params": {
"temperature": 0.8,
"max_tokens": 32768
}
}
```

**Note:** The `api_key` field is automatically populated by `RemoteRolloutProcessor` from your local `FIREWORKS_API_KEY` environment variable.

**Response format:**
```json
{
"status": "completed",
"rollout_id": "some-unique-id",
"choices": [
{
"message": {
"role": "assistant",
"content": "SVG code generated by model"
}
}
]
}
```

### `GET /`
Health check endpoint - returns server status.

## Integration with Tests

### Local Development
For local testing with `vercel dev`:

```python
@evaluation_test(
rollout_processor=RemoteRolloutProcessor(
remote_base_url="http://localhost:3000",
timeout_seconds=300,
),
# ... other params
)
```

### Production Deployment
For testing with deployed function:

```python
@evaluation_test(
rollout_processor=RemoteRolloutProcessor(
remote_base_url="https://vercel-svg-server.vercel.app",
timeout_seconds=300,
),
# ... other params
)
```

**Note:** The `RemoteRolloutProcessor` automatically passes your local `FIREWORKS_API_KEY` environment variable to the Vercel function.

## API Key Configuration

The function uses the following priority for API keys:

1. **Request payload** (`req.api_key`) - Automatically provided by `RemoteRolloutProcessor`
2. **Local .env file** (`FIREWORKS_API_KEY`) - Fallback for local development
3. **Environment variable** - Fallback for other deployment methods

This flexible approach works for both local development and production deployment.

## Architecture

- **Remote**: Model calls (handled by this serverless function)
- **Local**: SVG extraction, rendering, evaluation, scoring (handled by test client)

This keeps the heavy SVG processing local while scaling model calls in the cloud.
138 changes: 138 additions & 0 deletions eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""
Vercel serverless function for SVGBench remote evaluation.

This function handles the model call part of the evaluation pipeline.
The SVG evaluation logic remains in the test client.
"""

import json
import os
import logging
from http.server import BaseHTTPRequestHandler
from openai import OpenAI
from dotenv import load_dotenv

from eval_protocol import Status, InitRequest, FireworksTracingHttpHandler, RolloutIdFilter

load_dotenv()

# Attach Fireworks tracing handler to root logger
fireworks_handler = FireworksTracingHttpHandler()
logging.getLogger().addHandler(fireworks_handler)


class handler(BaseHTTPRequestHandler):
def do_POST(self):
try:
# Read and parse request body
content_length = int(self.headers.get("Content-Length", 0))
request_body = self.rfile.read(content_length).decode("utf-8")
request_data = json.loads(request_body)

# Parse as InitRequest
req = InitRequest(**request_data)

# Attach rollout_id filter to logger
logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}")
logger.addFilter(RolloutIdFilter(req.metadata.rollout_id))

# Validate required fields
if not req.messages:
error_msg = "messages is required"
logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)})
self._send_error(400, error_msg)
return

# Prepare completion arguments
completion_kwargs = {
"messages": req.messages,
**req.completion_params,
}

# Add tools if present
if req.tools:
completion_kwargs["tools"] = req.tools

# Add completion parameters if they exist
# if hasattr(req, 'completion_params') and req.completion_params:
# # Filter out any model override
# params = {k: v for k, v in req.completion_params.items() if k != 'model'}
# completion_kwargs.update(params)

# Get API key (prefer request api_key, fallback to environment)
api_key = req.api_key or os.environ.get("FIREWORKS_API_KEY")
if not api_key:
error_msg = "API key not provided in request or FIREWORKS_API_KEY environment variable"
logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)})
self._send_error(500, error_msg)
return

# Create OpenAI client
client = OpenAI(base_url=req.model_base_url, api_key=api_key)

logger.info(f"Sending completion request to model {req.completion_params.get('model')}")

# Make the model call
completion = client.chat.completions.create(**completion_kwargs)

logger.info(f"Completed response: {completion}")

# Log completion status
logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()})

# Return the completion response
response_data = {
"status": "completed",
"rollout_id": req.metadata.rollout_id,
"choices": [
{
"message": {
"role": completion.choices[0].message.role,
"content": completion.choices[0].message.content,
}
}
],
}

self._send_json_response(200, response_data)

except Exception as e:
# Log error if we have the request context
if "req" in locals() and "logger" in locals():
logger.error(f"❌ Error in rollout {req.metadata.rollout_id}: {e}")
logger.error(str(e), extra={"status": Status.rollout_error(str(e))})

self._send_error(500, str(e))

def do_GET(self):
"""Health check endpoint"""
self._send_json_response(
200,
{
"status": "ok",
"message": "SVGBench Vercel Serverless Function",
"endpoints": {"POST /": "Process SVGBench evaluation requests"},
},
)

def do_OPTIONS(self):
"""Handle CORS preflight requests"""
self.send_response(200)
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
self.send_header("Access-Control-Allow-Headers", "Content-Type")
self.end_headers()

def _send_json_response(self, status_code: int, data: dict):
"""Send a JSON response"""
self.send_response(status_code)
self.send_header("Content-Type", "application/json")
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
self.send_header("Access-Control-Allow-Headers", "Content-Type")
self.end_headers()
self.wfile.write(json.dumps(data).encode("utf-8"))

def _send_error(self, status_code: int, message: str):
"""Send an error response"""
self._send_json_response(status_code, {"error": message})
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
openai>=1.0.0
python-dotenv>=0.19.0
eval_protocol>=0.2.58
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"redirects": [{ "source": "/init", "destination": "/api/init" }]
}
2 changes: 1 addition & 1 deletion eval_protocol/types/remote_rollout_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ class InitRequest(BaseModel):
default_factory=dict,
description="Completion parameters including model and optional model_kwargs, temperature, etc.",
)
elastic_search_config: Optional[ElasticsearchConfig] = None
messages: Optional[List[Message]] = None
tools: Optional[List[Dict[str, Any]]] = None

Expand All @@ -60,6 +59,7 @@ class InitRequest(BaseModel):
"""

metadata: RolloutMetadata
api_key: Optional[str] = None


class StatusResponse(BaseModel):
Expand Down
3 changes: 1 addition & 2 deletions test_event_bus_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
import asyncio
import sys
import json
from eval_protocol.event_bus import SqliteEventBus
from eval_protocol.models import EvaluationRow, InputMetadata
from eval_protocol.event_bus.sqlite_event_bus import SqliteEventBus


async def listener_process(db_path: str):
Expand Down
Loading
Loading