diff --git a/eval_protocol/event_bus/__init__.py b/eval_protocol/event_bus/__init__.py index 40eedde8..7315e00b 100644 --- a/eval_protocol/event_bus/__init__.py +++ b/eval_protocol/event_bus/__init__.py @@ -1,5 +1,25 @@ # Global event bus instance - uses SqliteEventBus for cross-process functionality from eval_protocol.event_bus.event_bus import EventBus -from eval_protocol.event_bus.sqlite_event_bus import SqliteEventBus -event_bus: EventBus = SqliteEventBus() + +def _get_default_event_bus(): + from eval_protocol.event_bus.sqlite_event_bus import SqliteEventBus + + return SqliteEventBus() + + +# Lazy property that creates the event bus only when accessed +class _LazyEventBus(EventBus): + def __init__(self): + self._event_bus: EventBus | None = None + + def _get_event_bus(self): + if self._event_bus is None: + self._event_bus = _get_default_event_bus() + return self._event_bus + + def __getattr__(self, name): + return getattr(self._get_event_bus(), name) + + +event_bus: EventBus = _LazyEventBus() diff --git a/eval_protocol/pytest/tracing_utils.py b/eval_protocol/pytest/tracing_utils.py index 994b36c1..1a9248de 100644 --- a/eval_protocol/pytest/tracing_utils.py +++ b/eval_protocol/pytest/tracing_utils.py @@ -3,6 +3,7 @@ """ import base64 +import os from typing import Any, Callable, Dict, List, Optional from eval_protocol.adapters.fireworks_tracing import FireworksTracingAdapter @@ -126,12 +127,16 @@ def build_init_request( ): final_model_base_url = build_fireworks_tracing_url(model_base_url, meta, completion_params_base_url) + # Extract API key from environment or completion_params + api_key = os.environ.get("FIREWORKS_API_KEY") + return InitRequest( completion_params=completion_params_dict, messages=clean_messages, tools=row.tools, metadata=meta, model_base_url=final_model_base_url, + api_key=api_key, ) diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/.env.example b/eval_protocol/quickstart/svg_agent/vercel_svg_server/.env.example new file mode 100644 index 00000000..1f6c65f6 --- /dev/null +++ b/eval_protocol/quickstart/svg_agent/vercel_svg_server/.env.example @@ -0,0 +1,8 @@ +# Fireworks API Key for SVGBench Vercel Function +# Copy this file to .env and add your actual API key + +# Your Fireworks API key (get it from https://fireworks.ai) +FIREWORKS_API_KEY=your-fireworks-api-key-here + +# Note: The API key can also be passed in the request payload +# by RemoteRolloutProcessor. This .env file serves as a fallback. diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/.gitignore b/eval_protocol/quickstart/svg_agent/vercel_svg_server/.gitignore new file mode 100644 index 00000000..d1e9ab21 --- /dev/null +++ b/eval_protocol/quickstart/svg_agent/vercel_svg_server/.gitignore @@ -0,0 +1,3 @@ +.vercel +.env +!.env.example diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/README.md b/eval_protocol/quickstart/svg_agent/vercel_svg_server/README.md new file mode 100644 index 00000000..8da30ded --- /dev/null +++ b/eval_protocol/quickstart/svg_agent/vercel_svg_server/README.md @@ -0,0 +1,158 @@ +# SVGBench Vercel Serverless Function + +A Vercel serverless function that handles model calls for the SVGBench evaluation pipeline. + +## What it does + +- Receives SVGBench evaluation requests via POST +- Makes model calls to Fireworks AI +- Returns model responses for local SVG evaluation +- Handles CORS and provides health check endpoint + +## Setup + +### Option 1: Local Development (Recommended) + +1. **Install Vercel CLI:** + ```bash + npm install -g vercel + ``` + +2. **Navigate to this directory:** + ```bash + cd eval_protocol/quickstart/svg_agent/vercel_svg_server + ``` + +3. **Create .env file with your API key (optional):** + ```bash + cp .env.example .env + # Edit .env and add your actual API key + ``` + + **Note:** The API key can be provided either: + - In the request payload (automatically handled by `RemoteRolloutProcessor`) + - In a local `.env` file (fallback option) + +4. **Start local development server:** + ```bash + vercel dev + ``` + + Your function will be available at `http://localhost:3000` + +### Option 2: Production Deployment + +1. **Follow steps 1-2 above** + +2. **Deploy to Vercel:** + ```bash + vercel deploy + ``` + + Follow the prompts to: + - Create a new project (or link existing) + - Set project name (e.g., `svgbench-server`) + +3. **Deploy to production:** + ```bash + vercel --prod + ``` + +**Note:** The function receives the API key in the request payload (automatically handled by `RemoteRolloutProcessor`), but can also fall back to a local `.env` file if needed. + +## Usage + +The function provides these endpoints: + +### `POST /` +Processes SVGBench evaluation requests. + +**Request format:** +```json +{ + "model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b", + "messages": [ + { + "role": "user", + "content": "Your SVG generation prompt here" + } + ], + "model_base_url": "https://api.fireworks.ai/inference/v1", + "metadata": { + "rollout_id": "some-unique-id" + }, + "api_key": "your-fireworks-api-key", + "completion_params": { + "temperature": 0.8, + "max_tokens": 32768 + } +} +``` + +**Note:** The `api_key` field is automatically populated by `RemoteRolloutProcessor` from your local `FIREWORKS_API_KEY` environment variable. + +**Response format:** +```json +{ + "status": "completed", + "rollout_id": "some-unique-id", + "choices": [ + { + "message": { + "role": "assistant", + "content": "SVG code generated by model" + } + } + ] +} +``` + +### `GET /` +Health check endpoint - returns server status. + +## Integration with Tests + +### Local Development +For local testing with `vercel dev`: + +```python +@evaluation_test( + rollout_processor=RemoteRolloutProcessor( + remote_base_url="http://localhost:3000", + timeout_seconds=300, + ), + # ... other params +) +``` + +### Production Deployment +For testing with deployed function: + +```python +@evaluation_test( + rollout_processor=RemoteRolloutProcessor( + remote_base_url="https://vercel-svg-server.vercel.app", + timeout_seconds=300, + ), + # ... other params +) +``` + +**Note:** The `RemoteRolloutProcessor` automatically passes your local `FIREWORKS_API_KEY` environment variable to the Vercel function. + +## API Key Configuration + +The function uses the following priority for API keys: + +1. **Request payload** (`req.api_key`) - Automatically provided by `RemoteRolloutProcessor` +2. **Local .env file** (`FIREWORKS_API_KEY`) - Fallback for local development +3. **Environment variable** - Fallback for other deployment methods + +This flexible approach works for both local development and production deployment. + +## Architecture + +- **Remote**: Model calls (handled by this serverless function) +- **Local**: SVG extraction, rendering, evaluation, scoring (handled by test client) + +This keeps the heavy SVG processing local while scaling model calls in the cloud. diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py b/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py new file mode 100644 index 00000000..74a4b7ff --- /dev/null +++ b/eval_protocol/quickstart/svg_agent/vercel_svg_server/api/init.py @@ -0,0 +1,138 @@ +""" +Vercel serverless function for SVGBench remote evaluation. + +This function handles the model call part of the evaluation pipeline. +The SVG evaluation logic remains in the test client. +""" + +import json +import os +import logging +from http.server import BaseHTTPRequestHandler +from openai import OpenAI +from dotenv import load_dotenv + +from eval_protocol import Status, InitRequest, FireworksTracingHttpHandler, RolloutIdFilter + +load_dotenv() + +# Attach Fireworks tracing handler to root logger +fireworks_handler = FireworksTracingHttpHandler() +logging.getLogger().addHandler(fireworks_handler) + + +class handler(BaseHTTPRequestHandler): + def do_POST(self): + try: + # Read and parse request body + content_length = int(self.headers.get("Content-Length", 0)) + request_body = self.rfile.read(content_length).decode("utf-8") + request_data = json.loads(request_body) + + # Parse as InitRequest + req = InitRequest(**request_data) + + # Attach rollout_id filter to logger + logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}") + logger.addFilter(RolloutIdFilter(req.metadata.rollout_id)) + + # Validate required fields + if not req.messages: + error_msg = "messages is required" + logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)}) + self._send_error(400, error_msg) + return + + # Prepare completion arguments + completion_kwargs = { + "messages": req.messages, + **req.completion_params, + } + + # Add tools if present + if req.tools: + completion_kwargs["tools"] = req.tools + + # Add completion parameters if they exist + # if hasattr(req, 'completion_params') and req.completion_params: + # # Filter out any model override + # params = {k: v for k, v in req.completion_params.items() if k != 'model'} + # completion_kwargs.update(params) + + # Get API key (prefer request api_key, fallback to environment) + api_key = req.api_key or os.environ.get("FIREWORKS_API_KEY") + if not api_key: + error_msg = "API key not provided in request or FIREWORKS_API_KEY environment variable" + logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)}) + self._send_error(500, error_msg) + return + + # Create OpenAI client + client = OpenAI(base_url=req.model_base_url, api_key=api_key) + + logger.info(f"Sending completion request to model {req.completion_params.get('model')}") + + # Make the model call + completion = client.chat.completions.create(**completion_kwargs) + + logger.info(f"Completed response: {completion}") + + # Log completion status + logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()}) + + # Return the completion response + response_data = { + "status": "completed", + "rollout_id": req.metadata.rollout_id, + "choices": [ + { + "message": { + "role": completion.choices[0].message.role, + "content": completion.choices[0].message.content, + } + } + ], + } + + self._send_json_response(200, response_data) + + except Exception as e: + # Log error if we have the request context + if "req" in locals() and "logger" in locals(): + logger.error(f"❌ Error in rollout {req.metadata.rollout_id}: {e}") + logger.error(str(e), extra={"status": Status.rollout_error(str(e))}) + + self._send_error(500, str(e)) + + def do_GET(self): + """Health check endpoint""" + self._send_json_response( + 200, + { + "status": "ok", + "message": "SVGBench Vercel Serverless Function", + "endpoints": {"POST /": "Process SVGBench evaluation requests"}, + }, + ) + + def do_OPTIONS(self): + """Handle CORS preflight requests""" + self.send_response(200) + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS") + self.send_header("Access-Control-Allow-Headers", "Content-Type") + self.end_headers() + + def _send_json_response(self, status_code: int, data: dict): + """Send a JSON response""" + self.send_response(status_code) + self.send_header("Content-Type", "application/json") + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS") + self.send_header("Access-Control-Allow-Headers", "Content-Type") + self.end_headers() + self.wfile.write(json.dumps(data).encode("utf-8")) + + def _send_error(self, status_code: int, message: str): + """Send an error response""" + self._send_json_response(status_code, {"error": message}) diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt b/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt new file mode 100644 index 00000000..dadd2db1 --- /dev/null +++ b/eval_protocol/quickstart/svg_agent/vercel_svg_server/requirements.txt @@ -0,0 +1,3 @@ +openai>=1.0.0 +python-dotenv>=0.19.0 +eval_protocol>=0.2.58 diff --git a/eval_protocol/quickstart/svg_agent/vercel_svg_server/vercel.json b/eval_protocol/quickstart/svg_agent/vercel_svg_server/vercel.json new file mode 100644 index 00000000..112be6e9 --- /dev/null +++ b/eval_protocol/quickstart/svg_agent/vercel_svg_server/vercel.json @@ -0,0 +1,3 @@ +{ + "redirects": [{ "source": "/init", "destination": "/api/init" }] +} diff --git a/eval_protocol/types/remote_rollout_processor.py b/eval_protocol/types/remote_rollout_processor.py index 5f6654ae..03104c4a 100644 --- a/eval_protocol/types/remote_rollout_processor.py +++ b/eval_protocol/types/remote_rollout_processor.py @@ -48,7 +48,6 @@ class InitRequest(BaseModel): default_factory=dict, description="Completion parameters including model and optional model_kwargs, temperature, etc.", ) - elastic_search_config: Optional[ElasticsearchConfig] = None messages: Optional[List[Message]] = None tools: Optional[List[Dict[str, Any]]] = None @@ -60,6 +59,7 @@ class InitRequest(BaseModel): """ metadata: RolloutMetadata + api_key: Optional[str] = None class StatusResponse(BaseModel): diff --git a/test_event_bus_helper.py b/test_event_bus_helper.py index 4bd97231..ccedc542 100644 --- a/test_event_bus_helper.py +++ b/test_event_bus_helper.py @@ -4,8 +4,7 @@ import asyncio import sys import json -from eval_protocol.event_bus import SqliteEventBus -from eval_protocol.models import EvaluationRow, InputMetadata +from eval_protocol.event_bus.sqlite_event_bus import SqliteEventBus async def listener_process(db_path: str): diff --git a/tests/remote_server/quickstart.py b/tests/remote_server/quickstart.py index 1d204dd2..7f786b8a 100644 --- a/tests/remote_server/quickstart.py +++ b/tests/remote_server/quickstart.py @@ -1,15 +1,17 @@ -# MANUAL SERVER STARTUP REQUIRED: +# REMOTE SERVER OPTIONS: # -# For Python server testing, start: -# python -m tests.remote_server.remote_server (runs on http://127.0.0.1:3000) +# Option 1: Use Vercel dev server locally (recommended for development) +# cd eval_protocol/quickstart/svg_agent/vercel_svg_server +# vercel dev +# Then change remote_base_url to: "http://localhost:3000" # -# For TypeScript server testing, start: -# cd tests/remote_server/typescript-server -# npm install -# npm start +# Option 2: Use deployed Vercel production server (current configuration) +# No setup needed - uses the deployed serverless function +# Currently using: https://vercel-svg-server-qntltzfaq-xzrdereks-projects.vercel.app # -# The TypeScript server should be running on http://127.0.0.1:3000 -# You only need to start one of the servers! +# Option 3: Use local Python server (for testing) +# python -m tests.remote_server.remote_server +# Then change remote_base_url to: "http://127.0.0.1:3000" import os from typing import List @@ -34,14 +36,16 @@ def rows() -> List[EvaluationRow]: generators=[rows], ), rollout_processor=RemoteRolloutProcessor( - remote_base_url="http://127.0.0.1:3000", + # For local Vercel dev: "http://localhost:3000" + # For production Vercel: (current setting) + remote_base_url="https://vercel-svg-server.vercel.app", timeout_seconds=30, ), ) async def test_remote_rollout_and_fetch_fireworks(row: EvaluationRow) -> EvaluationRow: """ - End-to-end test: - - REQUIRES MANUAL SERVER STARTUP: python -m tests.remote_server.remote_server + End-to-end test with Vercel production server: + - Uses deployed Vercel serverless function (no manual startup needed) - trigger remote rollout via RemoteRolloutProcessor (calls init/status) - fetch traces from Langfuse via Fireworks tracing proxy (uses default FireworksTracingAdapter) - FAIL if no traces found or rollout_id missing diff --git a/tests/test_event_bus.py b/tests/test_event_bus.py index 6306a02f..301816da 100644 --- a/tests/test_event_bus.py +++ b/tests/test_event_bus.py @@ -1,7 +1,7 @@ import asyncio import tempfile -from eval_protocol.event_bus import SqliteEventBus +from eval_protocol.event_bus.sqlite_event_bus import SqliteEventBus from eval_protocol.event_bus.event_bus import EventBus from eval_protocol.models import EvaluationRow, InputMetadata, Message diff --git a/tests/test_event_bus_helper.py b/tests/test_event_bus_helper.py index 4bd97231..ccedc542 100644 --- a/tests/test_event_bus_helper.py +++ b/tests/test_event_bus_helper.py @@ -4,8 +4,7 @@ import asyncio import sys import json -from eval_protocol.event_bus import SqliteEventBus -from eval_protocol.models import EvaluationRow, InputMetadata +from eval_protocol.event_bus.sqlite_event_bus import SqliteEventBus async def listener_process(db_path: str):