Skip to content

Commit a2bfa7d

Browse files
committed
Vercel example
1 parent 0c02fec commit a2bfa7d

File tree

7 files changed

+277
-12
lines changed

7 files changed

+277
-12
lines changed

eval_protocol/pytest/tracing_utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import base64
6+
import os
67
from typing import Any, Callable, Dict, List, Optional
78

89
from eval_protocol.adapters.fireworks_tracing import FireworksTracingAdapter
@@ -122,12 +123,16 @@ def build_init_request(
122123
):
123124
final_model_base_url = build_fireworks_tracing_url(model_base_url, meta, completion_params_base_url)
124125

126+
# Extract API key from environment or completion_params
127+
api_key = os.environ.get("FIREWORKS_API_KEY")
128+
125129
return InitRequest(
126130
model=model,
127131
messages=clean_messages,
128132
tools=row.tools,
129133
metadata=meta,
130134
model_base_url=final_model_base_url,
135+
api_key=api_key,
131136
)
132137

133138

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
.vercel
2+
.env
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# SVGBench Vercel Serverless Function
2+
3+
A Vercel serverless function that handles model calls for the SVGBench evaluation pipeline.
4+
5+
## What it does
6+
7+
- Receives SVGBench evaluation requests via POST
8+
- Makes model calls to Fireworks AI
9+
- Returns model responses for local SVG evaluation
10+
- Handles CORS and provides health check endpoint
11+
12+
## Setup
13+
14+
1. **Install Vercel CLI:**
15+
```bash
16+
npm install -g vercel
17+
```
18+
19+
2. **Navigate to this directory:**
20+
```bash
21+
cd eval_protocol/quickstart/svg_agent/vercel_svg_server
22+
```
23+
24+
3. **Deploy to Vercel:**
25+
```bash
26+
vercel deploy
27+
```
28+
29+
Follow the prompts to:
30+
- Create a new project (or link existing)
31+
- Set project name (e.g., `svgbench-server`)
32+
33+
4. **Set environment variable:**
34+
```bash
35+
vercel env add FIREWORKS_API_KEY
36+
# Enter your Fireworks API key when prompted
37+
```
38+
39+
5. **Deploy to production:**
40+
```bash
41+
vercel --prod
42+
```
43+
44+
## Usage
45+
46+
The function provides these endpoints:
47+
48+
### `POST /`
49+
Processes SVGBench evaluation requests.
50+
51+
**Request format:**
52+
```json
53+
{
54+
"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b",
55+
"messages": [
56+
{
57+
"role": "user",
58+
"content": "Your SVG generation prompt here"
59+
}
60+
],
61+
"model_base_url": "https://api.fireworks.ai/inference/v1",
62+
"metadata": {
63+
"rollout_id": "some-unique-id"
64+
},
65+
"completion_params": {
66+
"temperature": 0.8,
67+
"max_tokens": 32768
68+
}
69+
}
70+
```
71+
72+
**Response format:**
73+
```json
74+
{
75+
"status": "completed",
76+
"rollout_id": "some-unique-id",
77+
"choices": [
78+
{
79+
"message": {
80+
"role": "assistant",
81+
"content": "SVG code generated by model"
82+
}
83+
}
84+
]
85+
}
86+
```
87+
88+
### `GET /`
89+
Health check endpoint - returns server status.
90+
91+
## Integration with Tests
92+
93+
Update your `test_remote_svgbench.py` to use your deployed URL:
94+
95+
```python
96+
@evaluation_test(
97+
rollout_processor=RemoteRolloutProcessor(
98+
remote_base_url="https://your-deployment-url.vercel.app",
99+
timeout_seconds=300,
100+
),
101+
# ... other params
102+
)
103+
```
104+
105+
## Architecture
106+
107+
- **Remote**: Model calls (handled by this serverless function)
108+
- **Local**: SVG extraction, rendering, evaluation, scoring (handled by test client)
109+
110+
This keeps the heavy SVG processing local while scaling model calls in the cloud.
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
"""
2+
Vercel serverless function for SVGBench remote evaluation.
3+
4+
This function handles the model call part of the evaluation pipeline.
5+
The SVG evaluation logic remains in the test client.
6+
"""
7+
8+
import json
9+
import os
10+
import logging
11+
from http.server import BaseHTTPRequestHandler
12+
from openai import OpenAI
13+
from dotenv import load_dotenv
14+
15+
from eval_protocol import Status, InitRequest, FireworksTracingHttpHandler, RolloutIdFilter
16+
17+
load_dotenv()
18+
19+
# Attach Fireworks tracing handler to root logger
20+
fireworks_handler = FireworksTracingHttpHandler()
21+
logging.getLogger().addHandler(fireworks_handler)
22+
23+
24+
class handler(BaseHTTPRequestHandler):
25+
def do_POST(self):
26+
try:
27+
# Read and parse request body
28+
content_length = int(self.headers.get("Content-Length", 0))
29+
request_body = self.rfile.read(content_length).decode("utf-8")
30+
request_data = json.loads(request_body)
31+
32+
# Parse as InitRequest
33+
req = InitRequest(**request_data)
34+
35+
# Attach rollout_id filter to logger
36+
logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}")
37+
logger.addFilter(RolloutIdFilter(req.metadata.rollout_id))
38+
39+
# Validate required fields
40+
if not req.messages:
41+
error_msg = "messages is required"
42+
logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)})
43+
self._send_error(400, error_msg)
44+
return
45+
46+
# Prepare completion arguments
47+
completion_kwargs = {
48+
"model": req.model,
49+
"messages": req.messages,
50+
}
51+
52+
# Add tools if present
53+
if req.tools:
54+
completion_kwargs["tools"] = req.tools
55+
56+
# Add completion parameters if they exist
57+
# if hasattr(req, 'completion_params') and req.completion_params:
58+
# # Filter out any model override
59+
# params = {k: v for k, v in req.completion_params.items() if k != 'model'}
60+
# completion_kwargs.update(params)
61+
62+
# Get API key (prefer request api_key, fallback to environment)
63+
api_key = req.api_key or os.environ.get("FIREWORKS_API_KEY")
64+
if not api_key:
65+
error_msg = "API key not provided in request or FIREWORKS_API_KEY environment variable"
66+
logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)})
67+
self._send_error(500, error_msg)
68+
return
69+
70+
# Create OpenAI client
71+
client = OpenAI(base_url=req.model_base_url, api_key=api_key)
72+
73+
logger.info(f"Sending completion request to model {req.model}")
74+
75+
# Make the model call
76+
completion = client.chat.completions.create(**completion_kwargs)
77+
78+
logger.info(f"Completed response: {completion}")
79+
80+
# Log completion status
81+
logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()})
82+
83+
# Return the completion response
84+
response_data = {
85+
"status": "completed",
86+
"rollout_id": req.metadata.rollout_id,
87+
"choices": [
88+
{
89+
"message": {
90+
"role": completion.choices[0].message.role,
91+
"content": completion.choices[0].message.content,
92+
}
93+
}
94+
],
95+
}
96+
97+
self._send_json_response(200, response_data)
98+
99+
except Exception as e:
100+
# Log error if we have the request context
101+
if "req" in locals() and "logger" in locals():
102+
logger.error(f"❌ Error in rollout {req.metadata.rollout_id}: {e}")
103+
logger.error(str(e), extra={"status": Status.rollout_error(str(e))})
104+
105+
self._send_error(500, str(e))
106+
107+
def do_GET(self):
108+
"""Health check endpoint"""
109+
self._send_json_response(
110+
200,
111+
{
112+
"status": "ok",
113+
"message": "SVGBench Vercel Serverless Function",
114+
"endpoints": {"POST /": "Process SVGBench evaluation requests"},
115+
},
116+
)
117+
118+
def do_OPTIONS(self):
119+
"""Handle CORS preflight requests"""
120+
self.send_response(200)
121+
self.send_header("Access-Control-Allow-Origin", "*")
122+
self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
123+
self.send_header("Access-Control-Allow-Headers", "Content-Type")
124+
self.end_headers()
125+
126+
def _send_json_response(self, status_code: int, data: dict):
127+
"""Send a JSON response"""
128+
self.send_response(status_code)
129+
self.send_header("Content-Type", "application/json")
130+
self.send_header("Access-Control-Allow-Origin", "*")
131+
self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
132+
self.send_header("Access-Control-Allow-Headers", "Content-Type")
133+
self.end_headers()
134+
self.wfile.write(json.dumps(data).encode("utf-8"))
135+
136+
def _send_error(self, status_code: int, message: str):
137+
"""Send an error response"""
138+
self._send_json_response(status_code, {"error": message})
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
openai>=1.0.0
2+
python-dotenv>=0.19.0
3+
eval_protocol>=0.2.58
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"redirects": [{ "source": "/init", "destination": "/api/init" }]
3+
}

tests/remote_server/quickstart.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
1-
# MANUAL SERVER STARTUP REQUIRED:
1+
# REMOTE SERVER OPTIONS:
22
#
3-
# For Python server testing, start:
4-
# python -m tests.remote_server.remote_server (runs on http://127.0.0.1:3000)
3+
# Option 1: Use Vercel dev server locally (recommended for development)
4+
# cd eval_protocol/quickstart/svg_agent/vercel_svg_server
5+
# vercel dev
6+
# Then change remote_base_url to: "http://localhost:3000"
57
#
6-
# For TypeScript server testing, start:
7-
# cd tests/remote_server/typescript-server
8-
# npm install
9-
# npm start
8+
# Option 2: Use deployed Vercel production server (current configuration)
9+
# No setup needed - uses the deployed serverless function
10+
# Currently using: https://vercel-svg-server-qntltzfaq-xzrdereks-projects.vercel.app
1011
#
11-
# The TypeScript server should be running on http://127.0.0.1:3000
12-
# You only need to start one of the servers!
12+
# Option 3: Use local Python server (for testing)
13+
# python -m tests.remote_server.remote_server
14+
# Then change remote_base_url to: "http://127.0.0.1:3000"
1315

1416
import os
1517
from typing import List
@@ -34,14 +36,16 @@ def rows() -> List[EvaluationRow]:
3436
generators=[rows],
3537
),
3638
rollout_processor=RemoteRolloutProcessor(
37-
remote_base_url="http://127.0.0.1:3000",
39+
# For local Vercel dev: "http://localhost:3000"
40+
# For production Vercel: (current setting)
41+
remote_base_url="https://vercel-svg-server.vercel.app",
3842
timeout_seconds=30,
3943
),
4044
)
4145
async def test_remote_rollout_and_fetch_fireworks(row: EvaluationRow) -> EvaluationRow:
4246
"""
43-
End-to-end test:
44-
- REQUIRES MANUAL SERVER STARTUP: python -m tests.remote_server.remote_server
47+
End-to-end test with Vercel production server:
48+
- Uses deployed Vercel serverless function (no manual startup needed)
4549
- trigger remote rollout via RemoteRolloutProcessor (calls init/status)
4650
- fetch traces from Langfuse via Fireworks tracing proxy (uses default FireworksTracingAdapter)
4751
- FAIL if no traces found or rollout_id missing

0 commit comments

Comments
 (0)