|
9 | 9 | import os |
10 | 10 | import logging |
11 | 11 | import sys |
12 | | -from http.server import BaseHTTPRequestHandler |
| 12 | +import asyncio |
| 13 | +from flask import Flask, request, jsonify |
13 | 14 | from openai import OpenAI |
14 | 15 | from dotenv import load_dotenv |
15 | 16 |
|
@@ -44,119 +45,133 @@ def filter(self, record: logging.LogRecord) -> bool: |
44 | 45 | # Attach Fireworks tracing handler to root logger (non-stream HTTP sink) |
45 | 46 | root_logger.addHandler(FireworksTracingHttpHandler()) |
46 | 47 |
|
| 48 | +# Create Flask app |
| 49 | +app = Flask(__name__) |
| 50 | + |
| 51 | + |
| 52 | +async def execute_rollout_background(req, api_key): |
| 53 | + """Execute the OpenAI completion in background and log results""" |
| 54 | + # Attach rollout_id filter to logger |
| 55 | + logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}") |
| 56 | + logger.addFilter(RolloutIdFilter(req.metadata.rollout_id)) |
| 57 | + |
| 58 | + try: |
| 59 | + model = req.completion_params.get("model") |
| 60 | + # Uncomment if you need to strip fireworks_ai/ prefix |
| 61 | + # if model and isinstance(model, str) and model.startswith("fireworks_ai/"): |
| 62 | + # model = model[len("fireworks_ai/"):] |
| 63 | + |
| 64 | + # Prepare completion arguments |
| 65 | + completion_kwargs = { |
| 66 | + "messages": req.messages, |
| 67 | + # "messages": [{"role": "user", "content": "Hello, how are you?"}], |
| 68 | + "model": model, |
| 69 | + "temperature": req.completion_params.get("temperature"), |
| 70 | + "max_tokens": req.completion_params.get("max_tokens"), |
| 71 | + } |
| 72 | + |
| 73 | + # Add tools if present |
| 74 | + if req.tools: |
| 75 | + completion_kwargs["tools"] = req.tools |
| 76 | + |
| 77 | + logger.info( |
| 78 | + f"DEBUG: {req.model_base_url}, COMPLETION_KWARGS: {completion_kwargs}, API_KEY: {api_key}, MODEL: {model}" |
| 79 | + ) |
| 80 | + |
| 81 | + # Create AsyncOpenAI client |
| 82 | + # client = AsyncOpenAI(base_url=req.model_base_url, api_key=api_key) |
| 83 | + client = OpenAI(base_url=req.model_base_url, api_key=api_key) |
| 84 | + |
| 85 | + logger.info(f"Sending completion request to model {model}") |
47 | 86 |
|
48 | | -class handler(BaseHTTPRequestHandler): |
49 | | - def do_POST(self): |
50 | | - try: |
51 | | - # Read and parse request body |
52 | | - content_length = int(self.headers.get("Content-Length", 0)) |
53 | | - request_body = self.rfile.read(content_length).decode("utf-8") |
54 | | - request_data = json.loads(request_body) |
55 | | - |
56 | | - # Parse as InitRequest |
57 | | - req = InitRequest(**request_data) |
58 | | - |
59 | | - # Attach rollout_id filter to logger |
60 | | - logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}") |
61 | | - logger.addFilter(RolloutIdFilter(req.metadata.rollout_id)) |
62 | | - |
63 | | - # Validate required fields |
64 | | - if not req.messages: |
65 | | - error_msg = "messages is required" |
66 | | - logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)}) |
67 | | - self._send_error(400, error_msg) |
68 | | - return |
69 | | - |
70 | | - model = req.completion_params.get("model") |
71 | | - if model and isinstance(model, str) and model.startswith("fireworks_ai/"): |
72 | | - model = model[len("fireworks_ai/") :] |
73 | | - |
74 | | - # Prepare completion arguments |
75 | | - completion_kwargs = { |
76 | | - "messages": req.messages, |
77 | | - "model": model, |
78 | | - "temperature": req.completion_params.get("temperature"), |
79 | | - "max_tokens": req.completion_params.get("max_tokens"), |
80 | | - } |
81 | | - |
82 | | - # Add tools if present |
83 | | - if req.tools: |
84 | | - completion_kwargs["tools"] = req.tools |
85 | | - |
86 | | - # Get API key (prefer request api_key, fallback to environment) |
87 | | - api_key = req.api_key or os.environ.get("FIREWORKS_API_KEY") |
88 | | - if not api_key: |
89 | | - error_msg = "API key not provided in request or FIREWORKS_API_KEY environment variable" |
90 | | - logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)}) |
91 | | - self._send_error(500, error_msg) |
92 | | - return |
93 | | - |
94 | | - # Create OpenAI client |
95 | | - client = OpenAI(base_url=req.model_base_url, api_key=api_key) |
96 | | - |
97 | | - logger.info(f"Sending completion request to model {req.completion_params.get('model')}") |
98 | | - |
99 | | - # Make the model call |
100 | | - completion = client.chat.completions.create(**completion_kwargs) |
101 | | - |
102 | | - logger.info(f"Completed response: {completion}") |
103 | | - |
104 | | - # Log completion status |
105 | | - logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()}) |
106 | | - |
107 | | - # Return the completion response |
108 | | - response_data = { |
109 | | - "status": "completed", |
110 | | - "rollout_id": req.metadata.rollout_id, |
111 | | - "choices": [ |
112 | | - { |
113 | | - "message": { |
114 | | - "role": completion.choices[0].message.role, |
115 | | - "content": completion.choices[0].message.content, |
116 | | - } |
117 | | - } |
118 | | - ], |
119 | | - } |
120 | | - |
121 | | - self._send_json_response(200, response_data) |
122 | | - |
123 | | - except Exception as e: |
124 | | - # Log error if we have the request context |
125 | | - if "req" in locals() and "logger" in locals(): |
126 | | - logger.error(f"❌ Error in rollout {req.metadata.rollout_id}: {e}") |
127 | | - logger.error(str(e), extra={"status": Status.rollout_error(str(e))}) |
128 | | - |
129 | | - self._send_error(500, str(e)) |
130 | | - |
131 | | - def do_GET(self): |
132 | | - """Health check endpoint""" |
133 | | - self._send_json_response( |
134 | | - 200, |
135 | | - { |
136 | | - "status": "ok", |
137 | | - "message": "SVGBench Vercel Serverless Function", |
138 | | - "endpoints": {"POST /": "Process SVGBench evaluation requests"}, |
139 | | - }, |
| 87 | + # Make the async model call with timeout |
| 88 | + import time |
| 89 | + |
| 90 | + logger.info(f"timing start: {time.time()}") |
| 91 | + completion = client.chat.completions.create(**completion_kwargs) |
| 92 | + logger.info(f"Completed response: {completion}") |
| 93 | + logger.info(f"timing end: {time.time()}") |
| 94 | + # Log successful completion - THIS IS WHAT RemoteRolloutProcessor POLLS FOR |
| 95 | + logger.info(f"Rollout {req.metadata.rollout_id} completed", extra={"status": Status.rollout_finished()}) |
| 96 | + |
| 97 | + except Exception as e: |
| 98 | + # Log error with structured status - THIS IS WHAT RemoteRolloutProcessor POLLS FOR |
| 99 | + logger.error( |
| 100 | + f"Rollout {req.metadata.rollout_id} failed: {e}", extra={"status": Status.rollout_error_from_exception(e)} |
140 | 101 | ) |
141 | 102 |
|
142 | | - def do_OPTIONS(self): |
143 | | - """Handle CORS preflight requests""" |
144 | | - self.send_response(200) |
145 | | - self.send_header("Access-Control-Allow-Origin", "*") |
146 | | - self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS") |
147 | | - self.send_header("Access-Control-Allow-Headers", "Content-Type") |
148 | | - self.end_headers() |
149 | | - |
150 | | - def _send_json_response(self, status_code: int, data: dict): |
151 | | - """Send a JSON response""" |
152 | | - self.send_response(status_code) |
153 | | - self.send_header("Content-Type", "application/json") |
154 | | - self.send_header("Access-Control-Allow-Origin", "*") |
155 | | - self.send_header("Access-Control-Allow-Methods", "POST, GET, OPTIONS") |
156 | | - self.send_header("Access-Control-Allow-Headers", "Content-Type") |
157 | | - self.end_headers() |
158 | | - self.wfile.write(json.dumps(data).encode("utf-8")) |
159 | | - |
160 | | - def _send_error(self, status_code: int, message: str): |
161 | | - """Send an error response""" |
162 | | - self._send_json_response(status_code, {"error": message}) |
| 103 | + |
| 104 | +@app.route("/init", methods=["POST"]) |
| 105 | +async def init(): |
| 106 | + try: |
| 107 | + # Parse as InitRequest |
| 108 | + req = InitRequest(**request.get_json()) |
| 109 | + |
| 110 | + # Create logger for immediate validation logging |
| 111 | + logger = logging.getLogger(f"{__name__}.{req.metadata.rollout_id}") |
| 112 | + logger.addFilter(RolloutIdFilter(req.metadata.rollout_id)) |
| 113 | + |
| 114 | + # Validate required fields |
| 115 | + if not req.messages: |
| 116 | + error_msg = "messages is required" |
| 117 | + logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)}) |
| 118 | + return jsonify({"error": error_msg}), 400 |
| 119 | + |
| 120 | + # Get API key (prefer request api_key, fallback to environment) |
| 121 | + if req.api_key: |
| 122 | + logger.info("Using API key from request") |
| 123 | + api_key = req.api_key |
| 124 | + elif os.environ.get("FIREWORKS_API_KEY"): |
| 125 | + logger.info("Using API key from environment") |
| 126 | + api_key = os.environ.get("FIREWORKS_API_KEY") |
| 127 | + else: |
| 128 | + error_msg = "API key not provided in request or environment variable" |
| 129 | + logger.error(error_msg, extra={"status": Status.rollout_error(error_msg)}) |
| 130 | + return jsonify({"error": error_msg}), 401 |
| 131 | + |
| 132 | + # 🔥 FIRE: Return immediately with acceptance (within 30s requirement) |
| 133 | + response_data = { |
| 134 | + "status": "accepted", |
| 135 | + "rollout_id": req.metadata.rollout_id, |
| 136 | + "message": "Rollout processing started", |
| 137 | + } |
| 138 | + |
| 139 | + # Fire and forget: Execute rollout asynchronously |
| 140 | + asyncio.create_task(execute_rollout_background(req, api_key)) |
| 141 | + |
| 142 | + return jsonify(response_data), 200 |
| 143 | + |
| 144 | + except Exception as e: |
| 145 | + # For request parsing errors, return error immediately (don't retry) |
| 146 | + return jsonify({"error": f"Request parsing error: {str(e)}"}), 400 |
| 147 | + |
| 148 | + |
| 149 | +@app.route("/", methods=["GET"]) |
| 150 | +def health_check(): |
| 151 | + """Health check endpoint""" |
| 152 | + return jsonify( |
| 153 | + { |
| 154 | + "status": "ok", |
| 155 | + "message": "SVGBench Vercel Serverless Function", |
| 156 | + "endpoints": {"POST /": "Process SVGBench evaluation requests"}, |
| 157 | + } |
| 158 | + ) |
| 159 | + |
| 160 | + |
| 161 | +@app.route("/", methods=["OPTIONS"]) |
| 162 | +def options_handler(): |
| 163 | + """Handle CORS preflight requests""" |
| 164 | + response = jsonify({}) |
| 165 | + response.headers["Access-Control-Allow-Origin"] = "*" |
| 166 | + response.headers["Access-Control-Allow-Methods"] = "POST, GET, OPTIONS" |
| 167 | + response.headers["Access-Control-Allow-Headers"] = "Content-Type" |
| 168 | + return response |
| 169 | + |
| 170 | + |
| 171 | +# Add CORS headers to all responses |
| 172 | +@app.after_request |
| 173 | +def add_cors_headers(response): |
| 174 | + response.headers["Access-Control-Allow-Origin"] = "*" |
| 175 | + response.headers["Access-Control-Allow-Methods"] = "POST, GET, OPTIONS" |
| 176 | + response.headers["Access-Control-Allow-Headers"] = "Content-Type" |
| 177 | + return response |
0 commit comments