From 861c590ea0f339ddfd4ff7dee2b33942126dc79d Mon Sep 17 00:00:00 2001 From: "shaobo.xie" Date: Mon, 9 Feb 2026 17:18:50 +0800 Subject: [PATCH 1/4] feat: add claude-opus-4.6 model and fix SSE event stream format - Add claude-opus-4.6 to model mapping, KIRO_MODELS set, models endpoint, and webui - Update map_model_name to route opus 4.6 requests correctly - Prefix all SSE data lines with proper 'event:' fields in anthropic stream handler - Add ping event after content_block_start for SSE compliance --- kiro_proxy/config.py | 5 +++-- kiro_proxy/handlers/anthropic.py | 31 ++++++++++++++++--------------- kiro_proxy/main.py | 1 + kiro_proxy/web/webui.py | 1 + 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/kiro_proxy/config.py b/kiro_proxy/config.py index 8fb8bc5..f4d31b4 100644 --- a/kiro_proxy/config.py +++ b/kiro_proxy/config.py @@ -43,9 +43,10 @@ "sonnet": "claude-sonnet-4", "haiku": "claude-haiku-4.5", "opus": "claude-opus-4.5", + "opus-4.6": "claude-opus-4.6", } -KIRO_MODELS = {"auto", "claude-sonnet-4.5", "claude-sonnet-4", "claude-haiku-4.5", "claude-opus-4.5"} +KIRO_MODELS = {"auto", "claude-sonnet-4.5", "claude-sonnet-4", "claude-haiku-4.5", "claude-opus-4.5", "claude-opus-4.6"} def map_model_name(model: str) -> str: """将外部模型名称映射到 Kiro 支持的名称""" @@ -57,7 +58,7 @@ def map_model_name(model: str) -> str: return model model_lower = model.lower() if "opus" in model_lower: - return "claude-opus-4.5" + return "claude-opus-4.6" if "4.6" in model_lower else "claude-opus-4.5" if "haiku" in model_lower: return "claude-haiku-4.5" if "sonnet" in model_lower: diff --git a/kiro_proxy/handlers/anthropic.py b/kiro_proxy/handlers/anthropic.py index 40a6e40..28599a4 100644 --- a/kiro_proxy/handlers/anthropic.py +++ b/kiro_proxy/handlers/anthropic.py @@ -243,7 +243,7 @@ async def generate(): if flow_id: flow_monitor.fail_flow(flow_id, "rate_limit_error", "All accounts rate limited", 429) - yield f'data: {{"type":"error","error":{{"type":"rate_limit_error","message":"All accounts rate limited"}}}}\n\n' + yield f'event: error\ndata: {{"type":"error","error":{{"type":"rate_limit_error","message":"All accounts rate limited"}}}}\n\n' return # 处理可重试的服务端错误 @@ -256,7 +256,7 @@ async def generate(): continue if flow_id: flow_monitor.fail_flow(flow_id, "api_error", "Server error after retries", response.status_code) - yield f'data: {{"type":"error","error":{{"type":"api_error","message":"Server error after retries"}}}}\n\n' + yield f'event: error\ndata: {{"type":"error","error":{{"type":"api_error","message":"Server error after retries"}}}}\n\n' return if response.status_code != 200: @@ -326,7 +326,7 @@ async def api_caller(prompt: str) -> str: if flow_id: flow_monitor.fail_flow(flow_id, error_type, error_msg, response.status_code, error_str) - yield f'data: {{"type":"error","error":{{"type":"{error_type}","message":"{error_msg}"}}}}\n\n' + yield f'event: error\ndata: {{"type":"error","error":{{"type":"{error_type}","message":"{error_msg}"}}}}\n\n' return # 标记开始流式传输 @@ -335,8 +335,9 @@ async def api_caller(prompt: str) -> str: # 正常处理响应 msg_id = f"msg_{log_id}" - yield f'data: {{"type":"message_start","message":{{"id":"{msg_id}","type":"message","role":"assistant","content":[],"model":"{model}","stop_reason":null,"stop_sequence":null,"usage":{{"input_tokens":0,"output_tokens":0}}}}}}\n\n' - yield f'data: {{"type":"content_block_start","index":0,"content_block":{{"type":"text","text":""}}}}\n\n' + yield f'event: message_start\ndata: {{"type":"message_start","message":{{"id":"{msg_id}","type":"message","role":"assistant","content":[],"model":"{model}","stop_reason":null,"stop_sequence":null,"usage":{{"input_tokens":0,"output_tokens":0}}}}}}\n\n' + yield f'event: content_block_start\ndata: {{"type":"content_block_start","index":0,"content_block":{{"type":"text","text":""}}}}\n\n' + yield f'event: ping\ndata: {{"type":"ping"}}\n\n' full_response = b"" @@ -367,7 +368,7 @@ async def api_caller(prompt: str) -> str: full_content += content if flow_id: flow_monitor.add_chunk(flow_id, content) - yield f'data: {{"type":"content_block_delta","index":0,"delta":{{"type":"text_delta","text":{json.dumps(content)}}}}}\n\n' + yield f'event: content_block_delta\ndata: {{"type":"content_block_delta","index":0,"delta":{{"type":"text_delta","text":{json.dumps(content)}}}}}\n\n' except Exception: pass pos += total_len @@ -376,17 +377,17 @@ async def api_caller(prompt: str) -> str: result = parse_event_stream_full(full_response) - yield f'data: {{"type":"content_block_stop","index":0}}\n\n' + yield f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":0}}\n\n' if result["tool_uses"]: for i, tool_use in enumerate(result["tool_uses"], 1): - yield f'data: {{"type":"content_block_start","index":{i},"content_block":{{"type":"tool_use","id":"{tool_use["id"]}","name":"{tool_use["name"]}","input":{{}}}}}}\n\n' - yield f'data: {{"type":"content_block_delta","index":{i},"delta":{{"type":"input_json_delta","partial_json":{json.dumps(json.dumps(tool_use["input"]))}}}}}\n\n' - yield f'data: {{"type":"content_block_stop","index":{i}}}\n\n' + yield f'event: content_block_start\ndata: {{"type":"content_block_start","index":{i},"content_block":{{"type":"tool_use","id":"{tool_use["id"]}","name":"{tool_use["name"]}","input":{{}}}}}}\n\n' + yield f'event: content_block_delta\ndata: {{"type":"content_block_delta","index":{i},"delta":{{"type":"input_json_delta","partial_json":{json.dumps(json.dumps(tool_use["input"]))}}}}}\n\n' + yield f'event: content_block_stop\ndata: {{"type":"content_block_stop","index":{i}}}\n\n' stop_reason = result["stop_reason"] - yield f'data: {{"type":"message_delta","delta":{{"stop_reason":"{stop_reason}","stop_sequence":null}},"usage":{{"output_tokens":100}}}}\n\n' - yield f'data: {{"type":"message_stop"}}\n\n' + yield f'event: message_delta\ndata: {{"type":"message_delta","delta":{{"stop_reason":"{stop_reason}","stop_sequence":null}},"usage":{{"output_tokens":100}}}}\n\n' + yield f'event: message_stop\ndata: {{"type":"message_stop"}}\n\n' # 完成 Flow if flow_id: @@ -416,7 +417,7 @@ async def api_caller(prompt: str) -> str: continue if flow_id: flow_monitor.fail_flow(flow_id, "timeout_error", "Request timeout after retries", 408) - yield f'data: {{"type":"error","error":{{"type":"api_error","message":"Request timeout after retries"}}}}\n\n' + yield f'event: error\ndata: {{"type":"error","error":{{"type":"api_error","message":"Request timeout after retries"}}}}\n\n' return except httpx.ConnectError: if retry_count < max_retries: @@ -427,7 +428,7 @@ async def api_caller(prompt: str) -> str: continue if flow_id: flow_monitor.fail_flow(flow_id, "connection_error", "Connection error after retries", 502) - yield f'data: {{"type":"error","error":{{"type":"api_error","message":"Connection error after retries"}}}}\n\n' + yield f'event: error\ndata: {{"type":"error","error":{{"type":"api_error","message":"Connection error after retries"}}}}\n\n' return except Exception as e: # 检查是否为可重试的网络错误 @@ -439,7 +440,7 @@ async def api_caller(prompt: str) -> str: continue if flow_id: flow_monitor.fail_flow(flow_id, "api_error", str(e), 500) - yield f'data: {{"type":"error","error":{{"type":"api_error","message":"{str(e)}"}}}}\n\n' + yield f'event: error\ndata: {{"type":"error","error":{{"type":"api_error","message":"{str(e)}"}}}}\n\n' return return StreamingResponse(generate(), media_type="text/event-stream") diff --git a/kiro_proxy/main.py b/kiro_proxy/main.py index cf63f67..00e224f 100644 --- a/kiro_proxy/main.py +++ b/kiro_proxy/main.py @@ -107,6 +107,7 @@ async def models(): {"id": "claude-sonnet-4", "object": "model", "owned_by": "kiro", "name": "Claude Sonnet 4"}, {"id": "claude-haiku-4.5", "object": "model", "owned_by": "kiro", "name": "Claude Haiku 4.5"}, {"id": "claude-opus-4.5", "object": "model", "owned_by": "kiro", "name": "Claude Opus 4.5"}, + {"id": "claude-opus-4.6", "object": "model", "owned_by": "kiro", "name": "Claude Opus 4.6"}, ]} diff --git a/kiro_proxy/web/webui.py b/kiro_proxy/web/webui.py index 1f81f9f..ad6bdf2 100644 --- a/kiro_proxy/web/webui.py +++ b/kiro_proxy/web/webui.py @@ -377,6 +377,7 @@ claude-sonnet-4.5⭐⭐⭐⭐ 更强gemini-1.5-pro claude-haiku-4.5⚡ 快速gpt-4o-mini, gpt-3.5-turbo, haiku claude-opus-4.5⭐⭐⭐⭐⭐ 最强o1, o1-preview, opus + claude-opus-4.6🚀 旗舰opus-4.6 auto🤖 自动auto From 7355c2a8d25ff933b734b5b4c5d2c6f7988359b0 Mon Sep 17 00:00:00 2001 From: "shaobo.xie" Date: Mon, 9 Feb 2026 17:30:23 +0800 Subject: [PATCH 2/4] fix: default opus model mapping to claude-opus-4.6 Change the model name mapping logic so that 'opus' defaults to claude-opus-4.6 unless '4.5' is explicitly specified, instead of the previous behavior that defaulted to 4.5 unless '4.6' was specified. --- kiro_proxy/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kiro_proxy/config.py b/kiro_proxy/config.py index f4d31b4..bf56ec4 100644 --- a/kiro_proxy/config.py +++ b/kiro_proxy/config.py @@ -58,7 +58,7 @@ def map_model_name(model: str) -> str: return model model_lower = model.lower() if "opus" in model_lower: - return "claude-opus-4.6" if "4.6" in model_lower else "claude-opus-4.5" + return "claude-opus-4.5" if "4.5" in model_lower else "claude-opus-4.6" if "haiku" in model_lower: return "claude-haiku-4.5" if "sonnet" in model_lower: From 7678b5ed3c9af2f8b79a480bc0fb0b99bf11d2a1 Mon Sep 17 00:00:00 2001 From: "shaobo.xie" Date: Mon, 9 Feb 2026 18:40:11 +0800 Subject: [PATCH 3/4] feat: add auto-refresh for logs tab with 3s interval - Add startLogsAutoRefresh/stopLogsAutoRefresh functions - Auto-refresh logs every 3 seconds when logs tab is active - Stop auto-refresh when switching to other tabs --- kiro_proxy/web/webui.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kiro_proxy/web/webui.py b/kiro_proxy/web/webui.py index ad6bdf2..0f08dc9 100644 --- a/kiro_proxy/web/webui.py +++ b/kiro_proxy/web/webui.py @@ -603,8 +603,9 @@ $$('.panel').forEach(x=>x.classList.remove('active')); t.classList.add('active'); $('#'+t.dataset.tab).classList.add('active'); + stopLogsAutoRefresh(); if(t.dataset.tab==='monitor'){loadStats();loadQuota();} - if(t.dataset.tab==='logs')loadLogs(); + if(t.dataset.tab==='logs')startLogsAutoRefresh(); if(t.dataset.tab==='accounts')loadAccounts(); if(t.dataset.tab==='flows'){loadFlowStats();loadFlows();} }); @@ -769,6 +770,7 @@ JS_LOGS = ''' // Logs +let logsInterval; async function loadLogs(){ try{ const r=await fetch('/api/logs?limit=50'); @@ -785,6 +787,14 @@ `).join(''); }catch(e){console.error(e)} } +function startLogsAutoRefresh(){ + if(logsInterval)clearInterval(logsInterval); + loadLogs(); + logsInterval=setInterval(loadLogs,3000); +} +function stopLogsAutoRefresh(){ + if(logsInterval){clearInterval(logsInterval);logsInterval=null;} +} ''' From aa8c9d2c8042d1c84ed76e467cda9ad61a0540c5 Mon Sep 17 00:00:00 2001 From: "shaobo.xie" Date: Tue, 10 Feb 2026 11:37:02 +0800 Subject: [PATCH 4/4] fix: ensure request logging in all error/success paths across handlers - Add inline logging for all stream error/success exit points in anthropic handler - Wrap retry loops in try/finally for gemini and openai handlers to guarantee logging - Add comprehensive logging for responses handler (both stream and non-stream) - Use should_log flag in anthropic non-stream handler to avoid duplicate logs --- kiro_proxy/handlers/anthropic.py | 58 +++++++++++++++- kiro_proxy/handlers/gemini.py | 33 +++++----- kiro_proxy/handlers/openai.py | 49 +++++++------- kiro_proxy/handlers/responses.py | 110 +++++++++++++++++++++++++++---- 4 files changed, 197 insertions(+), 53 deletions(-) diff --git a/kiro_proxy/handlers/anthropic.py b/kiro_proxy/handlers/anthropic.py index 28599a4..2222a04 100644 --- a/kiro_proxy/handlers/anthropic.py +++ b/kiro_proxy/handlers/anthropic.py @@ -244,6 +244,13 @@ async def generate(): if flow_id: flow_monitor.fail_flow(flow_id, "rate_limit_error", "All accounts rate limited", 429) yield f'event: error\ndata: {{"type":"error","error":{{"type":"rate_limit_error","message":"All accounts rate limited"}}}}\n\n' + duration = (time.time() - start_time) * 1000 + state.add_log(RequestLog( + id=log_id, timestamp=time.time(), method="POST", path="/v1/messages", + model=model, account_id=current_account.id if current_account else None, + status=429, duration_ms=duration, error="All accounts rate limited" + )) + stats_manager.record_request(account_id=current_account.id if current_account else "unknown", model=model, success=False, latency_ms=duration) return # 处理可重试的服务端错误 @@ -257,6 +264,13 @@ async def generate(): if flow_id: flow_monitor.fail_flow(flow_id, "api_error", "Server error after retries", response.status_code) yield f'event: error\ndata: {{"type":"error","error":{{"type":"api_error","message":"Server error after retries"}}}}\n\n' + duration = (time.time() - start_time) * 1000 + state.add_log(RequestLog( + id=log_id, timestamp=time.time(), method="POST", path="/v1/messages", + model=model, account_id=current_account.id if current_account else None, + status=response.status_code, duration_ms=duration, error="Server error after retries" + )) + stats_manager.record_request(account_id=current_account.id if current_account else "unknown", model=model, success=False, latency_ms=duration) return if response.status_code != 200: @@ -327,6 +341,13 @@ async def api_caller(prompt: str) -> str: if flow_id: flow_monitor.fail_flow(flow_id, error_type, error_msg, response.status_code, error_str) yield f'event: error\ndata: {{"type":"error","error":{{"type":"{error_type}","message":"{error_msg}"}}}}\n\n' + duration = (time.time() - start_time) * 1000 + state.add_log(RequestLog( + id=log_id, timestamp=time.time(), method="POST", path="/v1/messages", + model=model, account_id=current_account.id if current_account else None, + status=response.status_code, duration_ms=duration, error=error_msg + )) + stats_manager.record_request(account_id=current_account.id if current_account else "unknown", model=model, success=False, latency_ms=duration) return # 标记开始流式传输 @@ -406,6 +427,13 @@ async def api_caller(prompt: str) -> str: current_account.request_count += 1 current_account.last_used = time.time() get_rate_limiter().record_request(current_account.id) + duration = (time.time() - start_time) * 1000 + state.add_log(RequestLog( + id=log_id, timestamp=time.time(), method="POST", path="/v1/messages", + model=model, account_id=current_account.id if current_account else None, + status=200, duration_ms=duration, error=None + )) + stats_manager.record_request(account_id=current_account.id if current_account else "unknown", model=model, success=True, latency_ms=duration) return except httpx.TimeoutException: @@ -418,6 +446,13 @@ async def api_caller(prompt: str) -> str: if flow_id: flow_monitor.fail_flow(flow_id, "timeout_error", "Request timeout after retries", 408) yield f'event: error\ndata: {{"type":"error","error":{{"type":"api_error","message":"Request timeout after retries"}}}}\n\n' + duration = (time.time() - start_time) * 1000 + state.add_log(RequestLog( + id=log_id, timestamp=time.time(), method="POST", path="/v1/messages", + model=model, account_id=current_account.id if current_account else None, + status=408, duration_ms=duration, error="Request timeout after retries" + )) + stats_manager.record_request(account_id=current_account.id if current_account else "unknown", model=model, success=False, latency_ms=duration) return except httpx.ConnectError: if retry_count < max_retries: @@ -429,6 +464,13 @@ async def api_caller(prompt: str) -> str: if flow_id: flow_monitor.fail_flow(flow_id, "connection_error", "Connection error after retries", 502) yield f'event: error\ndata: {{"type":"error","error":{{"type":"api_error","message":"Connection error after retries"}}}}\n\n' + duration = (time.time() - start_time) * 1000 + state.add_log(RequestLog( + id=log_id, timestamp=time.time(), method="POST", path="/v1/messages", + model=model, account_id=current_account.id if current_account else None, + status=502, duration_ms=duration, error="Connection error after retries" + )) + stats_manager.record_request(account_id=current_account.id if current_account else "unknown", model=model, success=False, latency_ms=duration) return except Exception as e: # 检查是否为可重试的网络错误 @@ -441,6 +483,13 @@ async def api_caller(prompt: str) -> str: if flow_id: flow_monitor.fail_flow(flow_id, "api_error", str(e), 500) yield f'event: error\ndata: {{"type":"error","error":{{"type":"api_error","message":"{str(e)}"}}}}\n\n' + duration = (time.time() - start_time) * 1000 + state.add_log(RequestLog( + id=log_id, timestamp=time.time(), method="POST", path="/v1/messages", + model=model, account_id=current_account.id if current_account else None, + status=500, duration_ms=duration, error=str(e) + )) + stats_manager.record_request(account_id=current_account.id if current_account else "unknown", model=model, success=False, latency_ms=duration) return return StreamingResponse(generate(), media_type="text/event-stream") @@ -453,8 +502,10 @@ async def _handle_non_stream(kiro_request, headers, account, model, log_id, star current_account = account max_retries = 2 retry_ctx = RetryableRequest(max_retries=2) + should_log = False for retry in range(max_retries + 1): + should_log = False try: async with httpx.AsyncClient(verify=False, timeout=300) as client: response = await client.post(KIRO_API_URL, json=kiro_request, headers=headers) @@ -548,9 +599,11 @@ async def api_caller(prompt: str) -> str: ), ) + should_log = True return convert_kiro_response_to_anthropic(result, model, f"msg_{log_id}") except HTTPException: + should_log = True raise except httpx.TimeoutException as e: error_msg = f"Request timeout: {e}" @@ -561,6 +614,7 @@ async def api_caller(prompt: str) -> str: continue if flow_id: flow_monitor.fail_flow(flow_id, "timeout_error", "Request timeout after retries", 408) + should_log = True raise HTTPException(408, "Request timeout after retries") except httpx.ConnectError as e: error_msg = f"Connection error: {e}" @@ -571,6 +625,7 @@ async def api_caller(prompt: str) -> str: continue if flow_id: flow_monitor.fail_flow(flow_id, "connection_error", "Connection error after retries", 502) + should_log = True raise HTTPException(502, "Connection error after retries") except Exception as e: error_msg = str(e) @@ -582,9 +637,10 @@ async def api_caller(prompt: str) -> str: continue if flow_id: flow_monitor.fail_flow(flow_id, "api_error", str(e), 500) + should_log = True raise HTTPException(500, str(e)) finally: - if retry == max_retries or status_code == 200: + if should_log: duration = (time.time() - start_time) * 1000 state.add_log(RequestLog( id=log_id, diff --git a/kiro_proxy/handlers/gemini.py b/kiro_proxy/handlers/gemini.py index 3e72fa9..6e4191a 100644 --- a/kiro_proxy/handlers/gemini.py +++ b/kiro_proxy/handlers/gemini.py @@ -119,8 +119,9 @@ async def call_summary(prompt: str) -> str: content = "" current_account = account max_retries = 2 - - for retry in range(max_retries + 1): + + try: + for retry in range(max_retries + 1): try: async with httpx.AsyncClient(verify=False, timeout=120) as client: resp = await client.post(KIRO_API_URL, json=kiro_request, headers=headers) @@ -239,20 +240,20 @@ async def call_summary(prompt: str) -> str: await asyncio.sleep(0.5 * (2 ** retry)) continue raise HTTPException(500, str(e)) - - # 记录日志 - duration = (time.time() - start_time) * 1000 - state.add_log(RequestLog( - id=log_id, - timestamp=time.time(), - method="POST", - path=f"/v1/models/{model_name}:generateContent", - model=model, - account_id=current_account.id if current_account else None, - status=status_code, - duration_ms=duration, - error=error_msg - )) + finally: + # 记录日志 + duration = (time.time() - start_time) * 1000 + state.add_log(RequestLog( + id=log_id, + timestamp=time.time(), + method="POST", + path=f"/v1/models/{model_name}:generateContent", + model=model, + account_id=current_account.id if current_account else None, + status=status_code, + duration_ms=duration, + error=error_msg + )) # 使用转换函数生成 Gemini 格式响应 return convert_kiro_response_to_gemini(result, model) diff --git a/kiro_proxy/handlers/openai.py b/kiro_proxy/handlers/openai.py index f41b79f..a1b4f8e 100644 --- a/kiro_proxy/handlers/openai.py +++ b/kiro_proxy/handlers/openai.py @@ -118,8 +118,9 @@ async def call_summary(prompt: str) -> str: content = "" current_account = account max_retries = 2 - - for retry in range(max_retries + 1): + + try: + for retry in range(max_retries + 1): try: async with httpx.AsyncClient(verify=False, timeout=120) as client: resp = await client.post(KIRO_API_URL, json=kiro_request, headers=headers) @@ -239,28 +240,28 @@ async def call_summary(prompt: str) -> str: await asyncio.sleep(0.5 * (2 ** retry)) continue raise HTTPException(500, str(e)) - - # 记录日志 - duration = (time.time() - start_time) * 1000 - state.add_log(RequestLog( - id=log_id, - timestamp=time.time(), - method="POST", - path="/v1/chat/completions", - model=model, - account_id=current_account.id if current_account else None, - status=status_code, - duration_ms=duration, - error=error_msg - )) - - # 记录统计 - stats_manager.record_request( - account_id=current_account.id if current_account else "unknown", - model=model, - success=status_code == 200, - latency_ms=duration - ) + finally: + # 记录日志 + duration = (time.time() - start_time) * 1000 + state.add_log(RequestLog( + id=log_id, + timestamp=time.time(), + method="POST", + path="/v1/chat/completions", + model=model, + account_id=current_account.id if current_account else None, + status=status_code, + duration_ms=duration, + error=error_msg + )) + + # 记录统计 + stats_manager.record_request( + account_id=current_account.id if current_account else "unknown", + model=model, + success=status_code == 200, + latency_ms=duration + ) if stream: async def generate(): diff --git a/kiro_proxy/handlers/responses.py b/kiro_proxy/handlers/responses.py index 95342b6..5fbe443 100644 --- a/kiro_proxy/handlers/responses.py +++ b/kiro_proxy/handlers/responses.py @@ -515,17 +515,47 @@ async def api_caller(prompt: str) -> str: return await _handle_stream(kiro_request, headers, account, model, log_id, start_time) # 非流式 - async with httpx.AsyncClient(verify=False, timeout=120) as client: - resp = await client.post(KIRO_API_URL, json=kiro_request, headers=headers) - if resp.status_code != 200: - raise HTTPException(resp.status_code, resp.text) - - result = parse_event_stream_full(resp.content) - account.request_count += 1 - account.last_used = time.time() - get_rate_limiter().record_request(account.id) - - return _build_response(result, model, log_id) + status_code = 0 + error_msg = None + try: + async with httpx.AsyncClient(verify=False, timeout=120) as client: + resp = await client.post(KIRO_API_URL, json=kiro_request, headers=headers) + status_code = resp.status_code + if resp.status_code != 200: + error_msg = resp.text[:500] + raise HTTPException(resp.status_code, resp.text) + + result = parse_event_stream_full(resp.content) + account.request_count += 1 + account.last_used = time.time() + get_rate_limiter().record_request(account.id) + + return _build_response(result, model, log_id) + except HTTPException: + raise + except Exception as e: + error_msg = str(e) + status_code = 500 + raise + finally: + duration = (time.time() - start_time) * 1000 + state.add_log(RequestLog( + id=log_id, + timestamp=time.time(), + method="POST", + path="/v1/responses", + model=model, + account_id=account.id if account else None, + status=status_code, + duration_ms=duration, + error=error_msg + )) + stats_manager.record_request( + account_id=account.id if account else "unknown", + model=model, + success=status_code == 200, + duration_ms=duration + ) def _build_response(result: dict, model: str, response_id: str) -> dict: @@ -655,6 +685,24 @@ async def generate(): "error": {"code": error_code, "message": error_msg[:200]} } }) + duration = (time.time() - start_time) * 1000 + state.add_log(RequestLog( + id=log_id, + timestamp=time.time(), + method="POST", + path="/v1/responses (stream)", + model=model, + account_id=account.id if account else None, + status=response.status_code, + duration_ms=duration, + error=error_msg[:200] + )) + stats_manager.record_request( + account_id=account.id if account else "unknown", + model=model, + success=False, + duration_ms=duration + ) return # 1. response.created @@ -720,6 +768,24 @@ async def generate(): "error": {"code": "internal_error", "message": str(e)[:200]} } }) + duration = (time.time() - start_time) * 1000 + state.add_log(RequestLog( + id=log_id, + timestamp=time.time(), + method="POST", + path="/v1/responses (stream)", + model=model, + account_id=account.id if account else None, + status=500, + duration_ms=duration, + error=str(e)[:200] + )) + stats_manager.record_request( + account_id=account.id if account else "unknown", + model=model, + success=False, + duration_ms=duration + ) return # 4. response.output_item.done - 消息完成 @@ -789,7 +855,27 @@ async def generate(): } } }) - + + # 记录成功的流式请求日志 + duration = (time.time() - start_time) * 1000 + state.add_log(RequestLog( + id=log_id, + timestamp=time.time(), + method="POST", + path="/v1/responses (stream)", + model=model, + account_id=account.id if account else None, + status=200, + duration_ms=duration, + error=None + )) + stats_manager.record_request( + account_id=account.id if account else "unknown", + model=model, + success=True, + duration_ms=duration + ) + return StreamingResponse(generate(), media_type="text/event-stream")