|
7 | 7 | but cannot modify any files. |
8 | 8 | """ |
9 | 9 |
|
| 10 | +import asyncio |
10 | 11 | import json |
11 | 12 | import logging |
12 | 13 | import os |
|
25 | 26 | create_conversation, |
26 | 27 | get_messages, |
27 | 28 | ) |
28 | | -from .chat_constants import ROOT_DIR |
| 29 | +from .chat_constants import ( |
| 30 | + MAX_CHAT_RATE_LIMIT_RETRIES, |
| 31 | + ROOT_DIR, |
| 32 | + calculate_rate_limit_backoff, |
| 33 | + check_rate_limit_error, |
| 34 | +) |
29 | 35 |
|
30 | 36 | # Load environment variables from .env file if present |
31 | 37 | load_dotenv() |
@@ -393,39 +399,66 @@ async def _query_claude(self, message: str) -> AsyncGenerator[dict, None]: |
393 | 399 |
|
394 | 400 | full_response = "" |
395 | 401 |
|
396 | | - # Stream the response |
397 | | - async for msg in self.client.receive_response(): |
398 | | - msg_type = type(msg).__name__ |
399 | | - |
400 | | - if msg_type == "AssistantMessage" and hasattr(msg, "content"): |
401 | | - for block in msg.content: |
402 | | - block_type = type(block).__name__ |
403 | | - |
404 | | - if block_type == "TextBlock" and hasattr(block, "text"): |
405 | | - text = block.text |
406 | | - if text: |
407 | | - full_response += text |
408 | | - yield {"type": "text", "content": text} |
409 | | - |
410 | | - elif block_type == "ToolUseBlock" and hasattr(block, "name"): |
411 | | - tool_name = block.name |
412 | | - tool_input = getattr(block, "input", {}) |
| 402 | + # Stream the response (with rate-limit retry) |
| 403 | + for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1): |
| 404 | + try: |
| 405 | + async for msg in self.client.receive_response(): |
| 406 | + msg_type = type(msg).__name__ |
| 407 | + |
| 408 | + if msg_type == "AssistantMessage" and hasattr(msg, "content"): |
| 409 | + for block in msg.content: |
| 410 | + block_type = type(block).__name__ |
| 411 | + |
| 412 | + if block_type == "TextBlock" and hasattr(block, "text"): |
| 413 | + text = block.text |
| 414 | + if text: |
| 415 | + full_response += text |
| 416 | + yield {"type": "text", "content": text} |
| 417 | + |
| 418 | + elif block_type == "ToolUseBlock" and hasattr(block, "name"): |
| 419 | + tool_name = block.name |
| 420 | + tool_input = getattr(block, "input", {}) |
| 421 | + |
| 422 | + # Intercept ask_user tool calls -> yield as question message |
| 423 | + if tool_name == "mcp__features__ask_user": |
| 424 | + questions = tool_input.get("questions", []) |
| 425 | + if questions: |
| 426 | + yield { |
| 427 | + "type": "question", |
| 428 | + "questions": questions, |
| 429 | + } |
| 430 | + continue |
413 | 431 |
|
414 | | - # Intercept ask_user tool calls -> yield as question message |
415 | | - if tool_name == "mcp__features__ask_user": |
416 | | - questions = tool_input.get("questions", []) |
417 | | - if questions: |
418 | 432 | yield { |
419 | | - "type": "question", |
420 | | - "questions": questions, |
| 433 | + "type": "tool_call", |
| 434 | + "tool": tool_name, |
| 435 | + "input": tool_input, |
421 | 436 | } |
422 | | - continue |
423 | | - |
424 | | - yield { |
425 | | - "type": "tool_call", |
426 | | - "tool": tool_name, |
427 | | - "input": tool_input, |
428 | | - } |
| 437 | + # Completed successfully — break out of retry loop |
| 438 | + break |
| 439 | + except Exception as exc: |
| 440 | + is_rate_limit, retry_secs = check_rate_limit_error(exc) |
| 441 | + if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES: |
| 442 | + delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt) |
| 443 | + logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s") |
| 444 | + yield { |
| 445 | + "type": "rate_limited", |
| 446 | + "retry_in": delay, |
| 447 | + "attempt": _attempt + 1, |
| 448 | + "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES, |
| 449 | + } |
| 450 | + await asyncio.sleep(delay) |
| 451 | + await self.client.query(message) |
| 452 | + continue |
| 453 | + if is_rate_limit: |
| 454 | + logger.error("Rate limit retries exhausted for assistant chat") |
| 455 | + yield {"type": "error", "content": "Rate limited. Please try again later."} |
| 456 | + return |
| 457 | + # Non-rate-limit MessageParseError: log and break (don't crash) |
| 458 | + if type(exc).__name__ == "MessageParseError": |
| 459 | + logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}") |
| 460 | + break |
| 461 | + raise |
429 | 462 |
|
430 | 463 | # Store the complete response in the database |
431 | 464 | if full_response and self.conversation_id: |
|
0 commit comments