|
20 | 20 | from anton.core.tools.tool_defs import SCRATCHPAD_TOOL, MEMORIZE_TOOL, RECALL_TOOL, ToolDef |
21 | 21 | from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result |
22 | 22 |
|
| 23 | +from anton.explainability import ExplainabilityCollector, ExplainabilityStore |
| 24 | +from anton.llm.openai import build_chat_completion_kwargs |
| 25 | + |
23 | 26 | from anton.utils.datasources import ( |
24 | 27 | build_datasource_context, |
25 | 28 | scrub_credentials, |
@@ -136,6 +139,10 @@ def __init__( |
136 | 139 | workspace_path=workspace.base if workspace else None, |
137 | 140 | ) |
138 | 141 | self.tool_registry = ToolRegistry() |
| 142 | + self._explainability_store = ( |
| 143 | + ExplainabilityStore(workspace.base) if workspace is not None else None |
| 144 | + ) |
| 145 | + self._active_explainability: ExplainabilityCollector | None = None |
139 | 146 |
|
140 | 147 | @property |
141 | 148 | def history(self) -> list[dict]: |
@@ -184,6 +191,44 @@ def _persist_history(self) -> None: |
184 | 191 | if self._history_store and self._session_id: |
185 | 192 | self._history_store.save(self._session_id, self._history) |
186 | 193 |
|
| 194 | + def _record_cell_explainability( |
| 195 | + self, *, pad_name: str, description: str, cell |
| 196 | + ) -> None: |
| 197 | + if self._active_explainability is None: |
| 198 | + return |
| 199 | + if description: |
| 200 | + self._active_explainability.add_scratchpad_step(description) |
| 201 | + elif pad_name: |
| 202 | + self._active_explainability.add_scratchpad_step( |
| 203 | + f"work in scratchpad {pad_name}" |
| 204 | + ) |
| 205 | + for query in getattr(cell, "explainability_queries", []) or []: |
| 206 | + if not isinstance(query, dict): |
| 207 | + continue |
| 208 | + self._active_explainability.add_query( |
| 209 | + datasource=str(query.get("datasource", "")), |
| 210 | + sql=str(query.get("sql", "")), |
| 211 | + engine=( |
| 212 | + str(query.get("engine")) |
| 213 | + if query.get("engine") is not None |
| 214 | + else None |
| 215 | + ), |
| 216 | + status=str(query.get("status", "ok")), |
| 217 | + error_message=( |
| 218 | + str(query.get("error_message")) |
| 219 | + if query.get("error_message") is not None |
| 220 | + else None |
| 221 | + ), |
| 222 | + ) |
| 223 | + self._active_explainability.add_sources_from_text( |
| 224 | + getattr(cell, "code", ""), |
| 225 | + getattr(cell, "stdout", ""), |
| 226 | + getattr(cell, "logs", ""), |
| 227 | + ) |
| 228 | + self._active_explainability.add_inferred_queries_from_code( |
| 229 | + getattr(cell, "code", "") |
| 230 | + ) |
| 231 | + |
187 | 232 | async def _build_system_prompt(self, user_message: str = "") -> str: |
188 | 233 | import datetime as _dt |
189 | 234 | _now = _dt.datetime.now() |
@@ -566,64 +611,75 @@ async def turn_stream( |
566 | 611 | assistant_text_parts: list[str] = [] |
567 | 612 | _max_auto_retries = 2 |
568 | 613 | _retry_count = 0 |
| 614 | + self._active_explainability = ExplainabilityCollector( |
| 615 | + self._explainability_store, |
| 616 | + turn=self._turn_count + 1, |
| 617 | + user_message=user_msg_str, |
| 618 | + ) |
569 | 619 |
|
570 | | - while True: |
571 | | - try: |
572 | | - async for event in self._stream_and_handle_tools(user_msg_str): |
573 | | - if isinstance(event, StreamTextDelta): |
574 | | - assistant_text_parts.append(event.text) |
575 | | - yield event |
576 | | - break # completed successfully |
577 | | - except Exception as _agent_exc: |
578 | | - # Token/billing limit — don't retry, let the chat loop handle it |
579 | | - if isinstance(_agent_exc, TokenLimitExceeded): |
580 | | - raise |
581 | | - _retry_count += 1 |
582 | | - if _retry_count <= _max_auto_retries: |
583 | | - # Inject the error into history and let the LLM try to recover |
584 | | - self._history.append( |
585 | | - { |
586 | | - "role": "user", |
587 | | - "content": ( |
588 | | - f"SYSTEM: An error interrupted execution: {_agent_exc}\n\n" |
589 | | - "If you can diagnose and fix the issue, continue working on the task. " |
590 | | - "Adjust your approach to avoid the same error. " |
591 | | - "If this is unrecoverable, summarize what you accomplished and suggest next steps." |
592 | | - ), |
593 | | - } |
594 | | - ) |
595 | | - # Continue the while loop — _stream_and_handle_tools will be called |
596 | | - # again with the error context now in history |
597 | | - continue |
598 | | - else: |
599 | | - # Exhausted retries — stop and summarize for the user |
600 | | - self._history.append( |
601 | | - { |
602 | | - "role": "user", |
603 | | - "content": ( |
604 | | - f"SYSTEM: The task has failed {_retry_count} times. Latest error: {_agent_exc}\n\n" |
605 | | - "Stop retrying. Please:\n" |
606 | | - "1. Summarize what you accomplished so far.\n" |
607 | | - "2. Explain what went wrong in plain language.\n" |
608 | | - "3. Suggest next steps — what the user can try (e.g. rephrase, " |
609 | | - "simplify the request, or ask you to continue from where you left off).\n" |
610 | | - "Be concise and helpful." |
611 | | - ), |
612 | | - } |
613 | | - ) |
614 | | - try: |
615 | | - async for event in self._llm.plan_stream( |
616 | | - system=await self._build_system_prompt(user_msg_str), |
617 | | - messages=self._history, |
618 | | - ): |
619 | | - if isinstance(event, StreamTextDelta): |
620 | | - assistant_text_parts.append(event.text) |
621 | | - yield event |
622 | | - except Exception: |
623 | | - fallback = f"An unexpected error occurred: {_agent_exc}. Please try again or rephrase your request." |
624 | | - assistant_text_parts.append(fallback) |
625 | | - yield StreamTextDelta(text=fallback) |
626 | | - break |
| 620 | + try: |
| 621 | + while True: |
| 622 | + try: |
| 623 | + async for event in self._stream_and_handle_tools(user_msg_str): |
| 624 | + if isinstance(event, StreamTextDelta): |
| 625 | + assistant_text_parts.append(event.text) |
| 626 | + yield event |
| 627 | + break # completed successfully |
| 628 | + except Exception as _agent_exc: |
| 629 | + # Token/billing limit — don't retry, let the chat loop handle it |
| 630 | + if isinstance(_agent_exc, TokenLimitExceeded): |
| 631 | + raise |
| 632 | + _retry_count += 1 |
| 633 | + if _retry_count <= _max_auto_retries: |
| 634 | + # Inject the error into history and let the LLM try to recover |
| 635 | + self._history.append( |
| 636 | + { |
| 637 | + "role": "user", |
| 638 | + "content": ( |
| 639 | + f"SYSTEM: An error interrupted execution: {_agent_exc}\n\n" |
| 640 | + "If you can diagnose and fix the issue, continue working on the task. " |
| 641 | + "Adjust your approach to avoid the same error. " |
| 642 | + "If this is unrecoverable, summarize what you accomplished and suggest next steps." |
| 643 | + ), |
| 644 | + } |
| 645 | + ) |
| 646 | + # Continue the while loop — _stream_and_handle_tools will be called |
| 647 | + # again with the error context now in history |
| 648 | + continue |
| 649 | + else: |
| 650 | + # Exhausted retries — stop and summarize for the user |
| 651 | + self._history.append( |
| 652 | + { |
| 653 | + "role": "user", |
| 654 | + "content": ( |
| 655 | + f"SYSTEM: The task has failed {_retry_count} times. Latest error: {_agent_exc}\n\n" |
| 656 | + "Stop retrying. Please:\n" |
| 657 | + "1. Summarize what you accomplished so far.\n" |
| 658 | + "2. Explain what went wrong in plain language.\n" |
| 659 | + "3. Suggest next steps — what the user can try (e.g. rephrase, " |
| 660 | + "simplify the request, or ask you to continue from where you left off).\n" |
| 661 | + "Be concise and helpful." |
| 662 | + ), |
| 663 | + } |
| 664 | + ) |
| 665 | + try: |
| 666 | + async for event in self._llm.plan_stream( |
| 667 | + system=await self._build_system_prompt(user_msg_str), |
| 668 | + messages=self._history, |
| 669 | + ): |
| 670 | + if isinstance(event, StreamTextDelta): |
| 671 | + assistant_text_parts.append(event.text) |
| 672 | + yield event |
| 673 | + except Exception: |
| 674 | + fallback = f"An unexpected error occurred: {_agent_exc}. Please try again or rephrase your request." |
| 675 | + assistant_text_parts.append(fallback) |
| 676 | + yield StreamTextDelta(text=fallback) |
| 677 | + break |
| 678 | + finally: |
| 679 | + if self._active_explainability is not None: |
| 680 | + self._active_explainability.finalize( |
| 681 | + "".join(assistant_text_parts)[:2000] |
| 682 | + ) |
627 | 683 |
|
628 | 684 | # Log assistant response to episodic memory |
629 | 685 | if self._episodic is not None and assistant_text_parts: |
|
0 commit comments