diff --git a/loops/model-evaluation/plan.md b/loops/model-evaluation/plan.md new file mode 100644 index 00000000..1fd19bf3 --- /dev/null +++ b/loops/model-evaluation/plan.md @@ -0,0 +1,66 @@ +# Loop — Open-Source (Ollama) Model Evaluation + +**High-level plan.** Execute end-to-end, making reasonable decisions when details are +ambiguous, and record them in the final report (`report.md`; all working artifacts go +under `work/`). + +## Goal + +Benchmark open-source (Ollama) models that drive Data Formulator's analyst agents — +inspect tabular data, write transformation code, and commit a visualization — and report +**two independent axes**: + +1. **Success rate** — does the agent actually produce a rendered chart? (reliability) +2. **Quality when produced** — how good is the chart when it finishes, scored 0-100 by a + code + vision grader? (competence) + +Keep them separate: a model can write good code yet fail to deliver it through the +protocol. The dominant open-model failure mode is **driving the tool/transport, not +analyzing the data**, so each model runs through more than one agent transport: + +- `analyst` — native function/tool calls (with a content-JSON salvage fallback). +- `mini` — single-decision, pure-prompt JSON contract; the production low-cost agent. + +Always include the Azure references `gpt-5.5`, `gpt-5-mini` as the baseline. + +## Data + +A frozen **45-question** set across **15 datasets** from the `../visbench` benchmark, fed +as the **raw / grouped source tables** (not VisBench's derived single-table `data.csv`) so +the agent must do its own joins: + +- **vega_datasets** single tables — 9 single-table questions. +- **TidyTuesday** multi-CSV weeks — 18 multi-table questions. +- **Spider** databases grouped by DB — 18 multi-table questions. + +Reuse VisBench's quality-filtered question and reference chart for each item. The single- +vs multi-table split (9 / 36) is the axis along which models diverge most. + +## Steps + +1. **Select & pull models** — the open roster across size tiers (1B → 120B) plus the three + Azure references. +2. **Prepare the benchmark** — materialize the 45 questions as raw/grouped tables and + freeze the VisBench questions + reference charts, reused identically across every model + and agent. +3. **Run agents** — every `(agent, model, question)` cell with `--agent` in `analyst` + and `mini`; capture the event stream and render each chart to PNG. Frozen controls: + `max_iterations = 5`, 240 s timeout, resumable. +4. **Score (two phases, GPT-5.5 grader):** + - **Phase 1 — reliability:** five sequential gates (responded → emitted action → code + ran → output → **produced chart**). The chart gate is decisive and defines the + success rate; only those runs proceed. + - **Phase 2 — quality (0-100, produced charts only):** code review vs the question + (0-50) + vision review of the rendered PNG vs the reference chart (0-50). +5. **Aggregate & report** — report the two axes separately (never collapse them); for + ranking only, derive success-weighted quality (Phase 2 over all 45, no-chart = 0) and + combined = `0.3 × (success_rate × 100) + 0.7 × success-weighted quality`. Always show + the single- vs multi-table split, the per-gate drop-off, comparison to the references, + and recommendations per size tier (with which `--agent`). + +## Principles + +- **Two axes stay separate** — `combined` is for ranking only. +- **Freeze controls** — same questions, grader, `max_iterations`, and timeout across every cell. +- **`mini` is the production low-cost agent** — `simple` was removed; don't run `--agent simple`. +- **`uv` only**, no secrets (Azure auth via Entra ID), resumable, all artifacts under `work/`. diff --git a/py-src/data_formulator/agent_config.py b/py-src/data_formulator/agent_config.py index bec4c670..3e4c2b51 100644 --- a/py-src/data_formulator/agent_config.py +++ b/py-src/data_formulator/agent_config.py @@ -48,15 +48,14 @@ # ── Heavy: code-gen, multi-step, tool-using ───────────────────────────── "data_transform": "low", # generates Python transform scripts "data_rec": "low", # chart / transformation recommendation - "data_agent": "low", # multi-step exploration agent - "report_gen": "low", # narrative + inspect/embed tools + "analyst": "low", # unified multi-step exploration + report agent "interactive_explore": "low", # exploration idea agent "data_loading_chat": "low", # conversational data loading w/ tools # ── Light: single-turn extractors / classifiers / formatters ──────────── "data_load": "minimal", # one-shot type inference "data_clean": "minimal", # extract tables from text - "experience_distill": "minimal", # summarise an analysis context + "workflow_distill": "minimal", # summarise an analysis context "chart_insight": "minimal", # title + 1–3 takeaways from a chart "chart_restyle": "minimal", # apply style edits to a Vega-Lite spec "code_explanation": "minimal", # describe derived fields diff --git a/py-src/data_formulator/agents/__init__.py b/py-src/data_formulator/agents/__init__.py index e602fd67..2431c539 100644 --- a/py-src/data_formulator/agents/__init__.py +++ b/py-src/data_formulator/agents/__init__.py @@ -1,22 +1,13 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from data_formulator.agents.agent_data_transform import DataTransformationAgent -from data_formulator.agents.agent_data_rec import DataRecAgent - from data_formulator.agents.agent_data_load import DataLoadAgent from data_formulator.agents.agent_sort_data import SortDataAgent from data_formulator.agents.agent_simple import SimpleAgents -from data_formulator.agents.agent_interactive_explore import InteractiveExploreAgent -from data_formulator.agents.agent_chart_insight import ChartInsightAgent from data_formulator.agents.agent_chart_restyle import ChartRestyleAgent __all__ = [ - "DataTransformationAgent", - "DataRecAgent", "DataLoadAgent", "SortDataAgent", - "InteractiveExploreAgent", - "ChartInsightAgent", "ChartRestyleAgent", ] diff --git a/py-src/data_formulator/agents/agent_chart_insight.py b/py-src/data_formulator/agents/agent_chart_insight.py deleted file mode 100644 index a3ae8aba..00000000 --- a/py-src/data_formulator/agents/agent_chart_insight.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -from data_formulator.agent_config import reasoning_effort_for -from data_formulator.agents.agent_utils import generate_data_summary, extract_json_objects -from data_formulator.agents.agent_language import inject_language_instruction - -import logging - -logger = logging.getLogger(__name__) - -_AGENT_ID = "chart_insight" - - -SYSTEM_PROMPT = r'''You are a data analyst helping users understand their visualizations. -You are given a chart image along with metadata about the chart type, data fields used, and a summary of the underlying data (including schema, value ranges, and sample rows). - -Use both the chart image and the data summary to produce: - -1. **title**: A short, descriptive title for the chart (5-10 words). It should summarize what the chart is about — the subject, the dimensions compared, and the scope. Do not include the chart type in the title. Write it in title case. - -2. **takeaways**: A list of 1-3 key findings or insights from the chart. Each takeaway should be one sentence. Highlight notable patterns, trends, outliers, or comparisons visible in the chart. Be specific — reference actual values, categories, or trends from the data when possible. - -Respond with a JSON object in exactly this format (no markdown fences): - -{"title": "...", "takeaways": ["...", "..."]} -''' - - -class ChartInsightAgent(object): - - def __init__(self, client, workspace=None, language_instruction="", knowledge_store=None): - self.client = client - self.workspace = workspace - self.language_instruction = language_instruction - self._knowledge_store = knowledge_store - - def run(self, chart_image_base64, chart_type, field_names, input_tables=None, n=1): - """ - Generate insight for a chart. - - Args: - chart_image_base64: Base64-encoded PNG data URL of the chart - chart_type: The type of chart (e.g., "Bar Chart", "Scatter Plot") - field_names: List of field names used in the chart encodings - input_tables: Optional list of input table dicts for data context - n: Number of candidates to generate - """ - - # Build context about the chart - context_parts = [f"Chart type: {chart_type}"] - context_parts.append(f"Fields used: {', '.join(field_names)}") - - if input_tables and self.workspace: - data_summary = generate_data_summary( - input_tables, workspace=self.workspace, - include_data_samples=True, row_sample_size=3, - ) - context_parts.append(f"\nData summary:\n{data_summary}") - - # Search relevant knowledge for analysis context - if self._knowledge_store: - try: - search_query = " ".join([chart_type] + field_names[:5]).strip() - if search_query: - relevant = self._knowledge_store.search( - search_query, categories=["experiences"], max_results=3, - ) - if relevant: - kb_parts = ["Relevant analysis knowledge:"] - for item in relevant: - kb_parts.append(f"- {item['title']}: {item['snippet'][:200]}") - context_parts.append("\n".join(kb_parts)) - except Exception: - logger.warning("Failed to search knowledge experiences", exc_info=True) - - context = "\n".join(context_parts) - - # Build the message with image - user_content = [ - { - "type": "text", - "text": f"[CHART METADATA]\n{context}\n\n[CHART IMAGE]\nHere is the chart to analyze:" - }, - { - "type": "image_url", - "image_url": { - "url": f"data:image/png;base64,{chart_image_base64}", - "detail": "high" - } - } - ] - - system_prompt = SYSTEM_PROMPT - - if self._knowledge_store: - system_prompt += self._knowledge_store.format_rules_block() - - system_prompt = inject_language_instruction(system_prompt, self.language_instruction) - - messages = [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_content} - ] - - logger.debug(f"ChartInsightAgent: analyzing {chart_type} chart with fields {field_names}") - logger.info(f"[ChartInsightAgent] run start | chart_type={chart_type}") - - response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model)) - - candidates = [] - for choice in response.choices: - logger.debug("\n=== Chart insight result ===>\n") - logger.debug(choice.message.content + "\n") - - response_content = choice.message.content - title = "" - takeaways = [] - - # Parse JSON response - json_blocks = extract_json_objects(response_content + "\n") - for parsed in json_blocks: - title = parsed.get('title', '') - takeaways = parsed.get('takeaways', []) - if isinstance(takeaways, str): - takeaways = [takeaways] - if title or takeaways: - break - - if title or takeaways: - result = { - 'status': 'ok', - 'title': title, - 'takeaways': takeaways, - } - else: - logger.error(f"unable to parse insight from response: {response_content}") - result = { - 'status': 'other error', - 'content': 'unable to generate chart insight' - } - - result['dialog'] = [*messages, {"role": choice.message.role, "content": choice.message.content}] - result['agent'] = 'ChartInsightAgent' - - candidates.append(result) - - status = candidates[0].get('status', '?') if candidates else 'empty' - logger.info(f"[ChartInsightAgent] run done | status={status}") - return candidates diff --git a/py-src/data_formulator/agents/agent_chart_restyle.py b/py-src/data_formulator/agents/agent_chart_restyle.py index 61edc4d6..657d0ec5 100644 --- a/py-src/data_formulator/agents/agent_chart_restyle.py +++ b/py-src/data_formulator/agents/agent_chart_restyle.py @@ -50,17 +50,46 @@ Hard rules: 1. Do not include a `data` block in your output. The caller re-attaches live rows. 2. Only reference columns that exist in the data sample. +3. Preserve field-name escaping EXACTLY. Column names containing `.`, `[`, or `]` are escaped with a backslash in `field` references (e.g. a column literally named `Tomatoes, per lb.` appears as `"field": "Tomatoes, per lb\\."`). Keep those backslashes intact — do not drop or add them. An unescaped `.` makes Vega-Lite read it as a nested-object path, which breaks the chart (empty plot). -Out-of-scope: only refuse if the request genuinely needs data that isn't in the table — e.g. joining another dataset, a column that doesn't exist and can't be derived from existing ones. In that case return: +Out-of-scope: refuse if the request genuinely needs data that simply isn't there and can't be derived in Vega-Lite — e.g. joining a separate dataset, or a column that doesn't exist and can't be computed from the existing ones. Anything expressible with a Vega-Lite `transform` (aggregations, calculated fields, filters, folds, window/joinaggregate, etc.) is in scope — add the transforms you need. If you do refuse, return: {"out_of_scope": true, "rationale": ""} Otherwise return: { "vlSpec": , "label": "", - "rationale": "" + "rationale": "", + "configUI": } +[configUI — generative follow-up controls] +After you produce the new spec, design 2-4 small UI controls that let the user keep tweaking THIS specific variant without re-prompting. Pick knobs that are meaningful for the chart you just made (e.g. mark opacity, corner radius, point size, font size, gridlines on/off, label angle, color scheme, legend position). + +Each control declares WHERE in the spec it writes and the allowed VALUES — there is NO code, just a `path` (the location in the spec) plus the value the chosen option writes there. Shapes: +- "key": short unique id, lowercase no spaces, e.g. "opacity" +- "label": short human label, e.g. "opacity" +- "path": array describing the location in the vlSpec to write the value to, e.g. ["mark","opacity"] or ["encoding","x","axis","labelAngle"] or ["config","legend","orient"]. Use array indices (numbers) for arrays, e.g. ["layer",0,"mark","color"]. Intermediate objects are created if missing. +- "type": one of "continuous" | "binary" | "discrete" +- for "continuous": "min", "max", optional "step", and "defaultValue" (number) — the value written at `path` is the number itself +- for "binary": "defaultValue" (true/false) — the boolean is written at `path` +- for "discrete": "options" (array of {"value": , "label": ""}) and "defaultValue" — the chosen option's `value` is written at `path`. The `value` may be a scalar OR a whole object (e.g. a full mark sub-spec or a color array), which the app sets wholesale at `path`. + +Rules for configUI: +- `defaultValue` MUST equal what the spec you returned already encodes at that `path`, so the controls start in sync with the chart. +- Make sure `path` points at a real location in the spec you returned (so toggling actually changes the visible chart). +- Never use "__proto__", "prototype", or "constructor" as a path segment. + +Example configUI: +[ + {"key": "opacity", "label": "opacity", "type": "continuous", "min": 0.2, "max": 1, "step": 0.05, "defaultValue": 0.9, "path": ["mark", "opacity"]}, + {"key": "grid", "label": "gridlines", "type": "binary", "defaultValue": true, "path": ["encoding", "y", "axis", "grid"]}, + {"key": "scheme", "label": "palette", "type": "discrete", "defaultValue": "tableau10", "path": ["encoding", "color", "scale", "scheme"], + "options": [{"value": "tableau10", "label": "tableau"}, {"value": "category10", "label": "category"}, {"value": "set2", "label": "set2"}]} +] + +If no meaningful per-variant control fits, return "configUI": []. + Return ONLY the JSON object — no markdown fences, no commentary. ''' @@ -167,6 +196,7 @@ def run( "vlSpec": cleaned, "rationale": str(parsed.get("rationale", "")).strip(), "label": str(parsed.get("label", "")).strip(), + "configUI": self._sanitize_config_ui(parsed.get("configUI")), } # No usable response. @@ -180,6 +210,93 @@ def run( # Guardrails # ------------------------------------------------------------------ + _FORBIDDEN_PATH_SEGMENTS = {"__proto__", "prototype", "constructor"} + + def _sanitize_config_ui(self, raw: Any) -> list[dict]: + """Validate the LLM-authored configUI array into a clean list. + + Each control is a declarative "write value at path" knob — there is no + code. We validate the path (non-empty, no prototype-polluting segments) + and the per-type params, dropping anything malformed. Returns [] when + nothing is usable. The frontend re-validates as well. + """ + if not isinstance(raw, list): + return [] + out: list[dict] = [] + seen: set[str] = set() + for c in raw: + if not isinstance(c, dict): + continue + key = str(c.get("key", "")).strip() + label = str(c.get("label", "")).strip() + ctype = c.get("type") + if not key or not label or key in seen: + continue + + # Validate path: non-empty list of str / non-negative int, no + # prototype-polluting segments. + raw_path = c.get("path") + if not isinstance(raw_path, list) or len(raw_path) == 0: + continue + path: list = [] + path_ok = True + for seg in raw_path: + if isinstance(seg, bool): + path_ok = False + break + if isinstance(seg, int) and seg >= 0: + path.append(seg) + elif isinstance(seg, str) and seg and seg not in self._FORBIDDEN_PATH_SEGMENTS: + path.append(seg) + else: + path_ok = False + break + if not path_ok: + continue + + if ctype == "binary": + out.append({"key": key, "label": label, "type": "binary", + "path": path, "defaultValue": bool(c.get("defaultValue"))}) + elif ctype == "continuous": + try: + cmin = float(c.get("min")) + cmax = float(c.get("max")) + except (TypeError, ValueError): + continue + if not (cmax > cmin): + continue + entry = {"key": key, "label": label, "type": "continuous", + "path": path, "min": cmin, "max": cmax} + try: + step = float(c.get("step")) + if step > 0: + entry["step"] = step + except (TypeError, ValueError): + pass + try: + entry["defaultValue"] = float(c.get("defaultValue")) + except (TypeError, ValueError): + entry["defaultValue"] = cmin + out.append(entry) + elif ctype == "discrete": + opts_raw = c.get("options") + if not isinstance(opts_raw, list): + continue + options = [ + {"value": o.get("value"), "label": str(o.get("label", "")).strip()} + for o in opts_raw + if isinstance(o, dict) and str(o.get("label", "")).strip() + ] + if not options: + continue + default = c.get("defaultValue", options[0]["value"]) + out.append({"key": key, "label": label, "type": "discrete", + "path": path, "options": options, "defaultValue": default}) + else: + continue + seen.add(key) + return out + def _enforce_guardrails(self, original: dict, candidate: dict) -> dict | None: """Apply post-hoc guardrails to a candidate spec. diff --git a/py-src/data_formulator/agents/agent_code_explanation.py b/py-src/data_formulator/agents/agent_code_explanation.py index 43d78535..d783845a 100644 --- a/py-src/data_formulator/agents/agent_code_explanation.py +++ b/py-src/data_formulator/agents/agent_code_explanation.py @@ -17,12 +17,23 @@ For each non-trivial derived field, output: 1. the field name(s) - 2. a short formula — use actual field names (e.g. `Profit = Revenue - Cost`), - and reach for formal math (\sum, \frac, etc.) only when it's the clearest - way to express the computation. + 2. a short formula explaining the computation. + +Pick ONE format per formula — never mix the two: + +- Code span (default, use this for almost everything): wrap the formula in + single backticks and write field names exactly as they appear in the data. + Underscores stay literal — never add backslashes. + e.g. `basket_cost = Bananas + Bread + Milk` (NOT `basket\_cost`) + +- LaTeX (only when you genuinely need math notation such as a summation, + fraction, square root, or a statistical model's defining equation): inline + `\( ... \)` or block `\[ ... \]`. Use short abstract variables (x, n, ...) + so you never need underscores or escaping inside the math. + e.g. \[ \text{Normalized} = \frac{x - \min(x)}{\max(x) - \min(x)} \] A brief one-line description before the formula is allowed when it adds clarity -(e.g. "Within each Major\_category:"). Otherwise keep it to just the formula. +(e.g. "Within each category:"). Otherwise keep it to just the formula. Skip fields whose computation is trivial or obvious from the name (count/min/max/avg/sum, year/decade extraction, simple rename, etc.). @@ -31,10 +42,7 @@ For statistical-analysis code (regression, clustering, hypothesis tests), emit a single entry with `"field": "Statistical Analysis"` containing the -model's defining equation(s). - -LaTeX: inline `\( ... \)`, block `\[ ... \]`, escape underscores as `\_`. -Prefer inline for short formulas, block when there's vertical structure. +model's defining equation(s) in LaTeX. If nothing is worth showing, return an empty list. @@ -131,11 +139,11 @@ def extract_decade(date_str): [ { "field": "Norm_Rating, Norm_Gross", - "explanation": "-BSLASH-[ -BSLASH-text{Normalized} = -BSLASH-frac{x - -BSLASH-min(x)}{-BSLASH-max(x) - -BSLASH-min(x)} -BSLASH-]" + "explanation": "\\[ \\text{Normalized} = \\frac{x - \\min(x)}{\\max(x) - \\min(x)} \\]" }, { "field": "Critical_Commercial_Score", - "explanation": "-BSLASH-[ -BSLASH-text{Critical-BSLASH-_Commercial-BSLASH-_Score} = -BSLASH-text{Norm-BSLASH-_Rating} -BSLASH-times -BSLASH-text{Norm-BSLASH-_Gross} -BSLASH-]" + "explanation": "`Critical_Commercial_Score = Norm_Rating * Norm_Gross`" } ] ''' diff --git a/py-src/data_formulator/agents/agent_data_loading_chat.py b/py-src/data_formulator/agents/agent_data_loading_chat.py index 61d3a0e6..55f2640a 100644 --- a/py-src/data_formulator/agents/agent_data_loading_chat.py +++ b/py-src/data_formulator/agents/agent_data_loading_chat.py @@ -1292,7 +1292,7 @@ def _build_system_prompt(self, last_user_text: str = ""): """Build the system prompt with current workspace context. *last_user_text* is used to search the knowledge store for - experiences relevant to the user's current request. Falls back + workflows relevant to the user's current request. Falls back to a generic query when empty. """ table_names = "none" @@ -1324,7 +1324,7 @@ def _build_system_prompt(self, last_user_text: str = ""): if self._knowledge_store: prompt += self._knowledge_store.format_rules_block() - # Inject relevant experiences from knowledge store + # Inject relevant workflows from knowledge store if self._knowledge_store: try: search_query = ( @@ -1334,7 +1334,7 @@ def _build_system_prompt(self, last_user_text: str = ""): ) relevant = self._knowledge_store.search( search_query, - categories=["experiences"], + categories=["workflows"], max_results=3, ) if relevant: @@ -1343,7 +1343,7 @@ def _build_system_prompt(self, last_user_text: str = ""): knowledge_block += f"\n### {item['title']}\n{item['snippet']}\n" prompt += "\n\n" + knowledge_block except Exception: - logger.warning("Failed to search knowledge experiences", exc_info=True) + logger.warning("Failed to search knowledge workflows", exc_info=True) if self.language_instruction: prompt += "\n\n" + self.language_instruction diff --git a/py-src/data_formulator/agents/agent_data_rec.py b/py-src/data_formulator/agents/agent_data_rec.py deleted file mode 100644 index 8bd9f054..00000000 --- a/py-src/data_formulator/agents/agent_data_rec.py +++ /dev/null @@ -1,400 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import json -import time - -from data_formulator.agent_config import reasoning_effort_for -from data_formulator.agents.agent_utils import extract_json_objects, extract_code_from_gpt_response, generate_data_summary, supplement_missing_block, ensure_output_variable_in_code, compose_system_prompt -from data_formulator.agents.agent_diagnostics import AgentDiagnostics -from data_formulator.datalake.parquet_utils import df_to_safe_records -from data_formulator.security.sanitize import sanitize_error_message - -import pandas as pd - -import logging - -logger = logging.getLogger(__name__) - -_AGENT_ID = "data_rec" - -from data_formulator.agents.chart_creation_guide import ( - SHARED_ENVIRONMENT, - SHARED_SEMANTIC_TYPE_REFERENCE, - SHARED_CHART_REFERENCE, - SHARED_STATISTICAL_ANALYSIS, - SHARED_DUCKDB_NOTES, -) - -# ============================================================================= -# DataRecAgent system prompt -# ============================================================================= - -SYSTEM_PROMPT = f'''You are a data scientist who recommends data and visualizations. -Given [CONTEXT] (dataset summaries) and [GOAL] (user intent), recommend a transformed dataset and visualization, then write a Python script to produce it. - -{SHARED_ENVIRONMENT} - -You will produce two outputs: a JSON spec (```json```) and a Python script (```python```). No extra text. - -**Step 1: JSON spec** — infer user intent and recommend a visualization. - -```json -{{{{ - "display_instruction": "", // short verb phrase (<12 words) capturing computation intent. Bold **column names** (semantic matches count). For follow-ups, describe only the new part. - "input_tables": [...], // table names from [CONTEXT] to use - "output_fields": [...], // desired output fields (include intermediate fields) - "chart": {{{{ - "chart_type": "", // from [CHART TYPE REFERENCE] - "encodings": {{{{}}}}, // visual channels → output field names - "config": {{{{}}}} // optional styling - }}}}, - "field_metadata": {{{{ // semantic type for each encoding field - "": "Category" // from [SEMANTIC TYPE REFERENCE] - }}}}, - "output_variable": "" // descriptive snake_case name (e.g. "sales_by_region"), not "result_df" -}}}} -``` - -**Data format rules:** -- Output must be tidy (one field per visual channel, like VegaLite/ggplot2). -- For multiple similar columns: reshape to long format (only same semantic type in one column). -- For derived metrics: compute new fields (correlation, difference, profit, etc.). -- Keep encodings to 2–3 channels (x, y, color/size). Add facet only when needed. - -{SHARED_SEMANTIC_TYPE_REFERENCE} - -{SHARED_CHART_REFERENCE} - -{SHARED_STATISTICAL_ANALYSIS} - -**Step 2: Python script** — transform input data to produce a DataFrame with all "output_fields". Keep it simple and readable. The script MUST assign the final result to the variable named in `"output_variable"` from Step 1. - -**Datetime handling:** -- Year → number. Year-month / year-month-day → string ("2020-01" / "2020-01-01"). -- Hour alone → number. Hour:min or h:m:s → string. Never return raw datetime objects. - -{SHARED_DUCKDB_NOTES}''' - - -def _combine_rules(text_rules: str, knowledge_rules: list[dict]) -> str: - """Merge text rules and knowledge-file rules into a single string.""" - parts = [] - if text_rules and text_rules.strip(): - parts.append(text_rules.strip()) - for rule in knowledge_rules: - parts.append(f"### {rule['title']}\n{rule['body']}") - return "\n\n".join(parts) - - -class DataRecAgent(object): - - def __init__(self, client, workspace, system_prompt=None, agent_coding_rules="", language_instruction="", max_display_rows=10000, model_info=None, knowledge_store=None): - self.client = client - self.workspace = workspace - self.max_display_rows = max_display_rows - self._model_info = model_info or {} - self._agent_coding_rules = agent_coding_rules - self._language_instruction = language_instruction - - knowledge_rules = knowledge_store.load_always_apply_rules() if knowledge_store else [] - combined_rules = _combine_rules(agent_coding_rules, knowledge_rules) - - if system_prompt is not None: - self._base_prompt = system_prompt - else: - self._base_prompt = SYSTEM_PROMPT - - # Insert language instruction early (after role definition, before technical - # sections) so the LLM's "last impression" remains chart/code rules, - # reducing recency-bias interference on chart-type selection. - self.system_prompt = compose_system_prompt( - self._base_prompt, - agent_coding_rules=combined_rules if system_prompt is None else "", - language_instruction=language_instruction, - language_marker="**About the execution environment:**", - ) - - self._diag = AgentDiagnostics( - agent_name="DataRecAgent", - model_info=self._model_info, - base_system_prompt=self._base_prompt, - agent_coding_rules=self._agent_coding_rules, - language_instruction=self._language_instruction, - assembled_system_prompt=self.system_prompt, - ) - - def process_gpt_response(self, input_tables, messages, response, t_llm=None): - """Process GPT response to handle Python code execution""" - t_start = time.time() - t_exec_total = 0.0 - - if isinstance(response, Exception): - raw_error = str(getattr(response, "body", response)) - safe_error = sanitize_error_message(raw_error) - result = {'status': 'other error', 'content': safe_error, - 'diagnostics': self._diag.for_error(messages, error=safe_error)} - return [result] - - candidates = [] - for choice in response.choices: - - logger.debug("\n=== Data recommendation result ===>\n") - logger.debug(choice.message.content + "\n") - - # --- Parse JSON spec and Python code --- - json_blocks = extract_json_objects(choice.message.content + "\n") - refined_goal = None - for jb in json_blocks: - if isinstance(jb, dict): - refined_goal = jb - break - code_blocks = extract_code_from_gpt_response(choice.message.content + "\n", "python") - - # If only one block was produced, request the missing one - refined_goal, code_blocks, _supplement_content, t_supplement = supplement_missing_block( - self.client, messages, choice.message.content, - refined_goal, code_blocks, prefix="[DataRecAgent]" - ) - - # Apply fallbacks for missing JSON - json_fallback_used = refined_goal is None - if refined_goal is None: - refined_goal = {'output_fields': [], 'chart': {'chart_type': "", 'encodings': {}, 'config': {}}, 'output_variable': 'result_df'} - logger.warning( - "[DataRecAgent] JSON spec parsing failed — using fallback defaults. " - f"Response snippet: {choice.message.content[:300]!r}" - ) - output_variable = refined_goal.get('output_variable', 'result_df') or 'result_df' - logger.info(f"[DataRecAgent] extracted output_variable={output_variable!r}") - - # Diagnostics tracking - import re as _re - _diag_code = code_blocks[-1] if code_blocks else None - _diag_output_var_in_code = bool( - _diag_code and output_variable - and _re.search(rf'(?:^|\n)\s*{_re.escape(output_variable)}\s*=(?!=)', _diag_code) - ) - _diag_sandbox_mode = None - _diag_exec = {"status": None} - _diag_code_patched = False - - if len(code_blocks) > 0: - code = code_blocks[-1] - - if output_variable and not _diag_output_var_in_code: - code, was_patched, detected_var = ensure_output_variable_in_code(code, output_variable) - _diag_code_patched = was_patched - if was_patched: - logger.info( - f"[DataRecAgent] output_variable {output_variable!r} not in code — " - f"patched: appended `{output_variable} = {detected_var}`" - ) - else: - logger.warning( - f"[DataRecAgent] output_variable {output_variable!r} not in code " - f"and auto-patch found no candidate variable." - ) - - try: - from data_formulator.sandbox import create_sandbox - - try: - from flask import current_app - sandbox_mode = current_app.config.get('CLI_ARGS', {}).get('sandbox', 'local') - except (ImportError, RuntimeError): - sandbox_mode = 'local' - _diag_sandbox_mode = sandbox_mode - - t_exec_start = time.time() - sandbox = create_sandbox(sandbox_mode) - execution_result = sandbox.run_python_code( - code=code, - workspace=self.workspace, - output_variable=output_variable, - ) - t_exec_total += time.time() - t_exec_start - - if execution_result['status'] != 'ok': - diagnostics = execution_result.get("diagnostics", {}) - raw_exec_error = diagnostics.get( - "safe_detail", - execution_result.get('content', execution_result.get('error_message', 'Unknown error')), - ) - safe_exec_error = sanitize_error_message(raw_exec_error) - else: - safe_exec_error = None - _diag_exec = { - "status": execution_result['status'], - "error_message": safe_exec_error, - "available_dataframes": execution_result.get('df_names', []), - } - - if execution_result['status'] == 'ok': - full_df = execution_result['content'] - row_count = len(full_df) - - output_table_name = self.workspace.get_fresh_name(f"d-{output_variable}") - self.workspace.write_parquet(full_df, output_table_name) - - if row_count > self.max_display_rows: - query_output = full_df.head(self.max_display_rows) - else: - query_output = full_df - query_output = query_output.loc[:, ~query_output.columns.duplicated()] - - result = { - "status": "ok", - "code": code, - "content": { - 'rows': df_to_safe_records(query_output), - 'virtual': { - 'table_name': output_table_name, - 'row_count': row_count - } - }, - } - else: - result = { - 'status': 'error', - 'code': code, - 'content': safe_exec_error or 'Unknown error' - } - - except Exception as e: - logger.exception('Error occurred during code execution') - safe_error = sanitize_error_message(f"{type(e).__name__}: {e}") - result = { - 'status': 'other error', - 'code': code, - 'content': "Unexpected error during code execution.", - 'content_code': 'agent.unexpectedError' - } - _diag_exec = {"status": "exception", "error_message": safe_error} - else: - result = {'status': 'error', 'code': "", 'content': "No code block found in the response. The model is unable to generate code to complete the task.", 'content_code': 'agent.noCodeBlock'} - - _effective_content = choice.message.content - if _supplement_content: - _effective_content += "\n\n" + _supplement_content - result['dialog'] = [*messages, {"role": choice.message.role, "content": _effective_content}] - result['agent'] = 'DataRecAgent' - result['refined_goal'] = refined_goal - - # --- Build diagnostics --- - usage = getattr(response, 'usage', None) - result['diagnostics'] = self._diag.for_response( - messages, - raw_content=choice.message.content, - finish_reason=getattr(choice, 'finish_reason', None), - json_spec=refined_goal, - json_fallback_used=json_fallback_used, - code_found=len(code_blocks) > 0, - code=_diag_code, - output_variable=output_variable, - output_variable_in_code=_diag_output_var_in_code, - code_patched=_diag_code_patched, - supplemented=_supplement_content is not None, - sandbox_mode=_diag_sandbox_mode, - exec_status=_diag_exec.get("status"), - exec_error=_diag_exec.get("error_message"), - exec_df_names=_diag_exec.get("available_dataframes"), - t_llm=t_llm or 0, - t_supplement=t_supplement, - t_exec=t_exec_total, - prompt_tokens=getattr(usage, 'prompt_tokens', None) if usage else None, - completion_tokens=getattr(usage, 'completion_tokens', None) if usage else None, - ) - - candidates.append(result) - - t_total = time.time() - t_start - t_llm_val = t_llm or 0.0 - - logger.debug("=== Recommendation Candidates ===>") - for candidate in candidates: - for key, value in candidate.items(): - if key in ['dialog', 'content', 'diagnostics']: - logger.debug(f"##{key}:\n{str(value)[:1000]}...") - else: - logger.debug(f"## {key}:\n{value}") - - usage = getattr(response, 'usage', None) - usage_str = "" - if usage: - usage_str = f" | tokens: in={getattr(usage, 'prompt_tokens', None)}, out={getattr(usage, 'completion_tokens', None)}" - logger.info(f"[DataRecAgent] timing: llm={t_llm_val:.3f}s, supplement={t_supplement:.3f}s, exec={t_exec_total:.3f}s, total={t_total + t_llm_val:.3f}s{usage_str}") - return candidates - - def run(self, input_tables, description, n=1, prev_messages: list[dict] = [], primary_tables=None): - """ - Args: - input_tables: list[dict], each dict contains 'name' (table name in workspace) and 'rows' - description: str, the description of what the user wants - n: int, the number of candidates - prev_messages: list[dict], the previous messages - primary_tables: list[str], names of the primary (focused) tables for context prioritization - """ - table_names = [t.get('name', '?') for t in input_tables] - logger.info(f"[DataRecAgent] run start | tables={table_names} | primary={primary_tables}") - - # Generate data summary with file references - data_summary = generate_data_summary( - input_tables, - workspace=self.workspace, - primary_tables=primary_tables, - ) - - user_query = f"[CONTEXT]\n\n{data_summary}\n\n[GOAL]\n\n{description}" - if len(prev_messages) > 0: - user_query = f"The user wants a new recommendation based off the following updated context and goal:\n\n[CONTEXT]\n\n{data_summary}\n\n[GOAL]\n\n{description}" - - logger.debug(user_query) - - # Filter out system messages from prev_messages - filtered_prev_messages = [msg for msg in prev_messages if msg.get("role") != "system"] - - messages = [{"role":"system", "content": self.system_prompt}, - *filtered_prev_messages, - {"role":"user","content": user_query}] - - t_llm_start = time.time() - response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model)) - t_llm = time.time() - t_llm_start - - candidates = self.process_gpt_response(input_tables, messages, response, t_llm=t_llm) - status = candidates[0].get('status', '?') if candidates else 'empty' - logger.info(f"[DataRecAgent] run done | status={status}") - return candidates - - - def followup(self, input_tables, dialog, latest_data_sample, new_instruction: str, n=1): - """ - Followup recommendation based on previous dialog and new instruction. - - Args: - input_tables: list of input tables - dialog: previous conversation history - latest_data_sample: sample of the latest transformation result - new_instruction: new user instruction for followup - n: number of candidates - """ - logger.debug(f"GOAL: \n\n{new_instruction}") - logger.info(f"[DataRecAgent] followup start") - - # Format sample data - sample_data_str = pd.DataFrame(latest_data_sample).head(10).to_string() + '\n......' - - # Replace the old system prompt with the current one so that - # conversations continued from older threads pick up prompt changes. - updated_dialog = [{"role": "system", "content": self.system_prompt}, *dialog[1:]] - - messages = [*updated_dialog, - {"role":"user", - "content": f"This is the result from the latest transformation:\n\n{sample_data_str}\n\nUpdate the Python script above based on the following instruction:\n\n{new_instruction}"}] - - t_llm_start = time.time() - response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model)) - t_llm = time.time() - t_llm_start - - return self.process_gpt_response(input_tables, messages, response, t_llm=t_llm) diff --git a/py-src/data_formulator/agents/agent_data_transform.py b/py-src/data_formulator/agents/agent_data_transform.py deleted file mode 100644 index bdd842d2..00000000 --- a/py-src/data_formulator/agents/agent_data_transform.py +++ /dev/null @@ -1,462 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import json -import time - -from data_formulator.agent_config import reasoning_effort_for -from data_formulator.agents.agent_utils import extract_json_objects, extract_code_from_gpt_response, supplement_missing_block, ensure_output_variable_in_code, compose_system_prompt -from data_formulator.agents.agent_diagnostics import AgentDiagnostics -from data_formulator.datalake.parquet_utils import df_to_safe_records -from data_formulator.security.sanitize import sanitize_error_message -from data_formulator.agents.chart_creation_guide import ( - SHARED_ENVIRONMENT, - SHARED_SEMANTIC_TYPE_REFERENCE, - SHARED_CHART_REFERENCE, - SHARED_STATISTICAL_ANALYSIS, - SHARED_DUCKDB_NOTES, -) -import pandas as pd - -import logging - -logger = logging.getLogger(__name__) - -_AGENT_ID = "data_transform" - -SYSTEM_PROMPT = f'''You are a data scientist who transforms data for visualization. -Given [CONTEXT] (dataset summaries) and [GOAL] (user intent + chart spec), refine the goal and write a Python script to produce the transformed data. - -The user's [GOAL] includes a "chart" object (chart_type, encodings, config) describing the desired visualization and a natural language "goal". - -{SHARED_ENVIRONMENT} - -You will produce two outputs: a JSON spec (```json```) and a Python script (```python```). No extra text. - -**Step 1: JSON spec** — refine the user's goal and finalize the chart. - -Check if the user's "chart" (chart_type + encodings) is sufficient for their "goal": -- If encodings are sufficient, copy them. -- If encodings are missing fields, add minimal fields needed (aim for ≤3 channels: x, y, color/size). -- If encodings can be optimized, reorder for better visualization. -- If the user says "use B instead of A" while A is in encodings, update accordingly. -- For lat/lon data, use "latitude"/"longitude" as channel names, not "x"/"y". -- The user's chart_type may not be in [CHART TYPE REFERENCE] (e.g., "Radar Chart", "Bump Chart"). Preserve it as-is and infer valid encodings from channel names in the input. - -```json -{{{{ - "input_tables": [...], // table names from [CONTEXT]. Table 1 = currently viewed — prioritize it. - "detailed_instruction": "", // elaborated user instruction with details - "display_instruction": "", // short verb phrase (<12 words) capturing computation intent. Bold **column names** (semantic matches count). For follow-ups, describe only the new part. - "output_fields": [...], // desired output fields (include intermediate fields) - "chart": {{{{ - "chart_type": "", // from [CHART TYPE REFERENCE], or keep the user's chart_type as-is if not listed - "encodings": {{{{}}}}, // visual channels → output field names - "config": {{{{}}}} // optional styling - }}}}, - "field_metadata": {{{{ // semantic type for each encoding field - "": "Category" // from [SEMANTIC TYPE REFERENCE] - }}}}, - "output_variable": "", // descriptive snake_case name (e.g. "sales_by_region"), not "result_df" - "reason": "" // why this refinement is made -}}}} -``` - -{SHARED_SEMANTIC_TYPE_REFERENCE} - -{SHARED_CHART_REFERENCE} - -{SHARED_STATISTICAL_ANALYSIS} - -**Step 2: Python script** — transform input data to produce a DataFrame with all "output_fields". Keep it simple and readable. The script MUST assign the final result to the variable named in `"output_variable"` from Step 1. - -**Datetime handling:** -- Year → number. Year-month / year-month-day → string ("2020-01" / "2020-01-01"). -- Hour alone → number. Hour:min or h:m:s → string. Never return raw datetime objects. - -{SHARED_DUCKDB_NOTES}''' - - -def _combine_rules(text_rules: str, knowledge_rules: list[dict]) -> str: - """Merge text rules and knowledge-file rules into a single string.""" - parts = [] - if text_rules and text_rules.strip(): - parts.append(text_rules.strip()) - for rule in knowledge_rules: - parts.append(f"### {rule['title']}\n{rule['body']}") - return "\n\n".join(parts) - - -class DataTransformationAgent(object): - - def __init__(self, client, workspace, system_prompt=None, agent_coding_rules="", language_instruction="", max_display_rows=10000, model_info=None, knowledge_store=None): - self.client = client - self.workspace = workspace - self.max_display_rows = max_display_rows - self._model_info = model_info or {} - self._agent_coding_rules = agent_coding_rules - self._language_instruction = language_instruction - - knowledge_rules = knowledge_store.load_always_apply_rules() if knowledge_store else [] - combined_rules = _combine_rules(agent_coding_rules, knowledge_rules) - - if system_prompt is not None: - self._base_prompt = system_prompt - else: - self._base_prompt = SYSTEM_PROMPT - - self.system_prompt = compose_system_prompt( - self._base_prompt, - agent_coding_rules=combined_rules if system_prompt is None else "", - language_instruction=language_instruction, - language_marker="**About the execution environment:**", - ) - - self._diag = AgentDiagnostics( - agent_name="DataTransformationAgent", - model_info=self._model_info, - base_system_prompt=self._base_prompt, - agent_coding_rules=self._agent_coding_rules, - language_instruction=self._language_instruction, - assembled_system_prompt=self.system_prompt, - ) - - def process_gpt_response(self, response, messages, t_llm=None): - """Process GPT response to handle Python code execution""" - t_start = time.time() - t_exec_total = 0.0 - - if isinstance(response, Exception): - raw_error = str(getattr(response, "body", response)) - safe_error = sanitize_error_message(raw_error) - result = {'status': 'other error', 'content': safe_error, - 'diagnostics': self._diag.for_error(messages, error=safe_error)} - return [result] - - candidates = [] - for choice in response.choices: - logger.debug("=== Python script result ===>") - logger.debug(choice.message.content + "\n") - - # --- Parse JSON spec and Python code --- - json_blocks = extract_json_objects(choice.message.content + "\n") - refined_goal = None - for jb in json_blocks: - if isinstance(jb, dict): - refined_goal = jb - break - code_blocks = extract_code_from_gpt_response(choice.message.content + "\n", "python") - - # If only one block was produced, request the missing one - refined_goal, code_blocks, _supplement_content, t_supplement = supplement_missing_block( - self.client, messages, choice.message.content, - refined_goal, code_blocks, prefix="[DataTransformAgent]" - ) - - # Apply fallbacks for missing JSON - json_fallback_used = refined_goal is None - if refined_goal is None: - refined_goal = {'chart': {'chart_type': '', 'encodings': {}, 'config': {}}, 'instruction': '', 'reason': '', 'output_variable': 'result_df'} - logger.warning( - "[DataTransformAgent] JSON spec parsing failed — using fallback defaults. " - f"Response snippet: {choice.message.content[:300]!r}" - ) - output_variable = refined_goal.get('output_variable', 'result_df') or 'result_df' - logger.info(f"[DataTransformAgent] extracted output_variable={output_variable!r}") - - import re as _re - _diag_code = code_blocks[-1] if code_blocks else None - _diag_output_var_in_code = bool( - _diag_code and output_variable - and _re.search(rf'(?:^|\n)\s*{_re.escape(output_variable)}\s*=(?!=)', _diag_code) - ) - _diag_sandbox_mode = None - _diag_exec = {"status": None} - _diag_code_patched = False - - if len(code_blocks) > 0: - code = code_blocks[-1] - - if output_variable and not _diag_output_var_in_code: - code, was_patched, detected_var = ensure_output_variable_in_code(code, output_variable) - _diag_code_patched = was_patched - if was_patched: - logger.info( - f"[DataTransformAgent] output_variable {output_variable!r} not in code — " - f"patched: appended `{output_variable} = {detected_var}`" - ) - else: - logger.warning( - f"[DataTransformAgent] output_variable {output_variable!r} not in code " - f"and auto-patch found no candidate variable." - ) - - try: - from data_formulator.sandbox import create_sandbox - - try: - from flask import current_app - sandbox_mode = current_app.config.get('CLI_ARGS', {}).get('sandbox', 'local') - except (ImportError, RuntimeError): - sandbox_mode = 'local' - _diag_sandbox_mode = sandbox_mode - - t_exec_start = time.time() - sandbox = create_sandbox(sandbox_mode) - execution_result = sandbox.run_python_code( - code=code, - workspace=self.workspace, - output_variable=output_variable, - ) - t_exec_total += time.time() - t_exec_start - - if execution_result['status'] != 'ok': - diagnostics = execution_result.get("diagnostics", {}) - raw_exec_error = diagnostics.get( - "safe_detail", - execution_result.get('content', execution_result.get('error_message', 'Unknown error')), - ) - safe_exec_error = sanitize_error_message(raw_exec_error) - else: - safe_exec_error = None - _diag_exec = { - "status": execution_result['status'], - "error_message": safe_exec_error, - "available_dataframes": execution_result.get('df_names', []), - } - - if execution_result['status'] == 'ok': - full_df = execution_result['content'] - row_count = len(full_df) - - output_table_name = self.workspace.get_fresh_name(f"d-{output_variable}") - self.workspace.write_parquet(full_df, output_table_name) - - if row_count > self.max_display_rows: - query_output = full_df.head(self.max_display_rows) - else: - query_output = full_df - query_output = query_output.loc[:, ~query_output.columns.duplicated()] - - result = { - "status": "ok", - "code": code, - "content": { - 'rows': df_to_safe_records(query_output), - 'virtual': { - 'table_name': output_table_name, - 'row_count': row_count - } - }, - } - else: - result = { - 'status': 'error', - 'code': code, - 'content': safe_exec_error or 'Unknown error' - } - - except Exception as e: - logger.exception('Error occurred during code execution') - safe_error = sanitize_error_message(f"{type(e).__name__}: {e}") - result = { - 'status': 'error', - 'code': code, - 'content': "An error occurred during code execution." - } - _diag_exec = {"status": "exception", "error_message": safe_error} - - else: - result = {'status': 'error', 'code': "", 'content': "No code block found in the response. The model is unable to generate code to complete the task.", 'content_code': 'agent.noCodeBlock'} - - _effective_content = choice.message.content - if _supplement_content: - _effective_content += "\n\n" + _supplement_content - result['dialog'] = [*messages, {"role": choice.message.role, "content": _effective_content}] - result['agent'] = 'DataTransformationAgent' - result['refined_goal'] = refined_goal - - # --- Build diagnostics --- - usage = getattr(response, 'usage', None) - result['diagnostics'] = self._diag.for_response( - messages, - raw_content=choice.message.content, - finish_reason=getattr(choice, 'finish_reason', None), - json_spec=refined_goal, - json_fallback_used=json_fallback_used, - code_found=len(code_blocks) > 0, - code=_diag_code, - output_variable=output_variable, - output_variable_in_code=_diag_output_var_in_code, - code_patched=_diag_code_patched, - supplemented=_supplement_content is not None, - sandbox_mode=_diag_sandbox_mode, - exec_status=_diag_exec.get("status"), - exec_error=_diag_exec.get("error_message"), - exec_df_names=_diag_exec.get("available_dataframes"), - t_llm=t_llm or 0, - t_supplement=t_supplement, - t_exec=t_exec_total, - prompt_tokens=getattr(usage, 'prompt_tokens', None) if usage else None, - completion_tokens=getattr(usage, 'completion_tokens', None) if usage else None, - ) - - candidates.append(result) - - t_total = time.time() - t_start - t_llm_val = t_llm or 0.0 - - logger.debug("=== Transform Candidates ===>") - for candidate in candidates: - for key, value in candidate.items(): - if key in ['dialog', 'content', 'diagnostics']: - logger.debug(f"##{key}:\n{str(value)[:1000]}...") - else: - logger.debug(f"## {key}:\n{value}") - - usage = getattr(response, 'usage', None) - usage_str = "" - if usage: - usage_str = f" | tokens: in={getattr(usage, 'prompt_tokens', None)}, out={getattr(usage, 'completion_tokens', None)}" - logger.info(f"[DataTransformAgent] timing: llm={t_llm_val:.3f}s, supplement={t_supplement:.3f}s, exec={t_exec_total:.3f}s, total={t_total + t_llm_val:.3f}s{usage_str}") - return candidates - - - def run(self, input_tables, description, prev_messages: list[dict] = [], n=1, - current_visualization=None, expected_visualization=None): - """Args: - input_tables: list[dict], each dict contains 'name' (table name in workspace) - description: str, the description of the data transformation - prev_messages: list[dict], the previous messages - n: int, the number of candidates - current_visualization: dict or None, contains chart_spec and optional chart_image for complete charts - expected_visualization: dict or None, contains chart_spec for incomplete charts - """ - table_names = [t.get('name', '?') for t in input_tables] - logger.info(f"[DataTransformAgent] run start | tables={table_names}") - - # Generate data summary with file references - from data_formulator.agents.agent_utils import generate_data_summary - data_summary = generate_data_summary(input_tables, workspace=self.workspace) - - # Build visualization context section - vis_section = "" - if current_visualization: - vis_section = f"\n\n[CURRENT VISUALIZATION] This is the current visualization the user has:\n\n{json.dumps(current_visualization.get('chart_spec', {}), indent=4, ensure_ascii=False)}" - elif expected_visualization: - vis_section = f"\n\n[EXPECTED VISUALIZATION] This is the visualization expected by the user:\n\n{json.dumps(expected_visualization.get('chart_spec', {}), indent=4, ensure_ascii=False)}" - - # Order: context → visualization → goal - if len(prev_messages) > 0: - user_query = f"The user wants a new transformation based off the following updated context and goal:\n\n[CONTEXT]\n\n{data_summary}{vis_section}\n\n[GOAL]\n\n{description}" - else: - user_query = f"[CONTEXT]\n\n{data_summary}{vis_section}\n\n[GOAL]\n\n{description}" - - logger.debug(user_query) - - # Filter out system messages from prev_messages - filtered_prev_messages = [msg for msg in prev_messages if msg.get("role") != "system"] - - # Build user message content: include chart image if available - chart_image = current_visualization.get('chart_image') if current_visualization else None - has_image = bool(chart_image) - logger.info(f"[DataTransformAgent] run LLM call | messages={1 + len(filtered_prev_messages) + 1}, has_image={has_image}") - try: - if chart_image: - user_content = [ - {"type": "text", "text": user_query}, - {"type": "image_url", "image_url": {"url": chart_image, "detail": "low"}} - ] - else: - user_content = user_query - - messages = [{"role":"system", "content": self.system_prompt}, - *filtered_prev_messages, - {"role":"user","content": user_content}] - - t_llm_start = time.time() - response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model)) - t_llm = time.time() - t_llm_start - except Exception as e: - # Fallback to text-only if model doesn't support images - logger.warning(f"Image-based completion failed, falling back to text-only: {e}") - messages = [{'role':'system', 'content': self.system_prompt}, - *filtered_prev_messages, - {'role':'user','content': user_query}] - t_llm_start = time.time() - response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model)) - t_llm = time.time() - t_llm_start - - candidates = self.process_gpt_response(response, messages, t_llm=t_llm) - status = candidates[0].get('status', '?') if candidates else 'empty' - logger.info(f"[DataTransformAgent] run done | status={status}") - return candidates - - - def followup(self, input_tables, dialog, latest_data_sample, new_instruction: str, n=1, - current_visualization=None, expected_visualization=None): - """ - Followup transformation based on previous dialog and new instruction. - - Args: - input_tables: list of input tables - dialog: previous conversation history - latest_data_sample: sample of the latest transformation result - new_instruction: new user instruction for followup - n: number of candidates - current_visualization: dict or None, contains chart_spec and optional chart_image for complete charts - expected_visualization: dict or None, contains chart_spec for incomplete charts - """ - if not new_instruction or not new_instruction.strip(): - new_instruction = "Update the transformation based on the updated visualization context." - - logger.debug(f"GOAL: \n\n{new_instruction}") - logger.info(f"[DataTransformAgent] followup start") - - updated_dialog = [{"role":"system", "content": self.system_prompt}, *dialog[1:]] - - # Format sample data - sample_data_str = pd.DataFrame(latest_data_sample).head(10).to_string() + '\n......' - - # Build visualization context section - vis_section = "" - if current_visualization: - vis_section = f"\n\n[CURRENT VISUALIZATION] This is the current visualization the user has:\n\n{json.dumps(current_visualization.get('chart_spec', {}), indent=4, ensure_ascii=False)}" - elif expected_visualization: - vis_section = f"\n\n[EXPECTED VISUALIZATION] This is the visualization expected by the user:\n\n{json.dumps(expected_visualization.get('chart_spec', {}), indent=4, ensure_ascii=False)}" - - # Order: data sample → visualization → instruction - followup_text = f"This is the result from the latest transformation:\n\n{sample_data_str}{vis_section}\n\nUpdate the Python script above based on the following instruction:\n\n{new_instruction}" - - logger.debug(followup_text) - - # Build user message content: include chart image if available - chart_image = current_visualization.get('chart_image') if current_visualization else None - has_image = bool(chart_image) - logger.info(f"[DataTransformAgent] followup LLM call | messages={len(updated_dialog) + 1}, has_image={has_image}") - try: - if chart_image: - user_content = [ - {"type": "text", "text": followup_text}, - {"type": "image_url", "image_url": {"url": chart_image, "detail": "low"}} - ] - else: - user_content = followup_text - - messages = [*updated_dialog, {"role":"user", "content": user_content}] - - t_llm_start = time.time() - response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model)) - t_llm = time.time() - t_llm_start - except Exception as e: - # Fallback to text-only if model doesn't support images - logger.warning(f"Image-based completion failed, falling back to text-only: {e}") - messages = [*updated_dialog, {'role':'user', 'content': followup_text}] - t_llm_start = time.time() - response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model)) - t_llm = time.time() - t_llm_start - - candidates = self.process_gpt_response(response, messages, t_llm=t_llm) - status = candidates[0].get('status', 'unknown') if candidates else 'empty' - logger.info(f"[DataTransformAgent] followup done | status={status}") - return candidates diff --git a/py-src/data_formulator/agents/agent_interactive_explore.py b/py-src/data_formulator/agents/agent_interactive_explore.py deleted file mode 100644 index 67847ec2..00000000 --- a/py-src/data_formulator/agents/agent_interactive_explore.py +++ /dev/null @@ -1,343 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -import json -import logging -import time - -import pandas as pd - -from data_formulator.agent_config import reasoning_effort_for -from data_formulator.agents.agent_utils import ( - attach_reasoning_content, - extract_json_objects, - generate_data_summary, -) -from data_formulator.agents.agent_language import inject_language_instruction -from data_formulator.agents.context import ( - build_focused_thread_context, - build_lightweight_table_context, - build_peripheral_thread_context, - handle_inspect_source_data, -) - -logger = logging.getLogger(__name__) - -_AGENT_ID = "interactive_explore" - -# ── Tool definition (inspect only) ──────────────────────────────────────── - -INSPECT_TOOL = { - "type": "function", - "function": { - "name": "inspect_source_data", - "description": ( - "Get a detailed summary of one or more source tables — schema, " - "field-level statistics, and sample rows. Call this before suggesting " - "questions if you need to understand a table's contents." - ), - "parameters": { - "type": "object", - "properties": { - "table_names": { - "type": "array", - "items": {"type": "string"}, - "description": "List of table names to inspect.", - }, - }, - "required": ["table_names"], - }, - }, -} - -# ── Intent tags ─────────────────────────────────────────────────────────── - -INTENT_TAGS = ['deep-dive', 'pivot', 'broaden', 'cross-data', 'statistical'] - -# ── System prompt ───────────────────────────────────────────────────────── - -SYSTEM_PROMPT = '''You are a data exploration expert who suggests interesting questions to help users explore their datasets. - -The user message contains tiered context: -- **[PRIMARY TABLE(S)]** / **[OTHER AVAILABLE TABLES]**: Compact dataset context with schema, metadata descriptions, representative field values, numeric stats, and bounded sample rows. -- **[FOCUSED THREAD]** (optional): The exploration thread the user is continuing — each step shows what was asked, what was created, and what chart was made. -- **[OTHER THREADS]** (optional): Brief summaries of other exploration threads in the workspace. -- **[CURRENT CHART]** (optional): Image of the chart the user is currently viewing. -- **[START QUESTION]** (optional): A seed question from the user for context. - -Your task is to suggest 4 exploration questions that users can follow to gain insights from their data. - -Guidelines: -1. Suggest interesting analytical questions that can uncover new insights from the data. -2. Use a diverse language style (questions, statements, etc). -3. If there are multiple datasets, consider relationships between them. -4. CONCISENESS: questions should be concise and to the point. -5. QUESTION QUALITY: - - If no exploration thread is provided, start with high-level overview questions. - - If a thread exists, build on it — do not repeat questions already explored. - - If the current analysis is already very specialized, suggest broadening or pivoting rather than drilling deeper into a tiny subset. - - Leverage other tables in the workspace to suggest cross-data questions. -6. DIVERSITY: each question MUST have a different intent tag. Cover diverse exploration directions: - - `deep-dive`: Zoom in — refine, filter, drill down, focus on outliers or sub-dimensions. - - `pivot`: Same data, different analytical angle — change the metric, aggregation, or chart type. - - `broaden`: Zoom out — higher-level view, remove filters, return to an earlier table. - - `cross-data`: Bring in another workspace table not yet used in this thread. Only suggest when other tables are available. - - `statistical`: Apply a statistical technique — forecasting, regression, clustering, anomaly detection. -7. VISUALIZATION: each question should be visualizable with a chart. -8. FORMATTING: for each question, include: - - `text`: The full question text. - - `goal`: A concise summary (<10 words) with **bold** keywords for key attributes/metrics. - - `tag`: One of: `deep-dive`, `pivot`, `broaden`, `cross-data`, `statistical`. - -Output a list of JSON objects, one per line (NDJSON format). Each line must be valid JSON with NO prefix: - -{"type": "question", "text": ..., "goal": ..., "tag": ...} -{"type": "question", "text": ..., "goal": ..., "tag": ...} -... -''' - -class InteractiveExploreAgent(object): - - def __init__(self, client, workspace, agent_exploration_rules="", language_instruction="", knowledge_store=None): - self.client = client - self.agent_exploration_rules = agent_exploration_rules - self.workspace = workspace - self.language_instruction = language_instruction - self._knowledge_store = knowledge_store - - def run(self, input_tables, start_question=None, - focused_thread=None, other_threads=None, - primary_tables=None, - current_chart=None, - # Legacy params — kept for backward compatibility - exploration_thread=None, current_data_sample=None, - enable_inspect_round=False, - **kwargs): - """ - Suggest exploration questions for a dataset or exploration thread. - - Args: - input_tables: List of dataset objects with name, rows, description - start_question: Optional seed question for context - focused_thread: Rich thread context (list of step dicts from frontend) - other_threads: Peripheral thread summaries - primary_tables: List of primary table names for prioritization - current_chart: PNG data URL of the current visualization - exploration_thread: Legacy — flat list of tables (used if focused_thread not provided) - current_data_sample: Legacy — raw rows (ignored when focused_thread is provided) - enable_inspect_round: Optional fallback for unusual cases where an - extra inspect_source_data tool round is explicitly requested. - """ - - # ── Progress: context building ───────────────────────────────── - yield {"type": "progress", "phase": "building_context"} - - # ── Build tiered context ────────────────────────────────────── - t_ctx = time.time() - - context = build_lightweight_table_context( - input_tables, self.workspace, primary_tables=primary_tables, - ) - - if focused_thread: - context += "\n\n" + build_focused_thread_context(focused_thread) - elif exploration_thread: - # Legacy fallback: build a simple thread summary from flat table list - thread_summary = generate_data_summary( - [{ - 'name': table.get('name', f'Table {i}'), - 'rows': table.get('rows', []), - } for i, table in enumerate(exploration_thread, 1)], - self.workspace, - table_name_prefix="Thread Table", - ) - context += f"\n\n[EXPLORATION THREAD]\n\n{thread_summary}" - - if other_threads: - context += "\n\n" + build_peripheral_thread_context(other_threads) - - if current_data_sample and not focused_thread: - context += f"\n\n[CURRENT DATA SAMPLE]\n\n{pd.DataFrame(current_data_sample).head(10).to_string()}" - - if start_question: - context += f"\n\n[START QUESTION]\n\n{start_question}" - - # ── Inject relevant experiences from knowledge store ────────── - if self._knowledge_store: - try: - query = start_question or "" - table_names = [t.get("name", "") for t in input_tables if t.get("name")] - search_query = " ".join([query] + table_names[:5]).strip() - if search_query: - relevant = self._knowledge_store.search( - search_query, categories=["experiences"], max_results=3, - ) - if relevant: - knowledge_block = "[RELEVANT KNOWLEDGE]\n" - for item in relevant: - knowledge_block += f"\n### {item['title']}\n{item['snippet']}\n" - context += f"\n\n{knowledge_block}" - except Exception: - logger.warning("Failed to search knowledge experiences", exc_info=True) - - # ── Build system prompt ─────────────────────────────────────── - system_prompt = SYSTEM_PROMPT - - if self.agent_exploration_rules and self.agent_exploration_rules.strip(): - system_prompt += "\n\n[AGENT EXPLORATION RULES]\n\n" + self.agent_exploration_rules.strip() + "\n\nPlease follow the above agent exploration rules when suggesting questions." - - if self._knowledge_store: - system_prompt += self._knowledge_store.format_rules_block() - - system_prompt = inject_language_instruction(system_prompt, self.language_instruction) - - ctx_elapsed = time.time() - t_ctx - logger.info( - "[InteractiveExploreAgent] context: %d chars (~%d tokens), " - "tables=%d, primary=%s, built in %.2fs", - len(context), len(context) // 4, - len(input_tables), - primary_tables, - ctx_elapsed, - ) - logger.debug("Interactive explore agent input: %s", context) - logger.info("[InteractiveExploreAgent] run start") - - # ── Build initial messages ──────────────────────────────────── - if current_chart: - user_content = [ - {"type": "text", "text": context}, - {"type": "image_url", "image_url": {"url": current_chart, "detail": "low"}} - ] - else: - user_content = context - - messages = [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_content}, - ] - - # ── Optional inspect_source_data fallback ───────────────────── - if enable_inspect_round: - messages = self._run_inspect_round(messages, input_tables) - - # ── Progress: generating ────────────────────────────────────── - yield {"type": "progress", "phase": "generating"} - - # ── Stream the final response ───────────────────────────────── - try: - stream = self.client.get_completion(messages=messages, stream=True, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model)) - except Exception as e: - # If image fails, retry without it - if current_chart: - messages[1] = {"role": "user", "content": context} - stream = self.client.get_completion(messages=messages, stream=True) - else: - raise - - t_llm = time.time() - first_token = False - for part in stream: - if hasattr(part, 'choices') and len(part.choices) > 0: - delta = part.choices[0].delta - if hasattr(delta, 'content') and delta.content: - if not first_token: - first_token = True - logger.info( - "[InteractiveExploreAgent] TTFB: %.2fs", - time.time() - t_llm, - ) - yield delta.content - - logger.info( - "[InteractiveExploreAgent] LLM total: %.2fs, run done", - time.time() - t_llm, - ) - - def _run_inspect_round(self, messages, input_tables): - """Run one non-streaming LLM call with the inspect_source_data tool. - - If the model calls the tool, execute it and append the result. - If the model produces text without tool calls, skip (the main - streaming call will generate the final output). - - Returns the updated messages list. - """ - max_rounds = 3 - tools = [INSPECT_TOOL] - - for _ in range(max_rounds): - try: - response = self._call_llm_with_tools(messages, tools) - except Exception as e: - logger.warning(f"[InteractiveExploreAgent] Inspect round failed: {e}") - from data_formulator.error_handler import collect_stream_warning - collect_stream_warning( - "Data inspection round failed — results may be less accurate", - detail=str(e), - message_code="INSPECT_ROUND_FAILED", - ) - break - - if not response or not response.choices: - break - - choice = response.choices[0] - content = choice.message.content or "" - tool_calls = getattr(choice.message, 'tool_calls', None) - - if not tool_calls: - # No tool call — model is ready to answer. - # Don't append its text; we'll re-stream for the final response. - break - - # Append assistant message with tool calls - assistant_msg = { - "role": "assistant", - "content": content or None, - "tool_calls": [ - { - "id": tc.id, - "type": "function", - "function": { - "name": tc.function.name, - "arguments": tc.function.arguments, - }, - } - for tc in tool_calls - ], - } - attach_reasoning_content(assistant_msg, choice.message) - messages.append(assistant_msg) - - # Execute each tool call - for tc in tool_calls: - tool_name = tc.function.name - try: - tool_args = json.loads(tc.function.arguments) - except json.JSONDecodeError: - tool_args = {} - - if tool_name == "inspect_source_data": - table_names = tool_args.get("table_names", []) - tool_content = handle_inspect_source_data( - table_names, input_tables, self.workspace - ) - else: - tool_content = f"Unknown tool: {tool_name}" - - messages.append({ - "role": "tool", - "tool_call_id": tc.id, - "content": tool_content, - }) - - logger.info(f"[InteractiveExploreAgent] Inspect round: executed {len(tool_calls)} tool call(s)") - - return messages - - def _call_llm_with_tools(self, messages, tools): - """Non-streaming LLM call with tool definitions.""" - return self.client.get_completion_with_tools( - messages, tools=tools, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model), - ) \ No newline at end of file diff --git a/py-src/data_formulator/agents/agent_report_gen.py b/py-src/data_formulator/agents/agent_report_gen.py deleted file mode 100644 index 9d936b28..00000000 --- a/py-src/data_formulator/agents/agent_report_gen.py +++ /dev/null @@ -1,456 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -"""Report generation agent with tool-calling for inspect + embed. - -Two-phase architecture: - - **Phase 1 (Inspect)**: Non-streaming LLM call with inspection tools. - Agent calls inspect_chart / inspect_source_data to gather information. - Results are fed back as context. Invisible to the user. - - **Phase 2 (Generate)**: Streaming LLM call with embedding tools. - Agent writes the report narrative token-by-token. - embed_chart / embed_table tool calls produce structured blocks - in the output stream — rendered by the frontend as inline content. -""" - -import json -import logging -from typing import Any, Generator - -import pandas as pd - -from data_formulator.agent_config import reasoning_effort_for -from data_formulator.agents.agent_utils import ( - attach_reasoning_content, - generate_data_summary, -) -from data_formulator.agents.agent_language import inject_language_instruction -from data_formulator.datalake.parquet_utils import df_to_safe_records -from data_formulator.agents.context import ( - build_focused_thread_context, - build_lightweight_table_context, - build_peripheral_thread_context, - handle_inspect_source_data, -) -from data_formulator.workflows.create_vl_plots import ( - assemble_vegailte_chart, - coerce_field_type, - resolve_field_type, - spec_to_base64, - field_metadata_to_semantic_types, -) - -logger = logging.getLogger(__name__) - -_AGENT_ID = "report_gen" - -# ── Tool definitions ────────────────────────────────────────────────────── - -INSPECT_TOOLS = [ - { - "type": "function", - "function": { - "name": "inspect_chart", - "description": ( - "Get the visualization image and underlying data for one or more charts. " - "Returns the chart image (PNG), a sample of the chart's data, " - "and the transformation code that created it." - ), - "parameters": { - "type": "object", - "properties": { - "chart_ids": { - "type": "array", - "items": {"type": "string"}, - "description": "List of chart IDs from [AVAILABLE CHARTS] to inspect.", - }, - }, - "required": ["chart_ids"], - }, - }, - }, - { - "type": "function", - "function": { - "name": "inspect_source_data", - "description": ( - "Get a detailed summary of one or more source tables — schema, " - "field-level statistics, and sample rows." - ), - "parameters": { - "type": "object", - "properties": { - "table_names": { - "type": "array", - "items": {"type": "string"}, - "description": "List of table names to inspect.", - }, - }, - "required": ["table_names"], - }, - }, - }, -] - - -# ── System prompt ───────────────────────────────────────────────────────── - -SYSTEM_PROMPT = """\ -You are a data journalist / analyst who creates insightful, well-organized reports -based on data explorations. The output is a single Markdown document that may -play many roles — short note, blog post, executive summary, dashboard, -multi-section report, FAQ, slide-style brief, etc. Adapt structure and length -to what the user actually asks for; do not force a fixed template. - -The user message contains context about the workspace: -- **[PRIMARY TABLE(S)]** / **[OTHER AVAILABLE TABLES]**: Lightweight schema of datasets. -- **[FOCUSED THREAD]** (optional): The exploration thread the user is continuing. -- **[OTHER THREADS]** (optional): Brief summaries of other exploration threads. -- **[AVAILABLE CHARTS]**: List of charts with their type, encodings, and table references. - -## Phase 1 — Inspect -Before writing, use `inspect_chart` and `inspect_source_data` to gather information -about the charts and data you want to include. Inspect only what you actually need -to ground your narrative — don't fetch everything. - -## Phase 2 — Write the report - -### Embedding charts (REQUIRED FORMAT — do not change this) -To embed a chart image, use markdown image syntax with a `chart://` URL: - ![Caption describing the chart](chart://chart_id) - -Example: `![Monthly trade balance trend](chart://chart-123)` - -The chart_id must match one from [AVAILABLE CHARTS]. Place each chart embed on -its own line (it renders as a block). You can embed the same chart at most -once. Captions are short — one line describing what the chart shows. - -### Tables -For data tables, write standard markdown tables directly: -| date | value | -| --- | --- | -| 2020-01 | -43.5 | - -### Style & structure — adapt to the user's request -The user may ask for any of: -- a short note or social-style summary (a few sentences, maybe one chart), -- a blog post / narrative report (intro → findings → takeaway), -- an executive summary (key numbers up top, then context), -- a KPI dashboard / multi-section overview (headings per topic, multiple charts - arranged with short commentary between them), -- a slide-style brief (compact sections with bullet points and embedded charts), -- a deeper analytical report with sub-sections, methodology notes, and caveats. - -Pick the structure that fits the request and the available material. Reasonable -defaults if the user is vague: -- Start with a `# Title` that reflects the topic. -- Group related findings under `##` (and `###` if useful) headings. -- Around each embedded chart, briefly explain what it shows and the key insight. -- Use bullets / short paragraphs / tables where they help; don't pad. -- Close with a brief takeaway or summary section if the report is more than a - few paragraphs. For very short outputs (notes, single-chart blurbs), a closing - summary is optional. - -### Guardrails -- Write in Markdown. Keep prose tight; let the data and charts carry the weight. -- Stay faithful to the data — do not invent numbers, comparisons, or causation - that the data does not actually support. -- It is fine to flag uncertainty ("based on the sample shown…") when appropriate. -- Embed every chart you discuss; don't reference a chart in prose without showing it. -""" - - -class ReportGenAgent: - """Tool-calling report generation agent with two-phase streaming.""" - - def __init__(self, client, workspace, language_instruction=""): - self.client = client - self.workspace = workspace - self.language_instruction = language_instruction - - def run( - self, - input_tables: list[dict[str, Any]], - charts: list[dict[str, Any]], - user_prompt: str = "Create a report summarizing the exploration.", - focused_thread: list[dict[str, Any]] | None = None, - other_threads: list[dict[str, Any]] | None = None, - primary_tables: list[str] | None = None, - ) -> Generator[dict[str, Any], None, None]: - """Generate a report via two-phase tool-calling. - - Yields SSE-style dicts: - {"type": "text_delta", "content": "..."} - {"type": "embed_chart", "chart_id": "...", "caption": "..."} - {"type": "embed_table", "table_id": "...", ...} - - Args: - input_tables: Source table objects with name (rows optional for lightweight mode) - charts: Chart descriptors: {chart_id, chart_type, encodings, table_ref, code?, chart_data?, chart_image?} - user_prompt: The user's report request - focused_thread: Rich thread context (from buildFocusedThread) - other_threads: Peripheral thread summaries - primary_tables: List of primary table names for prioritization - """ - # Build context - context = build_lightweight_table_context( - input_tables, self.workspace, primary_tables=primary_tables, - ) - if focused_thread: - context += "\n\n" + build_focused_thread_context(focused_thread) - if other_threads: - context += "\n\n" + build_peripheral_thread_context(other_threads) - - # Build available charts section - if charts: - chart_lines = ["[AVAILABLE CHARTS]"] - for c in charts: - enc_str = ", ".join(f"{k}: {v}" for k, v in c.get("encodings", {}).items() if v) - chart_lines.append( - f" - {c['chart_id']}: {c.get('chart_type', 'Unknown')} " - f"({enc_str}) → table: {c.get('table_ref', '?')}" - ) - context += "\n\n" + "\n".join(chart_lines) - - # Build system prompt - system_prompt = SYSTEM_PROMPT - system_prompt = inject_language_instruction(system_prompt, self.language_instruction) - - messages: list[dict] = [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": f"{context}\n\n[USER REQUEST]\n\n{user_prompt}"}, - ] - - # ── Phase 1: Inspect (non-streaming) ────────────────────────── - messages = self._run_inspect_phase(messages, input_tables, charts) - - # ── Phase 2: Generate (streaming with embed tools) ──────────── - yield from self._run_generate_phase(messages, charts, input_tables) - - # ------------------------------------------------------------------ - # Phase 1: Inspection loop - # ------------------------------------------------------------------ - - def _run_inspect_phase( - self, - messages: list[dict], - input_tables: list[dict[str, Any]], - charts: list[dict[str, Any]], - ) -> list[dict]: - """Run non-streaming inspect calls. Returns updated messages.""" - max_rounds = 5 - - for _ in range(max_rounds): - try: - response = self._call_llm(messages, tools=INSPECT_TOOLS) - except Exception as e: - logger.warning(f"[ReportAgent] Inspect phase error: {e}") - from data_formulator.error_handler import collect_stream_warning - collect_stream_warning( - "Report data inspection failed — report may be incomplete", - detail=str(e), - message_code="INSPECT_PHASE_FAILED", - ) - break - - if not response or not response.choices: - break - - choice = response.choices[0] - content = choice.message.content or "" - tool_calls = getattr(choice.message, "tool_calls", None) - - if not tool_calls: - # Agent is ready to write — don't append its text yet, - # Phase 2 will re-prompt with embed tools - break - - # Append assistant message - assistant_msg: dict[str, Any] = { - "role": "assistant", - "content": content or None, - "tool_calls": [ - { - "id": tc.id, - "type": "function", - "function": { - "name": tc.function.name, - "arguments": tc.function.arguments, - }, - } - for tc in tool_calls - ], - } - attach_reasoning_content(assistant_msg, choice.message) - messages.append(assistant_msg) - - # Execute each tool - for tc in tool_calls: - tool_name = tc.function.name - try: - tool_args = json.loads(tc.function.arguments) - except json.JSONDecodeError: - tool_args = {} - - if tool_name == "inspect_chart": - tool_content = self._handle_inspect_chart( - tool_args.get("chart_ids", []), charts - ) - elif tool_name == "inspect_source_data": - tool_content = handle_inspect_source_data( - tool_args.get("table_names", []), - input_tables, - self.workspace, - ) - else: - tool_content = f"Unknown tool: {tool_name}" - - messages.append({ - "role": "tool", - "tool_call_id": tc.id, - "content": tool_content, - }) - - logger.info(f"[ReportAgent] Inspect phase: executed {len(tool_calls)} tool call(s)") - - return messages - - # ------------------------------------------------------------------ - # Phase 2: Streaming generation with embed tools - # ------------------------------------------------------------------ - - def _run_generate_phase( - self, - messages: list[dict], - charts: list[dict[str, Any]], - input_tables: list[dict[str, Any]], - ) -> Generator[dict[str, Any], None, None]: - """Stream the report as plain text with [IMAGE()] placeholders.""" - - # Add a nudge to start writing - messages.append({ - "role": "user", - "content": ( - "Now write the report in markdown. " - "Use ![caption](chart://chart_id) to embed charts." - ), - }) - - try: - stream = self._call_llm_streaming(messages, tools=None) - except Exception as e: - logger.error(f"[ReportAgent] Generate phase error: {e}") - yield {"type": "text_delta", "content": f"Error generating report: {e}"} - return - - for chunk in stream: - if not chunk.choices: - continue - delta = chunk.choices[0].delta - if hasattr(delta, "content") and delta.content: - yield {"type": "text_delta", "content": delta.content} - - # ------------------------------------------------------------------ - # Tool handlers - # ------------------------------------------------------------------ - - def _handle_inspect_chart( - self, - chart_ids: list[str], - charts: list[dict[str, Any]], - ) -> str: - """Return chart details as text + image content for inspection.""" - results = [] - for chart_id in chart_ids: - chart = next((c for c in charts if c["chart_id"] == chart_id), None) - if not chart: - results.append(f"Chart {chart_id}: not found") - continue - - parts = [f"Chart: {chart_id}"] - parts.append(f" Type: {chart.get('chart_type', 'Unknown')}") - - encodings = chart.get("encodings", {}) - if encodings: - enc_str = ", ".join(f"{k}: {v}" for k, v in encodings.items() if v) - parts.append(f" Encodings: {enc_str}") - - if chart.get("code"): - parts.append(f" Code:\n```python\n{chart['code']}\n```") - - # Data sample - chart_data = chart.get("chart_data") - if chart_data and chart_data.get("rows"): - df = pd.DataFrame(chart_data["rows"]) - parts.append(f" Data ({len(df)} rows, {len(df.columns)} cols):") - parts.append(f" Columns: {', '.join(df.columns.tolist())}") - parts.append(f" Sample:\n{df.head(5).to_string()}") - - # Chart image — return as base64 reference - if chart.get("chart_image"): - parts.append(" [Chart image available — shown below]") - - results.append("\n".join(parts)) - - return "\n\n".join(results) - - def _resolve_table_data( - self, - table_id: str, - input_tables: list[dict[str, Any]], - charts: list[dict[str, Any]], - columns: list[str] | None = None, - max_rows: int = 10, - sort_by: str | None = None, - ) -> dict[str, Any]: - """Resolve table data for embed_table — check both source tables and chart data tables.""" - # Check input tables - table = next((t for t in input_tables if t.get("name") == table_id), None) - - # Check chart data tables - if not table: - for c in charts: - cd = c.get("chart_data", {}) - if cd.get("name") == table_id: - table = cd - break - - if not table or not table.get("rows"): - return {"columns": [], "rows": []} - - try: - df = pd.DataFrame(table["rows"]) - if sort_by and sort_by in df.columns: - df = df.sort_values(sort_by, ascending=False) - if columns: - valid_cols = [c for c in columns if c in df.columns] - if valid_cols: - df = df[valid_cols] - df = df.head(max_rows) - return { - "columns": df.columns.tolist(), - "rows": df_to_safe_records(df), - } - except Exception as e: - logger.error(f"[ReportAgent] resolve_table_data error: {e}") - return {"columns": [], "rows": []} - - # ------------------------------------------------------------------ - # LLM call helpers - # ------------------------------------------------------------------ - - def _call_llm(self, messages: list[dict], tools: list[dict] | None = None): - """Non-streaming LLM call with optional tool definitions.""" - if tools: - return self.client.get_completion_with_tools( - messages, tools=tools, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model), - ) - return self.client.get_completion(messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model)) - - def _call_llm_streaming(self, messages: list[dict], tools: list[dict] | None = None): - """Streaming LLM call with optional tool definitions.""" - if tools: - return self.client.get_completion_with_tools( - messages, tools=tools, stream=True, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model), - ) - return self.client.get_completion(messages, stream=True, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model)) diff --git a/py-src/data_formulator/agents/agent_experience_distill.py b/py-src/data_formulator/agents/agent_workflow_distill.py similarity index 55% rename from py-src/data_formulator/agents/agent_experience_distill.py rename to py-src/data_formulator/agents/agent_workflow_distill.py index cc738495..0d86aa78 100644 --- a/py-src/data_formulator/agents/agent_experience_distill.py +++ b/py-src/data_formulator/agents/agent_workflow_distill.py @@ -1,17 +1,17 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -"""Experience distillation agent — extracts reusable knowledge from analysis context. +"""Workflow distillation agent — extracts a replayable workflow from analysis context. Given a user-visible analysis context (timeline of events) plus an optional user instruction, this agent calls an LLM to produce a structured Markdown -experience document with YAML front matter suitable for storage in the +workflow document with YAML front matter suitable for storage in the knowledge base. Usage:: - agent = ExperienceDistillAgent(client) - md_content = agent.run(experience_context, user_instruction="...") + agent = WorkflowDistillAgent(client) + md_content = agent.run(workflow_context, user_instruction="...") """ from __future__ import annotations @@ -25,18 +25,38 @@ logger = logging.getLogger(__name__) -_AGENT_ID = "experience_distill" +_AGENT_ID = "workflow_distill" SYSTEM_PROMPT = """\ -You are a knowledge distiller. Given the chronological events of a data -analysis session plus an optional user instruction, write a short reusable -Markdown note that will help with similar future tasks. +You are a workflow distiller. Given the chronological events of a data +analysis session plus an optional user distillation instruction, extract a **replayable +workflow** that captures *what the user wanted and got* — and write it at +TWO levels so it can be reused in two different situations: + +1. An **Abstract workflow** — dataset-independent. The underlying analytical + pattern, stripped of this dataset's subject matter: the sequence of + questions, computations, and chart kinds, phrased in domain-neutral terms. + Following it on a *different and possibly very differently-shaped* dataset + should walk the same process and arrive at structurally similar + visualizations. +2. A **Concrete workflow** — for *similar* data (same shape, only minor + differences — a different period, region, or filter). It names the real + fields, aggregations, filters, and chart encodings used here, so the + analysis can be replayed closely with minimal thought. + +Both describe the SAME analysis at different distances. They should be +consistent, but they do NOT need an exact 1:1 step mapping — let each be as +long as it needs (typically 3-7 steps each). + +Where the analysis hinges on a few choices a user might change on replay (a +period, a filter, a top-N), surface them as named **parameters** with +`{{token}}` placeholders in the steps — see the `## Parameters` section below. The session contains one or more threads (separate analysis branches in the same session) each rendered under a `### Thread N` header. When -multiple threads are provided, synthesise lessons that hold across them -— do NOT enumerate per-thread. +multiple threads are provided, merge them into one coherent ordered +workflow — do NOT enumerate per-thread. The events use three types: - `message` — directed speech, formatted as `[/] `. @@ -46,56 +66,129 @@ (followed by columns, row count, sample, and code). - `create_chart` — a chart emitted on a table (mark + encoding summary). -If a user instruction is provided, focus the note on that instruction. -Otherwise, distill the most transferable methodology from the events. +CRITICAL extraction rules — keep only what the user wanted and got: +- Recover the ORDERED list of requests the user actually wanted, and the + outputs (tables/charts) they kept. Each step states BOTH the question + explored AND what was produced to answer it — including the chart and the + key fields it uses. +- DROP corrective back-and-forth. If the user changed their mind + ("no, it should be…", "actually use median instead"), keep ONLY the + final resolved intent — not the wrong first attempt or the correction. +- DROP abandoned work. If a chart or table was created and then deleted + or never kept, leave it out entirely. +- DROP mechanics. Do NOT include error-repair loops, dtype fixes, tool + call noise, or low-level code dumps. Describe intent, not implementation. +- Capture genuine gotchas as short Notes (advisory warnings to carry + forward), NOT as steps to re-perform. + +If a user instruction is provided, let it steer what to keep or emphasise. Output format (Markdown with YAML front matter, nothing else): ``` --- -subtitle: -tags: [] +subtitle: +filename: created: updated: source: distill source_context: --- -## When to Use - - -## Method - - -## Pitfalls & Tips - +## Goal + + +## Parameters + +- `{{period}}` — the time range analysed; used here: 2024; on replay: ask. +- `{{top_n}}` — how many top categories to keep; used here: 10; on replay: keep. +- `{{region}}` — geographic filter applied; used here: National; on replay: ask. + +## Abstract workflow + +1. +2. +3. <…> + +## Concrete workflow + +1. +2. <…> + +## Notes + ``` Rules: -- Subtitle must be a short, scannable noun phrase (3-8 words) that captures - the technique or pattern. The hosting application prefixes it with the - session name to form the full title (e.g. "Experience from : "), - so do NOT include the session name in the subtitle. Do NOT pack scenario, - takeaway, and steps into the subtitle — leave details for `## When to Use` - and `## Method`. - Good: "Year-over-year volatility comparison". "Repairing pandas dtype mismatches". - Bad: "Time series analysis workflow: aggregate, visualize trends, quantify YoY spikes, and compare volatility across periods". -- Focus on *transferable* methods and caveats, not case-specific details. -- Keep the body under 500 words. -- No raw data, PII, secrets, or specific values unless they show a universal pattern. -- Write the subtitle, headings, body, and tags in {output_language}. +- The subtitle is the workflow's display TITLE. Make it ABSTRACT and + library-friendly: name the *kind of analysis* — a technique plus a GENERIC + subject (KPI, metric, category, event, cohort) — so someone browsing the + workflow library can tell whether this is the KIND of analysis they want to + reuse. Do NOT pin it to this dataset's specific subject, period, or column + names, and do NOT prefix it with the session name. + - Pair a real technique with a generic subject; avoid bare category words. + Good: "Year-over-year KPI volatility analysis". + "Category contribution-to-total breakdown". + "Time-windowed event composition analysis". + Bad: "Plot monthly gasoline prices in 2024 and compare each year". (too specific) + "Time series analysis". "Data workflow". "Chart exploration". (too vague) + The dataset-grounded, full-sentence explanation goes in `## Goal`, NOT the title. +- Filename must be a SHORT (2-5 word) lowercase name for the file — just + the technique/subject, e.g. "kpi volatility analysis", "region revenue + breakdown". No dates, no file extension, no session name. It only names the + file on disk; the subtitle is what users see. +- Abstract workflow must be domain-neutral — strip this dataset's subject + matter and column names; describe only the transferable pattern (question + types, computations, chart kinds). Concrete workflow must be runnable on a + near-identical dataset: real field names, the aggregation, the filter to + vary, the chart mark + key encodings. Do NOT have the two sections merely + repeat each other — each adds its own grain of reuse guidance. +- Parameters are optional and a judgment call: surface only the FEW knobs + that materially change the outcome and that a user would revisit on replay + (often 0-4). When in doubt, leave the value inline — a spurious `{{token}}` + is worse than none. Knobs may be run-specific (period, region, top-N — + usually `ask`) or dataset-specific (a domain value/column — usually `keep`, + and may be skipped in the Abstract workflow). Every `{{token}}` in the steps + must be listed in `## Parameters` and vice versa. +- Steps in both sections must be ordered and reproducible. +- Be as long as the analysis needs — do not omit meaningful steps, questions, + or charts just to stay short. Stay focused, but completeness matters more + than brevity. +- No raw data, PII, secrets, or specific values unless essential to a request. +- Write the subtitle, headings, and body in {output_language}. YAML front-matter keys stay in English. {language_instruction} """ -class ExperienceDistillAgent: - """Distills analysis context into a reusable experience document.""" - # Language display names for experience-specific prompts +class WorkflowDistillAgent: + """Distills analysis context into a reusable workflow document.""" + + # Language display names for workflow-specific prompts _LANG_NAMES: dict[str, str] = { "zh": "Simplified Chinese (简体中文)", "ja": "Japanese (日本語)", @@ -121,7 +214,7 @@ def __init__( self.timeout_seconds = int(timeout_seconds) if timeout_seconds else self.DEFAULT_TIMEOUT def run(self, context: dict[str, Any], user_instruction: str = "") -> str: - """Distill an experience document from user-visible session context.""" + """Distill a workflow document from user-visible session context.""" summary = self._extract_context_summary(context) today = datetime.now(timezone.utc).strftime("%Y-%m-%d") context_id = str(context.get("context_id", "") or "") @@ -130,7 +223,7 @@ def run(self, context: dict[str, Any], user_instruction: str = "") -> str: instruction_block = ( f"\n[USER INSTRUCTION]\n{user_instruction.strip()}\n" - f"Focus the distilled experience on the above instruction.\n" + f"Focus the distilled workflow on the above instruction.\n" ) if user_instruction and user_instruction.strip() else "" workspace_block = ( @@ -158,9 +251,11 @@ def run(self, context: dict[str, Any], user_instruction: str = "") -> str: {"role": "user", "content": user_msg}, ] - from data_formulator.knowledge.store import KNOWLEDGE_LIMITS + from data_formulator.knowledge.store import KNOWLEDGE_LIMITS, WORKFLOW_HARD_MAX content = self._call_with_length_retry( - messages, KNOWLEDGE_LIMITS.get("experiences", 2000), + messages, + KNOWLEDGE_LIMITS.get("workflows", 6000), + WORKFLOW_HARD_MAX, ) if not content.strip().startswith("---"): @@ -182,7 +277,7 @@ def _prompt_format_kwargs(self) -> dict[str, str]: lang_block = ( f"[LANGUAGE INSTRUCTION]\n" f"The user's language is **{display_name}**.\n" - f"Write the title, all section headings, all body text, and tags " + f"Write the title, all section headings, and all body text " f"in {display_name}. YAML front-matter keys stay in English." ) return { @@ -199,39 +294,43 @@ def _prompt_format_kwargs(self) -> dict[str, str]: def _call_with_length_retry( self, messages: list[dict], - body_limit: int, + soft_limit: int, + hard_limit: int, ) -> str: - """Call LLM and retry once if the body exceeds *body_limit* characters. + """Call the LLM, nudging it to stay near *soft_limit* characters. - If the retry *still* overshoots, hard-truncate the body so the - document is saved instead of the entire distillation being lost. + ``soft_limit`` is advisory guidance: if the first response overshoots + it we retry once asking the model to condense. We only ever + hard-truncate at ``hard_limit`` — a much larger safety ceiling — so + rich, multi-section workflows are kept intact while runaway output + is still bounded. """ from data_formulator.knowledge.store import parse_front_matter content = self._call_llm(messages) _, body = parse_front_matter(content) - if len(body.strip()) <= body_limit: + if len(body.strip()) <= soft_limit: return content - retry_target = max(body_limit - self.RETRY_MARGIN, 1) + retry_target = max(soft_limit - self.RETRY_MARGIN, 1) logger.info( - "Distilled content too long (%d > %d), retrying with condensation prompt (target ≤ %d)", - len(body.strip()), body_limit, retry_target, + "Distilled content over soft target (%d > %d), retrying with condensation prompt (target ≤ %d)", + len(body.strip()), soft_limit, retry_target, ) messages = messages + [ {"role": "assistant", "content": content}, {"role": "user", "content": ( - f"Your output body is {len(body.strip())} characters, which exceeds " - f"the limit of {body_limit}. Please condense the document to fit " - f"within {retry_target} characters while keeping the most important " - f"insights. Output ONLY the revised Markdown document." + f"Your output body is {len(body.strip())} characters, which is " + f"longer than ideal. Please tighten the document to around " + f"{retry_target} characters while keeping the most important " + f"insights and all sections. Output ONLY the revised Markdown document." )}, ] retried = self._call_llm(messages) - # Hard-trim if the retry still overshoots — better a slightly - # truncated experience than a save failure. - return self._truncate_body_to_limit(retried, body_limit) + # Hard-trim only if the retry blows past the absolute ceiling — + # better a slightly truncated workflow than a save failure. + return self._truncate_body_to_limit(retried, hard_limit) @classmethod def _truncate_body_to_limit(cls, content: str, body_limit: int) -> str: @@ -385,7 +484,7 @@ def _render_events(cls, events: list[Any]) -> str: return "\n".join(parts) if parts else "(empty context)" def _call_llm(self, messages: list[dict]) -> str: - """Single LLM call to generate the experience document.""" + """Single LLM call to generate the workflow document.""" resp = self.client.get_completion( messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model), timeout=self.timeout_seconds, ) @@ -401,7 +500,6 @@ def _add_fallback_front_matter( header = ( f"---\ntitle: {title}\n" - f"tags: []\n" f"created: {today}\n" f"updated: {today}\n" f"source: distill\n" diff --git a/py-src/data_formulator/agents/chart_creation_guide.py b/py-src/data_formulator/agents/chart_creation_guide.py deleted file mode 100644 index 20a6ab94..00000000 --- a/py-src/data_formulator/agents/chart_creation_guide.py +++ /dev/null @@ -1,153 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -"""Chart creation guide and shared prompt fragments. - -This module is the canonical source of truth for the prompt fragments -that describe how the agent should write code, choose chart types, -annotate semantic types, run statistical analyses, and use DuckDB. - -The individual ``SHARED_*`` fragments are imported by ``DataRecAgent`` -and ``DataTransformationAgent`` (which weave them into their system -prompts) and are also composed into ``CHART_CREATION_GUIDE``, the -single block injected lazily on the first ``visualize`` tool call. -""" - - -SHARED_ENVIRONMENT = '''**About the execution environment:** -- You can use BOTH DuckDB SQL and pandas operations in the same script -- The script will run in the workspace data directory (all data files are in the current directory) -- Each table in [CONTEXT] has a **file path** (e.g., `student_exam.parquet`, `sales.csv`). Use EXACTLY that path to load data: - - `.parquet`: `pd.read_parquet('file.parquet')` or DuckDB `read_parquet('file.parquet')` - - `.csv`: `pd.read_csv('file.csv')` or DuckDB `read_csv_auto('file.csv')` - - `.json`: `pd.read_json('file.json')` - - `.xlsx`/`.xls`: `pd.read_excel('file.xlsx')` - - `.txt`: `pd.read_csv('file.txt', sep='\\t')` -- **IMPORTANT:** Use the exact filename from the context — do NOT change the file extension or assume all files are parquet. -- **Allowed libraries:** pandas, numpy, duckdb, math, datetime, json, statistics, collections, re, sklearn, scipy, random, itertools, functools, operator, time -- **Not allowed:** matplotlib, plotly, seaborn, requests, subprocess, os, sys, io, or any other library not listed above. -- File system access (open, write) and network access are also forbidden. - -**When to use DuckDB vs pandas:** -- **Prefer plain pandas** for most tasks — it's simpler and more readable. -- Only use DuckDB when the dataset is very large and you need efficient SQL aggregations, filtering, joins, or window functions. -- You can combine both: DuckDB for initial loading/filtering on large files, then pandas for complex operations. - -**Code structure:** standalone script (no function wrapper), imports at top. **CRITICAL:** The final result DataFrame MUST be assigned to the exact variable name you specified in `"output_variable"` in the JSON spec — the system uses this name to extract the result. For example, if your output_variable is `sales_by_region`, the script must contain `sales_by_region = ...`.''' - - -SHARED_SEMANTIC_TYPE_REFERENCE = '''**[SEMANTIC TYPE REFERENCE]** - -Choose the most specific type that fits. Only annotate fields used in chart encodings. - -| Category | Types | -|---|---| -| Temporal | DateTime, Date, Time, Timestamp, Year, Quarter, Month, Week, Day, Hour, YearMonth, YearQuarter, YearWeek, Decade, Duration | -| Monetary measures | Amount, Price | -| Physical measures | Quantity, Temperature | -| Proportion | Percentage | -| Signed/diverging | Profit, PercentageChange, Sentiment, Correlation | -| Generic measures | Count, Number | -| Discrete numeric | Rank, Score | -| Identifier | ID | -| Geographic | Latitude, Longitude, Country, State, City, Region, Address, ZipCode | -| Entity names | Category, Name | -| Coded categorical | Status, Boolean, Direction | -| Binned ranges | Range | -| Fallback | Unknown | - -Key guidelines: -- Use **Amount** for summed monetary totals, **Price** for per-unit prices, **Profit** for values that can be negative. -- Use **Temperature** (not Quantity) for temperature — it has special diverging behavior. -- Use **Year** (not Number) for columns like "year" with values 2020, 2021.''' - - -SHARED_CHART_REFERENCE = '''**[CHART TYPE REFERENCE]** - -The `chart_type` value in the `visualize` action MUST be one of the names listed -in the first column below (exact spelling, including capitalization). When a row -lists multiple names, pick whichever fits the "when to use" hint best. - -| chart_type | encodings | config | when to use | -|---|---|---|---| -| Scatter Plot | x, y, color, size, facet | opacity (0.1–1.0) | Relationships between two quantitative fields | -| Regression | x, y, color, size, facet | regressionMethod ("linear","log","exp","pow","quad","poly"), polyOrder (2–10) | Trend line over scatter; one line per color group | -| Bar Chart / Lollipop Chart / Waterfall Chart | x, y, color, facet | — | Bar: default categorical comparison. Lollipop: cleaner for ranked lists / sparse categories. Waterfall: cumulative gain/loss, each bar starts where the previous ended | -| Grouped Bar Chart | x, y, group, facet | — | Side-by-side bars across a second categorical dimension | -| Histogram / Density Plot | x, color, facet | — | Distribution of one quantitative field. Histogram: discrete bins, auto-binned. Density Plot: smooth KDE curve | -| Boxplot | x, y, color, facet | — | Distribution summary (median/quartiles/outliers) by category | -| Ranged Dot Plot | x, y, color, facet | — | Min–max range or two-point comparison per category | -| Line Chart | x, y, color, strokeDash, facet | interpolate ("linear","monotone","step") | Trends over an ordered (usually temporal) x-axis | -| Area Chart | x, y, color, facet | — | Magnitude over ordered x; auto-stacks when color is set | -| Pie Chart | size, color, facet | innerRadius (0–100; 0=pie, >0=donut) | Part-of-whole with ≤7 categories. Wedge value goes on **size**, not **theta** | -| Radar Chart | x, y, color, facet | — | Multi-metric profile/comparison; x = metric name, y = value, color = entity (long-form data) | -| Heatmap | x, y, color, facet | colorScheme ("viridis","blues","reds","oranges","greens","blueorange","redblue") | Matrix / 2D density; color encodes the quantitative cell value | -| Bar Table | x, y, color, facet | — | Ranked horizontal table with inline bars; one row per category. y = category, x = value | -| KPI Card | metric, value, goal | — | "Big number" dashboard tile(s); one row per tile. `value` must be pre-aggregated; `goal` is optional | -| Candlestick Chart | x, open, high, low, close, facet | — | OHLC financial data | -| World Map | longitude, latitude, color, size | projection ("mercator","equalEarth","naturalEarth1","orthographic"), projectionCenter ([lon,lat]) | Geographic points/regions on a world projection | -| US Map | longitude, latitude, color, size | — (fixed albersUsa) | US-only points/regions (albersUsa projection) | - -**Critical chart rules:** -- **Scatter Plot**: use config opacity (0.1–1.0) for dense data instead of encoding opacity. -- **Regression**: trend line is automatic — do NOT compute regression coefficients/predictions in Python. Use `color` to get separate trend lines per group. -- **Bar Chart**: x=categorical, y=quantitative (vertical bars). Swap x↔y for horizontal bars. Same-x rows are auto-stacked when `color` is set. -- **Grouped Bar Chart**: use the `group` channel (not `color`) for side-by-side bars. -- **Histogram**: do NOT pre-bin in Python — pass the raw quantitative field on `x` and the chart bins automatically. Pre-aggregating gives wrong bin widths. -- **Line Chart**: use `strokeDash` to differentiate line styles (e.g. actual vs forecast). -- **Pie Chart**: use the `size` channel (not `theta`) for wedge values. Avoid when >7–8 categories. -- **Radar Chart**: data must be long-form — one row per (entity, metric, value). If your data is wide-form (one column per metric), melt it first in the Python step. -- **Bar Table**: y is the category column to rank; x is the quantitative value driving bar length. Don't sort in Python — the template sorts. -- **KPI Card**: channels are `metric`, `value`, `goal` (not x/y). One DataFrame row = one tile. The `value` column must already contain the final number to display (aggregate upstream in the Python step). -- **Candlestick Chart**: requires `open`, `high`, `low`, `close` columns. -- **World Map / US Map**: channel names are `longitude` / `latitude`, not `x` / `y`. -- **facet**: available for nearly all chart types; use a low-cardinality categorical field. -- All fields in `encodings` must also appear in `output_fields`. Typically use 2–3 channels (x, y, color/size).''' - - -SHARED_STATISTICAL_ANALYSIS = '''**Statistical analysis guide:** -- **Regression**: use chart_type "Regression" — the trend line is automatic, do NOT compute regression values in Python code. Configure method via `{"regressionMethod": "linear"}` (options: "linear", "log", "exp", "pow", "quad", "poly"; for poly add `{"polyOrder": 3}`). -- **Forecasting**: compute predicted future values in Python. Use Line Chart with strokeDash to distinguish actual vs forecast, and color for series grouping. -- **Clustering**: compute cluster assignments in Python. Output [x, y, cluster_id]. Use Scatter Plot with color → cluster_id.''' - - -SHARED_DUCKDB_NOTES = '''**DuckDB notes:** -- Escape single quotes with '' (not \\') -- No Unicode escapes (\\u0400); use character ranges directly: [а-яА-Я] -- Cast date columns explicitly: `CAST(col AS DATE)`, `CAST(col AS TIMESTAMP)` -- For complex datetime operations, load data first then use pandas datetime functions -- Critical identifier quoting rule: - * If a table/column name contains non-ASCII characters (e.g., Chinese, Japanese, Korean, Cyrillic, etc.), spaces, or punctuation, - you MUST wrap it in double quotes, e.g. SELECT "金额" FROM "客户表". - * Never output placeholder identifiers like your_table_name, your_column, your_condition.''' - - -CHART_CREATION_GUIDE = f"""\ -## Chart Creation Guide - -The following reference material applies when you call the `visualize` tool. - -### A. Code Execution Rules - -{SHARED_ENVIRONMENT} - -{SHARED_DUCKDB_NOTES} - -**Datetime handling:** -- `date` columns contain date-only values (YYYY-MM-DD). `datetime` columns contain date+time (ISO 8601). -- `time` columns contain time-only values (HH:mm:ss). `duration` columns are time intervals. -- Year → number. Year-month / year-month-day → string ("2020-01" / "2020-01-01"). -- Hour alone → number. Hour:min or h:m:s → string. Never return raw datetime objects. - -### B. Chart Type Reference - -{SHARED_CHART_REFERENCE} - -### C. Semantic Type Reference - -{SHARED_SEMANTIC_TYPE_REFERENCE} - -### D. Statistical Analysis Guide - -{SHARED_STATISTICAL_ANALYSIS} -""" diff --git a/py-src/data_formulator/agents/client_utils.py b/py-src/data_formulator/agents/client_utils.py index 0069c4b4..efd99d51 100644 --- a/py-src/data_formulator/agents/client_utils.py +++ b/py-src/data_formulator/agents/client_utils.py @@ -1,7 +1,220 @@ +import json import litellm +from types import SimpleNamespace + from azure.identity import DefaultAzureCredential, get_bearer_token_provider +def _synthesize_stream(response): + """Yield LiteLLM-style streaming chunks reconstructed from a *buffered* + response, so a caller that consumes a stream sees the same data. + + Used for Ollama: LiteLLM's Ollama streaming path does not parse native + tool calls (it leaks the call as raw JSON ``content`` with + ``finish_reason='stop'``), whereas the buffered path parses them correctly. + We therefore call Ollama non-streaming and replay the result as a stream. + """ + try: + choice0 = response.choices[0] + message = choice0.message + finish_reason = getattr(choice0, "finish_reason", "stop") or "stop" + except (AttributeError, IndexError): + return + + reasoning = getattr(message, "reasoning_content", None) + if reasoning: + yield SimpleNamespace(choices=[SimpleNamespace( + delta=SimpleNamespace(content=None, tool_calls=None, + reasoning_content=reasoning), + finish_reason=None)]) + + content = getattr(message, "content", None) + if content: + yield SimpleNamespace(choices=[SimpleNamespace( + delta=SimpleNamespace(content=content, tool_calls=None, + reasoning_content=None), + finish_reason=None)]) + + for idx, tc in enumerate(getattr(message, "tool_calls", None) or []): + fn = getattr(tc, "function", None) + yield SimpleNamespace(choices=[SimpleNamespace( + delta=SimpleNamespace( + content=None, reasoning_content=None, + tool_calls=[SimpleNamespace( + index=idx, id=getattr(tc, "id", None) or f"call_{idx}", + function=SimpleNamespace( + name=getattr(fn, "name", None), + arguments=getattr(fn, "arguments", "") or ""))]), + finish_reason=None)]) + + yield SimpleNamespace(choices=[SimpleNamespace( + delta=SimpleNamespace(content=None, tool_calls=None, + reasoning_content=None), + finish_reason=finish_reason)]) + + +def _extract_json_objects(text): + """Return top-level brace-balanced JSON object substrings found in ``text``. + + String-aware (ignores braces inside quoted strings) so it survives code + payloads that contain ``{`` / ``}``. Used to recover an action that a weak + model emitted as plain content instead of a native tool call. + """ + objs = [] + depth = 0 + start = -1 + in_str = False + esc = False + for i, ch in enumerate(text): + if in_str: + if esc: + esc = False + elif ch == "\\": + esc = True + elif ch == '"': + in_str = False + continue + if ch == '"': + in_str = True + elif ch == "{": + if depth == 0: + start = i + depth += 1 + elif ch == "}": + if depth > 0: + depth -= 1 + if depth == 0 and start >= 0: + objs.append(text[start:i + 1]) + start = -1 + return objs + + +def _match_tool_from_obj(obj, tools, _depth=0): + """Map a parsed JSON object to ``(tool_name, arguments_dict)`` if it matches + one of ``tools``' schemas, else ``None``. + + Handles three shapes weak models emit instead of a native tool call: + * nested wrapper — ``{"thought": ..., "action": {"name": "visualize", + "arguments": {...}}}`` (a key points to an object describing the call); + * flat explicit wrapper — ``{"name"/"tool"/"action": "visualize", + "arguments": {...}}`` (the object names the tool directly); + * bare arguments — ``{"code": ..., "output_variable": ..., "chart": ...}`` + (no tool named; keys matched against each tool's ``required`` params, + most specific tool wins). + """ + if not isinstance(obj, dict) or _depth > 4: + return None + + tool_by_name = {} + for t in tools or []: + fn = (t or {}).get("function") or {} + name = fn.get("name") + if name: + tool_by_name[name] = fn + + # Nested wrapper: a key points to an object that itself describes the call + # (e.g. {"action": {"name": "visualize", "arguments": {...}}}). Recurse. + for wrap_key in ("action", "tool", "function", "tool_call", "call", + "function_call"): + inner = obj.get(wrap_key) + if isinstance(inner, dict): + got = _match_tool_from_obj(inner, tools, _depth + 1) + if got is not None: + return got + + # OpenAI tool-call wire format echoed as content: {"tool_calls": [{...}]}. + tc_list = obj.get("tool_calls") + if isinstance(tc_list, list) and tc_list: + got = _match_tool_from_obj(tc_list[0], tools, _depth + 1) + if got is not None: + return got + + # Flat explicit wrapper: the object names the tool as a string. + for name_key in ("name", "tool", "action", "function", "tool_name"): + cand = obj.get(name_key) + if isinstance(cand, str) and cand in tool_by_name: + args = obj.get("arguments") + if isinstance(args, str): + try: + args = json.loads(args) + except (ValueError, TypeError): + args = None + if not isinstance(args, dict): + args = obj.get("parameters") if isinstance(obj.get("parameters"), dict) else None + if not isinstance(args, dict): + args = obj.get("args") if isinstance(obj.get("args"), dict) else None + if not isinstance(args, dict): + args = {k: v for k, v in obj.items() + if k not in (name_key, "arguments", "parameters", "args")} + return cand, args + + # Bare arguments: match by required-key coverage, most specific tool wins. + keys = set(obj.keys()) + best = None + best_score = None + for name, fn in tool_by_name.items(): + params = fn.get("parameters") or {} + required = set(params.get("required") or []) + props = set((params.get("properties") or {}).keys()) + if not required or not required.issubset(keys): + continue + score = (len(required), len(keys & props), -len(keys - props)) + if best_score is None or score > best_score: + best_score, best = score, name + if best is not None: + return best, dict(obj) + return None + + +def _salvage_tool_calls_from_content(response, tools): + """If ``response`` carries an action as JSON *content* but no native + ``tool_calls``, rewrite it into a proper tool call in place. + + Weak / open models under a long system prompt frequently emit the action + (e.g. ``visualize``/``ask_user``) as a JSON object in the assistant content + channel rather than as a native function call. This recovers that action so + the agent — which only consumes native ``tool_calls`` — can proceed.""" + if not tools: + return response + try: + choice0 = response.choices[0] + message = choice0.message + except (AttributeError, IndexError): + return response + if getattr(message, "tool_calls", None): + return response + content = getattr(message, "content", None) + if not isinstance(content, str) or "{" not in content: + return response + + for blob in _extract_json_objects(content): + try: + obj = json.loads(blob) + except (ValueError, TypeError): + continue + matched = _match_tool_from_obj(obj, tools) + if matched is None: + continue + name, args = matched + try: + from litellm.types.utils import ChatCompletionMessageToolCall, Function + tc = ChatCompletionMessageToolCall( + function=Function(name=name, arguments=json.dumps(args)), + id="call_salvage_0", type="function") + except Exception: + tc = SimpleNamespace( + id="call_salvage_0", type="function", + function=SimpleNamespace(name=name, arguments=json.dumps(args))) + message.tool_calls = [tc] + message.content = None + try: + choice0.finish_reason = "tool_calls" + except (AttributeError, TypeError): + pass + break + return response + + class Client(object): """ Returns a LiteLLM client configured for the specified endpoint and model. @@ -91,8 +304,14 @@ def _is_reasoning_effort_error(self, error_text: str) -> bool: """Detect provider errors caused by an unsupported ``reasoning_effort`` value (e.g. ``"minimal"`` on a model that only accepts ``none/low/medium/high/xhigh``). The provider message reliably - mentions the parameter name.""" - return "reasoning_effort" in error_text.lower() + mentions the parameter name. + + Also covers Ollama models that lack reasoning support: LiteLLM maps + ``reasoning_effort`` to Ollama's ``think`` flag, and such models reject + it with ``" does not support thinking"``. Retrying without + ``reasoning_effort`` (which drops ``think``) lets these models run.""" + lowered = error_text.lower() + return "reasoning_effort" in lowered or "does not support thinking" in lowered @classmethod def from_config(cls, model_config: dict[str, str]): @@ -129,6 +348,27 @@ def ping(self, timeout: int = 10): max_tokens=3, drop_params=True, **params, ) + def _dispatch(self, *, messages, stream, params, tools=None, extra=None): + """Issue the LiteLLM call, transparently handling Ollama streaming. + + Ollama's streaming path in LiteLLM fails to parse native tool calls, so + for Ollama we always call non-streaming and, when the caller asked for a + stream, replay the buffered response as streaming chunks via + ``_synthesize_stream``. All other providers stream natively.""" + is_ollama = self.endpoint == "ollama" + effective_stream = stream and not is_ollama + call_kwargs = dict(model=self.model, messages=messages, + drop_params=True, stream=effective_stream, + **params, **(extra or {})) + if tools is not None: + call_kwargs["tools"] = tools + resp = litellm.completion(**call_kwargs) + if is_ollama and tools: + resp = _salvage_tool_calls_from_content(resp, tools) + if is_ollama and stream: + return _synthesize_stream(resp) + return resp + def get_completion(self, messages, stream=False, reasoning_effort="low", **kwargs): """Send a chat completion request via LiteLLM. @@ -142,24 +382,15 @@ def get_completion(self, messages, stream=False, reasoning_effort="low", params["reasoning_effort"] = reasoning_effort params.update(kwargs) try: - return litellm.completion( - model=self.model, messages=messages, - drop_params=True, stream=stream, **params, - ) + return self._dispatch(messages=messages, stream=stream, params=params) except Exception as e: err = str(e) if self._is_reasoning_effort_error(err): params.pop("reasoning_effort", None) - return litellm.completion( - model=self.model, messages=messages, - drop_params=True, stream=stream, **params, - ) + return self._dispatch(messages=messages, stream=stream, params=params) if self._is_image_deserialize_error(err): sanitized = self._strip_images_from_messages(messages) - return litellm.completion( - model=self.model, messages=sanitized, - drop_params=True, stream=stream, **params, - ) + return self._dispatch(messages=sanitized, stream=stream, params=params) raise def get_completion_with_tools(self, messages, tools, stream=False, @@ -172,22 +403,16 @@ def get_completion_with_tools(self, messages, tools, stream=False, params = self.params.copy() params["reasoning_effort"] = reasoning_effort try: - return litellm.completion( - model=self.model, messages=messages, tools=tools, - drop_params=True, stream=stream, **params, **kwargs, - ) + return self._dispatch(messages=messages, stream=stream, + params=params, tools=tools, extra=kwargs) except Exception as e: err = str(e) if self._is_reasoning_effort_error(err): params.pop("reasoning_effort", None) - return litellm.completion( - model=self.model, messages=messages, tools=tools, - drop_params=True, stream=stream, **params, **kwargs, - ) + return self._dispatch(messages=messages, stream=stream, + params=params, tools=tools, extra=kwargs) if self._is_image_deserialize_error(err): sanitized = self._strip_images_from_messages(messages) - return litellm.completion( - model=self.model, messages=sanitized, tools=tools, - drop_params=True, stream=stream, **params, **kwargs, - ) + return self._dispatch(messages=sanitized, stream=stream, + params=params, tools=tools, extra=kwargs) raise \ No newline at end of file diff --git a/py-src/data_formulator/agents/data_agent.py b/py-src/data_formulator/agents/data_agent.py deleted file mode 100644 index 8e9cd39a..00000000 --- a/py-src/data_formulator/agents/data_agent.py +++ /dev/null @@ -1,2114 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -"""Hybrid data exploration agent (Option A with tool-calling for data inspection). - -Architecture: - - **Tools** (explore, inspect_source_data): Called via OpenAI tool-calling - API within a single LLM turn. The agent gathers data silently — these - are internal to the agent and not surfaced to the user. - - **Actions** (visualize, clarify, explain, summary, delegate): Structured - JSON output in the LLM's text response. These are externalized to the - user — each one ends the current turn and produces visible output. - -The server-side while loop handles one action per iteration: - 1. Call LLM (with tools) → agent may call tools internally - 2. Parse the structured JSON action from the text response - 3. Execute the action (sandbox, chart assembly, etc.) - 4. Append rich observation to trajectory - 5. Repeat or terminate -""" - -import json -import logging -import time -import uuid -from pathlib import Path -from typing import Any, Generator - -import pandas as pd - -from data_formulator.agent_config import reasoning_effort_for -from data_formulator.agents.agent_utils import ( - attach_reasoning_content, - ensure_output_variable_in_code, - extract_json_objects, - generate_data_summary, -) -from data_formulator.agents.context import ( - build_focused_thread_context, - build_lightweight_table_context, - build_peripheral_thread_context, - handle_inspect_source_data, -) -from data_formulator.agents.client_utils import Client -from data_formulator.datalake.parquet_utils import df_to_safe_records -from data_formulator.agents.chart_creation_guide import CHART_CREATION_GUIDE -from data_formulator.security.code_signing import sign_result -from data_formulator.workflows.create_vl_plots import ( - assemble_vegailte_chart, - coerce_field_type, - resolve_field_type, - spec_to_base64, - field_metadata_to_semantic_types, -) - -logger = logging.getLogger(__name__) - -_AGENT_ID = "data_agent" - -# ── Weak-model rescue helpers ───────────────────────────────────────────── -# When a weaker LLM calls visualize/clarify/explain/summary as a tool instead -# of outputting JSON in text, these helpers validate and normalise the args -# so the action can be rescued without wasting rounds. - -_ACTION_REQUIRED_FIELDS: dict[str, list[str]] = { - "visualize": ["code", "output_variable", "chart"], - "clarify": ["questions"], - "explain": ["explanation"], - "summary": ["summary"], - "delegate": ["target", "options"], -} - -# Valid targets for a `delegate` action. -_DELEGATE_TARGETS: tuple[str, ...] = ("data_loading", "report_gen") - - -def _rescue_unpack_json_strings(data: dict) -> None: - """In-place: parse values that are JSON-encoded strings back to objects. - - Weak models sometimes double-serialise nested fields, e.g. - ``"chart": "{\\"chart_type\\": \\"Scatter Plot\\"}"`` instead of a dict. - """ - for key in ("chart", "input_tables", "questions", "options", "followups", "field_metadata", "field_display_names"): - val = data.get(key) - if isinstance(val, str) and val.strip()[:1] in ("{", "["): - try: - data[key] = json.loads(val) - except (json.JSONDecodeError, ValueError): - pass - - -def _rescue_validate_action(data: dict) -> list[str]: - """Return list of missing required fields for the action, or [] if valid.""" - action = data.get("action", "") - required = _ACTION_REQUIRED_FIELDS.get(action, []) - return [f for f in required if not data.get(f)] - - -# ── Tool definitions (OpenAI function-calling format) ───────────────────── -# These are internal tools the agent can use freely within a turn to -# gather data before committing to a user-visible action. - -TOOLS = [ - { - "type": "function", - "function": { - "name": "explore", - "description": ( - "Run Python code to inspect data, compute statistics, or verify " - "assumptions. Use print() to see results — stdout is returned. " - "pandas, numpy, duckdb, sklearn, scipy are available." - ), - "parameters": { - "type": "object", - "properties": { - "purpose": { - "type": "string", - "description": "One-sentence description of what this code does and why (shown to user as progress).", - }, - "code": { - "type": "string", - "description": "Python code to execute. Use print() to see output.", - }, - }, - "required": ["purpose", "code"], - }, - }, - }, - { - "type": "function", - "function": { - "name": "inspect_source_data", - "description": ( - "Get a detailed summary of one or more source tables — schema, " - "field-level statistics, and sample rows. Cheaper than explore() " - "for basic data inspection." - ), - "parameters": { - "type": "object", - "properties": { - "table_names": { - "type": "array", - "items": {"type": "string"}, - "description": "List of table names from [SOURCE TABLES] to inspect.", - }, - }, - "required": ["table_names"], - }, - }, - }, - { - "type": "function", - "function": { - "name": "search_knowledge", - "description": ( - "Search the user's knowledge base (rules, experiences) " - "for relevant entries. Returns title, category, snippet, and " - "path for each match. Use read_knowledge to get full content." - ), - "parameters": { - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "Search keywords.", - }, - "categories": { - "type": "array", - "items": { - "type": "string", - "enum": ["rules", "experiences"], - }, - "description": "Optional: limit search to specific categories.", - }, - }, - "required": ["query"], - }, - }, - }, - { - "type": "function", - "function": { - "name": "read_knowledge", - "description": ( - "Read the full content of a knowledge entry. Use the category " - "and path from search_knowledge results." - ), - "parameters": { - "type": "object", - "properties": { - "category": { - "type": "string", - "enum": ["rules", "experiences"], - "description": "Knowledge category.", - }, - "path": { - "type": "string", - "description": "Relative path to the knowledge file (from search_knowledge).", - }, - }, - "required": ["category", "path"], - }, - }, - }, -] - - -# ── System prompt ───────────────────────────────────────────────────────── - -SYSTEM_PROMPT = '''\ -You are an autonomous data exploration agent. - -Your goal is to help the user answer their question by creating one or more -data visualizations. You operate in a loop. - -## Tools (internal — for data gathering) - -You have tools you can call to gather data: - -- **explore(code)** — run Python code to inspect data, compute stats, etc. - **Important**: each call runs in a fresh namespace — variables do NOT - persist between calls. Combine all related operations (loading, - transforming, printing) into a single explore() call. -- **inspect_source_data(table_names)** — get schema, stats, and sample rows - for source tables (cheaper than explore for basic inspection). -- **search_knowledge(query, categories?)** — search the user's knowledge base - (rules, experiences) for relevant entries. -- **read_knowledge(category, path)** — read the full content of a knowledge entry. - -You analyse data that is **already in the workspace**. If the user's -question requires data that isn't present, do NOT try to find it yourself — -emit a `delegate` action targeting the Data Loading agent and the user -can hand off in one click. - -The initial context already includes sample rows and statistics for each -table. If the data is straightforward, proceed directly to your action -without calling tools. Tool results are returned to you before you -produce your action. Tools are NOT shown to the user. - -## Actions (external — shown to the user) - -After gathering data (or immediately if the data is clear), output -**exactly one action** as a JSON object in your text response. Actions -are shown to the user and end the current turn. - -⚠ **CRITICAL**: `visualize`, `clarify`, `explain`, `summary`, and -`delegate` are **actions**, NOT tools. Never call them via -function/tool calling — they MUST appear as a JSON object in your **text -reply**. Only the items listed in the Tools section above (`explore`, -`inspect_source_data`, `search_knowledge`, `read_knowledge`) may be -invoked as tool calls. - -### `visualize` -```json -{{ - "action": "visualize", - "display_instruction": "<≤12 words. State the question or hypothesis the chart investigates — don't recap the chart spec (x/y/color/split are already visible). Bold a **column** if it anchors the question. ✗ 'Plotting price over time, split by fuel, to see trends'>", - "input_tables": [""], - "code": "", - "output_variable": "", - "chart": {{ - "chart_type": "", - "encodings": {{"x": "", "y": "", ...}}, - "config": {{}} - }}, - "field_metadata": {{"": "", ...}}, - "field_display_names": {{"": "", ...}} -}} -``` - -### `clarify` -```json -{{ - "action": "clarify", - "questions": [ - {{ - "text": "", - "responseType": "single_choice", - "options": ["", - "followups": [ - "" - ] -}} -``` - -Use `explain` when the user is asking a conceptual / clarifying question -about the data, the schema, the meaning of a field, or any informational -exchange that does **not** require producing a chart right now. Keep the -explanation concise (1–3 sentences). Followups are optional (≤4 items, -≤8 words each) and must be visualization-oriented prompts — clicking one -should lead to a `visualize` action on the next turn. Omit `followups` -entirely if no useful chart-producing follow-ups exist. - -**Column-name emphasis:** in `explain.explanation`, `followups[]`, and -`clarify.questions[].text` / `options[]`, you may wrap a column name in -`**…**` to render it as a highlighted token in the UI. - -### `summary` -```json -{{ - "action": "summary", - "summary": "" -}} -``` - -Use `summary` to end the run after visualization(s) with a one-sentence -closing remark on the key finding. This is the standard close for any -question you answer with charts. - -### `delegate` -```json -{{ - "action": "delegate", - "target": "data_loading" | "report_gen", - "message": "", - "options": [""] -}} -``` - -Use `delegate` to hand off to a peer agent. Each option becomes a one-click -button (the string is both the button label and the seed prompt). Provide -1–2 options; if two, make them meaningfully distinct (e.g. different search -angles, or executive summary vs. deep-dive). - -Valid `target` values: -- **`data_loading`** — the user's question needs data that isn't in the - workspace. Options are short search phrases (e.g. `'monthly orders 2024'`). - Prefer `clarify` if the workspace tables might already cover the question. -- **`report_gen`** — the user wants a narrative report or write-up over - the charts already produced. Options restate the report style in one - short sentence. - -## Understanding your context - -{{context_guide}} - -## Decision guidelines - -- **Classify the question first** (silently) to calibrate effort, not as a hard rule: - - *Conceptual / informational* (asking about meaning, schema, what a field represents, why something is the way it is — no chart needed): use `explain`. - - *Concrete* (one specific answer, e.g. "avg price by region", "which sold most"): **1 visualization** → `summary` (one-line takeaway). - - *Progressive* (one question best answered by a small sequence, e.g. "why did revenue drop?", "compare regions"): **2–3 visualizations** → `summary` to tie them together. - - *Open-ended* (explicit exploration, e.g. "explore", "overview", "what's interesting"): **3–5 visualizations** forming a narrative → `summary` to tie them together. - - *Hand-off needed* — use `delegate` as the terminal action when the request is better served by a peer agent: - - *Missing data* (the user's question needs tables not in the workspace): `delegate(target="data_loading")` with a short search phrase as `prompt`. - - *Report request* (e.g. "create a report about X", "write up the findings", "summarize Y as a narrative"): produce any charts the report needs (0–3, judgment-based — if the workspace already has relevant charts you may delegate immediately), then end with `delegate(target="report_gen")`. -- **After each chart**, continue only if the next chart answers a gap *raised* by the previous one — not just another interesting angle. Otherwise close out (`summary`, or `delegate` for hand-off cases) and let the user ask for more. -- If ambiguous, `clarify`. -- **Never** repeat a visualization already in the trajectory or in another thread. -- {max_iterations} visualizations is a **hard ceiling**, not a target. - -{agent_exploration_rules} -''' - - -# --------------------------------------------------------------------------- -# Agent -# --------------------------------------------------------------------------- - - -class DataAgent: - """Structured JSON data exploration agent.""" - - def __init__( - self, - client: Client, - workspace, - agent_exploration_rules: str = "", - agent_coding_rules: str = "", - language_instruction: str = "", - max_iterations: int = 5, - max_repair_attempts: int = 2, - identity_id: str | None = None, - ): - self.client = client - self.workspace = workspace - self.agent_exploration_rules = agent_exploration_rules - self.agent_coding_rules = agent_coding_rules - self.language_instruction = language_instruction - self.max_iterations = max_iterations - self.max_repair_attempts = max_repair_attempts - - from data_formulator.agents.reasoning_log import ( - ReasoningLogger, _NullReasoningLogger, - ) - self._session_id = uuid.uuid4().hex[:12] - if identity_id: - try: - self._reasoning_log = ReasoningLogger( - identity_id, "DataAgent", self._session_id, - ) - except Exception: - logger.warning("Failed to initialise ReasoningLogger", exc_info=True) - self._reasoning_log = _NullReasoningLogger() - else: - self._reasoning_log = _NullReasoningLogger() - - self._knowledge_store = None - self._injected_knowledge: list[dict[str, Any]] = [] - self._injected_rules: list[str] = [] - _user_home = getattr(workspace, "user_home", None) - if _user_home: - try: - from data_formulator.knowledge.store import KnowledgeStore - self._knowledge_store = KnowledgeStore(_user_home) - except Exception: - logger.warning("Failed to initialise KnowledgeStore", exc_info=True) - - # ------------------------------------------------------------------ - # Helpers - # ------------------------------------------------------------------ - - def _explore_ns_dir(self) -> Path: - """Directory for cross-turn namespace serialisation.""" - return self.workspace.confined_scratch.root / "_explore_ns" - - # ------------------------------------------------------------------ - # Public API - # ------------------------------------------------------------------ - - def run( - self, - input_tables: list[dict[str, Any]], - user_question: str, - focused_thread: list[dict[str, Any]] | None = None, - other_threads: list[dict[str, Any]] | None = None, - trajectory: list[dict] | None = None, - completed_step_count: int = 0, - primary_tables: list[str] | None = None, - attached_images: list[str] | None = None, - ) -> Generator[dict[str, Any], None, None]: - """Run the structured exploration loop. - - Yields event dicts with ``type`` in: - ``"action"`` – the agent's chosen action (for UI) - ``"result"`` – a visualization result (data + chart) - ``"explore_result"`` – explore code output - ``"clarify"`` – clarification question (loop pauses) - ``"explain"`` – conversational explanation (loop pauses) - ``"delegate"`` – hand-off to a peer agent (loop terminates) - ``"completion"`` – final summary (loop terminates) - ``"error"`` – error information - """ - rlog = self._reasoning_log - session_start_time = time.time() - total_llm_calls = 0 - completed_steps: list[dict[str, Any]] = [] - iteration = completed_step_count - final_status = "max_iterations" - - try: - rlog.log( - "session_start", - agent="DataAgent", - session_id=self._session_id, - user_question=user_question, - input_tables=[t.get("name", "") for t in input_tables], - model=self.client.model, - rules_injected=[ - r for r in [self.agent_exploration_rules, self.agent_coding_rules] if r - ], - knowledge_injected=[], - ) - - if trajectory is None: - ns_dir = self._explore_ns_dir() - if ns_dir.exists(): - import shutil - shutil.rmtree(ns_dir, ignore_errors=True) - - trajectory = self._build_initial_messages( - input_tables, user_question, focused_thread, other_threads, - primary_tables=primary_tables, - attached_images=attached_images, - ) - rlog.log( - "context_built", - system_prompt_tokens=len(trajectory[0].get("content", "")) // 4 if trajectory else 0, - user_msg_tokens=len(str(trajectory[1].get("content", ""))) // 4 if len(trajectory) > 1 else 0, - total_tables=len(input_tables), - primary_tables=primary_tables or [], - knowledge_rules_injected=self._injected_rules, - knowledge_injected=self._injected_knowledge, - ) - - if self._injected_rules or self._injected_knowledge: - yield { - "type": "context_info", - "rules_injected": self._injected_rules, - "knowledge_injected": [ - {"category": k["category"], "title": k["title"]} - for k in self._injected_knowledge - ], - } - - action_retry_budget = 1 # one extra chance when the LLM fails to produce an action - - while iteration < self.max_iterations: - iteration += 1 - - # --- THINK: call LLM with tools, get action --------------- - t_start = time.time() - action = None - action_reason = "ok" - action_error = "" - for event in self._get_next_action(trajectory, input_tables, outer_iteration=iteration): - if event.get("type") == "agent_action": - action = event.get("action_data") - action_reason = event.get("reason", "ok") - action_error = event.get("error_message", "") - total_llm_calls += event.get("llm_calls", 0) - else: - yield event - logger.info("[DataAgent] iteration %d total=%.2fs reason=%s", - iteration, time.time() - t_start, action_reason) - - if action is None: - # ① tool rounds exhausted → pause and let the user decide - if action_reason == "tool_rounds_exhausted": - steps_desc = "\n".join( - f" • {s['display_instruction']}" for s in completed_steps - ) or "(none yet)" - final_status = "clarify_exhausted" - yield { - "type": "clarify", - "iteration": iteration, - "thought": "", - "questions": [ - { - "text": ( - "I've been exploring extensively but haven't reached " - "a conclusion yet.\n\nCompleted steps so far:\n" - f"{steps_desc}\n\n" - "How would you like to proceed?" - ), - "text_code": "agent.clarifyExhausted", - "text_params": {"steps": steps_desc}, - "responseType": "single_choice", - "required": True, - "options": [ - { - "label": "Continue exploring", - "label_code": "agent.clarifyOptionContinue", - }, - { - "label": "Simplify the task", - "label_code": "agent.clarifyOptionSimplify", - }, - { - "label": "Summarize what you have so far", - "label_code": "agent.clarifyOptionSummary", - }, - ], - } - ], - "trajectory": self._strip_images(trajectory), - "completed_step_count": len(completed_steps), - } - self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time) - return - - # ② LLM API error (already retried in _call_llm) → fatal - if action_reason == "llm_error": - final_status = "llm_error" - yield self._error_event( - iteration, - action_error or "LLM API error", - message_code="agent.llmApiError", - ) - break - - # ③ json_parse_failed or unknown → retry once with context - if action_retry_budget > 0: - action_retry_budget -= 1 - logger.info("[DataAgent] action=None (reason=%s), retrying " - "(%d retries left)", action_reason, action_retry_budget) - steps_summary = "\n".join( - f" - Step {i + 1}: {s['display_instruction']}" - for i, s in enumerate(completed_steps) - ) or " (no completed steps)" - trajectory.append({ - "role": "user", - "content": ( - "[SYSTEM] Your previous response could not be parsed. " - "Here is what was already completed:\n" - f"{steps_summary}\n\n" - "Please output a JSON action object " - "(visualize / clarify / explain / summary / delegate) " - "to continue." - ), - }) - continue - - final_status = "parse_failed" - yield self._error_event( - iteration, - action_error or "Failed to parse agent action from LLM response", - message_code="agent.parseActionFailed", - ) - break - - action_type = action.get("action") - logger.info(f"[DataAgent] Iteration {iteration}: action={action_type}") - - # --- ACT (only user-visible actions reach here) -------- - if action_type == "clarify": - rlog.log("action_execution", action="clarify", status="ok", - iteration=iteration) - final_status = "clarify" - try: - clarify_payload = self._normalize_clarify_action(action) - except ValueError: - final_status = "parse_failed" - yield self._error_event( - iteration, - "Clarify action requires non-empty questions.", - message_code="agent.parseActionFailed", - ) - self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time) - return - yield { - "type": "clarify", - "iteration": iteration, - "thought": action.get("thought", ""), - **clarify_payload, - "trajectory": self._strip_images(trajectory), - "completed_step_count": len(completed_steps), - } - self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time) - return - - elif action_type == "explain": - rlog.log("action_execution", action="explain", status="ok", - iteration=iteration) - final_status = "explain" - try: - explain_payload = self._normalize_explain_action(action) - except ValueError: - final_status = "parse_failed" - yield self._error_event( - iteration, - "Explain action requires a non-empty explanation.", - message_code="agent.parseActionFailed", - ) - self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time) - return - yield { - "type": "explain", - "iteration": iteration, - "thought": action.get("thought", ""), - **explain_payload, - "trajectory": self._strip_images(trajectory), - "completed_step_count": len(completed_steps), - } - self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time) - return - - elif action_type == "summary": - rlog.log("action_execution", action="summary", status="ok", - iteration=iteration, total_steps=len(completed_steps)) - final_status = "success" - yield { - "type": "completion", - "iteration": iteration, - "status": "success", - "content": { - "thought": action.get("thought", ""), - "summary": action.get("summary", ""), - "total_steps": len(completed_steps), - }, - } - self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time) - return - - elif action_type == "delegate": - rlog.log("action_execution", action="delegate", status="ok", - iteration=iteration) - final_status = "delegate" - try: - delegate_payload = self._normalize_delegate_action(action) - except ValueError as exc: - final_status = "parse_failed" - yield self._error_event( - iteration, - str(exc) or "delegate action requires non-empty target and options.", - message_code="agent.parseActionFailed", - ) - self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time) - return - yield { - "type": "delegate", - "iteration": iteration, - "thought": action.get("thought", ""), - **delegate_payload, - "trajectory": self._strip_images(trajectory), - "completed_step_count": len(completed_steps), - } - self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time) - return - - elif action_type == "visualize": - code = action.get("code", "") - output_variable = action.get("output_variable", "result_df") - chart_spec = action.get("chart", {}) - field_metadata = action.get("field_metadata", {}) - field_display_names = action.get("field_display_names", {}) - display_instruction = action.get("display_instruction", "") - - yield { - "type": "action", - "iteration": iteration, - "action": "visualize", - "thought": action.get("thought", ""), - "display_instruction": display_instruction, - "input_tables": action.get("input_tables", []), - } - - viz_result = self._execute_visualize( - code=code, - output_variable=output_variable, - chart_spec=chart_spec, - field_metadata=field_metadata, - field_display_names=field_display_names, - display_instruction=display_instruction, - input_tables=input_tables, - messages=trajectory, - outer_iteration=iteration, - ) - total_llm_calls += viz_result.get("repair_llm_calls", 0) - - if viz_result["status"] != "ok": - error_msg = viz_result.get("error_message", "Unknown error") - rlog.log("action_execution", action="visualize", status="error", - iteration=iteration, error=error_msg) - observation = f"[OBSERVATION – Step {len(completed_steps) + 1} FAILED]\n\nError: {error_msg}" - trajectory.append({"role": "user", "content": observation}) - yield self._error_event(iteration, error_msg, display_instruction=display_instruction) - continue - - transform_result = viz_result["transform_result"] - sign_result(transform_result) - transformed_data = transform_result["content"] - output_rows = len(transformed_data.get("rows", [])) - chart_type = chart_spec.get("chart_type", "") - rlog.log("action_execution", action="visualize", status="ok", - iteration=iteration, output_rows=output_rows, - chart_type=chart_type) - - completed_steps.append({ - "display_instruction": display_instruction, - "code": transform_result.get("code", ""), - }) - - yield { - "type": "result", - "iteration": iteration, - "status": "success", - "content": { - "question": display_instruction, - "result": transform_result, - }, - } - - observation_msg = self._format_observation( - step_index=len(completed_steps), - display_instruction=display_instruction, - thought=action.get("thought", ""), - code=transform_result.get("code", ""), - data=transformed_data, - chart_image=None, - ) - trajectory.append(observation_msg) - - else: - trajectory.append({ - "role": "user", - "content": ( - f"[ERROR] Unknown action '{action_type}'. " - "Please choose one of: visualize, clarify, explain, summary, delegate." - ), - }) - yield self._error_event(iteration, f"Unknown action: {action_type}", message_code="agent.unknownAction") - - # Exhausted max iterations (or break from error) - self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time) - if final_status == "max_iterations": - yield { - "type": "completion", - "iteration": iteration, - "status": "max_iterations", - "content": { - "summary": "Reached the maximum number of exploration steps.", - "summary_code": "agent.maxIterationsSummary", - "total_steps": len(completed_steps), - }, - } - finally: - rlog.close() - - @classmethod - def _sanitize_clarification_options(cls, raw_options: Any) -> list[dict[str, Any]]: - """Normalize clarify/explain option payloads. - - Accepts either bare strings (the new simplified shape) or - ``{label, label_code?}`` dicts (legacy). Output is always a list of - ``{label, label_code?}`` dicts — no ids. Position in the list is the - only stable handle, used by the response payload. - """ - if not isinstance(raw_options, list): - return [] - - options: list[dict[str, Any]] = [] - for raw_option in raw_options[:6]: - if isinstance(raw_option, str): - label = raw_option.strip() - label_code = "" - elif isinstance(raw_option, dict): - label = str(raw_option.get("label", "")).strip() - label_code = str(raw_option.get("label_code", "")).strip() - else: - continue - - if not label and not label_code: - continue - - option: dict[str, Any] = {} - if label: - option["label"] = label - if label_code: - option["label_code"] = label_code - options.append(option) - - return options - - @classmethod - def _sanitize_clarification_questions(cls, raw_questions: Any) -> list[dict[str, Any]]: - if not isinstance(raw_questions, list): - return [] - - questions: list[dict[str, Any]] = [] - for raw_question in raw_questions[:3]: - if not isinstance(raw_question, dict): - continue - - text = str(raw_question.get("text", "")).strip() - text_code = str(raw_question.get("text_code", "")).strip() - if not text and not text_code: - continue - - options = cls._sanitize_clarification_options(raw_question.get("options")) - response_type = raw_question.get("responseType") or raw_question.get("response_type") - if response_type not in ("single_choice", "free_text"): - response_type = "single_choice" if options else "free_text" - - question: dict[str, Any] = { - "responseType": response_type, - "required": bool(raw_question.get("required", True)), - } - if text: - question["text"] = text - if text_code: - question["text_code"] = text_code - if isinstance(raw_question.get("text_params"), dict): - question["text_params"] = raw_question["text_params"] - if options: - question["options"] = options - questions.append(question) - - return questions - - @classmethod - def _normalize_clarify_action(cls, action: dict[str, Any]) -> dict[str, Any]: - questions = cls._sanitize_clarification_questions(action.get("questions")) - if not questions: - raise ValueError("clarify action requires non-empty questions[]") - return {"questions": questions} - - @classmethod - def _normalize_explain_action(cls, action: dict[str, Any]) -> dict[str, Any]: - """Normalize an explain action into the same shape as clarify. - - The frontend reuses the clarify pipeline (one question whose ``text`` - is the explanation and whose ``options`` are clickable followups), so - we emit a ``questions[]`` payload here. The followups are optional - visualization-leading prompts; clicking one is equivalent to typing - that prompt as the next user message. - """ - explanation = str(action.get("explanation", "")).strip() - if not explanation: - raise ValueError("explain action requires a non-empty 'explanation'") - - options = cls._sanitize_clarification_options(action.get("followups")) - question: dict[str, Any] = { - "text": explanation, - "responseType": "single_choice", - "required": False, - } - if options: - question["options"] = options - return {"questions": [question]} - - @classmethod - def _normalize_delegate_action(cls, action: dict[str, Any]) -> dict[str, Any]: - """Normalize a delegate action. - - The agent emits this when it wants to hand off to a peer agent - (e.g. the Data Loading agent when the workspace lacks needed - data, or the Report Gen agent when the user wants a written - report). The frontend renders each option as a one-click - handoff card. - - Shape: ``{target, message?, options: [str, ...]}`` with 1–2 - options. - """ - target = str(action.get("target", "")).strip() - if target not in _DELEGATE_TARGETS: - raise ValueError( - f"delegate action requires 'target' ∈ {_DELEGATE_TARGETS}, got {target!r}" - ) - - message = str(action.get("message") or "").strip() - - raw_options = action.get("options") - cleaned: list[str] = [] - if isinstance(raw_options, list): - for opt in raw_options: - if isinstance(opt, str): - text = opt.strip() - if text: - cleaned.append(text) - - if not cleaned: - raise ValueError("delegate action requires non-empty 'options[]'") - - # Cap at 2 — keep the user choice cognitively light. - cleaned = cleaned[:2] - - payload: dict[str, Any] = {"target": target, "options": cleaned} - if message: - payload["message"] = message - return payload - - # ------------------------------------------------------------------ - # Visualize execution (with repair) - # ------------------------------------------------------------------ - - def _execute_visualize( - self, - code: str, - output_variable: str, - chart_spec: dict, - field_metadata: dict, - field_display_names: dict, - display_instruction: str, - input_tables: list[dict[str, Any]], - messages: list[dict], - outer_iteration: int = 0, - ) -> dict[str, Any]: - """Execute a visualize action with repair retries. - - Returns a dict with at least ``status`` and, on success, - ``transform_result``. Also includes ``repair_llm_calls`` — - the number of LLM calls made during repair attempts so that - the caller can accumulate them into ``total_llm_calls``. - """ - viz_result = self._run_visualize_code( - code=code, - output_variable=output_variable, - chart_spec=chart_spec, - field_metadata=field_metadata, - field_display_names=field_display_names, - display_instruction=display_instruction, - messages=messages, - ) - - rlog = self._reasoning_log - repair_llm_calls = 0 - attempt = 0 - while viz_result["status"] != "ok" and attempt < self.max_repair_attempts: - attempt += 1 - error_msg = viz_result.get("error_message", "Unknown error") - logger.warning(f"[DataAgent] Repair attempt {attempt}/{self.max_repair_attempts}: {error_msg}") - - # Mutate the canonical `messages` list so the dialog snapshot - # captures the repair turn just like any other tool round. - # The agent therefore sees one continuous conversation across - # the original visualize and any repairs, not a forked copy. - messages.append({ - "role": "user", - "content": ( - f"[CODE ERROR]\n\n{error_msg}\n\n" - "Please fix the code and output a new visualize action." - ), - }) - repair_action = None - for evt in self._get_next_action( - messages, input_tables, - outer_iteration=outer_iteration, - ): - if evt.get("type") == "agent_action": - repair_action = evt.get("action_data") - repair_llm_calls += evt.get("llm_calls", 0) - if repair_action and repair_action.get("action") == "visualize": - viz_result = self._run_visualize_code( - code=repair_action.get("code", code), - output_variable=repair_action.get("output_variable", output_variable), - chart_spec=repair_action.get("chart", chart_spec), - field_metadata=repair_action.get("field_metadata", field_metadata), - field_display_names=repair_action.get("field_display_names", field_display_names), - display_instruction=display_instruction, - messages=messages, - ) - rlog.log("repair_attempt", attempt=attempt, - original_error=error_msg[:200], - status=viz_result["status"]) - else: - rlog.log("repair_attempt", attempt=attempt, - original_error=error_msg[:200], - status="repair_failed") - break - - viz_result["repair_llm_calls"] = repair_llm_calls - return viz_result - - def _run_explore_code( - self, - code: str, - input_tables: list[dict[str, Any]], - ) -> dict[str, Any]: - """Run explore code in sandbox, capturing stdout. - - When called inside ``_get_next_action``, uses the shared - ``SandboxSession`` so that variables persist across calls. - Falls back to a one-shot subprocess otherwise. - """ - # Wrap code: capture stdout - capture_code = ( - "import io as _io, sys as _sys, pandas as _pd\n" - "_old_stdout = _sys.stdout\n" - "_sys.stdout = _captured = _io.StringIO()\n" - "\n" - f"{code}\n" - "\n" - "_sys.stdout = _old_stdout\n" - "_pack = {\n" - " 'stdout': _captured.getvalue(),\n" - "}\n" - ) - - try: - with self.workspace.local_dir() as local_path: - import os as _os - workspace_path = _os.path.abspath(str(local_path)) - allowed_objects = {"_pack": None} - - session = getattr(self, "_explore_session", None) - if session is not None: - raw = session.execute(capture_code, allowed_objects, workspace_path) - else: - from data_formulator.sandbox import create_sandbox - try: - from flask import current_app - sandbox_mode = current_app.config.get('CLI_ARGS', {}).get('sandbox', 'local') - except (ImportError, RuntimeError): - sandbox_mode = 'local' - sandbox = create_sandbox(sandbox_mode) - raw = sandbox._run_in_warm_subprocess( - capture_code, allowed_objects, workspace_path - ) - - if raw.get("status") == "ok": - allowed = raw.get("allowed_objects") or {} - if not isinstance(allowed, dict): - allowed = {} - pack = allowed.get("_pack", {}) - stdout = pack.get("stdout", "") if isinstance(pack, dict) else "" - if not isinstance(stdout, str): - stdout = str(stdout) - if len(stdout) > 8000: - stdout = stdout[:8000] + "\n... (truncated)" - return {"status": "ok", "stdout": stdout} - else: - return { - "status": "error", - "error": raw.get("error_message", raw.get("content", "Unknown error")), - "stdout": "", - } - except Exception as e: - logger.error("[DataAgent] Sandbox execution error", exc_info=e) - return {"status": "error", "error": "Code execution failed", "stdout": ""} - - def _run_visualize_code( - self, - code: str, - output_variable: str, - chart_spec: dict, - field_metadata: dict, - field_display_names: dict, - display_instruction: str, - messages: list[dict] | None = None, - ) -> dict[str, Any]: - """Run visualize code in sandbox and assemble chart.""" - from data_formulator.sandbox import create_sandbox - - try: - from flask import current_app - sandbox_mode = current_app.config.get('CLI_ARGS', {}).get('sandbox', 'local') - max_display_rows = current_app.config['CLI_ARGS'].get('max_display_rows', 5000) - except (ImportError, RuntimeError): - sandbox_mode = 'local' - max_display_rows = 5000 - - # Patch output_variable if needed - code, was_patched, detected_var = ensure_output_variable_in_code(code, output_variable) - if was_patched: - logger.info(f"[DataAgent] patched output_variable: {output_variable} = {detected_var}") - - sandbox = create_sandbox(sandbox_mode) - - try: - execution_result = sandbox.run_python_code( - code=code, - workspace=self.workspace, - output_variable=output_variable, - ) - - if execution_result['status'] != 'ok': - error_message = execution_result.get('content', 'Unknown error') - return {"status": "error", "error_message": str(error_message)} - - full_df = execution_result['content'] - row_count = len(full_df) - - # Validate that all encoding fields exist in the output DataFrame - chart_encodings = chart_spec.get("encodings", {}) - missing_fields = [ - f"{channel}: '{field}'" - for channel, field in chart_encodings.items() - if field and field not in full_df.columns - ] - if missing_fields: - available = list(full_df.columns) - return { - "status": "error", - "error_message": ( - f"Chart encoding fields not found in output DataFrame: " - f"{', '.join(missing_fields)}. " - f"Available columns: {available}" - ), - "error_code": "agent.fieldsNotFound", - "error_params": { - "missing": ", ".join(missing_fields), - "available": str(available), - }, - } - - if row_count == 0: - return { - "status": "error", - "error_message": "Output DataFrame is empty (0 rows). Check filters or data loading.", - "error_code": "agent.emptyDataframe", - } - - output_table_name = self.workspace.get_fresh_name(f"d-{output_variable}") - self.workspace.write_parquet(full_df, output_table_name) - - if row_count > max_display_rows: - query_output = full_df.head(max_display_rows) - else: - query_output = full_df - query_output = query_output.loc[:, ~query_output.columns.duplicated()] - - # Skip chart image generation for agent observation (avoids rendering - # discrepancy between server-side matplotlib and frontend Vega-Lite). - # User-submitted images (attached_images) and focused thread chart - # thumbnails (rendered by the frontend) are still passed through. - - # Build refined_goal for frontend compatibility - refined_goal = { - "display_instruction": display_instruction, - "output_variable": output_variable, - "output_fields": list(query_output.columns), - "chart": chart_spec, - "field_metadata": field_metadata, - "field_display_names": field_display_names or {}, - } - - transform_result = { - "status": "ok", - "code": code, - "content": { - "rows": df_to_safe_records(query_output), - "virtual": { - "table_name": output_table_name, - "row_count": row_count, - }, - }, - "refined_goal": refined_goal, - "dialog": self._snapshot_dialog(messages), - "agent": "DataAgent", - } - - return { - "status": "ok", - "transform_result": transform_result, - } - - except Exception as e: - logger.error("[DataAgent] Visualize execution error", exc_info=e) - return {"status": "error", "error_message": "Visualization execution failed"} - - def _create_chart( - self, - data: dict[str, Any], - chart_spec: dict[str, Any], - field_metadata: dict[str, Any] | None = None, - ) -> str | None: - """Create a chart and return a base64 PNG string for observation feedback.""" - chart_type = chart_spec.get("chart_type", "Bar Chart") - chart_encodings = chart_spec.get("encodings", {}) - chart_config = chart_spec.get("config", {}) - - try: - df = pd.DataFrame(data["rows"]) - if df.empty: - return None - - encodings = {} - for channel, field in chart_encodings.items(): - if field and field in df.columns: - field_type = resolve_field_type(df[field], field) - field_type = coerce_field_type(chart_type, channel, field_type) - encodings[channel] = {"field": field, "type": field_type} - - spec = assemble_vegailte_chart( - df, chart_type, encodings, config=chart_config, - semantic_types=field_metadata_to_semantic_types(field_metadata), - ) - return spec_to_base64(spec) if spec else None - except Exception as e: - logger.error(f"[DataAgent] Chart creation error: {e}") - return None - - # ------------------------------------------------------------------ - # Message construction - # ------------------------------------------------------------------ - - def _build_system_prompt( - self, - has_primary_tables: bool = False, - has_focused_thread: bool = False, - has_other_threads: bool = False, - has_attached_images: bool = False, - ) -> str: - rules_block = "" - if self.agent_exploration_rules and self.agent_exploration_rules.strip(): - rules_block = ( - "\n## Additional exploration rules\n\n" - + self.agent_exploration_rules.strip() - + "\n\nPlease follow the above rules when exploring data." - ) - - # Build context guide dynamically based on what's actually present - context_lines = [] - if has_primary_tables: - context_lines.append( - "- **[PRIMARY TABLE(S)]**: The table(s) the user is focused on. " - "Prioritize these, but freely use other available tables if needed." - ) - context_lines.append( - "- **[OTHER AVAILABLE TABLES]**: Additional tables in the workspace." - ) - else: - context_lines.append( - "- **[AVAILABLE TABLES]**: All tables in the workspace." - ) - context_lines.append( - " Use `inspect_source_data` to get detailed stats and sample rows. " - "Use `explore` for custom computations." - ) - if has_focused_thread: - context_lines.append( - "- **[FOCUSED THREAD]**: The thread the user is continuing. " - "Build on this — do not repeat visualizations already created here." - ) - if has_other_threads: - context_lines.append( - "- **[OTHER THREADS]**: Brief summaries of other exploration threads in this workspace. " - ) - if has_attached_images: - context_lines.append( - "- **[USER ATTACHMENT(S)]**: Image(s) provided by the user. " - "Refer to these when relevant to the user's question." - ) - context_guide = "\n".join(context_lines) - - prompt = SYSTEM_PROMPT.format( - max_iterations=self.max_iterations, - agent_exploration_rules=rules_block, - context_guide=context_guide, - ) - - # Inject alwaysApply rules RIGHT AFTER the core prompt, BEFORE - # technical reference material (chart guide, coding rules). - # This placement ensures the LLM sees user rules early, while - # they are still in the high-attention window. - if self._knowledge_store: - knowledge_rules = self._knowledge_store.load_always_apply_rules() - self._injected_rules = [r["title"] for r in knowledge_rules] - prompt += self._knowledge_store.format_rules_block(knowledge_rules) - else: - self._injected_rules = [] - - # Append technical reference material after user rules - prompt += "\n\n" + CHART_CREATION_GUIDE - if self.agent_coding_rules and self.agent_coding_rules.strip(): - prompt += ( - "\n\n## Agent Coding Rules\n\n" - + self.agent_coding_rules.strip() - ) - - if self.language_instruction: - prompt = prompt + "\n\n" + self.language_instruction - return prompt - - def _build_initial_messages( - self, - input_tables: list[dict[str, Any]], - user_question: str, - focused_thread: list[dict[str, Any]] | None = None, - other_threads: list[dict[str, Any]] | None = None, - primary_tables: list[str] | None = None, - attached_images: list[str] | None = None, - ) -> list[dict]: - """Build the initial messages with 3-tier context. - - Tier 1: Source tables (lightweight — column names + types + row count) - Tier 2: Focused thread (detailed — per-step interaction history) - Tier 3: Peripheral threads (minimal — one-line per step) - """ - # Tier 1: Always lightweight schema — agent uses inspect_source_data - # tool for details on tables it needs - table_summaries = self._build_lightweight_table_context(input_tables, primary_tables=primary_tables) - - # Tier 2: Focused thread (detailed) - focused_block = "" - if focused_thread: - focused_block = self._build_focused_thread_context(focused_thread) - - # Tier 3: Peripheral threads (minimal) - peripheral_block = "" - if other_threads: - peripheral_block = self._build_peripheral_thread_context(other_threads) - - # Use [SOURCE TABLES] when no tiering, omit section header when tiered - # (the tiers already have their own headers) - if primary_tables: - user_content = f"{table_summaries}\n\n" - else: - user_content = f"[AVAILABLE TABLES]\n\n{table_summaries}\n\n" - if focused_block: - user_content += f"{focused_block}\n\n" - if peripheral_block: - user_content += f"{peripheral_block}\n\n" - - # Search and inject relevant knowledge (experiences + non-alwaysApply rules) - table_names = [t.get("name", "") for t in input_tables if t.get("name")] - relevant_knowledge = self._search_relevant_knowledge(user_question, table_names) - - # Always include the experience distilled from the active workspace - # (design-docs/24 §3.6) so the session has stable working memory - # across turns regardless of search relevance. - session_exp = self._load_active_session_experience() - if session_exp: - existing_paths = { - (item["category"], item["path"]) for item in relevant_knowledge - } - if (session_exp["category"], session_exp["path"]) not in existing_paths: - relevant_knowledge = [session_exp] + relevant_knowledge - - if relevant_knowledge: - knowledge_block = "[RELEVANT KNOWLEDGE]\n" - for item in relevant_knowledge: - label = "rule" if item["category"] == "rules" else "knowledge" - knowledge_block += ( - f"\n### [{label}] {item['title']}\n" - f"{item['snippet']}\n" - ) - user_content += f"{knowledge_block}\n\n" - self._injected_knowledge = [ - {"category": item["category"], "title": item["title"], "path": item["path"]} - for item in relevant_knowledge - ] - else: - self._injected_knowledge = [] - - self._reasoning_log.log( - "knowledge_search", - source="auto_inject", - query=user_question, - table_names=table_names, - results_count=len(relevant_knowledge), - results=[ - {"category": item["category"], "title": item["title"]} - for item in relevant_knowledge - ], - ) - - # Inject alwaysApply rules into user message for better visibility - # (rules in system prompt are often ignored; rules in user message have higher impact) - if self._knowledge_store: - always_apply_rules = self._knowledge_store.load_always_apply_rules() - if always_apply_rules: - rules_text = "\n\n".join([f"### {r['title']}\n{r['body']}" for r in always_apply_rules]) - user_content += f"[USER RULES - MUST FOLLOW]\n\n{rules_text}\n\n" - - user_content += f"[USER QUESTION]\n\n{user_question}" - - # Check if any step in the focused thread has a chart thumbnail - # (the focused leaf's chart image for visual context) - chart_thumbnail = None - if focused_thread: - for step in focused_thread: - if step.get("chart_thumbnail"): - chart_thumbnail = step["chart_thumbnail"] - - # Build system prompt with context-aware guide - system_prompt = self._build_system_prompt( - has_primary_tables=bool(primary_tables), - has_focused_thread=bool(focused_thread), - has_other_threads=bool(other_threads), - has_attached_images=bool(attached_images), - ) - - # Determine if we need multimodal content (chart thumbnail or user-attached images) - has_images = (chart_thumbnail and chart_thumbnail.startswith("data:")) or (attached_images and len(attached_images) > 0) - - if has_images: - content_parts: list[dict] = [{"type": "text", "text": user_content}] - if chart_thumbnail and chart_thumbnail.startswith("data:"): - content_parts.append({"type": "text", "text": "\n[CURRENT CHART] (the chart the user is currently viewing):"}) - content_parts.append({"type": "image_url", "image_url": {"url": chart_thumbnail, "detail": "low"}}) - if attached_images: - label = "[USER ATTACHMENT]" if len(attached_images) == 1 else "[USER ATTACHMENTS]" - content_parts.append({"type": "text", "text": f"\n{label} (image(s) provided by the user):"}) - for img in attached_images: - if img.startswith("data:"): - content_parts.append({"type": "image_url", "image_url": {"url": img, "detail": "low"}}) - return [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": content_parts}, - ] - else: - return [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_content}, - ] - - def _build_focused_thread_context( - self, focused_thread: list[dict[str, Any]] - ) -> str: - return build_focused_thread_context(focused_thread) - - def _build_peripheral_thread_context( - self, other_threads: list[dict[str, Any]] - ) -> str: - return build_peripheral_thread_context(other_threads) - - def _build_lightweight_table_context( - self, input_tables: list[dict[str, Any]], primary_tables: list[str] | None = None - ) -> str: - return build_lightweight_table_context( - input_tables, - self.workspace, - primary_tables, - ) - - # ------------------------------------------------------------------ - # LLM interaction (with internal tool-calling loop) - # ------------------------------------------------------------------ - - def _get_next_action( - self, - trajectory: list[dict], - input_tables: list[dict[str, Any]] | None = None, - outer_iteration: int = 0, - ) -> Generator[dict[str, Any], None, None]: - """Call the LLM with tools, handle tool calls internally, then - parse the structured JSON action from the text response. - - Yields: - - ``{"type": "tool_start", "tool": ..., ...}`` for each tool call - - ``{"type": "tool_result", "tool": ..., ...}`` for each tool result - - ``{"type": "agent_action", "action_data": dict, "reason": ..., - "llm_calls": int}`` as the final yield. - ``action_data`` is *None* on failure; - ``reason`` is one of ``"ok"``, ``"json_parse_failed"``, - ``"llm_error"``, ``"tool_rounds_exhausted"``. - ``llm_calls`` is the number of LLM calls made in this cycle. - """ - max_tool_rounds = 12 - max_json_retries = 1 - json_retries = 0 - messages = trajectory - llm_calls_in_cycle = 0 - - rlog = self._reasoning_log - - from data_formulator.sandbox.local_sandbox import SandboxSession - ns_dir = self._explore_ns_dir() - ws_path = str(self.workspace.confined_scratch.root.parent) - - with SandboxSession() as explore_session: - self._explore_session = explore_session - - if ns_dir.exists(): - ok = SandboxSession.restore_namespace(explore_session, ns_dir, ws_path) - if ok: - logger.info("[DataAgent] Restored explore namespace from %s", ns_dir) - import shutil - shutil.rmtree(ns_dir, ignore_errors=True) - - self._tool_loop_exit_reason = None - yield from self._tool_loop( - messages, max_tool_rounds, max_json_retries, json_retries, - llm_calls_in_cycle, rlog, input_tables, outer_iteration, - ) - - if self._tool_loop_exit_reason == "tool_rounds_exhausted": - saved = explore_session.save_namespace(ns_dir, ws_path) - if saved: - logger.info("[DataAgent] Saved explore namespace to %s", ns_dir) - - self._explore_session = None - - def _tool_loop( - self, - messages, max_tool_rounds, max_json_retries, json_retries, - llm_calls_in_cycle, rlog, input_tables, outer_iteration, - ): - """Inner tool-calling loop, extracted so _get_next_action can wrap - it in a SandboxSession context manager.""" - for round_idx in range(max_tool_rounds): - llm_calls_in_cycle += 1 - rlog.log("llm_request", iteration=outer_iteration, - round=round_idx + 1, - messages_count=len(messages), - tools_available=[t["function"]["name"] for t in TOOLS]) - llm_t0 = time.time() - try: - response = self._call_llm(messages) - except Exception as exc: - llm_latency = int((time.time() - llm_t0) * 1000) - rlog.log("llm_response", iteration=outer_iteration, - round=round_idx + 1, - latency_ms=llm_latency, finish_reason="error", - error=type(exc).__name__) - logger.error("[DataAgent] LLM call failed", exc_info=exc) - from data_formulator.security.sanitize import classify_llm_error - yield { - "type": "agent_action", - "action_data": None, - "reason": "llm_error", - "error_message": classify_llm_error(exc), - "llm_calls": llm_calls_in_cycle, - } - return - - llm_latency = int((time.time() - llm_t0) * 1000) - - if not response.choices: - rlog.log("llm_response", iteration=outer_iteration, - round=round_idx + 1, - latency_ms=llm_latency, finish_reason="empty") - yield {"type": "agent_action", "action_data": None, "reason": "llm_error", - "error_message": "LLM returned empty response", - "llm_calls": llm_calls_in_cycle} - return - - choice = response.choices[0] - content = choice.message.content or "" - tool_calls = getattr(choice.message, 'tool_calls', None) - finish_reason = getattr(choice, "finish_reason", "stop") - - if tool_calls: - rlog.log("llm_response", iteration=outer_iteration, - round=round_idx + 1, - latency_ms=llm_latency, finish_reason="tool_calls", - tool_calls=[{"name": tc.function.name} for tc in tool_calls]) - else: - rlog.log("llm_response", iteration=outer_iteration, - round=round_idx + 1, - latency_ms=llm_latency, finish_reason=finish_reason) - - # --- tool calls: execute and loop back --- - if tool_calls: - if content.strip(): - yield {"type": "thinking_text", "content": content.strip()} - - assistant_msg: dict[str, Any] = { - "role": "assistant", - "content": content or None, - } - attach_reasoning_content(assistant_msg, choice.message) - assistant_msg["tool_calls"] = [ - { - "id": tc.id, - "type": "function", - "function": { - "name": tc.function.name, - "arguments": tc.function.arguments, - }, - } - for tc in tool_calls - ] - messages.append(assistant_msg) - - for tc in tool_calls: - tool_name = tc.function.name - try: - tool_args = json.loads(tc.function.arguments) - except json.JSONDecodeError: - tool_args = {} - - yield { - "type": "tool_start", - "tool": tool_name, - "purpose": tool_args.get("purpose") if tool_name == "explore" else None, - "code": tool_args.get("code") if tool_name == "explore" else None, - "table_names": tool_args.get("table_names") if tool_name == "inspect_source_data" else None, - "query": tool_args.get("query") if tool_name == "search_knowledge" else None, - } - - tool_t0 = time.time() - tool_status = "ok" - - if tool_name == "explore": - result = self._run_explore_code( - tool_args.get("code", ""), - input_tables or [], - ) - tool_content = result.get("stdout", "") - tool_status = result.get("status", "ok") - if result.get("error"): - tool_content += f"\n\nError: {result['error']}" - yield { - "type": "tool_result", - "tool": tool_name, - "status": tool_status, - "stdout": result.get("stdout", ""), - "error": result.get("error"), - } - elif tool_name == "inspect_source_data": - table_names = tool_args.get("table_names", []) - tool_content = handle_inspect_source_data( - table_names, input_tables or [], self.workspace, - ) - yield { - "type": "tool_result", - "tool": tool_name, - "status": "ok", - "stdout": tool_content, - } - elif tool_name == "search_knowledge": - tool_content = self._handle_search_knowledge(tool_args) - rlog.log("knowledge_search", - query=tool_args.get("query", ""), - results_count=tool_content.count("- [") if tool_content else 0) - yield { - "type": "tool_result", - "tool": tool_name, - "status": "ok", - "stdout": tool_content, - } - elif tool_name == "read_knowledge": - tool_content = self._handle_read_knowledge(tool_args) - yield { - "type": "tool_result", - "tool": tool_name, - "status": "ok", - "stdout": tool_content, - } - elif tool_name in ("visualize", "clarify", "explain", "summary", "delegate", "action"): - action_data = dict(tool_args) - if "action" not in action_data: - real_name = tool_name if tool_name != "action" else action_data.get("type", "summary") - action_data["action"] = real_name - - _rescue_unpack_json_strings(action_data) - - missing = _rescue_validate_action(action_data) - if missing: - tool_content = ( - f"ERROR: '{action_data['action']}' is an ACTION, not a tool. " - f"Output it as a JSON object in your text reply. " - f"Also, these required fields are missing: {', '.join(missing)}." - ) - logger.warning("[DataAgent] Action-as-tool with missing fields %s, sending correction", missing) - yield { - "type": "tool_result", - "tool": tool_name, - "status": "error", - "error": f"Missing fields: {', '.join(missing)}", - } - else: - logger.info("[DataAgent] Rescued action '%s' from tool call (weak-model fallback)", action_data.get("action")) - tool_content = "ok" - messages.append({ - "role": "tool", - "tool_call_id": tc.id, - "content": tool_content, - }) - rlog.log("tool_execution", iteration=outer_iteration, - tool=tool_name, - input_summary="rescued_as_action", - output_summary="ok", - latency_ms=0, status="ok") - yield {"type": "agent_action", "action_data": action_data, - "reason": "ok", "llm_calls": llm_calls_in_cycle} - return - else: - tool_content = f"Unknown tool: {tool_name}" - - tool_latency = int((time.time() - tool_t0) * 1000) - output_summary = (tool_content[:200] + "...") if len(tool_content) > 200 else tool_content - rlog.log("tool_execution", iteration=outer_iteration, - tool=tool_name, - input_summary=tool_args.get("purpose", "")[:200], - output_summary=output_summary, - latency_ms=tool_latency, status=tool_status) - - messages.append({ - "role": "tool", - "tool_call_id": tc.id, - "content": tool_content, - }) - - logger.info("[DataAgent] Executed %d tool call(s), looping back to LLM", len(tool_calls)) - continue - - # --- no tool calls — parse JSON action from text --- - logger.debug("[DataAgent] Raw LLM response:\n%s", content) - json_blocks = extract_json_objects(content) - if json_blocks: - messages.append({"role": "assistant", "content": content}) - yield {"type": "agent_action", "action_data": json_blocks[0], "reason": "ok", - "llm_calls": llm_calls_in_cycle} - return - - # --- JSON parse failed — focused retry (ask LLM to reformat only) --- - if json_retries < max_json_retries: - json_retries += 1 - logger.warning("[DataAgent] No JSON found (retry %d/%d), asking LLM to reformat", - json_retries, max_json_retries) - retry_assistant_msg: dict[str, Any] = {"role": "assistant", "content": content} - attach_reasoning_content(retry_assistant_msg, choice.message) - messages.append(retry_assistant_msg) - messages.append({ - "role": "user", - "content": ( - "[FORMAT ERROR] Your previous response did not contain a valid JSON action. " - "Please output ONLY a JSON object with one of these actions: " - "visualize, clarify, explain, summary, or delegate. Do NOT repeat your analysis — " - "just reformat your conclusion as JSON." - ), - }) - continue - - logger.warning("[DataAgent] JSON parse failed after retries: %s", content[:200]) - yield {"type": "agent_action", "action_data": None, "reason": "json_parse_failed", - "llm_calls": llm_calls_in_cycle} - return - - # --- tool rounds exhausted --- - logger.warning("[DataAgent] Exceeded %d tool rounds without producing an action", max_tool_rounds) - self._tool_loop_exit_reason = "tool_rounds_exhausted" - yield {"type": "agent_action", "action_data": None, "reason": "tool_rounds_exhausted", - "llm_calls": llm_calls_in_cycle} - return - - _MAX_LLM_RETRIES = 3 - - @staticmethod - def _is_transient_error(exc: Exception) -> bool: - msg = str(exc).lower() - if any(kw in msg for kw in ( - "timeout", "timed out", "rate limit", "rate_limit", - "429", "503", "502", "connection", "reset by peer", - )): - return True - name = type(exc).__name__.lower() - return any(kw in name for kw in ("timeout", "ratelimit", "connection")) - - def _call_llm(self, messages: list[dict]): - """Call the LLM with tool definitions (non-streaming). - - Retries up to ``_MAX_LLM_RETRIES`` times on transient errors - (timeout, rate-limit, connection reset) with exponential back-off. - """ - last_exc: Exception | None = None - for attempt in range(self._MAX_LLM_RETRIES): - try: - return self._call_llm_once(messages) - except Exception as e: - last_exc = e - if self._is_transient_error(e) and attempt < self._MAX_LLM_RETRIES - 1: - wait = 2 ** attempt - logger.warning( - "[DataAgent] Transient LLM error (attempt %d/%d), " - "retrying in %ds: %s", - attempt + 1, self._MAX_LLM_RETRIES, wait, e, - ) - time.sleep(wait) - continue - raise - raise last_exc # pragma: no cover - - def _call_llm_once(self, messages: list[dict]): - """Single LLM call (no retry).""" - return self.client.get_completion_with_tools( - messages, tools=TOOLS, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model), - ) - - # ------------------------------------------------------------------ - # Observation formatting - # ------------------------------------------------------------------ - - def _format_observation( - self, - step_index: int, - display_instruction: str, - thought: str, - code: str, - data: dict[str, Any], - chart_image: str | None, - ) -> dict: - """Format a rich observation for the trajectory. - - Includes data summary, code, and optionally the chart image - so the agent can make informed decisions about the next step. - """ - data_summary = generate_data_summary( - [{"name": data.get("virtual", {}).get("table_name", f"step_{step_index}"), - "rows": data["rows"]}], - workspace=self.workspace, - ) - - text = ( - f"[OBSERVATION – Step {step_index}]\n\n" - f"**Visualization**: {display_instruction}\n\n" - f"**Code**:\n```python\n{code}\n```\n\n" - f"**Transformed Data**:\n{data_summary}" - ) - - if chart_image: - content: list[dict[str, Any]] = [ - {"type": "text", "text": text + "\n\n**Chart**:"}, - ] - if chart_image.startswith("data:") or chart_image.startswith("http"): - content.append({ - "type": "image_url", - "image_url": {"url": chart_image, "detail": "low"}, - }) - return {"role": "user", "content": content} - - return {"role": "user", "content": text} - - # ------------------------------------------------------------------ - # Knowledge helpers - # ------------------------------------------------------------------ - - def _search_relevant_knowledge( - self, - user_question: str, - table_names: list[str], - max_items: int = 5, - ) -> list[dict[str, Any]]: - """Search experiences and non-alwaysApply rules relevant to the current session. - - Uses the user question as the search query and passes table names - separately for tag-overlap boosting. alwaysApply rules are - excluded by KnowledgeStore.search() since they are already - injected via system prompt. - Graceful degradation: returns empty list on failure. - """ - if not self._knowledge_store: - return [] - try: - results = self._knowledge_store.search( - user_question, - categories=["rules", "experiences"], - max_results=max_items, - table_names=table_names[:5], - ) - return results - except Exception: - logger.warning("Failed to search knowledge", exc_info=True) - return [] - - def _load_active_session_experience(self) -> dict[str, Any] | None: - """Return the experience distilled from the active workspace, if any. - - The session-scoped distillation flow (design-docs/24) writes one - experience per workspace, stamped with ``source_workspace_id``. - We always inject that file into the agent's context so the agent - has stable working memory for the active session in addition to - whatever the relevance search picked. - """ - if not self._knowledge_store: - return None - try: - from data_formulator.workspace_factory import get_active_workspace_id - ws_id = get_active_workspace_id() - except Exception: - ws_id = None - if not ws_id: - return None - try: - entry = self._knowledge_store.find_experience_by_workspace_id(ws_id) - except Exception: - logger.warning("find_experience_by_workspace_id failed", exc_info=True) - return None - if not entry: - return None - try: - content = self._knowledge_store.read("experiences", entry["path"]) - except Exception: - return None - from data_formulator.knowledge.store import parse_front_matter - _, body = parse_front_matter(content) - snippet = body[:500].strip() - if not snippet: - return None - return { - "category": "experiences", - "title": entry.get("title", entry.get("path", "")), - "tags": entry.get("tags", []), - "path": entry["path"], - "snippet": snippet, - "source": entry.get("source", "distill"), - } - - def _handle_search_knowledge(self, tool_args: dict) -> str: - """Handle the ``search_knowledge`` tool call.""" - if not self._knowledge_store: - return "Knowledge base is not available." - - query = tool_args.get("query", "") - categories = tool_args.get("categories") - try: - results = self._knowledge_store.search(query, categories=categories) - if not results: - return "No matching knowledge entries found." - lines = [] - for r in results: - lines.append( - f"- [{r['category']}] **{r['title']}** ({r['path']})\n" - f" {r['snippet'][:200]}" - ) - return "\n".join(lines) - except Exception as exc: - logger.warning("search_knowledge tool error: %s", type(exc).__name__) - return f"Error searching knowledge: {type(exc).__name__}" - - def _handle_read_knowledge(self, tool_args: dict) -> str: - """Handle the ``read_knowledge`` tool call.""" - if not self._knowledge_store: - return "Knowledge base is not available." - - category = tool_args.get("category", "") - path = tool_args.get("path", "") - try: - return self._knowledge_store.read(category, path) - except ValueError as exc: - return f"Invalid path: {exc}" - except FileNotFoundError: - return "Knowledge file not found." - except Exception as exc: - logger.warning("read_knowledge tool error: %s", type(exc).__name__) - return f"Error reading knowledge: {type(exc).__name__}" - - # ------------------------------------------------------------------ - # Helpers - # ------------------------------------------------------------------ - - @staticmethod - def _strip_images(trajectory: list[dict]) -> list[dict]: - """Return a copy of the trajectory with image_url blocks removed.""" - stripped: list[dict] = [] - for msg in trajectory: - content = msg.get("content") - if isinstance(content, list): - text_parts = [p for p in content if p.get("type") == "text"] - if text_parts: - stripped.append({**msg, "content": text_parts}) - else: - stripped.append({**msg, "content": "[image removed]"}) - else: - stripped.append(msg) - return stripped - - @staticmethod - def _log_session_end( - rlog, - status: str, - total_iterations: int, - total_llm_calls: int, - session_start_time: float, - ) -> None: - """Write ``session_end`` to the reasoning log. - - Does **not** close the log — the ``finally`` block in ``run()`` - handles that so the fd is released even on unexpected exceptions. - """ - rlog.log( - "session_end", - status=status, - total_iterations=total_iterations, - total_llm_calls=total_llm_calls, - total_latency_ms=int((time.time() - session_start_time) * 1000), - ) - - @staticmethod - def _error_event( - iteration: int, - message: str, - *, - display_instruction: str = "", - message_code: str = "", - message_params: dict | None = None, - ) -> dict[str, Any]: - """Build an ``"error"`` event dict for the streaming response.""" - event: dict[str, Any] = { - "type": "error", - "iteration": iteration, - "message": message, - } - if message_code: - event["message_code"] = message_code - if message_params: - event["message_params"] = message_params - if display_instruction: - event["display_instruction"] = display_instruction - return event - - @staticmethod - def _snapshot_dialog(messages: list[dict] | None) -> list[dict]: - """Snapshot the conversation for the Agent Log dialog. - - Handles plain text, multimodal content, tool_calls on assistant - messages, and tool result messages. - """ - if not messages: - return [] - snapshot: list[dict] = [] - for msg in messages: - role = msg.get("role", "") - content = msg.get("content") - - # Flatten multimodal content to text-only - if isinstance(content, list): - content = "\n".join( - p.get("text", "") for p in content if p.get("type") == "text" - ) - - # Assistant messages with tool_calls — show tool call details - if role == "assistant" and msg.get("tool_calls"): - tool_details = [] - for tc in msg["tool_calls"]: - fn = tc.get("function", {}) - name = fn.get("name", "?") - args_str = fn.get("arguments", "{}") - try: - args_obj = json.loads(args_str) - if name == "explore" and "code" in args_obj: - tool_details.append(f"[tool: {name}]\n```python\n{args_obj['code']}\n```") - else: - formatted = json.dumps(args_obj, indent=2, ensure_ascii=False) - tool_details.append(f"[tool: {name}]\n```json\n{formatted}\n```") - except (json.JSONDecodeError, TypeError): - tool_details.append(f"[tool: {name}]\n{args_str}") - text_part = content or "" - combined = (text_part + "\n\n" + "\n\n".join(tool_details)).strip() - snapshot.append({"role": role, "content": combined}) - - # Tool result messages - elif role == "tool": - tool_content = content or "" - if isinstance(tool_content, str) and len(tool_content) > 3000: - tool_content = tool_content[:3000] + "\n... (truncated)" - snapshot.append({"role": "assistant", "content": f"[tool result]\n{tool_content}"}) - - # Regular messages (system, user, assistant without tool_calls) - elif content: - if role != "system" and isinstance(content, str) and len(content) > 4000: - content = content[:4000] + "\n... (truncated)" - snapshot.append({"role": role, "content": content}) - return snapshot diff --git a/py-src/data_formulator/analyst/__init__.py b/py-src/data_formulator/analyst/__init__.py new file mode 100644 index 00000000..13bac642 --- /dev/null +++ b/py-src/data_formulator/analyst/__init__.py @@ -0,0 +1,51 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Analyst agent — a single user-facing data agent hosting multiple skills. + +This package unifies the former ``DataAgent`` (structured-action visualization +loop) and ``ReportGenAgent`` (streaming report writer) into one agent shell +that loads *skills* on demand. See ``design-docs/35-unified-agent-skills- +architecture.md`` for the full design. + +Core ideas: + - **Inspection tools** gather information and are parallel-safe; their results + come back to the agent and are never shown to the user. The shell ships a + small core set (``inspect_source_data``, ``execute_python_script``, ``load_skill``); a + loaded skill may contribute additional tools (e.g. ``inspect_chart``). + - **Actions** are committing surfaces — at most one per turn. Each returns an + observation the shell feeds back as the action's tool-call result, so the + agent reads it and decides its own next move. ``visualize`` / ``delegate`` + are core (always available); skill actions (``write_report``, + ``restyle_chart``, …) are *gated* until their ``SKILL.md`` is loaded. The + run ends when the model commits no action (its final plain text is the + completion). + - A **skill is a passive plugin**, not a mini-agent: it bundles its + ``SKILL.md`` with optional ``tools`` + ``actions`` and the handlers + (``handle_tool`` / ``handle_action``) that perform any compute / rendering. + Its Python is always imported; ``load_skill`` only exposes it to the model. +""" + +from data_formulator.analyst.skills import ( + Event, + Skill, + SkillContext, + SkillMeta, + SkillRegistry, + ToolResult, + build_registry, +) +from data_formulator.analyst.agent import AnalystAgent +from data_formulator.analyst.mini_agent import MiniAnalystAgent + +__all__ = [ + "AnalystAgent", + "MiniAnalystAgent", + "Event", + "Skill", + "SkillContext", + "SkillMeta", + "SkillRegistry", + "ToolResult", + "build_registry", +] diff --git a/py-src/data_formulator/analyst/agent.py b/py-src/data_formulator/analyst/agent.py new file mode 100644 index 00000000..078fb610 --- /dev/null +++ b/py-src/data_formulator/analyst/agent.py @@ -0,0 +1,2112 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""AnalystAgent — the unified data analyst agent shell. + +This is the single user-facing data agent that replaces the separate +``DataAgent`` (structured-action visualization loop) and ``ReportGenAgent`` +(streaming report writer). It hosts a set of **core actions** plus a registry +of **skills** that unlock additional **gated actions** on demand. See +``design-docs/35-unified-agent-skills-architecture.md`` and the action turn +model in ``design-docs/36-artifact-turn-model.md``. + +Architecture (a vanilla tool-calling loop, plus the skills layer): + - **Inspection tools** (``execute_python_script``, ``inspect_source_data``, ``load_skill``, + plus skill-private tools) are called via the tool-calling API to gather + information. Parallel-safe, internal, no side effects. + - **Committing actions** (``visualize``, ``delegate``) render a user-visible + surface. Each returns an *observation* string that the shell feeds back as + the action's tool-call result — the same lane an inspection tool result + rides — so the agent reads it and decides its own next move. Always available. + - **Gated actions** (e.g. ``write_report``) are unlocked only after their + skill is loaded via ``load_skill``; their tool is not offered until then. + +The run ends when the model commits **no action** in a turn: its final plain-text +answer *is* the completion (the frontend renders it as the run's summary). There +is no control verdict and no separate "stop" action — the agent simply stops +acting. The shell stays skill-agnostic: it partitions a response into inspection +tools vs committing actions, enforces the one-action-per-turn cardinality guard, +routes the chosen action to the owning skill's ``handle_action(...)``, feeds the +returned observation back, and forwards the channel-tagged events. +""" + +import json +import logging +import re +import time +import uuid +from pathlib import Path +from types import SimpleNamespace +from typing import Any, Generator + +from data_formulator.agent_config import reasoning_effort_for +from data_formulator.agents.agent_utils import ( + accumulate_reasoning_content, + attach_reasoning_content, + ensure_output_variable_in_code, +) +from data_formulator.agents.context import ( + build_focused_thread_context, + build_lightweight_table_context, + build_peripheral_thread_context, + handle_inspect_source_data, +) +from data_formulator.agents.client_utils import Client +from data_formulator.datalake.parquet_utils import df_to_safe_records + +from data_formulator.analyst.skills import ( + Event, + SkillContext, + SkillRegistry, + ToolResult, + build_registry, +) +from data_formulator.analyst.tools import build_tools + +logger = logging.getLogger(__name__) + +_AGENT_ID = "analyst" + +# The always-on baseline skill, auto-loaded at the start of every run. It owns +# the built-in tools (execute_python_script / inspect_source_data) and the always-available +# actions (visualize / delegate) plus the base prompt body (its SKILL.md). The +# shell hardcodes nothing about those actions — legality is derived from +# whichever skills are loaded. +_CORE_SKILL = "core" + +# Banner stamped at the START of a loaded skill's body message. It is the single +# contract between the emitter (_load_skill_into_context) and the resume parser +# (_rehydrate_loaded_skills): they share this template + regex so they cannot +# drift, and the regex is anchored to the message start so only banners *we* +# emitted match — never the same text pasted by a user or echoed by the model. +_SKILL_LOADED_BANNER = "[SKILL LOADED: {name}]" +_SKILL_LOADED_RE = re.compile(r"^\[SKILL LOADED: ([^\]]+)\]") + +# ── Action-argument coercion ────────────────────────────────────────────── +# Weaker models sometimes JSON-encode a nested action argument as a string +# (e.g. ``"chart": "{...}"``). Parse those back to objects before dispatch so +# the skill handler sees structured data. Required-field validation lives in the +# registry (``action_required_fields``) and the skill handler — not here. + + +def _rescue_unpack_json_strings(data: dict) -> None: + """In-place: parse values that are JSON-encoded strings back to objects.""" + for key in ( + "chart", "input_tables", "questions", "options", "followups", + "field_metadata", "field_display_names", + ): + val = data.get(key) + if isinstance(val, str) and val.strip()[:1] in ("{", "["): + try: + data[key] = json.loads(val) + except (json.JSONDecodeError, ValueError): + pass + + +# ── Live tool-argument streaming (design-docs/36 §5) ─────────────────────── +# A streaming action (only ``write_report`` today) writes its payload as a +# tool-call argument. Providers stream that argument as a growing JSON fragment +# (``delta.tool_calls[].function.arguments`` — Anthropic's ``input_json_delta``). +# This extractor pulls the *decoded* value of one top-level string key out of +# that fragment as it grows, surfacing only the newly-completed suffix each feed +# so the agent can forward it as channel ``text_delta``s. It is forgiving of a +# partial trailing escape (``\\`` or an incomplete ``\\uXXXX``): it holds those +# bytes back until the next chunk completes them, never emitting half an escape. + + +class _StreamingArgExtractor: + """Incrementally extract the decoded string value of a top-level JSON key + from a growing tool-call ``arguments`` fragment. + + ``feed`` is given the full accumulated arguments so far and returns only the + newly-decoded suffix of the target field's value (``""`` while nothing new + can be safely decoded yet). + """ + + def __init__(self, field: str): + # Matches ``"field"`` then ``:`` then the opening quote of the value. + self._open_re = re.compile(r'"' + re.escape(field) + r'"\s*:\s*"') + self._emitted = 0 + + def feed(self, args_so_far: str) -> str: + decoded = self._decode(args_so_far) + if decoded is None or len(decoded) <= self._emitted: + return "" + new = decoded[self._emitted:] + self._emitted = len(decoded) + return new + + def _decode(self, args: str) -> str | None: + """Return the decoded value-so-far of the field, or ``None`` if the + value has not started or a trailing escape is incomplete.""" + m = self._open_re.search(args) + if not m: + return None + rest = args[m.end():] + out: list[str] = [] + i, n = 0, len(rest) + while i < n: + ch = rest[i] + if ch == "\\": + if i + 1 >= n: + break # dangling escape — wait for the next chunk + out.append(rest[i:i + 2]) + i += 2 + continue + if ch == '"': + break # closing quote — value complete + out.append(ch) + i += 1 + try: + # Re-wrap as a JSON string literal so escapes decode correctly. + return json.loads('"' + "".join(out) + '"') + except (json.JSONDecodeError, ValueError): + return None # e.g. partial ``\\uXXXX`` — wait for more + + + +# The agent's system frame — shell-owned, invariant across skills: identity, the +# tools-vs-actions contract, the skills mechanism, and the action budget / +# stop criteria. This is the agent's own contract, so it lives here as code (not +# as a skill body). ``_build_system_prompt`` fills the ``{...}`` slots via plain +# string substitution (NOT str.format — braces elsewhere stay literal). The +# always-loaded ``core`` skill's SKILL.md (the concrete tools + action schemas) +# is appended after this frame, unformatted, exactly like any other skill body. +SYSTEM_PROMPT = """\ +You are an autonomous data analyst agent. + +Your goal is to help the user by exploring their data, producing visualizations, +and — when asked — packaging the findings (e.g. into a written report). You +operate in a loop: gather what you need with inspection tools, take an **action** +when you want to act on the data, read its result, and repeat — then stop by +giving your final answer in plain text. + +## Tools vs. actions + +Everything you do is a function/tool call, but calls come in two kinds and +keeping them straight is essential: + +- **Inspection tools** (internal — for gathering information). Functions like + `execute_python_script`, `inspect_source_data`, `inspect_chart`, and `load_skill` that + inspect data or load instructions *before* you act. Their results return to + you and are **not** shown to the user. They commit nothing and are + **independent** — none depends on another's result — so call as many as you + need, across as many rounds as you need, until you have enough to act. +- **Actions** (committing — shown to the user). A discrete operation like + `visualize`, `ask_user`, `delegate`, and (once the report skill is loaded) + `write_report`. Each renders a user-visible surface, and its result is + returned to you just like a tool result so you can react to it. + +**Actions are sequential — take exactly one, then wait for its result.** This is +the key difference from inspection tools: those are independent, but each +action's result shapes your next decision — the chart you'd draw next depends on +what this one reveals — so choosing two at once would make the second a blind +guess, decided before you've seen the first's outcome. Do all your inspection +first, then commit the single action that fits. + +Treat each action like one turn in a back-and-forth: **you act → its result +answers → you act again.** Even when you're planning a sequence of charts, +surface them one at a time so each reacts to the last. (If you do emit several +actions at once, only the first runs and the rest are discarded — batching only +loses work.) + +**To finish, reply with plain text and no action.** Plain text is your +**closing answer** — the run is over and you expect nothing further (the user's +next message starts a fresh turn). Use it whenever you've done what was asked, +including answering a question you fully resolved. + +**Whenever you expect the user to reply — a question, a clarification, an +explanation you want them to react to, or a set of choices — use the `ask_user` +action instead.** It renders a question widget and pauses the run for their +reply, so the conversation resumes in the same turn. `ask_user` accepts +free-text questions (no clickable options required), so reach for it for *any* +followup-seeking turn, not only structured choices. Plain text never asks for +input; `ask_user` always does. There is no separate "stop" or "summary" action: +you stop by simply not acting. + +The concrete actions available to you — and how to use each well — are +described in the capability sections below. + +## Understanding your context + +{context_guide} + +## Skills (load on demand) + +Your baseline capabilities come from the **core** skill, which is **always loaded +automatically** (you'll see it below as `[SKILL: core]`). Beyond that baseline, +extra capabilities are packaged as **extension skills** — each one unlocks an +additional action (and sometimes extra tools), but only after you load it: +1. Call the `load_skill("")` tool — this reads the skill's instructions into + your context and unlocks its action(s) and any tools it provides. +2. Follow those instructions and call the action it unlocks (its tool only + appears once the skill is loaded). + +Calling an extension skill's action **before** loading the skill will not +execute — you'll be asked to load it first. Extension skills available this run +(load the one whose `when to use` fits): + +{skills_block} + +## Working within your budget + +- You have a budget of **{max_iterations} actions** for this run — a **hard + ceiling, not a target**. Use as few as the goal requires. +- **Stop as soon as the user's goal is met.** End the run by giving your final + answer in plain text rather than taking more actions just because you can. +- For concrete/progressive questions, take a follow-up action only when it + addresses a gap the previous step actually raised. For open-ended + exploration, the opposite applies: deliberately spend your budget covering + distinct analytical angles (see the core skill's "Choosing what to do"). +- If the request is genuinely ambiguous, ask the user in plain text (no action) + rather than guessing. + +{agent_exploration_rules}""" + + +# --------------------------------------------------------------------------- +# Agent +# --------------------------------------------------------------------------- + + +class AnalystAgent: + """Unified data analyst agent — core actions + on-demand skills.""" + + def __init__( + self, + client: Client, + workspace, + skill_registry: SkillRegistry | None = None, + agent_exploration_rules: str = "", + agent_coding_rules: str = "", + language_instruction: str = "", + max_iterations: int = 5, + max_repair_attempts: int = 2, + identity_id: str | None = None, + ): + self.client = client + self.workspace = workspace + self.registry = skill_registry or build_registry() + self.agent_exploration_rules = agent_exploration_rules + self.agent_coding_rules = agent_coding_rules + self.language_instruction = language_instruction + self.max_iterations = max_iterations + self.max_repair_attempts = max_repair_attempts + + from data_formulator.agents.reasoning_log import ( + ReasoningLogger, _NullReasoningLogger, + ) + self._session_id = uuid.uuid4().hex[:12] + if identity_id: + try: + self._reasoning_log = ReasoningLogger( + identity_id, "AnalystAgent", self._session_id, + ) + except Exception: + logger.warning("Failed to initialise ReasoningLogger", exc_info=True) + self._reasoning_log = _NullReasoningLogger() + else: + self._reasoning_log = _NullReasoningLogger() + + self._knowledge_store = None + self._injected_knowledge: list[dict[str, Any]] = [] + self._injected_rules: list[str] = [] + _user_home = getattr(workspace, "user_home", None) + if _user_home: + try: + from data_formulator.knowledge.store import KnowledgeStore + self._knowledge_store = KnowledgeStore(_user_home) + except Exception: + logger.warning("Failed to initialise KnowledgeStore", exc_info=True) + + # Per-run skill state (reset at the start of each run()). Skill code + # modules themselves live in ``self.registry.skills`` and are always + # available; ``_loaded_skills`` only tracks which skills the model has + # been *exposed* to (tools + actions + guidance) this run. + self._loaded_skills: set[str] = set() + # Free-form payload for skill dispatch (charts, etc.), set per run. + self._run_payload: dict[str, Any] = {} + # Live-streaming bookkeeping (design-docs/36 §5). ``_streamed_channels`` + # maps a committing action's tool_call_id -> the channel its argument was + # already forwarded on during the streaming LLM call; ``_suppress_stream_channel`` + # is set just before dispatching such an action so the router drops the + # skill's duplicate (buffered) emission of the same content. + self._streamed_channels: dict[str, str] = {} + self._suppress_stream_channel: str | None = None + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + def _explore_ns_dir(self) -> Path: + """Directory for cross-turn namespace serialisation.""" + return self.workspace.confined_scratch.root / "_explore_ns" + + def _legal_actions(self) -> frozenset[str]: + """The set of committing actions currently legal to emit. + + Every legal action is owned by a *loaded* skill. ``core`` is always + loaded, so its baseline actions are always legal; a gated skill's + actions become legal once that skill is loaded. + """ + legal: set[str] = set() + for name in self._loaded_skills: + meta = self.registry.metas.get(name) + if meta: + legal.update(meta.action_names) + return frozenset(legal) + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def run( + self, + input_tables: list[dict[str, Any]], + user_question: str, + focused_thread: list[dict[str, Any]] | None = None, + other_threads: list[dict[str, Any]] | None = None, + trajectory: list[dict] | None = None, + completed_step_count: int = 0, + primary_tables: list[str] | None = None, + attached_images: list[str] | None = None, + charts: list[dict[str, Any]] | None = None, + ) -> Generator[dict[str, Any], None, None]: + """Run the unified analyst loop. + + Yields event dicts with ``type`` in: + ``"action"`` – the agent's committed action (for UI) + ``"result"`` – a visualization result (data + chart) + ``"tool_start"`` / ``"tool_result"`` – inspection tool activity + ``"skill_loaded"`` – a skill's gate opened + ``"delegate"`` – hand-off to a peer agent + ``"completion"`` – the run's final answer (ends the run) + ``"error"`` – error information + + The run ends when the model commits no action in a turn: its final + plain-text answer is emitted as the ``completion`` event. + """ + rlog = self._reasoning_log + session_start_time = time.time() + total_llm_calls = 0 + completed_steps: list[dict[str, Any]] = [] + iteration = completed_step_count + final_status = "max_iterations" + + # Reset per-run skill + payload state. ``core`` is auto-loaded: its + # baseline tools + actions are always available and its SKILL.md body is + # appended to the system frame (see _build_system_prompt). Gated skills + # are added to this set as the model loads them. The payload carries + # everything a dispatched skill handler needs to build its own context + # (e.g. the report skill rebuilds [AVAILABLE CHARTS] + thread + # context). + self._loaded_skills = {_CORE_SKILL} + self._run_payload = { + "input_tables": input_tables, + "charts": charts or [], + "focused_thread": focused_thread, + "other_threads": other_threads, + "primary_tables": primary_tables, + } + + try: + rlog.log( + "session_start", + agent="AnalystAgent", + session_id=self._session_id, + user_question=user_question, + input_tables=[t.get("name", "") for t in input_tables], + model=self.client.model, + rules_injected=[ + r for r in [self.agent_exploration_rules, self.agent_coding_rules] if r + ], + knowledge_injected=[], + ) + + if trajectory is None: + ns_dir = self._explore_ns_dir() + if ns_dir.exists(): + import shutil + shutil.rmtree(ns_dir, ignore_errors=True) + + trajectory = self._build_initial_messages( + input_tables, user_question, focused_thread, other_threads, + primary_tables=primary_tables, + attached_images=attached_images, + charts=charts, + ) + rlog.log( + "context_built", + system_prompt_tokens=len(trajectory[0].get("content", "")) // 4 if trajectory else 0, + user_msg_tokens=len(str(trajectory[1].get("content", ""))) // 4 if len(trajectory) > 1 else 0, + total_tables=len(input_tables), + primary_tables=primary_tables or [], + knowledge_rules_injected=self._injected_rules, + knowledge_injected=self._injected_knowledge, + ) + + if self._injected_rules or self._injected_knowledge: + yield { + "type": "context_info", + "rules_injected": self._injected_rules, + "knowledge_injected": [ + {"category": k["category"], "title": k["title"]} + for k in self._injected_knowledge + ], + } + else: + # Resume: the trajectory is the single source of truth. A loaded + # skill is just its ``[SKILL LOADED: ]`` body sitting in + # history (kept for free via prefix caching), so re-open the gate + # for every skill whose body is still present. This keeps + # ``_loaded_skills`` in sync with what the model actually sees, + # avoiding a "body present but gate closed" contradiction. + self._rehydrate_loaded_skills(trajectory) + + action_budget = self.max_iterations # hard ceiling on committing actions + actions_committed = completed_step_count # resume-aware count + hard_ceiling = iteration + max(self.max_iterations * 3, 12) + + while iteration < hard_ceiling: + iteration += 1 + + # --- THINK: call LLM with tools, get the next action ------ + t_start = time.time() + action = None + action_reason = "ok" + action_error = "" + final_text = "" + action_tool_call_id = None + for event in self._get_next_action(trajectory, input_tables, outer_iteration=iteration): + if event.get("type") == "agent_action": + action = event.get("action_data") + action_reason = event.get("reason", "ok") + action_error = event.get("error_message", "") + final_text = event.get("final_text", "") + action_tool_call_id = event.get("tool_call_id") + total_llm_calls += event.get("llm_calls", 0) + else: + yield event + logger.info("[AnalystAgent] iteration %d total=%.2fs reason=%s", + iteration, time.time() - t_start, action_reason) + + if action is None: + # ── No committing action → the run is over ──────────────── + # The normal close: the model answered in plain text and + # committed nothing. That final text IS the completion (the + # frontend renders it as the run's summary). An LLM API error + # is fatal; the tool-round backstop also lands here. + if action_reason == "llm_error": + final_status = "llm_error" + yield self._error_event( + iteration, + action_error or "LLM API error", + message_code="agent.llmApiError", + ) + self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time) + return + + final_status = ( + "tool_rounds_exhausted" + if action_reason == "tool_rounds_exhausted" + else "success" + ) + yield { + "type": "completion", + "iteration": iteration, + "status": final_status, + "content": { + "summary": final_text, + "total_steps": len(completed_steps), + }, + } + self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time) + return + + action_type = action.get("action") + logger.info(f"[AnalystAgent] Iteration {iteration}: action={action_type}") + + # --- GATE: every action is owned by a skill; its owner must be + # loaded. ``core`` is always loaded, so its actions pass + # straight through. + owner = self.registry.action_owner(action_type) + if owner is None: + legal = ", ".join(sorted(self._legal_actions())) + self._set_action_observation( + trajectory, action_tool_call_id, + f"[ERROR] Unknown action '{action_type}'. Choose one of: " + f"{legal}, or load a skill that unlocks the action you need.", + ) + yield self._error_event( + iteration, f"Unknown action: {action_type}", + message_code="agent.unknownAction", + ) + continue + if owner not in self._loaded_skills: + # Gate closed — tell the model to load the skill, no execution. + self._set_action_observation( + trajectory, action_tool_call_id, + f"[GATED] The '{action_type}' action requires the " + f"'{owner}' skill. Call load_skill(\"{owner}\") first, " + "follow its instructions, then emit the action again.", + ) + rlog.log("action_gated", action=action_type, skill=owner, + iteration=iteration) + continue + + # --- DISPATCH: the owning skill renders the action and RETURNS + # an observation string; the shell feeds it back as the + # action's tool-call result (the same lane an inspection tool + # result rides), then loops so the agent reads it and decides + # its own next move. There is no control verdict. + # If this action's argument was streamed live during the LLM call + # (e.g. write_report), tell the router to drop the skill's + # duplicate buffered emission of the same content. + self._suppress_stream_channel = self._streamed_channels.get( + action_tool_call_id + ) + try: + observation = yield from self._dispatch_skill_action( + owner, action_type, action, trajectory, iteration, completed_steps, + ) + finally: + self._suppress_stream_channel = None + self._set_action_observation( + trajectory, action_tool_call_id, observation, + ) + + if observation is None: + # ── Terminal action → the run pauses ────────────────────── + # A handler that returns no observation (``interact``) has + # nothing for the agent to react to: it already yielded its + # own terminal surface (a question widget) and the run waits + # for the user. Stop here; their next message starts a fresh + # turn. No completion event — the interact event is the close. + self._log_session_end( + rlog, "success", iteration, total_llm_calls, session_start_time, + ) + return + + actions_committed += 1 + remaining = action_budget - actions_committed + if remaining <= 0: + # Hard action ceiling reached — stop and let the user steer. + final_status = "max_iterations" + yield { + "type": "completion", + "iteration": iteration, + "status": "max_iterations", + "content": { + "summary": "Reached the maximum number of actions for this run.", + "summary_code": "agent.maxIterationsSummary", + "total_steps": len(completed_steps), + }, + } + self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time) + return + if remaining == 1: + trajectory.append({ + "role": "user", + "content": ( + "[SYSTEM] You have 1 action left in your budget. Make it " + "count, or wrap up by giving your final answer in plain " + "text (which ends the run)." + ), + }) + continue + + # Runaway backstop — too many non-committing rounds without finishing. + final_status = "max_iterations" + self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time) + yield { + "type": "completion", + "iteration": iteration, + "status": "max_iterations", + "content": { + "summary": "Reached the maximum number of exploration steps.", + "summary_code": "agent.maxIterationsSummary", + "total_steps": len(completed_steps), + }, + } + finally: + rlog.close() + + # ------------------------------------------------------------------ + # Skill loading + dispatch + # ------------------------------------------------------------------ + + def _rehydrate_loaded_skills(self, trajectory: list[dict]) -> None: + """Re-open skill gates for bodies still present in a resumed trajectory. + + A skill is "loaded" iff its ``[SKILL LOADED: ]`` body is in + context. On resume ``_loaded_skills`` has just been reset to ``{core}``, + so scan the (persisted) trajectory for those banners and re-add every + known skill whose body survived. Unknown names are ignored — only the + registry decides what is real. + + The match is anchored to the start of the message (see + ``_SKILL_LOADED_RE``): our emitter always stamps the banner at position + 0, so a user-pasted or model-echoed ``[SKILL LOADED: ...]`` sitting + mid-message will not spuriously open a gate. + """ + for message in trajectory: + content = message.get("content") + if not isinstance(content, str): + continue + m = _SKILL_LOADED_RE.match(content) + if m: + name = m.group(1).strip() + if self.registry.has(name): + self._loaded_skills.add(name) + + def _load_skill_into_context( + self, name: str, trajectory: list[dict], + ) -> tuple[bool, str]: + """Load a skill's ``SKILL.md`` body into the trajectory. + + Returns ``(ok, message)``. On success the body is appended as a user + message and ``name`` is recorded in ``_loaded_skills``; the gated + actions it declares become legal. Idempotent — loading twice is a no-op. + + Convenience wrapper around :meth:`_build_skill_body_message` that appends + the body immediately. Prefer the builder directly when loading inside a + tool-call round, where the body must be appended *after* the tool-result + messages (an assistant ``tool_calls`` turn must be immediately followed + by its tool responses — see the readonly loop in ``_tool_loop``). + """ + ok, message, body_msg = self._build_skill_body_message(name) + if ok and body_msg is not None: + trajectory.append(body_msg) + return ok, message + + def _build_skill_body_message( + self, name: str, + ) -> tuple[bool, str, dict | None]: + """Resolve a skill's body into a ``user`` message *without* appending it. + + Returns ``(ok, message, body_msg)``. On success ``name`` is recorded in + ``_loaded_skills`` (so the gated actions become legal immediately) and + ``body_msg`` is the user turn the caller must append to the trajectory; + the caller controls *when* it lands so message ordering stays + provider-valid. Idempotent — loading twice yields ``body_msg=None``. + """ + if not self.registry.has(name): + return False, f"Unknown skill: {name!r}", None + if name in self._loaded_skills: + return True, f"Skill '{name}' already loaded.", None + try: + body = self.registry.load_body(name) + except Exception as e: + logger.warning("[AnalystAgent] Failed to load skill body %s", name, exc_info=True) + return False, f"Failed to load skill {name!r}: {e}", None + + meta = self.registry.metas[name] + unlocks = ", ".join(meta.action_names) if meta.action_names else "(none)" + tool_names = [ + spec.get("function", {}).get("name") + for spec in self.registry.tools_for([name]) + ] + tool_names = [t for t in tool_names if t] + tools_line = ( + f" New tools available: {', '.join(tool_names)}.\n" if tool_names else "" + ) + # Mirror the ``[SKILL: ]`` header the core body gets in + # _build_system_prompt, so every capability bundle reads as one family — + # here ``[SKILL LOADED: ]`` marks one that just became active. The + # banner is built from the shared template so resume-time rehydration + # (_rehydrate_loaded_skills) parses exactly what we emit here. + body_msg = { + "role": "user", + "content": ( + f"{_SKILL_LOADED_BANNER.format(name=name)} You can now use the action(s): {unlocks}.\n" + f"{tools_line}\n" + f"{body}" + ), + } + self._loaded_skills.add(name) + return True, f"Skill '{name}' loaded; unlocked: {unlocks}.", body_msg + + def _dispatch_skill_action( + self, + skill_name: str, + action_type: str, + action: dict[str, Any], + trajectory: list[dict], + iteration: int, + completed_steps: list[dict[str, Any]], + ) -> Generator[Event, None, str | None]: + """Render a skill's action via ``handle_action`` and return its + observation string (or ``None``). + + The skill does the *processing* (validate, run, emit events) and yields + events back; this method *routes* those events to the caller — stamping + ``iteration``, tracking completed visualization steps, and enriching the + delegate event with the resumability fields the frontend needs — then + returns the skill's observation. The shell feeds that observation back as + the action's tool-call result (see ``_set_action_observation``). + + The skill is always instantiated (eager registry build), so this only + fails if a skill declares an action in its ``SKILL.md`` but ships no + executable handler — a config error: the shell yields its own ``error`` + event and returns an observation describing the failure. + """ + rlog = self._reasoning_log + skill = self.registry.get_skill(skill_name) + if skill is None or not hasattr(skill, "handle_action"): + logger.warning( + "[AnalystAgent] Skill %r unlocks action %r but has no handle_action.", + skill_name, action_type, + ) + rlog.log("action_execution", action=action_type, status="no_handler", + iteration=iteration, skill=skill_name) + yield self._error_event( + iteration, + f"Skill '{skill_name}' has no handler for '{action_type}'.", + message_code="agent.skillNoHandler", + ) + return ( + f"[SKILL ERROR] The '{skill_name}' skill cannot render " + f"'{action_type}'. Choose a core action instead." + ) + + ctx = SkillContext( + client=self.client, + workspace=self.workspace, + language_instruction=self.language_instruction, + trajectory=trajectory, + payload={**self._run_payload, "completed_step_count": len(completed_steps)}, + runtime=self, + ) + rlog.log("action_execution", action=action_type, status="ok", + iteration=iteration, skill=skill_name) + gen = skill.handle_action(action_type, action, ctx) + observation = yield from self._route_skill_events( + gen, iteration, trajectory, completed_steps, + ) + return observation + + def _route_skill_events( + self, + gen: Generator[Event, None, str | None], + iteration: int, + trajectory: list[dict], + completed_steps: list[dict[str, Any]], + ) -> Generator[Event, None, str | None]: + """The shell's router: a skill yields events to *here* (never straight + to the frontend), and this is the single place that decides what to + forward upstream — re-yielding each event after enriching it with + shell-owned bookkeeping — then returns the skill's observation string. + + Concretely it: + - stamps ``iteration`` on every event; + - records each ``result`` event as a completed visualization step; + - enriches ``delegate`` / ``interact`` events (both pause the run) with + the stripped trajectory + completed-step count needed to resume. + + It is free to transform or drop events; skills stay decoupled from the + wire protocol and the routing policy. + + Suppression: when the committing action's argument was already streamed + live (``_suppress_stream_channel`` set by ``run``), the skill's later + *buffered* re-emission of the same content — its ``action`` event and the + ``text_delta`` on that channel — is dropped here so the frontend sees the + content exactly once (design-docs/36 §5). + + Recoverable errors: every ``error`` event a skill yields is paired with a + returned observation string (e.g. visualize's "chart fields not found", + a malformed ``ask_user`` payload). That observation is fed back to the + agent as the action's tool-call result, so the agent sees the failure and + self-corrects on the next iteration. These are *internal* retry signals, + not user-facing failures, so they are dropped here and never streamed to + the frontend. Only fatal, run-ending errors (LLM API failures) are + emitted directly by ``run`` outside this router and do reach the client. + """ + suppress_channel = self._suppress_stream_channel + try: + ev = next(gen) + while True: + ev.setdefault("iteration", iteration) + etype = ev.get("type") + drop = ( + etype == "error" + or (bool(suppress_channel) and ( + etype == "action" + or (etype == "text_delta" and ev.get("channel") == suppress_channel) + )) + ) + if not drop: + if etype == "result": + content = ev.get("content", {}) or {} + result = content.get("result") or {} + completed_steps.append({ + "display_instruction": content.get("question", ""), + "code": result.get("code", ""), + }) + elif etype in ("delegate", "interact"): + # Both pause the run; the frontend needs the trajectory + + # step count to resume after the user answers / hands off. + ev.setdefault("trajectory", self._strip_images(trajectory)) + ev.setdefault("completed_step_count", len(completed_steps)) + yield ev + ev = gen.send(None) + except StopIteration as stop: + return stop.value # the skill's observation string (or None) + + def _set_action_observation( + self, messages: list[dict], tool_call_id: str | None, observation: str | None, + ) -> None: + """Feed an action's observation back as its tool-call result. + + The committing action was recorded as an assistant tool call answered by + an empty placeholder ``tool`` message (see ``_commit_action``); fill that + placeholder with the skill's observation so the agent reads it exactly + like an inspection tool result. Falls back to appending a user message if + the id is missing (safety). + """ + text = observation if observation else "ok" + if tool_call_id: + for msg in reversed(messages): + if msg.get("role") == "tool" and msg.get("tool_call_id") == tool_call_id: + msg["content"] = text + return + messages.append({"role": "user", "content": text}) + + # ------------------------------------------------------------------ + # Runtime facade — execution substrate exposed to skills via ctx.runtime + # ------------------------------------------------------------------ + + def run_visualize_code(self, **kwargs) -> dict[str, Any]: + """Public alias so skills can run visualize code via ``ctx.runtime``.""" + return self._run_visualize_code(**kwargs) + + def register_run_chart( + self, + transform_result: dict[str, Any], + chart_spec: dict[str, Any], + ) -> None: + """Register a chart created mid-run so gated skills (e.g. report) can + reference and inspect it within the same run. + + The entry mirrors the shape the frontend forwards for pre-existing charts + (``chart_id`` / ``chart_type`` / ``encodings`` / ``table_ref`` / ``code`` / + ``chart_data``). Charts are read by the agent from their encodings + sample + data (and code), not a rendered image. The mutation lands on + ``self._run_payload['charts']`` so the next dispatched skill ctx sees it. + """ + chart_id = transform_result.get("chart_id") + if not chart_id: + return + content = transform_result.get("content", {}) or {} + table_name = (content.get("virtual", {}) or {}).get("table_name", "") + rows = content.get("rows", []) or [] + charts = self._run_payload.setdefault("charts", []) + if any(c.get("chart_id") == chart_id for c in charts): + return + charts.append({ + "chart_id": chart_id, + "chart_type": chart_spec.get("type") or chart_spec.get("chart_type") or "Unknown", + "encodings": dict(chart_spec.get("encodings", {}) or {}), + "table_ref": table_name, + "code": transform_result.get("code", ""), + "chart_data": {"name": table_name, "rows": rows[:50]}, + }) + + def run_explore_code( + self, code: str, input_tables: list[dict[str, Any]], + ) -> dict[str, Any]: + """Public alias so skills can run explore code via ``ctx.runtime``.""" + return self._run_explore_code(code, input_tables) + + # ------------------------------------------------------------------ + # Sandbox execution substrate + # ------------------------------------------------------------------ + + def _run_explore_code( + self, + code: str, + input_tables: list[dict[str, Any]], + ) -> dict[str, Any]: + """Run explore code in sandbox, capturing stdout.""" + capture_code = ( + "import io as _io, sys as _sys, pandas as _pd\n" + "_old_stdout = _sys.stdout\n" + "_sys.stdout = _captured = _io.StringIO()\n" + "\n" + f"{code}\n" + "\n" + "_sys.stdout = _old_stdout\n" + "_pack = {\n" + " 'stdout': _captured.getvalue(),\n" + "}\n" + ) + + try: + with self.workspace.local_dir() as local_path: + import os as _os + workspace_path = _os.path.abspath(str(local_path)) + allowed_objects = {"_pack": None} + + session = getattr(self, "_explore_session", None) + if session is not None: + raw = session.execute(capture_code, allowed_objects, workspace_path) + else: + from data_formulator.sandbox import create_sandbox + try: + from flask import current_app + sandbox_mode = current_app.config.get('CLI_ARGS', {}).get('sandbox', 'local') + except (ImportError, RuntimeError): + sandbox_mode = 'local' + sandbox = create_sandbox(sandbox_mode) + raw = sandbox._run_in_warm_subprocess( + capture_code, allowed_objects, workspace_path + ) + + if raw.get("status") == "ok": + allowed = raw.get("allowed_objects") or {} + if not isinstance(allowed, dict): + allowed = {} + pack = allowed.get("_pack", {}) + stdout = pack.get("stdout", "") if isinstance(pack, dict) else "" + if not isinstance(stdout, str): + stdout = str(stdout) + if len(stdout) > 8000: + stdout = stdout[:8000] + "\n... (truncated)" + return {"status": "ok", "stdout": stdout} + else: + return { + "status": "error", + "error": raw.get("error_message", raw.get("content", "Unknown error")), + "stdout": "", + } + except Exception as e: + logger.error("[AnalystAgent] Sandbox execution error", exc_info=e) + return {"status": "error", "error": "Code execution failed", "stdout": ""} + + def _run_visualize_code( + self, + code: str, + output_variable: str, + chart_spec: dict, + field_metadata: dict, + field_display_names: dict, + display_instruction: str, + title: str = "", + messages: list[dict] | None = None, + ) -> dict[str, Any]: + """Run visualize code in sandbox and assemble chart.""" + from data_formulator.sandbox import create_sandbox + + try: + from flask import current_app + sandbox_mode = current_app.config.get('CLI_ARGS', {}).get('sandbox', 'local') + max_display_rows = current_app.config['CLI_ARGS'].get('max_display_rows', 5000) + except (ImportError, RuntimeError): + sandbox_mode = 'local' + max_display_rows = 5000 + + code, was_patched, detected_var = ensure_output_variable_in_code(code, output_variable) + if was_patched: + logger.info(f"[AnalystAgent] patched output_variable: {output_variable} = {detected_var}") + + sandbox = create_sandbox(sandbox_mode) + + try: + execution_result = sandbox.run_python_code( + code=code, + workspace=self.workspace, + output_variable=output_variable, + ) + + if execution_result['status'] != 'ok': + error_message = execution_result.get('content', 'Unknown error') + return {"status": "error", "error_message": str(error_message)} + + full_df = execution_result['content'] + row_count = len(full_df) + + chart_encodings = chart_spec.get("encodings", {}) + + def _missing_encoding(field: Any) -> bool: + # field is normally a column-name string. Weak models sometimes + # emit a dict ({"field": "col"}), a list, or other non-string; + # turn those into a clean, repairable "not found" instead of an + # unhashable-type crash on the membership test below. + if not field: + return False # empty / None -> optional channel, skip + if isinstance(field, dict): + field = field.get("field") + if not field: + return False + if not isinstance(field, str): + return True # list / number / etc. -> invalid single column + return field not in full_df.columns + + missing_fields = [ + f"{channel}: '{field}'" + for channel, field in chart_encodings.items() + if _missing_encoding(field) + ] + if missing_fields: + available = list(full_df.columns) + return { + "status": "error", + "error_message": ( + f"Chart encoding fields not found in output DataFrame: " + f"{', '.join(missing_fields)}. " + f"Available columns: {available}" + ), + "error_code": "agent.fieldsNotFound", + "error_params": { + "missing": ", ".join(missing_fields), + "available": str(available), + }, + } + + if row_count == 0: + return { + "status": "error", + "error_message": "Output DataFrame is empty (0 rows). Check filters or data loading.", + "error_code": "agent.emptyDataframe", + } + + output_table_name = self.workspace.get_fresh_name(f"d-{output_variable}") + self.workspace.write_parquet(full_df, output_table_name) + + if row_count > max_display_rows: + query_output = full_df.head(max_display_rows) + else: + query_output = full_df + query_output = query_output.loc[:, ~query_output.columns.duplicated()] + + refined_goal = { + "display_instruction": display_instruction, + "title": title, + "output_variable": output_variable, + "output_fields": list(query_output.columns), + "chart": chart_spec, + "field_metadata": field_metadata, + "field_display_names": field_display_names or {}, + } + + transform_result = { + "status": "ok", + # Backend-minted, run-stable chart id. Forwarded to the frontend + # in the ``result`` event so it adopts this id verbatim — the same + # id the agent can embed in a same-run report (``chart://``) + # and pass to ``inspect_chart``. NOT derived from the table name + # (one table may back many charts). + "chart_id": f"chart-{uuid.uuid4().hex[:12]}", + "code": code, + "content": { + "rows": df_to_safe_records(query_output), + "virtual": { + "table_name": output_table_name, + "row_count": row_count, + }, + }, + "refined_goal": refined_goal, + "dialog": self._snapshot_dialog(messages), + "agent": "AnalystAgent", + } + + return { + "status": "ok", + "transform_result": transform_result, + } + + except Exception as e: + logger.error("[AnalystAgent] Visualize execution error", exc_info=e) + return {"status": "error", "error_message": "Visualization execution failed"} + + # ------------------------------------------------------------------ + # Message construction + # ------------------------------------------------------------------ + + def _build_system_prompt( + self, + has_primary_tables: bool = False, + has_focused_thread: bool = False, + has_other_threads: bool = False, + has_attached_images: bool = False, + has_charts: bool = False, + ) -> str: + rules_block = "" + if self.agent_exploration_rules and self.agent_exploration_rules.strip(): + rules_block = ( + "\n## Additional exploration rules\n\n" + + self.agent_exploration_rules.strip() + + "\n\nPlease follow the above rules when exploring data." + ) + + context_lines = [] + if has_primary_tables: + context_lines.append( + "- **[PRIMARY TABLE(S)]**: The table(s) the user is focused on. " + "Prioritize these, but freely use other available tables if needed." + ) + context_lines.append( + "- **[OTHER AVAILABLE TABLES]**: Additional tables in the workspace." + ) + else: + context_lines.append( + "- **[AVAILABLE TABLES]**: All tables in the workspace." + ) + context_lines.append( + " Use `inspect_source_data` to get detailed stats and sample rows. " + "Use `execute_python_script` for custom computations." + ) + if has_focused_thread: + context_lines.append( + "- **[FOCUSED THREAD]**: The thread the user is continuing. " + "Build on this — do not repeat visualizations already created here." + ) + if has_other_threads: + context_lines.append( + "- **[OTHER THREADS]**: Brief summaries of other exploration threads in this workspace. " + ) + if has_charts: + context_lines.append( + "- **[AVAILABLE CHARTS]**: Charts the user already created (with their " + "ids, types, and encodings). These already exist — build on them or " + "reference them; do not re-create an equivalent chart. When asked to " + "write up / summarize / report on the exploration, load the `report` " + "skill and embed these by id rather than producing new visualizations." + ) + if has_attached_images: + context_lines.append( + "- **[USER ATTACHMENT(S)]**: Image(s) provided by the user. " + "Refer to these when relevant to the user's question." + ) + context_guide = "\n".join(context_lines) + + # The skill catalog is static capability config (fixed at agent build, + # independent of the user's question), so it belongs in the frame next to + # the skills mechanism — not in the per-run user message. The only truly + # dynamic skill data is a loaded skill body, which arrives as a + # ``load_skill`` tool result. + skills_block = self.registry.render_registry_block() or "_(no loadable skills)_" + + # Fill the system frame's slots via plain substitution (brace-safe: any + # other braces in the text stay literal). The frame is the agent's own + # contract — identity, tools-vs-actions, skills mechanism, budget. + substitutions = { + "{context_guide}": context_guide, + "{skills_block}": skills_block, + "{max_iterations}": str(self.max_iterations), + "{agent_exploration_rules}": rules_block, + } + prompt = SYSTEM_PROMPT + for slot, value in substitutions.items(): + prompt = prompt.replace(slot, value) + + # Append the always-loaded ``core`` skill's capability body (the concrete + # tools + action schemas). It is plain content — no placeholders — and is + # framed with the same ``[SKILL: ]`` header as on-demand skills (see + # _load_skill_into_context) so every capability bundle reads as one family: + # core is the always-active baseline, gated skills announce themselves when + # loaded. + core_body = self.registry.load_body(_CORE_SKILL) + prompt += ( + f"\n\n[SKILL: {_CORE_SKILL}] Always-on baseline — these tools and " + f"actions are active for the whole run.\n\n{core_body}" + ) + + if self._knowledge_store: + knowledge_rules = self._knowledge_store.load_always_apply_rules() + self._injected_rules = [r["title"] for r in knowledge_rules] + prompt += self._knowledge_store.format_rules_block(knowledge_rules) + else: + self._injected_rules = [] + + if self.agent_coding_rules and self.agent_coding_rules.strip(): + prompt += ( + "\n\n## Agent Coding Rules\n\n" + + self.agent_coding_rules.strip() + ) + + if self.language_instruction: + prompt = prompt + "\n\n" + self.language_instruction + return prompt + + def _build_initial_messages( + self, + input_tables: list[dict[str, Any]], + user_question: str, + focused_thread: list[dict[str, Any]] | None = None, + other_threads: list[dict[str, Any]] | None = None, + primary_tables: list[str] | None = None, + attached_images: list[str] | None = None, + charts: list[dict[str, Any]] | None = None, + ) -> list[dict]: + """Build the initial messages with 3-tier context.""" + table_summaries = self._build_lightweight_table_context(input_tables, primary_tables=primary_tables) + + focused_block = "" + if focused_thread: + focused_block = self._build_focused_thread_context(focused_thread) + + peripheral_block = "" + if other_threads: + peripheral_block = self._build_peripheral_thread_context(other_threads) + + if primary_tables: + user_content = f"{table_summaries}\n\n" + else: + user_content = f"[AVAILABLE TABLES]\n\n{table_summaries}\n\n" + if focused_block: + user_content += f"{focused_block}\n\n" + if peripheral_block: + user_content += f"{peripheral_block}\n\n" + + # Surface the charts the user already created so the agent treats them as + # existing material — to build on, reference, or report from — rather than + # re-creating them. The chart_ids here are exactly what the report skill's + # ``inspect_chart`` / ``![caption](chart://chart_id)`` embeds expect. + charts_block = self._build_available_charts_context(charts) + if charts_block: + user_content += f"{charts_block}\n\n" + + self._injected_knowledge = [] + if self._knowledge_store: + always_apply_rules = self._knowledge_store.load_always_apply_rules() + if always_apply_rules: + rules_text = "\n\n".join([f"### {r['title']}\n{r['body']}" for r in always_apply_rules]) + user_content += f"[USER RULES - MUST FOLLOW]\n\n{rules_text}\n\n" + + user_content += f"[USER QUESTION]\n\n{user_question}" + + system_prompt = self._build_system_prompt( + has_primary_tables=bool(primary_tables), + has_focused_thread=bool(focused_thread), + has_other_threads=bool(other_threads), + has_attached_images=bool(attached_images), + has_charts=bool(charts_block), + ) + + has_images = bool(attached_images) and len(attached_images) > 0 + + if has_images: + content_parts: list[dict] = [{"type": "text", "text": user_content}] + label = "[USER ATTACHMENT]" if len(attached_images) == 1 else "[USER ATTACHMENTS]" + content_parts.append({"type": "text", "text": f"\n{label} (image(s) provided by the user):"}) + for img in attached_images: + if img.startswith("data:"): + content_parts.append({"type": "image_url", "image_url": {"url": img, "detail": "low"}}) + return [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": content_parts}, + ] + else: + return [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_content}, + ] + + def _build_focused_thread_context( + self, focused_thread: list[dict[str, Any]] + ) -> str: + return build_focused_thread_context(focused_thread) + + def _build_peripheral_thread_context( + self, other_threads: list[dict[str, Any]] + ) -> str: + return build_peripheral_thread_context(other_threads) + + @staticmethod + def _build_available_charts_context( + charts: list[dict[str, Any]] | None, + ) -> str: + """Render the ``[AVAILABLE CHARTS]`` block from the chart descriptors. + + Mirrors the legacy report agent's listing (id, type, encodings, table + ref) so chart_ids stay stable across the run — the report skill's + ``inspect_chart`` and ``chart://chart_id`` embeds reference these ids. + Returns ``""`` when there are no charts. + """ + if not charts: + return "" + lines = ["[AVAILABLE CHARTS]"] + for c in charts: + chart_id = c.get("chart_id") + if not chart_id: + continue + enc_str = ", ".join( + f"{k}: {v}" for k, v in (c.get("encodings") or {}).items() if v + ) + lines.append( + f" - {chart_id}: {c.get('chart_type', 'Unknown')}" + + (f" ({enc_str})" if enc_str else "") + + f" → table: {c.get('table_ref', '?')}" + ) + return "\n".join(lines) if len(lines) > 1 else "" + + def _build_lightweight_table_context( + self, input_tables: list[dict[str, Any]], primary_tables: list[str] | None = None + ) -> str: + return build_lightweight_table_context( + input_tables, + self.workspace, + primary_tables, + ) + + # ------------------------------------------------------------------ + # LLM interaction (with internal tool-calling loop) + # ------------------------------------------------------------------ + + def _get_next_action( + self, + trajectory: list[dict], + input_tables: list[dict[str, Any]] | None = None, + outer_iteration: int = 0, + ) -> Generator[dict[str, Any], None, None]: + """Call the LLM with tools, run the inspection tool rounds internally, + and surface the single committing action the turn ends with (as an + ``agent_action`` event).""" + max_tool_rounds = 12 + max_json_retries = 1 + json_retries = 0 + messages = trajectory + llm_calls_in_cycle = 0 + + rlog = self._reasoning_log + + from data_formulator.sandbox.local_sandbox import SandboxSession + ns_dir = self._explore_ns_dir() + ws_path = str(self.workspace.confined_scratch.root.parent) + + with SandboxSession() as explore_session: + self._explore_session = explore_session + + if ns_dir.exists(): + ok = SandboxSession.restore_namespace(explore_session, ns_dir, ws_path) + if ok: + logger.info("[AnalystAgent] Restored explore namespace from %s", ns_dir) + import shutil + shutil.rmtree(ns_dir, ignore_errors=True) + + self._tool_loop_exit_reason = None + yield from self._tool_loop( + messages, max_tool_rounds, max_json_retries, json_retries, + llm_calls_in_cycle, rlog, input_tables, outer_iteration, + ) + + if self._tool_loop_exit_reason == "tool_rounds_exhausted": + saved = explore_session.save_namespace(ns_dir, ws_path) + if saved: + logger.info("[AnalystAgent] Saved explore namespace to %s", ns_dir) + + self._explore_session = None + + def _current_tools(self) -> list[dict[str, Any]]: + """The tool set offered this turn: inspection tools (core tools + + load_skill + loaded skills' tools) plus the committing **action** + tools of loaded skills (core's visualize/delegate always; write_report + once the report skill is loaded). The model gathers with inspection tools + and acts with at most one action per turn.""" + extra_tools = self.registry.tools_for(self._loaded_skills) + action_tools = self.registry.action_tools_for(self._loaded_skills) + return build_tools( + self.registry.gated_skill_names(), + extra_tools, + action_tools=action_tools, + ) + + def _loaded_skill_tool_map(self) -> dict[str, Any]: + """Map ``tool_name -> skill instance`` for inspection tools unlocked by + loaded skills. Tool names come from the registry's ``tools.json`` specs; + the value is the skill processor that handles them.""" + mapping: dict[str, Any] = {} + for name in self._loaded_skills: + skill = self.registry.get_skill(name) + if skill is None: + continue + for spec in self.registry.tools_for([name]): + fn_name = spec.get("function", {}).get("name") + if fn_name: + mapping[fn_name] = skill + return mapping + + def _tool_loop( + self, + messages, max_tool_rounds, max_json_retries, json_retries, + llm_calls_in_cycle, rlog, input_tables, outer_iteration, + ): + """Inner tool-calling loop, wrapped by _get_next_action in a + SandboxSession context manager.""" + for round_idx in range(max_tool_rounds): + llm_calls_in_cycle += 1 + tools = self._current_tools() + rlog.log("llm_request", iteration=outer_iteration, + round=round_idx + 1, + messages_count=len(messages), + tools_available=[t["function"]["name"] for t in tools]) + llm_t0 = time.time() + try: + response = yield from self._stream_llm(messages, tools) + except Exception as exc: + llm_latency = int((time.time() - llm_t0) * 1000) + rlog.log("llm_response", iteration=outer_iteration, + round=round_idx + 1, + latency_ms=llm_latency, finish_reason="error", + error=type(exc).__name__) + logger.error("[AnalystAgent] LLM call failed", exc_info=exc) + from data_formulator.security.sanitize import classify_llm_error + yield { + "type": "agent_action", + "action_data": None, + "reason": "llm_error", + "error_message": classify_llm_error(exc), + "llm_calls": llm_calls_in_cycle, + } + return + + llm_latency = int((time.time() - llm_t0) * 1000) + + if not response.choices: + rlog.log("llm_response", iteration=outer_iteration, + round=round_idx + 1, + latency_ms=llm_latency, finish_reason="empty") + yield {"type": "agent_action", "action_data": None, "reason": "llm_error", + "error_message": "LLM returned empty response", + "llm_calls": llm_calls_in_cycle} + return + + choice = response.choices[0] + content = choice.message.content or "" + tool_calls = getattr(choice.message, 'tool_calls', None) + finish_reason = getattr(choice, "finish_reason", "stop") + + if tool_calls: + rlog.log("llm_response", iteration=outer_iteration, + round=round_idx + 1, + latency_ms=llm_latency, finish_reason="tool_calls", + tool_calls=[{"name": tc.function.name} for tc in tool_calls]) + else: + rlog.log("llm_response", iteration=outer_iteration, + round=round_idx + 1, + latency_ms=llm_latency, finish_reason=finish_reason) + + # --- tool calls: partition into committing actions vs inspection --- + if tool_calls: + if content.strip(): + yield {"type": "thinking_text", "content": content.strip()} + + # A committing action is a tool call (visualize / delegate / + # write_report). Inspection tools (explore / + # inspect_source_data / inspect_chart / load_skill) gather. A turn + # ends with exactly ONE action; the harness enforces that here. + action_names = self.registry.action_names() + action_calls = [tc for tc in tool_calls + if tc.function.name in action_names] + readonly_calls = [tc for tc in tool_calls + if tc.function.name not in action_names] + + # ── Action present → cardinality guard (first-wins) ─────────── + if action_calls: + committed = yield from self._commit_action( + action_calls, readonly_calls, messages, content, choice, + rlog, outer_iteration, llm_calls_in_cycle, + ) + if committed: + return + # Not committed (e.g. missing required fields) → a correction + # tool-result was appended; loop and let the model retry. + continue + + # ── Only inspection tools → execute all and loop ─────────────── + assistant_msg: dict[str, Any] = { + "role": "assistant", + "content": content or None, + } + attach_reasoning_content(assistant_msg, choice.message) + assistant_msg["tool_calls"] = [ + { + "id": tc.id, + "type": "function", + "function": { + "name": tc.function.name, + "arguments": tc.function.arguments, + }, + } + for tc in readonly_calls + ] + messages.append(assistant_msg) + + # Tools unlocked by currently-loaded skills (name -> instance). + skill_tool_owners = self._loaded_skill_tool_map() + # Images returned by skill tools are attached as a single + # follow-up vision message after all tool results this round. + pending_images: list[str] = [] + # Skill bodies unlocked via load_skill this round. They are + # `user` turns and MUST land AFTER every tool result — an + # assistant `tool_calls` turn must be immediately followed by its + # tool responses (Azure/OpenAI reject any other message in + # between). So we defer them past the per-tc loop. + pending_skill_bodies: list[dict] = [] + + for tc in readonly_calls: + tool_name = tc.function.name + try: + tool_args = json.loads(tc.function.arguments) + except json.JSONDecodeError: + tool_args = {} + + yield { + "type": "tool_start", + "tool": tool_name, + "purpose": tool_args.get("purpose") if tool_name == "execute_python_script" else None, + "code": tool_args.get("code") if tool_name == "execute_python_script" else None, + "table_names": tool_args.get("table_names") if tool_name == "inspect_source_data" else None, + "skill": tool_args.get("name") if tool_name == "load_skill" else None, + } + + tool_t0 = time.time() + tool_status = "ok" + + if tool_name == "execute_python_script": + result = self._run_explore_code( + tool_args.get("code", ""), + input_tables or [], + ) + tool_content = result.get("stdout", "") + tool_status = result.get("status", "ok") + if result.get("error"): + tool_content += f"\n\nError: {result['error']}" + yield { + "type": "tool_result", + "tool": tool_name, + "status": tool_status, + "stdout": result.get("stdout", ""), + "error": result.get("error"), + } + elif tool_name == "inspect_source_data": + table_names = tool_args.get("table_names", []) + tool_content = handle_inspect_source_data( + table_names, input_tables or [], self.workspace, + ) + yield { + "type": "tool_result", + "tool": tool_name, + "status": "ok", + "stdout": tool_content, + } + elif tool_name == "load_skill": + skill_name = tool_args.get("name", "") + ok, message, body_msg = self._build_skill_body_message(skill_name) + tool_status = "ok" if ok else "error" + tool_content = message + # The skill body is a `user` turn that must be appended + # AFTER this round's tool results (see pending_skill_bodies); + # the tool result here just confirms the load. + if ok and body_msg is not None: + pending_skill_bodies.append(body_msg) + if ok: + yield { + "type": "skill_loaded", + "skill": skill_name, + "unlocks": list( + self.registry.metas[skill_name].action_names + ) if self.registry.has(skill_name) else [], + } + yield { + "type": "tool_result", + "tool": tool_name, + "status": tool_status, + "stdout": message, + "error": None if ok else message, + } + elif tool_name in skill_tool_owners: + skill = skill_tool_owners[tool_name] + skill_ctx = SkillContext( + client=self.client, + workspace=self.workspace, + language_instruction=self.language_instruction, + trajectory=messages, + payload=dict(self._run_payload), + ) + try: + result = skill.handle_tool(tool_name, tool_args, skill_ctx) + except Exception as exc: + logger.warning("[AnalystAgent] Skill tool %r failed", tool_name, exc_info=exc) + result = ToolResult(text=f"Tool '{tool_name}' failed: {exc}") + tool_status = "error" + tool_content = result.text + if result.images: + pending_images.extend(result.images) + yield { + "type": "tool_result", + "tool": tool_name, + "status": tool_status, + "stdout": tool_content, + } + else: + tool_content = f"Unknown tool: {tool_name}" + + tool_latency = int((time.time() - tool_t0) * 1000) + output_summary = (tool_content[:200] + "...") if len(tool_content) > 200 else tool_content + rlog.log("tool_execution", iteration=outer_iteration, + tool=tool_name, + input_summary=tool_args.get("purpose", "")[:200], + output_summary=output_summary, + latency_ms=tool_latency, status=tool_status) + + messages.append({ + "role": "tool", + "tool_call_id": tc.id, + "content": tool_content, + }) + + # Attach any skill-tool images as a single follow-up vision turn + # (tool-result messages can't carry image content on most providers). + if pending_images: + image_blocks: list[dict[str, Any]] = [{ + "type": "text", + "text": ( + "[INSPECTED IMAGE(S)] Rendered images for the tool " + "call(s) you just made, in request order:" + ), + }] + for url in pending_images: + image_blocks.append({ + "type": "image_url", + "image_url": {"url": url, "detail": "high"}, + }) + messages.append({"role": "user", "content": image_blocks}) + + # Now that every tool result is in place, land any skill bodies + # unlocked this round (deferred so the assistant tool_calls turn + # stays immediately followed by its tool responses). + for body_msg in pending_skill_bodies: + messages.append(body_msg) + + logger.info("[AnalystAgent] Executed %d inspection tool call(s), looping back to LLM", len(readonly_calls)) + continue + + # --- no tool calls — the model gave a plain-text answer ---------- + # In this turn model, committing no action is the NORMAL way to end + # the run: the agent has nothing more to do and answers in prose. + # That final text is the run's completion (the frontend renders it + # as the summary). Record it as a plain assistant turn and signal + # "done" to the outer loop. + logger.info("[AnalystAgent] No action committed; final text ends the run") + final_msg: dict[str, Any] = {"role": "assistant", "content": content or None} + attach_reasoning_content(final_msg, choice.message) + messages.append(final_msg) + yield {"type": "agent_action", "action_data": None, "reason": "done", + "final_text": content.strip(), "llm_calls": llm_calls_in_cycle} + return + + # --- tool rounds exhausted --- + logger.warning("[AnalystAgent] Exceeded %d tool rounds without committing an action", max_tool_rounds) + self._tool_loop_exit_reason = "tool_rounds_exhausted" + yield {"type": "agent_action", "action_data": None, "reason": "tool_rounds_exhausted", + "llm_calls": llm_calls_in_cycle} + return + + def _commit_action( + self, + action_calls: list, + readonly_calls: list, + messages: list[dict], + content: str, + choice, + rlog, + outer_iteration: int, + llm_calls_in_cycle: int, + ) -> Generator[Event, None, bool]: + """Apply the one-action-per-turn cardinality guard and commit. + + A turn ends with exactly one committing action. When the model emits + more than one action (or mixes an action with inspection calls in the + same response), we take the **first** action and discard the rest — + first-wins, never reject-the-whole-turn (mirrors Claude's + serialize-don't-refuse). The trajectory is kept provider-valid by + recording an assistant message carrying *only* the chosen action's + tool call (so there are no orphaned ``tool_calls`` to answer), plus its + single ``ok`` tool result; any drop is noted so the model learns the + rule. + + Yields the ``agent_action`` event with the chosen action's arguments + (the ``run`` loop then gates + dispatches it to the owning skill) and + returns ``True`` when committed. Returns ``False`` without committing if + the chosen action is missing required fields — after appending a + correction so the caller can loop and let the model retry. + """ + chosen = action_calls[0] + chosen_name = chosen.function.name + dropped_actions = [tc.function.name for tc in action_calls[1:]] + dropped_readonly = [tc.function.name for tc in readonly_calls] + + try: + action_data = json.loads(chosen.function.arguments) + except json.JSONDecodeError: + action_data = {} + if not isinstance(action_data, dict): + action_data = {} + _rescue_unpack_json_strings(action_data) + action_data["action"] = chosen_name + + # Record the commitment as an assistant turn carrying ONLY the chosen + # action's tool call — dropping siblings keeps the trajectory valid for + # any disposition (a CONTINUE action will make another LLM call). + assistant_msg: dict[str, Any] = {"role": "assistant", "content": content or None} + attach_reasoning_content(assistant_msg, choice.message) + assistant_msg["tool_calls"] = [{ + "id": chosen.id, + "type": "function", + "function": { + "name": chosen_name, + "arguments": chosen.function.arguments, + }, + }] + messages.append(assistant_msg) + + # Pre-dispatch completeness check (belt-and-suspenders on top of the + # skill handler's own validation). Missing fields → correct + retry. + required = self.registry.action_required_fields(chosen_name) + missing = [f for f in required if not action_data.get(f)] + if missing: + correction = ( + f"The '{chosen_name}' action is missing required field(s): " + f"{', '.join(missing)}. Call it again with those fields filled in." + ) + messages.append({ + "role": "tool", + "tool_call_id": chosen.id, + "content": f"ERROR: {correction}", + }) + rlog.log("tool_execution", iteration=outer_iteration, tool=chosen_name, + input_summary="action_missing_fields", + output_summary=", ".join(missing), latency_ms=0, status="error") + logger.warning("[AnalystAgent] Action '%s' missing fields %s, requesting retry", + chosen_name, missing) + yield {"type": "tool_result", "tool": chosen_name, "status": "error", + "error": f"Missing fields: {', '.join(missing)}"} + return False + + # Answer the action's tool call with a placeholder so the trajectory is + # well-formed during dispatch; the run loop overwrites this with the + # skill's observation (see _set_action_observation) once the action has + # rendered. This is what makes an action's result ride the same lane as + # an inspection tool result. + messages.append({ + "role": "tool", + "tool_call_id": chosen.id, + "content": "", + }) + + # If we dropped anything, teach the one-action rule so the model + # converges (the note rides along on the next CONTINUE turn's context). + if dropped_actions or dropped_readonly: + dropped_desc: list[str] = [] + if dropped_actions: + dropped_desc.append( + f"additional action call(s) ({', '.join(dropped_actions)})" + ) + if dropped_readonly: + dropped_desc.append( + f"inspection call(s) ({', '.join(dropped_readonly)}) made alongside it" + ) + messages.append({ + "role": "user", + "content": ( + f"[SYSTEM] A turn commits exactly one action. Kept " + f"'{chosen_name}'; ignored {' and '.join(dropped_desc)}. Do any " + "inspection in its own round before the action, and " + "emit only one action per turn." + ), + }) + logger.info( + "[AnalystAgent] Cardinality guard: kept '%s', dropped actions=%s readonly=%s", + chosen_name, dropped_actions, dropped_readonly, + ) + + rlog.log("tool_execution", iteration=outer_iteration, tool=chosen_name, + input_summary="action_committed", output_summary="ok", + latency_ms=0, status="ok") + yield {"type": "agent_action", "action_data": action_data, "reason": "ok", + "tool_call_id": chosen.id, "llm_calls": llm_calls_in_cycle} + return True + + _MAX_LLM_RETRIES = 3 + + @staticmethod + def _is_transient_error(exc: Exception) -> bool: + msg = str(exc).lower() + if any(kw in msg for kw in ( + "timeout", "timed out", "rate limit", "rate_limit", + "429", "503", "502", "connection", "reset by peer", + )): + return True + name = type(exc).__name__.lower() + return any(kw in name for kw in ("timeout", "ratelimit", "connection")) + + def _open_stream(self, messages: list[dict], tools: list[dict]): + """Open a *streaming* LLM call with tool definitions, retrying on + transient errors *before* any tokens are consumed. + + ``stream=True`` is what makes live report streaming possible: the loop's + LLM call always streams, and the agent forwards a streaming action's + argument as it arrives (design-docs/36 §5). ``parallel_tool_calls=False`` + forces one tool call per response — the structural backstop for the + one-action-per-turn rule: actions are sequential (each result shapes the + next), so the model must never batch them. It also serializes inspection + tools — a minor extra round-trip — an acceptable trade for never silently + dropping batched actions. Providers that don't support the flag drop it + (``drop_params=True``); the first-wins cardinality guard remains as a + belt-and-suspenders net. + """ + last_exc: Exception | None = None + for attempt in range(self._MAX_LLM_RETRIES): + try: + return self.client.get_completion_with_tools( + messages, tools=tools, stream=True, + reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model), + parallel_tool_calls=False, + ) + except Exception as e: + last_exc = e + if self._is_transient_error(e) and attempt < self._MAX_LLM_RETRIES - 1: + wait = 2 ** attempt + logger.warning( + "[AnalystAgent] Transient LLM error (attempt %d/%d), " + "retrying in %ds: %s", + attempt + 1, self._MAX_LLM_RETRIES, wait, e, + ) + time.sleep(wait) + continue + raise + raise last_exc # pragma: no cover + + def _stream_llm( + self, messages: list[dict], tools: list[dict], + ) -> Generator[Event, None, Any]: + """Stream the LLM call, forwarding any *streaming* action's argument live, + and return a reconstructed non-streaming-shaped response for the loop. + + The agent owns this generic forwarding envelope (design-docs/36 §5): it + accumulates content / reasoning / tool-call deltas exactly as a buffered + call would, but when a tool call's name is a streaming action (per + ``registry.action_stream_spec``) it emits the action's ``action`` event + once and then forwards the growing ``stream_field`` argument as + ``text_delta``s on the skill's declared channel as the tokens arrive. + The reconstructed response carries the *full* assembled tool calls, so + the downstream partition / commit / dispatch path is byte-for-byte the + same as the old buffered call — the only difference is that the report's + text reached the frontend live. The skill's later (buffered) re-emission + of the same content is suppressed by the router (see ``run`` / + ``_route_skill_events``); on a provider without tool-arg streaming nothing + is forwarded here and the buffered path delivers it instead. + """ + # Each LLM call starts a fresh streamed-channel map; only the round that + # actually commits a streaming action leaves an entry for the run loop. + self._streamed_channels = {} + + stream = self._open_stream(messages, tools) + + content_parts: list[str] = [] + reasoning_acc: str | None = None + finish_reason = "stop" + # idx -> {"id", "name", "arguments"} + tool_calls_acc: dict[int, dict[str, Any]] = {} + # idx -> {"active", "channel", "extractor", "announced"} for streaming actions + streamers: dict[int, dict[str, Any]] = {} + + for chunk in stream: + if not getattr(chunk, "choices", None): + continue + choice0 = chunk.choices[0] + delta = getattr(choice0, "delta", None) + if delta is None: + continue + if getattr(choice0, "finish_reason", None): + finish_reason = choice0.finish_reason + + reasoning_acc = accumulate_reasoning_content(reasoning_acc, delta) + + content = getattr(delta, "content", None) + if content: + content_parts.append(content) + + for tcd in getattr(delta, "tool_calls", None) or []: + idx = getattr(tcd, "index", 0) or 0 + slot = tool_calls_acc.setdefault( + idx, {"id": None, "name": "", "arguments": ""}, + ) + if getattr(tcd, "id", None): + slot["id"] = tcd.id + fn = getattr(tcd, "function", None) + if fn is not None: + if getattr(fn, "name", None): + slot["name"] = fn.name + arg_delta = getattr(fn, "arguments", None) + if arg_delta: + slot["arguments"] += arg_delta + yield from self._forward_stream_delta(slot, streamers) + + # Reconstruct a non-streaming-shaped response for the loop. + tool_call_objs: list[Any] = [] + for i in sorted(tool_calls_acc): + tc = tool_calls_acc[i] + tool_call_objs.append(SimpleNamespace( + id=tc["id"] or f"call_{i}", + type="function", + function=SimpleNamespace(name=tc["name"], arguments=tc["arguments"]), + )) + message = SimpleNamespace( + content="".join(content_parts) or None, + tool_calls=tool_call_objs or None, + reasoning_content=reasoning_acc, + ) + choice = SimpleNamespace(message=message, finish_reason=finish_reason) + return SimpleNamespace(choices=[choice]) + + def _forward_stream_delta( + self, slot: dict[str, Any], streamers: dict[int, dict[str, Any]], + ) -> Generator[Event, None, None]: + """Forward a streaming action's growing argument as channel ``text_delta``s. + + Decides once per tool-call slot whether it is a streaming action (by + name, via the registry); if so, emits the ``action`` commitment event the + first time and then surfaces newly-decoded ``stream_field`` text as it + arrives. No-ops for buffered actions and inspection tools. + """ + name = slot.get("name") or "" + if not name: + return + idx = id(slot) # stable key for this slot within the call + st = streamers.get(idx) + if st is None: + spec = self.registry.action_stream_spec(name) + if spec is None: + streamers[idx] = {"active": False} + return + field, channel = spec + st = { + "active": True, + "channel": channel, + "extractor": _StreamingArgExtractor(field), + "announced": False, + } + streamers[idx] = st + if not st["active"]: + return + + if not st["announced"]: + # Preserve the buffered order (action first, then report text). + yield {"type": "action", "action": name} + st["announced"] = True + + new_text = st["extractor"].feed(slot["arguments"]) + if new_text: + yield {"type": "text_delta", "channel": st["channel"], "content": new_text} + tcid = slot.get("id") + if tcid: + self._streamed_channels[tcid] = st["channel"] + + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + @staticmethod + def _strip_images(trajectory: list[dict]) -> list[dict]: + """Return a copy of the trajectory with image_url blocks removed.""" + stripped: list[dict] = [] + for msg in trajectory: + content = msg.get("content") + if isinstance(content, list): + text_parts = [p for p in content if p.get("type") == "text"] + if text_parts: + stripped.append({**msg, "content": text_parts}) + else: + stripped.append({**msg, "content": "[image removed]"}) + else: + stripped.append(msg) + return stripped + + @staticmethod + def _log_session_end( + rlog, + status: str, + total_iterations: int, + total_llm_calls: int, + session_start_time: float, + ) -> None: + """Write ``session_end`` to the reasoning log (does not close it).""" + rlog.log( + "session_end", + status=status, + total_iterations=total_iterations, + total_llm_calls=total_llm_calls, + total_latency_ms=int((time.time() - session_start_time) * 1000), + ) + + @staticmethod + def _error_event( + iteration: int, + message: str, + *, + display_instruction: str = "", + message_code: str = "", + message_params: dict | None = None, + ) -> dict[str, Any]: + """Build an ``"error"`` event dict for the streaming response.""" + event: dict[str, Any] = { + "type": "error", + "iteration": iteration, + "message": message, + } + if message_code: + event["message_code"] = message_code + if message_params: + event["message_params"] = message_params + if display_instruction: + event["display_instruction"] = display_instruction + return event + + @staticmethod + def _snapshot_dialog(messages: list[dict] | None) -> list[dict]: + """Snapshot the conversation for the Agent Log dialog.""" + if not messages: + return [] + snapshot: list[dict] = [] + for msg in messages: + role = msg.get("role", "") + content = msg.get("content") + + if isinstance(content, list): + content = "\n".join( + p.get("text", "") for p in content if p.get("type") == "text" + ) + + if role == "assistant" and msg.get("tool_calls"): + tool_details = [] + for tc in msg["tool_calls"]: + fn = tc.get("function", {}) + name = fn.get("name", "?") + args_str = fn.get("arguments", "{}") + try: + args_obj = json.loads(args_str) + if name == "execute_python_script" and "code" in args_obj: + tool_details.append(f"[tool: {name}]\n```python\n{args_obj['code']}\n```") + else: + formatted = json.dumps(args_obj, indent=2, ensure_ascii=False) + tool_details.append(f"[tool: {name}]\n```json\n{formatted}\n```") + except (json.JSONDecodeError, TypeError): + tool_details.append(f"[tool: {name}]\n{args_str}") + text_part = content or "" + combined = (text_part + "\n\n" + "\n\n".join(tool_details)).strip() + snapshot.append({"role": role, "content": combined}) + + elif role == "tool": + tool_content = content or "" + if isinstance(tool_content, str) and len(tool_content) > 3000: + tool_content = tool_content[:3000] + "\n... (truncated)" + snapshot.append({"role": "assistant", "content": f"[tool result]\n{tool_content}"}) + + elif content: + if role != "system" and isinstance(content, str) and len(content) > 4000: + content = content[:4000] + "\n... (truncated)" + snapshot.append({"role": role, "content": content}) + return snapshot diff --git a/py-src/data_formulator/analyst/mini_agent.py b/py-src/data_formulator/analyst/mini_agent.py new file mode 100644 index 00000000..99088c3c --- /dev/null +++ b/py-src/data_formulator/analyst/mini_agent.py @@ -0,0 +1,797 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""A single-decision, no-loop variant of :class:`AnalystAgent` for small models. + +:class:`MiniAnalystAgent` is the most stripped-down member of the analyst family. +Where :class:`~data_formulator.analyst.agent.AnalystAgent` runs a +multi-step *inspect -> act -> observe* loop that can commit several actions, the +mini agent makes exactly **one analytic decision per run** and stops. Given the +data context it returns ONE of two things: + +* **visualize** — a small data-transformation script plus a chart spec (this is + the default; almost every question should produce a chart), or +* **explain** — a short free-text answer (only when the user is clearly *not* + asking for a chart, e.g. a yes/no or factual question). + +Before deciding, the agent may look at the data once: the model MAY run a single +``execute_python_script`` inspection (e.g. to check a join or a column's exact +values), then must produce its visualize/explain. If the committed chart then +fails, each in-place repair attempt may likewise run one inspection to diagnose +the failure before re-emitting the chart. Inspection is one call per decision and +the repair budget is bounded, so the run stays finite rather than an open loop +(see ``loops/model-evaluation`` Section 9). + +The chart-type set is deliberately **reduced** to a handful of common types, and +the prompt is tightly scoped, so small open-weight models reliably emit a +well-formed ``visualize`` action. To keep small models usable without drifting +into multi-chart territory, a *committed* visualize whose code/encodings fail is +**repaired in place** (the model is shown the error and asked to fix the SAME +chart) up to ``max_repair_attempts`` times — this completes the single +visualization, it does not start a new analysis turn. + +Reuse: the visualize execution + ``result`` event are produced by the **same** +core-skill dispatch the base agent uses (:meth:`_dispatch_skill_action`), so a +mini result is byte-for-byte the shape every consumer already understands. The +plain-text transport (``_call_model`` / ``_parse_action`` / +``_run_inspection_tool``) carries actions as content JSON so models with weak or +absent function-calling still work; only the prompt and the single-decision +control flow are new. +""" + +from __future__ import annotations + +import json +import time +from typing import Any, Generator + +from data_formulator.agent_config import reasoning_effort_for +from data_formulator.agents.client_utils import ( + _extract_json_objects, + _match_tool_from_obj, +) +from data_formulator.analyst.agent import ( + AnalystAgent, + _AGENT_ID, + _CORE_SKILL, + _rescue_unpack_json_strings, + handle_inspect_source_data, + logger, +) +from data_formulator.analyst.skills import SkillContext + +# Keys a model may use to carry the explanation text in an ``explain`` action. +_EXPLAIN_TEXT_KEYS = ("text", "explanation", "answer", "summary", "content", "message") + +# Keys a model may use to carry its private reasoning alongside the action JSON; +# surfaced as a thinking_text event (mirrors how the native loop surfaces the +# assistant content that accompanies a tool call). +_THOUGHT_KEYS = ("thought", "thoughts", "reasoning", "thinking", "rationale") + + +# Shown when a run finishes without anything user-visible: the model returned an +# empty reply, or it burned its protocol budget (e.g. a small model that kept +# asking to inspect) without ever committing a chart. A mini run must never end +# silently — the frontend drops an empty summary, so we surface this instead so +# the user can retry or switch to a more capable model rather than seeing nothing. +_NO_OUTPUT_FALLBACK = ( + "I couldn't produce a chart or a clear answer for this request. Try " + "rephrasing it, or switch to a more capable model for mini mode." +) + + +# The reduced chart-type set. Every name here is a valid Data Formulator +# ``chart_type`` that the eval renderer and the visualize skill both understand; +# the list is kept short on purpose so a small model picks a sensible type +# instead of guessing among twenty. +_MINI_CHART_TYPES = ( + "Bar Chart", + "Grouped Bar Chart", + "Line Chart", + "Scatter Plot", + "Histogram", + "Pie Chart", + "Heatmap", +) + +_MINI_CHART_REFERENCE = """\ +- Bar Chart (x, y, color) - compare ONE number across categories. Category on x, number on y. Set color to colour/stack by a second category. +- Grouped Bar Chart (x, y, group) - side-by-side bars split by a second category; put that second category on `group`. +- Line Chart (x, y, color) - a trend over an ordered or time x-axis; color draws one line per series. +- Scatter Plot (x, y, color, size) - relationship between two numeric fields. +- Histogram (x) - distribution of ONE numeric field; put the raw field on x, do NOT pre-bin it. +- Pie Chart (color, size) - parts of a whole with <=7 slices; slice category on `color`, its value on `size`. +- Heatmap (x, y, color) - a 2D grid; x and y are the two categories, color is the numeric cell value.""" + + +# A pseudo-tool advertised so the JSON matcher recognises an ``explain`` action. +# ``explain`` is not a registered skill action (it never reaches the skill +# dispatch); the mini loop intercepts it and ends the run with its text. +_EXPLAIN_TOOL = { + "type": "function", + "function": { + "name": "explain", + "description": "Answer the user in plain text when no chart is needed.", + "parameters": { + "type": "object", + "properties": {"text": {"type": "string"}}, + "required": ["text"], + }, + }, +} + + +# The complete, self-contained system prompt for the mini agent. Slots +# ({chart_types}, {inspect_note}) are filled by str.replace (NOT str.format) so +# the literal JSON braces below stay intact. +_MINI_PROMPT_TEMPLATE = """\ +You are a data visualization agent. The user asks a question about their data and +you answer it by producing ONE chart in a single step. + +## Your data +The tables are already loaded. The user message lists them under [AVAILABLE TABLES] +(or [PRIMARY TABLE(S)]) with their columns and a few sample rows, and ends with +[USER QUESTION]. In your Python, read a table by its EXACT file name shown there, +e.g. pd.read_csv('orders.csv') or pd.read_parquet('sales.parquet'). Never invent +files or columns that are not listed. + +## What you output: exactly ONE JSON object +Your ENTIRE reply is ONE JSON object and nothing else - no prose, no markdown +fences. It is one of two kinds: + +1. VISUALIZE - use this for almost every question: +{"thought": "", "tool": "visualize", "arguments": {"code": "", "output_variable": "", "chart": {"chart_type": "", "encodings": {"x": "", "y": ""}, "config": {}}, "title": "", "input_tables": [""]}} + +2. EXPLAIN - only when the user is NOT asking for a chart (a yes/no or factual question): +{"thought": "", "tool": "explain", "arguments": {"text": ""}} + +When in doubt, VISUALIZE. + +## Writing the visualize code +- A standalone Python script: imports at the top, NO function wrapper. +- Read the source tables by their exact file names, then aggregate / filter / sort / + reshape so the DataFrame is exactly what the chart needs, and assign it to your + output_variable. +- output_variable MUST be a pandas DataFrame (a table with named columns), NEVER a + Series or a single number. Two common mistakes and their fixes: + * groupby -> pass as_index=False, e.g. + df.groupby('city', as_index=False)['sales'].sum() + * value_counts() returns a Series -> call .reset_index(), e.g. + df['city'].value_counts().reset_index(name='count') # columns: city, count +- Every column named in `encodings` MUST be an actual column of your output + DataFrame (check the names match exactly, including the ones you create). +- Allowed libraries: pandas, numpy, duckdb, math, datetime, statistics, collections, + re, sklearn, scipy. NOT allowed: matplotlib, plotly, seaborn, os, sys, requests. +- Strings must be valid JSON: write newlines in the code as \\n and quotes as \\". + +### Chart types (chart_type must be one of these EXACT names) +{chart_types} +{inspect_note} +## Worked example +[USER QUESTION] Top 5 products by revenue. +Your entire reply (one object, nothing else): +{"thought": "sum revenue per product, take the top 5, bar chart", "tool": "visualize", "arguments": {"code": "import pandas as pd\\norders = pd.read_csv('orders.csv')\\nagg = orders.groupby('product', as_index=False)['revenue'].sum()\\ntop_products = agg.sort_values('revenue', ascending=False).head(5)", "output_variable": "top_products", "chart": {"chart_type": "Bar Chart", "encodings": {"x": "product", "y": "revenue"}, "config": {}}, "title": "Top 5 Products By Revenue", "input_tables": ["orders"]}} + +## Worked example (counting rows -> a DataFrame, not a Series) +[USER QUESTION] How many orders are in each status? +Your entire reply (one object, nothing else): +{"thought": "count rows per status with value_counts, reset_index to a real table", "tool": "visualize", "arguments": {"code": "import pandas as pd\\norders = pd.read_csv('orders.csv')\\ncounts = orders['status'].value_counts().reset_index(name='count')", "output_variable": "counts", "chart": {"chart_type": "Bar Chart", "encodings": {"x": "status", "y": "count"}, "config": {}}, "title": "Orders By Status", "input_tables": ["orders"]}} + +## Rules +- Reply with EXACTLY ONE JSON object. Do not wrap it in markdown, do not add text + before or after it. +- Always assign the final DataFrame to the exact output_variable name you chose. +- Use only file names and columns that appear in the user message. +""" + +_INSPECT_NOTE = """\ + +## (Optional) look at the data first +If the sample rows do not tell you enough (e.g. you need the exact category values, +a column's range, or how two tables join), you MAY first run ONE inspection: +{"thought": "", "tool": "execute_python_script", "arguments": {"code": ""}} +It returns its stdout to you only. After it runs you MUST reply with your visualize +(or explain) object. Use this at most once; if the samples already tell you enough, +skip it and go straight to visualize. +""" + + +class MiniAnalystAgent(AnalystAgent): + """A single-decision analyst: one ``visualize`` (or ``explain``) per run. + + Unlike :class:`AnalystAgent` it does **not** loop: :meth:`run` makes one + analytic decision and stops. It carries its own plain-text transport seams + (``_call_model`` / ``_parse_action`` / ``_run_inspection_tool``) so models + with weak or absent function-calling still work, and dispatches the committed + ``visualize`` through the base core skill, so the emitted ``result`` / + ``completion`` events are identical to the loop-based agent. Before committing, + the model may run a single ``execute_python_script`` inspection; if the chart + then fails, a bounded auto-revision loop lets it inspect again and fix the + SAME chart (capped by ``max_repair_attempts``), so the run stays finite. + """ + + # Auto-revision floor: small/local models often need a few tries — inspect + # the data, read the error, fix the code — before a chart succeeds, so a + # single blind retry isn't enough. A higher caller-provided value is kept. + _AUTO_REVISION_ATTEMPTS = 3 + + def __init__(self, *args: Any, **kwargs: Any) -> None: + # One committing action per run; the base machinery is never asked to + # take a second analytic step. + kwargs.setdefault("max_iterations", 1) + super().__init__(*args, **kwargs) + # Give the in-place repair loop room to revise (inspect -> fix -> retry) + # instead of giving up after one attempt. + self.max_repair_attempts = max( + int(self.max_repair_attempts), self._AUTO_REVISION_ATTEMPTS) + + # ------------------------------------------------------------------ + # Prompt: a tightly scoped, single-decision system prompt + # ------------------------------------------------------------------ + + def _build_system_prompt( + self, + has_primary_tables: bool = False, + has_focused_thread: bool = False, + has_other_threads: bool = False, + has_attached_images: bool = False, + has_charts: bool = False, + **kwargs: Any, + ) -> str: + """Assemble the mini prompt: one visualize/explain decision, a reduced + chart-type reference, and a short note describing the single optional + ``execute_python_script`` inspection call.""" + prompt = _MINI_PROMPT_TEMPLATE + prompt = prompt.replace("{chart_types}", _MINI_CHART_REFERENCE) + prompt = prompt.replace("{inspect_note}", _INSPECT_NOTE) + if self.language_instruction: + prompt = prompt + "\n\n" + self.language_instruction + return prompt + + # ------------------------------------------------------------------ + # Tool set: only visualize + explain (+ the one inspection, until spent) + # ------------------------------------------------------------------ + + def _mini_tools(self, allow_inspect: bool) -> list[dict[str, Any]]: + """The minimal tool set the mini agent recognises this turn: the + ``visualize`` action, the ``explain`` pseudo-tool, and — only when + ``allow_inspect`` — the ``execute_python_script`` inspection tool.""" + base = AnalystAgent._current_tools(self) + keep = {"visualize"} + if allow_inspect: + keep.add("execute_python_script") + tools = [t for t in base + if ((t.get("function") or {}).get("name") in keep)] + tools.append(_EXPLAIN_TOOL) + return tools + + # ------------------------------------------------------------------ + # Plain-text transport: a no-native-tools model call, a JSON-action parser, + # and the single inspection executor. Actions travel as content JSON so + # models with weak or absent function-calling still work; every tool is run + # by the SAME base handlers as the looping agent. + # ------------------------------------------------------------------ + + def _catalog_reminder(self, tools: list[dict[str, Any]]) -> str: + """A short per-turn reminder of the protocol + the names currently + available (reflects skills loaded so far, e.g. ``write_report`` after the + report skill loads). Keeps weak models on-protocol without re-deriving + the full schemas, which already live in the prompt/skill bodies.""" + action_names = self.registry.action_names() + names = [(t.get("function") or {}).get("name") for t in tools] + inspect = [n for n in names if n and n not in action_names] + act = [n for n in names if n and n in action_names] + return ( + "[ACTION PROTOCOL] Reply with ONE JSON object " + '{"thought":..,"tool":,"arguments":{..}} to call a tool/action, ' + "or plain text (no JSON) to finish. " + f"Inspection tools: {', '.join(inspect) or '(none)'}. " + f"Actions: {', '.join(act) or '(none)'}." + ) + + def _call_model(self, messages: list[dict]): + """Buffered completion with NO tools, retrying transient errors before + any output is consumed (mirrors the base :meth:`_open_stream` contract).""" + last_exc: Exception | None = None + for attempt in range(self._MAX_LLM_RETRIES): + try: + return self.client.get_completion( + messages, stream=False, + reasoning_effort=reasoning_effort_for( + _AGENT_ID, self.client.model), + ) + except Exception as e: # noqa: BLE001 - retried or re-raised below + last_exc = e + if self._is_transient_error(e) and attempt < self._MAX_LLM_RETRIES - 1: + wait = 2 ** attempt + logger.warning( + "[MiniAnalystAgent] Transient LLM error (attempt " + "%d/%d), retrying in %ds: %s", + attempt + 1, self._MAX_LLM_RETRIES, wait, e, + ) + time.sleep(wait) + continue + raise + raise last_exc # pragma: no cover - loop always returns or raises + + @staticmethod + def _parse_action( + content: str | None, tools: list[dict[str, Any]], + ) -> tuple[str, str, dict[str, Any]] | None: + """Parse the first JSON object in ``content`` that names a known tool. + + Returns ``(thought, tool_name, arguments)`` or ``None`` when no JSON + object matches a tool - in which case ``content`` is the run's final + plain-text answer. The same matcher used by the Ollama salvage resolves + the documented ``{"tool","arguments"}`` shape as well as the nested / + bare-argument shapes weaker models fall into. + """ + if not isinstance(content, str) or "{" not in content: + return None + for blob in _extract_json_objects(content): + try: + obj = json.loads(blob) + except (ValueError, TypeError): + continue + if not isinstance(obj, dict): + continue + matched = _match_tool_from_obj(obj, tools) + if matched is None: + continue + name, args = matched + thought = "" + for k in _THOUGHT_KEYS: + v = obj.get(k) + if isinstance(v, str) and v.strip(): + thought = v.strip() + break + return thought, name, (args if isinstance(args, dict) else {}) + return None + + def _run_inspection_tool( + self, + tool_name: str, + tool_args: dict[str, Any], + input_tables: list[dict[str, Any]] | None, + outer_iteration: int, + rlog, + ) -> Generator[dict, None, tuple[str, dict | None]]: + """Execute one inspection tool with the SAME handlers as the base loop, + yielding the same ``tool_start`` / ``tool_result`` / ``skill_loaded`` + events. Returns ``(observation_text, skill_body_msg_or_None)``.""" + yield { + "type": "tool_start", + "tool": tool_name, + "purpose": tool_args.get("purpose") if tool_name == "execute_python_script" else None, + "code": tool_args.get("code") if tool_name == "execute_python_script" else None, + "table_names": tool_args.get("table_names") if tool_name == "inspect_source_data" else None, + "skill": tool_args.get("name") if tool_name == "load_skill" else None, + } + + tool_t0 = time.time() + tool_status = "ok" + body_msg: dict | None = None + + if tool_name == "execute_python_script": + result = self._run_explore_code( + tool_args.get("code", ""), input_tables or []) + tool_content = result.get("stdout", "") + tool_status = result.get("status", "ok") + if result.get("error"): + tool_content += f"\n\nError: {result['error']}" + yield {"type": "tool_result", "tool": tool_name, "status": tool_status, + "stdout": result.get("stdout", ""), "error": result.get("error")} + elif tool_name == "inspect_source_data": + tool_content = handle_inspect_source_data( + tool_args.get("table_names", []), input_tables or [], self.workspace) + yield {"type": "tool_result", "tool": tool_name, "status": "ok", + "stdout": tool_content} + elif tool_name == "load_skill": + skill_name = tool_args.get("name", "") + ok, message, body_msg = self._build_skill_body_message(skill_name) + tool_status = "ok" if ok else "error" + tool_content = message + if ok: + yield {"type": "skill_loaded", "skill": skill_name, + "unlocks": list(self.registry.metas[skill_name].action_names) + if self.registry.has(skill_name) else []} + yield {"type": "tool_result", "tool": tool_name, "status": tool_status, + "stdout": message, "error": None if ok else message} + elif tool_name in self._loaded_skill_tool_map(): + skill = self._loaded_skill_tool_map()[tool_name] + skill_ctx = SkillContext( + client=self.client, workspace=self.workspace, + language_instruction=self.language_instruction, + trajectory=[], payload=dict(self._run_payload)) + try: + result = skill.handle_tool(tool_name, tool_args, skill_ctx) + tool_content = result.text + except Exception as exc: # noqa: BLE001 + logger.warning("[MiniAnalystAgent] Skill tool %r failed", tool_name, exc_info=exc) + tool_content = f"Tool '{tool_name}' failed: {exc}" + tool_status = "error" + yield {"type": "tool_result", "tool": tool_name, "status": tool_status, + "stdout": tool_content} + else: + tool_content = ( + f"Unknown tool: {tool_name}. Use only the tools/actions listed in " + "the protocol, or reply in plain text to finish." + ) + tool_status = "error" + yield {"type": "tool_result", "tool": tool_name, "status": tool_status, + "stdout": tool_content} + + rlog.log("tool_execution", iteration=outer_iteration, tool=tool_name, + input_summary=(tool_args.get("purpose", "") or "")[:200], + output_summary=(tool_content[:200] + "...") if len(tool_content) > 200 else tool_content, + latency_ms=int((time.time() - tool_t0) * 1000), status=tool_status) + return tool_content, body_msg + + # ------------------------------------------------------------------ + # The run: one decision, no loop + # ------------------------------------------------------------------ + + def run( + self, + input_tables: list[dict[str, Any]], + user_question: str, + focused_thread: list[dict[str, Any]] | None = None, + other_threads: list[dict[str, Any]] | None = None, + trajectory: list[dict] | None = None, + completed_step_count: int = 0, + primary_tables: list[str] | None = None, + attached_images: list[str] | None = None, + charts: list[dict[str, Any]] | None = None, + ) -> Generator[dict[str, Any], None, None]: + """Make a single analytic decision and stop. + + Yields the same event types as :meth:`AnalystAgent.run` (``thinking_text``, + ``tool_start`` / ``tool_result`` for the optional inspection, ``action`` / + ``result`` for the chart, ``error``, and a terminal ``completion``), but + commits at most one ``visualize`` (repaired in place on failure) or ends + with one ``explain``. + """ + rlog = self._reasoning_log + session_start = time.time() + + self._loaded_skills = {_CORE_SKILL} + self._run_payload = { + "input_tables": input_tables, + "charts": charts or [], + "focused_thread": focused_thread, + "other_threads": other_threads, + "primary_tables": primary_tables, + } + completed_steps: list[dict[str, Any]] = [] + iteration = completed_step_count + 1 + + try: + rlog.log( + "session_start", + agent="MiniAnalystAgent", + session_id=self._session_id, + user_question=user_question, + input_tables=[t.get("name", "") for t in input_tables], + model=self.client.model, + rules_injected=[], + knowledge_injected=[], + ) + + if trajectory is None: + ns_dir = self._explore_ns_dir() + if ns_dir.exists(): + import shutil + shutil.rmtree(ns_dir, ignore_errors=True) + messages = self._build_initial_messages( + input_tables, user_question, focused_thread, other_threads, + primary_tables=primary_tables, + attached_images=attached_images, charts=charts, + ) + else: + messages = trajectory + + # A live sandbox session backs the optional inspection so its + # namespace persists across the (at most one) inspection call. + from data_formulator.sandbox.local_sandbox import SandboxSession + with SandboxSession() as explore_session: + self._explore_session = explore_session + kind, payload = yield from self._decide( + messages, input_tables, iteration, + allow_inspect=True, + ) + self._explore_session = None + + if kind == "explain": + summary = payload.strip() if isinstance(payload, str) else "" + yield { + "type": "completion", + "iteration": iteration, + "status": "success", + "content": {"summary": summary or _NO_OUTPUT_FALLBACK, + "total_steps": 0}, + } + self._log_session_end(rlog, "success", iteration, 0, session_start) + return + + if kind == "visualize": + produced, viz_error = yield from self._visualize_with_repair( + payload, messages, input_tables, iteration, completed_steps) + status = "success" if produced else "completed_no_viz" + if not produced: + # A failed chart would otherwise end the run silently: the + # skill's error events are internal retry signals the shell + # router drops. Surface the failure (with the reason, when we + # have it) so the user sees why nothing rendered. + detail = f" ({viz_error})" if viz_error else "" + yield self._error_event( + iteration, + "I couldn't build a working chart for this request" + f"{detail}. Try rephrasing it, or switch to a more capable " + "model for mini mode.", + message_code="agent.miniNoChart", + message_params={"error": detail}, + ) + yield { + "type": "completion", + "iteration": iteration, + "status": status, + "content": {"summary": "", "total_steps": len(completed_steps)}, + } + self._log_session_end(rlog, status, iteration, 0, session_start) + return + + # kind == "none": an LLM error or an exhausted protocol; payload is + # the status string. + if payload == "llm_error": + # The error event is this path's user-visible feedback. + yield self._error_event( + iteration, "LLM API error", message_code="agent.llmApiError") + summary = "" + else: + # Exhausted the protocol without committing (e.g. a small model + # that kept asking to inspect): surface a message so the run is + # never silent. + summary = _NO_OUTPUT_FALLBACK + yield { + "type": "completion", + "iteration": iteration, + "status": payload, + "content": {"summary": summary, "total_steps": 0}, + } + self._log_session_end(rlog, payload, iteration, 0, session_start) + return + finally: + rlog.close() + + # ------------------------------------------------------------------ + # Decision: (optional inspection ->) one visualize/explain + # ------------------------------------------------------------------ + + def _decide( + self, + messages: list[dict], + input_tables: list[dict[str, Any]] | None, + iteration: int, + *, + allow_inspect: bool, + ) -> Generator[dict, None, tuple[str, Any]]: + """Run the single decision. Returns ``("visualize", args)``, + ``("explain", text)`` or ``("none", reason)``. + + At most one inspection (``execute_python_script``) and one corrective + re-prompt are allowed, so the decision is bounded and never loops. + """ + rlog = self._reasoning_log + inspections_left = 1 if allow_inspect else 0 + corrections_left = 1 + + for _round in range(4): # hard safety ceiling on model calls + can_inspect = inspections_left > 0 + # Advertise inspection only when it's actually allowed this round, but + # always RECOGNISE an inspection call so a model that asks for one when + # it can't have it is nudged back on track (not misread as a final + # plain-text answer). + advertised = self._mini_tools(can_inspect) + recognize = self._mini_tools(allow_inspect=True) + rlog.log("llm_request", iteration=iteration, + messages_count=len(messages), + tools_available=[t["function"]["name"] for t in advertised], + transport="json_protocol_mini") + call_messages = list(messages) + [ + {"role": "system", "content": self._catalog_reminder(advertised)}, + ] + t0 = time.time() + try: + response = self._call_model(call_messages) + except Exception as exc: # noqa: BLE001 + rlog.log("llm_response", iteration=iteration, + latency_ms=int((time.time() - t0) * 1000), + finish_reason="error", error=type(exc).__name__) + logger.error("[MiniAnalystAgent] LLM call failed", exc_info=exc) + return ("none", "llm_error") + + latency = int((time.time() - t0) * 1000) + if not getattr(response, "choices", None): + rlog.log("llm_response", iteration=iteration, + latency_ms=latency, finish_reason="empty") + return ("none", "llm_error") + + content = (response.choices[0].message.content or "") + parsed = self._parse_action(content, recognize) + + # --- plain text -> the explain answer --------------------------- + if parsed is None: + stripped = content.strip() + # An empty reply is a failure, not a deliberate answer: nudge + # once for a real answer rather than ending the run with nothing. + if not stripped and corrections_left > 0: + corrections_left -= 1 + messages.append({"role": "assistant", "content": content or None}) + messages.append({"role": "user", "content": ( + "[OBSERVATION] Your reply was empty. Emit your visualize " + "JSON object now, or an explain object with your answer.")}) + rlog.log("llm_response", iteration=iteration, + latency_ms=latency, finish_reason="empty_reply") + continue + rlog.log("llm_response", iteration=iteration, + latency_ms=latency, finish_reason="final_text") + messages.append({"role": "assistant", "content": content or None}) + return ("explain", stripped) + + thought, name, args = parsed + messages.append({"role": "assistant", "content": content}) + if thought: + yield {"type": "thinking_text", "content": thought} + + # --- explain action --------------------------------------------- + if name == "explain": + rlog.log("llm_response", iteration=iteration, + latency_ms=latency, finish_reason="explain") + text = "" + for k in _EXPLAIN_TEXT_KEYS: + v = args.get(k) + if isinstance(v, str) and v.strip(): + text = v.strip() + break + return ("explain", text or thought or content.strip()) + + # --- visualize action ------------------------------------------- + if name == "visualize": + _rescue_unpack_json_strings(args) + missing = [f for f in ("code", "output_variable", "chart") + if not args.get(f)] + if missing and corrections_left > 0: + corrections_left -= 1 + messages.append({"role": "user", "content": ( + "[OBSERVATION] ERROR: your visualize is missing required " + f"field(s): {', '.join(missing)}. Emit the visualize JSON " + "again with those filled in.")}) + rlog.log("llm_response", iteration=iteration, + latency_ms=latency, finish_reason="missing_fields") + continue + rlog.log("llm_response", iteration=iteration, + latency_ms=latency, finish_reason="visualize") + return ("visualize", args) + + # --- the one optional inspection -------------------------------- + if name in ("execute_python_script", "inspect_source_data"): + if can_inspect: + inspections_left -= 1 + rlog.log("llm_response", iteration=iteration, + latency_ms=latency, finish_reason="inspect", tool=name) + tool_content, body_msg = yield from self._run_inspection_tool( + name, args, input_tables, iteration, rlog) + messages.append({"role": "user", "content": ( + f"[OBSERVATION] {tool_content}\n\nNow emit your visualize " + "JSON object (or an explain object).")}) + if body_msg is not None: + messages.append(body_msg) + continue + # Inspection asked for but not available (budget spent, or the + # no-tool variation): nudge straight to the answer. + if corrections_left > 0: + corrections_left -= 1 + messages.append({"role": "user", "content": ( + "[OBSERVATION] Inspection is not available now; emit your " + "visualize JSON object directly (or an explain object).")}) + rlog.log("llm_response", iteration=iteration, + latency_ms=latency, finish_reason="inspect_denied") + continue + return ("none", "tool_rounds_exhausted") + + # --- anything else -> one corrective nudge ---------------------- + if corrections_left > 0: + corrections_left -= 1 + messages.append({"role": "user", "content": ( + f"[OBSERVATION] ERROR: '{name}' is not available. Reply with a " + "single visualize JSON object (or an explain object).")}) + rlog.log("llm_response", iteration=iteration, + latency_ms=latency, finish_reason="unknown_tool") + continue + + return ("none", "tool_rounds_exhausted") + + return ("none", "tool_rounds_exhausted") + + # ------------------------------------------------------------------ + # Visualize: dispatch through the core skill, repair the SAME chart on failure + # ------------------------------------------------------------------ + + @staticmethod + def _extract_viz_error(observation: str | None) -> str | None: + """Pull a one-line error summary out of a failed-visualize observation. + + The visualize skill reports a failure as + ``"[OBSERVATION – Step N FAILED]\\n\\nError: "`` and the shell router + drops the matching ``error`` event (an internal retry signal), so this + observation string is the only place the reason survives. Returns the + first non-empty line of ```` (truncated) or ``None`` when there's + nothing useful to show.""" + if not observation: + return None + text = observation.split("Error:", 1)[1] if "Error:" in observation else observation + first_line = next((ln.strip() for ln in text.splitlines() if ln.strip()), "") + return first_line[:200] or None + + def _visualize_with_repair( + self, + args: dict[str, Any], + messages: list[dict], + input_tables: list[dict[str, Any]] | None, + iteration: int, + completed_steps: list[dict[str, Any]], + ) -> Generator[dict, None, tuple[bool, str | None]]: + """Execute the committed ``visualize`` via the base core-skill dispatch, + re-yielding its ``action`` / ``result`` events. If the code or encodings + fail, show the model the error and let it fix the SAME chart, up to + ``max_repair_attempts`` times; each retry may run one inspection first to + diagnose the failure. Returns ``(True, None)`` once a chart is produced, + or ``(False, last_error)`` if every attempt failed — the skill's ``error`` + events are dropped by the shell router, so ``last_error`` carries the + reason out for the run to surface.""" + repairs_left = max(0, int(self.max_repair_attempts)) + last_error: str | None = None + + while True: + action = dict(args) + action["action"] = "visualize" + + gen = self._dispatch_skill_action( + _CORE_SKILL, "visualize", action, messages, iteration, completed_steps) + produced = False + observation: str | None = None + try: + while True: + event = next(gen) + if event.get("type") == "result": + produced = True + yield event + except StopIteration as stop: + observation = stop.value + + # Keep history coherent (pure-text transport) so a repair turn reads + # the failure exactly like an inspection result. + self._set_action_observation(messages, None, observation) + + if produced: + return True, None + # The skill's error EVENT was dropped by the router; the observation + # string is the only carrier of why the chart failed. + last_error = self._extract_viz_error(observation) or last_error + if repairs_left <= 0: + return False, last_error + + repairs_left -= 1 + messages.append({"role": "user", "content": ( + "[SYSTEM] The visualize above FAILED. Read the error in the " + "observation and fix the SAME chart. If the error looks like the " + "data isn't what you assumed (a missing column, a wrong dtype, or " + "values that need parsing/splitting), FIRST run ONE " + "execute_python_script inspection to print the real columns and a " + "few values, then emit ONE corrected visualize JSON object. If the " + "fix is obvious, emit the corrected visualize directly.")}) + kind, new_args = yield from self._decide( + messages, input_tables, iteration, allow_inspect=True) + if kind != "visualize": + return False, last_error + args = new_args diff --git a/py-src/data_formulator/analyst/skills/__init__.py b/py-src/data_formulator/analyst/skills/__init__.py new file mode 100644 index 00000000..b6b6bac5 --- /dev/null +++ b/py-src/data_formulator/analyst/skills/__init__.py @@ -0,0 +1,382 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Skill registry — discovery and eager instantiation of analyst skills. + +Each skill lives in its own sub-package under this directory and ships a +``SKILL.md`` with YAML frontmatter (``name`` / ``description`` / +``when_to_use`` / ``always_on`` / ``actions``). At startup the registry scans +those frontmatter blocks to build a cheap, always-resident index (tier-1 +progressive disclosure) **and** imports each skill's Python code module so the +skill instance is always available to the agent. + +The distinction is deliberate: a skill's code is always imported and callable; +what ``load_skill(name)`` does is flip a *switch* that exposes the skill's +tools, opens its action gate, and injects its ``SKILL.md`` body into context — +i.e. it controls exposure to the model, not availability of the code. + +Convention for a skill code module: ``skills//skill.py`` exposing a +``get_skill() -> Skill`` factory. A skill that ships only a ``SKILL.md`` (pure +guidance, no code) is still discoverable — it simply has no tools or handlers. +""" + +from __future__ import annotations + +import importlib +import json +import logging +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +from data_formulator.analyst.skills.base import ( + Event, + Skill, + SkillContext, + SkillMeta, + ToolResult, +) + +logger = logging.getLogger(__name__) + +SKILLS_DIR = Path(__file__).parent +SKILL_DOC_NAME = "SKILL.md" +TOOLS_FILE_NAME = "tools.json" + +_FM_PATTERN = re.compile(r"^---\s*\n(.*?)\n---\s*\n?", re.DOTALL) + + +def _parse_front_matter(content: str) -> tuple[dict[str, Any], str]: + """Return ``(frontmatter_dict, body)``. Degrades gracefully to ``({}, content)``.""" + m = _FM_PATTERN.match(content) + if not m: + return {}, content + try: + import yaml # local import — only needed when parsing + + meta = yaml.safe_load(m.group(1)) + if not isinstance(meta, dict): + return {}, content + except Exception: + return {}, content + return meta, content[m.end():] + + +def _coerce_name_list(raw: Any) -> tuple[str, ...]: + """Normalize a frontmatter name list (``tools``/``actions``) to a tuple.""" + if isinstance(raw, str): + return (raw.strip(),) if raw.strip() else () + if isinstance(raw, (list, tuple)): + return tuple(str(a).strip() for a in raw if str(a).strip()) + return () + + +def _meta_from_frontmatter(raw: dict[str, Any], fallback_name: str) -> SkillMeta: + return SkillMeta( + name=str(raw.get("name") or fallback_name), + description=str(raw.get("description") or ""), + when_to_use=str(raw.get("when_to_use") or ""), + always_on=bool(raw.get("always_on", False)), + tool_names=_coerce_name_list(raw.get("tools")), + action_names=_coerce_name_list(raw.get("actions")), + ) + + +@dataclass +class SkillRegistry: + """Index of discovered skills, keyed by skill name. + + Holds three declarative things per skill, all resolved at build time: + the cheap frontmatter (``SkillMeta``), the eagerly-instantiated code module + (the *processor*: ``handle_tool`` / ``handle_action``), and the skill's + ``tools.json`` schemas (``tool_specs``). The doc *body* is read lazily. + """ + + metas: dict[str, SkillMeta] = field(default_factory=dict) + # Eagerly-instantiated skill code modules, keyed by name. A name present in + # ``metas`` but absent here is a guidance-only skill (SKILL.md, no code). + skills: dict[str, Skill] = field(default_factory=dict) + # Declarative tool/action schemas per skill, keyed by name. Each value is a + # flat list of standard OpenAI function-tool specs (``{"type":"function", + # "function":{name,description,parameters}}``) covering BOTH the skill's + # inspection tools and its committing actions; the split is decided by the + # frontmatter ``tools:`` / ``actions:`` lists (a spec whose name is in + # ``actions`` is a committing action, in ``tools`` an inspection tool). + tool_specs: dict[str, list[dict[str, Any]]] = field(default_factory=dict) + _doc_paths: dict[str, Path] = field(default_factory=dict) + + def _specs_split(self, name: str) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + """Partition a skill's ``tool_specs`` into ``(inspection_tools, actions)`` + using its frontmatter ``tools:`` / ``actions:`` lists as the authority. + + A spec whose function name is declared in ``actions:`` is a committing + action; everything else is an inspection tool. The ``tools:`` list is the + symmetric companion declaration: any spec not named in *either* list is + flagged as drift (it lives in ``tools.json`` but is undeclared in + ``SKILL.md``) and treated as an inspection tool. + """ + meta = self.metas.get(name) + action_set = set(meta.action_names) if meta else set() + tool_set = set(meta.tool_names) if meta else set() + tools: list[dict[str, Any]] = [] + actions: list[dict[str, Any]] = [] + for spec in self.tool_specs.get(name, ()): # may be empty + fn = spec.get("function", {}).get("name") + if fn in action_set: + actions.append(spec) + else: + if fn not in tool_set: + logger.warning( + "[skills] %s: tools.json declares %r but SKILL.md " + "frontmatter lists it in neither tools: nor actions: " + "— treating as an inspection tool.", + name, fn, + ) + tools.append(spec) + return tools, actions + + def names(self) -> list[str]: + return sorted(self.metas) + + def list_metas(self) -> list[SkillMeta]: + return [self.metas[n] for n in self.names()] + + def has(self, name: str) -> bool: + return name in self.metas + + def gated_skill_names(self) -> list[str]: + """Skills that load on demand (not ``always_on``).""" + return [n for n in self.names() if not self.metas[n].always_on] + + def action_owner(self, action: str) -> str | None: + """Return the skill name that unlocks ``action``, or ``None`` if no + gated skill declares it (i.e. it is a core action).""" + for name in self.names(): + if action in self.metas[name].action_names: + return name + return None + + def render_registry_block(self) -> str: + """Tier-1 progressive-disclosure listing for the base prompt. + + One line per gated skill: name, the actions it unlocks, and a short + ``when_to_use``/``description``. Bodies are pulled on demand via + ``load_skill``; only this cheap index stays resident. + """ + lines: list[str] = [] + for name in self.gated_skill_names(): + meta = self.metas[name] + blurb = (meta.when_to_use or meta.description or "").strip().replace("\n", " ") + unlocks = ", ".join(meta.action_names) if meta.action_names else "(no actions)" + lines.append(f"- **{name}** — unlocks `{unlocks}`. {blurb}") + return "\n".join(lines) + + def load_body(self, name: str) -> str: + """Return the ``SKILL.md`` body (frontmatter stripped) for ``name``.""" + path = self._doc_paths.get(name) + if not path or not path.exists(): + raise KeyError(f"Unknown skill: {name!r}") + _, body = _parse_front_matter(path.read_text(encoding="utf-8")) + return body.strip() + + def get_skill(self, name: str) -> Skill | None: + """Return the (eagerly-instantiated) skill code module, or ``None`` for + an unknown or guidance-only skill.""" + return self.skills.get(name) + + def tools_for(self, names) -> list[dict[str, Any]]: + """Merge the inspection tool specs contributed by the named (loaded) skills.""" + out: list[dict[str, Any]] = [] + for name in names: + out.extend(self._specs_split(name)[0]) + return out + + # ------------------------------------------------------------------ + # Actions (design-docs/36): the committing tool calls a turn may end with. + # A skill's ``tools.json`` lists tools and actions together as standard + # function specs; the frontmatter ``actions:`` list says which are committing + # actions. The agent offers their tool specs and dispatches the chosen one. + # (Inspection tools gather; a committing action ends the turn.) + # ------------------------------------------------------------------ + + def action_tools_for(self, names) -> list[dict[str, Any]]: + """Render the committing-action tool specs unlocked by the named (loaded) + skills. + + These are offered alongside the inspection tools each round; the agent + partitions the model's response by which tool names are committing + actions vs inspection tools. + """ + out: list[dict[str, Any]] = [] + for name in names: + out.extend(self._specs_split(name)[1]) + return out + + def action_required_fields(self, name: str) -> tuple[str, ...]: + """Return the required argument names for the action ``name`` (empty if + unknown), read from the action schema's ``parameters.required``. Used for + a cheap pre-dispatch completeness check.""" + for skill_name in self.names(): + for spec in self._specs_split(skill_name)[1]: + if spec.get("function", {}).get("name") == name: + params = spec.get("function", {}).get("parameters") or {} + return tuple(params.get("required") or ()) + return () + + def action_names(self) -> set[str]: + """All committing-action names declared by any skill's frontmatter + ``actions:`` — the universe of committing tool names, used to partition a + response's tool calls into inspection tools vs committing actions.""" + out: set[str] = set() + for meta in self.metas.values(): + out.update(meta.action_names) + return out + + def action_stream_spec(self, action: str) -> tuple[str, str] | None: + """Return ``(stream_field, stream_channel)`` for a *streaming* action, or + ``None`` for a buffered one. + + Streaming is a property of the **loop**, not the schema (design-docs/36 + §5): a skill declares which of its actions stream by exposing a + ``streaming_actions = {action: (field, channel)}`` mapping on its code + module (behaviour lives in code, not the JSON sent to the model). The + agent reads this to know whether to forward the action's argument live + on its declared channel as the model writes it. Today only the report + skill's ``write_report`` streams (its ``report`` field on the ``report`` + channel).""" + for name in self.names(): + skill = self.skills.get(name) + spec = getattr(skill, "streaming_actions", None) + if spec and action in spec: + field, channel = spec[action] + return (str(field), str(channel)) + return None + + +def _instantiate_skill(name: str) -> Skill | None: + """Import ``skills//skill.py`` and call ``get_skill()``. + + Returns ``None`` (not an error) for a guidance-only skill with no code + module, and logs a warning for a malformed one. + """ + module_path = f"{__name__}.{name}.skill" + try: + module = importlib.import_module(module_path) + except ModuleNotFoundError: + return None # guidance-only skill (SKILL.md, no skill.py) + factory = getattr(module, "get_skill", None) + if not callable(factory): + logger.warning("Skill module %s is missing a get_skill() factory.", module_path) + return None + try: + return factory() + except Exception: + logger.warning("Failed to instantiate skill %r", name, exc_info=True) + return None + + +def _load_tool_specs(skill_dir: Path) -> list[dict[str, Any]]: + """Load a skill's declarative tool/action schemas from ``tools.json``. + + ``tools.json`` sits next to ``SKILL.md`` and is a flat JSON list of standard + OpenAI function-tool specs covering BOTH the skill's inspection tools and its + committing actions; which is which is decided by the frontmatter ``tools:`` / + ``actions:`` lists. A skill with no ``tools.json`` (e.g. guidance-only) gets + an empty list. + """ + f = skill_dir / TOOLS_FILE_NAME + if not f.exists(): + return [] + try: + data = json.loads(f.read_text(encoding="utf-8")) + except Exception: + logger.warning("Failed to parse %s", f, exc_info=True) + return [] + return [s for s in data if isinstance(s, dict)] if isinstance(data, list) else [] + + +def build_registry(skills_dir: Path | None = None) -> SkillRegistry: + """Scan ``skills_dir`` for ``/SKILL.md``, build the index, eagerly + instantiate each skill's code module, and load its ``tools.json`` schemas.""" + root = skills_dir or SKILLS_DIR + registry = SkillRegistry() + for child in sorted(root.iterdir()): + if not child.is_dir() or child.name.startswith((".", "_")): + continue + doc = child / SKILL_DOC_NAME + if not doc.exists(): + continue + try: + raw, _ = _parse_front_matter(doc.read_text(encoding="utf-8")) + except Exception: + logger.warning("Failed to read SKILL.md for %s", child.name, exc_info=True) + continue + meta = _meta_from_frontmatter(raw, child.name) + registry.metas[meta.name] = meta + registry._doc_paths[meta.name] = doc + instance = _instantiate_skill(meta.name) + if instance is not None: + registry.skills[meta.name] = instance + registry.tool_specs[meta.name] = _load_tool_specs(child) + _warn_on_name_collisions(registry) + return registry + + +def _warn_on_name_collisions(registry: SkillRegistry) -> None: + """Warn (don't raise) when skills declare clashing action or tool names. + + Two flat namespaces share one function-calling surface: a committing action + resolves to a single owner (first declarer wins) and inspection tools are + merged into one name-unique list — and since a committing action is *also* a + tool call, its name must not clash with an inspection tool name either. A + clash means one skill silently shadows another. Today the built-in skills + don't collide, so this is a guard for when users drop in new skills — it + surfaces the problem loudly at startup instead of letting it fail + mysteriously mid-run. + """ + action_sources: dict[str, list[str]] = {} + tool_sources: dict[str, list[str]] = {} + for name in registry.names(): + tools, actions = registry._specs_split(name) + for action in registry.metas[name].action_names: + action_sources.setdefault(action, []).append(name) + # Inspection tools and committing actions share one tool namespace. + for spec in (*tools, *actions): + tool_name = spec.get("function", {}).get("name") + if tool_name: + tool_sources.setdefault(tool_name, []).append(name) + + for action, owners in action_sources.items(): + if len(owners) > 1: + logger.warning( + "Action name collision: %r is declared by multiple skills (%s). " + "Only %r will own it; the rest are shadowed. Rename the action in " + "the conflicting SKILL.md frontmatter.", + action, ", ".join(owners), owners[0], + ) + for tool_name, owners in tool_sources.items(): + if len(owners) > 1: + logger.warning( + "Tool name collision: %r is provided by multiple skills (%s). " + "Function-calling tool names (inspection tools and committing " + "actions share one namespace) must be globally unique, so one " + "will shadow the others. Give each a distinct (e.g. " + "skill-prefixed) name.", + tool_name, ", ".join(owners), + ) + + + +__all__ = [ + # Re-exported skill substrate (defined in skills/base.py) + "Event", + "Skill", + "SkillContext", + "SkillMeta", + "ToolResult", + # Registry + "SkillRegistry", + "build_registry", + "SKILLS_DIR", +] diff --git a/py-src/data_formulator/analyst/skills/base.py b/py-src/data_formulator/analyst/skills/base.py new file mode 100644 index 00000000..c542f5f2 --- /dev/null +++ b/py-src/data_formulator/analyst/skills/base.py @@ -0,0 +1,185 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Skill protocol and shared types for the analyst agent. + +A *skill* is a passive plugin the single analyst agent can switch on. It never +runs its own agent loop; instead it contributes: + 1. a ``SKILL.md`` doc (frontmatter + how-to body) — progressive disclosure, + 2. zero or more **tools** the model may call once the skill is loaded, + 3. zero or more **gated actions** it unlocks, and + 4. **handlers** (``handle_tool`` / ``handle_action``) that perform any + compute / rendering and yield channel-tagged events. + +The shell stays skill-agnostic: it merges a loaded skill's tools into the +model's tool list, opens the gate for its actions, routes tool calls to +``handle_tool`` and emitted actions to ``handle_action``, and forwards whatever +events come back. "Loading" a skill controls only *exposure to the model* — the +skill's Python is always imported and callable. + +Two output channels, never crossed: + * **frontend** — a handler *yields* ``Event``s. A skill never yields straight + to the user; it yields to the **agent**, whose router (see the shell's + ``_route_skill_events``) is the single place that forwards / stamps / + enriches / could drop each event before it reaches the stream. Yielding is + how streaming works: the route consumes ``agent.run()`` as a synchronous + generator, so nested output must propagate up via ``yield from``. + * **agent loop** — a handler *returns* an ``observation`` string (or ``None``): + LLM-facing feedback that the shell appends to the trajectory as the action's + tool-call result, exactly like an inspection tool's output. There is no + control verdict — the agent simply reads the result and decides its next + move (commit another action, or stop by answering). A recoverable failure is + just an observation describing what went wrong; the agent re-decides freely. + +Frontend payloads therefore live in yielded events, never in the returned +observation; the ``observation`` is LLM-facing trajectory text, never shown to +the user. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Generator, Protocol, runtime_checkable + +# An ``Event`` is a channel-tagged dict yielded on the unified output stream. +# See design-docs/35 §5. Examples: +# {"type": "text_delta", "channel": "report", "content": "..."} +# {"type": "tool_start", "tool": "inspect_chart", ...} +# {"type": "action", "action": "visualize", ...} +# {"type": "result", ...} +# {"type": "completion", ...} +# A committing action (visualize / delegate / write_report) is dispatched from a +# committing tool call and yields these same events; see design-docs/36. +Event = dict[str, Any] + + +@dataclass(frozen=True) +class SkillMeta: + """A skill's frontmatter — the cheap, always-resident registry entry. + + Mirrors Anthropic Agent Skills tier-1 disclosure: only ``name`` and + ``description`` (plus an optional ``when_to_use``) are kept resident in the + base prompt so the model knows *when* to reach for the skill; the body is + loaded on demand via the ``load_skill`` tool. + """ + + name: str + description: str + when_to_use: str = "" + # ``always_on`` skills (e.g. visualization) are pre-loaded and their actions + # are never gated. Everything else loads dynamically. + always_on: bool = False + # The inspection **tool** names this skill exposes (data gathering, no turn + # commit). Declared in the ``SKILL.md`` frontmatter (``tools: [inspect_chart]``) + # so the frontmatter is the complete, symmetric surface declaration; the + # matching JSON schemas live in ``tools.json``. + tool_names: tuple[str, ...] = () + # The gated **action** names this skill unlocks once loaded. Declared in the + # ``SKILL.md`` frontmatter (``actions: [write_report]``) so the shell can + # build its legal-action set from tier-1 metadata alone — without importing + # the skill's code module. + action_names: tuple[str, ...] = () + + +@dataclass +class SkillContext: + """Shared handles + per-turn state passed to a skill handler. + + Carries the substrate a handler needs (LLM client, workspace, language + instruction) plus the live trajectory and any data the action operates on. + Skills read from here rather than reaching into the agent shell. + """ + + client: Any + workspace: Any + language_instruction: str = "" + # The running message trajectory (read/append as the handler streams). + trajectory: list[dict] = field(default_factory=list) + # Free-form per-turn payload (input tables, charts, etc.) the action needs. + payload: dict[str, Any] = field(default_factory=dict) + # Shell-provided execution substrate (sandbox-backed). Skills call back + # through this for raw compute that the loop owns — e.g. + # ``ctx.runtime.run_visualize_code(...)`` / ``run_explore_code(...)``. The + # shell sets it to itself; ``None`` in standalone unit tests. + runtime: Any = None + + +@dataclass(frozen=True) +class ToolResult: + """Return value of a skill's ``handle_tool``. + + ``text`` is fed back to the model as the tool-result message. ``images`` + are base64 data-URLs (e.g. a rendered chart) that the shell attaches as a + follow-up vision message, since tool-result messages cannot carry image + content on most providers. + """ + + text: str = "" + images: tuple[str, ...] = () + + +@runtime_checkable +class Skill(Protocol): + """A passive plugin the agent shell exposes once its skill is *loaded*. + + A skill never runs its own agent loop. It is a pure **processor**: two + handlers that perform any compute / rendering. Its *declarative* surface — + metadata (``SKILL.md`` frontmatter → ``SkillMeta``) and the inspection tool / + committing action *schemas* (``tools.json``) — lives in data files the + registry loads, not on the class. The frontmatter ``tools:`` / ``actions:`` + lists decide which schemas are inspection tools vs committing actions. + + The Python module is always imported and instantiated at registry build + time; "loading" a skill only controls *exposure to the model*, never the + availability of the code. + """ + + def handle_tool( + self, + name: str, + args: dict[str, Any], + ctx: SkillContext, + ) -> ToolResult: + """Execute an inspection tool the model called. ``name`` is one of this + skill's ``tools``; ``args`` is the parsed tool arguments. Parallel-safe; + returns text (and optional images) for the model to read.""" + ... + + def handle_action( + self, + action: str, + spec: dict[str, Any], + ctx: SkillContext, + ) -> Generator[Event, None, str | None]: + """Dispatch a committing **action** the model emitted as a tool call: + validate the arguments, run any compute / rendering, and yield + channel-tagged events as it goes (result / delegate / text_delta / …). + It then **returns** an ``observation`` string (or ``None``): LLM-facing + feedback the shell appends to the trajectory as the action's tool-call + result, exactly like an inspection tool's output. + + There is no control verdict. The agent reads the observation and decides + its own next move — commit another action, or stop by giving its final + answer (a turn with no action ends the run). A recoverable failure is + just an observation describing what went wrong; the agent re-decides. + + Yielded events go to the **agent**, not the frontend: the shell's router + forwards them (stamping ``iteration``, tracking steps) and is free to + transform or drop any of them. Frontend output therefore lives only in + these yields; the returned observation is never shown to the user. + + ``action`` is one of the skill's frontmatter ``actions:`` names; ``spec`` + is the parsed action tool-call arguments. Implement as a generator that + ``return``s the observation; the shell captures it via ``yield from`` / + ``StopIteration``. + """ + ... + + +__all__ = [ + "Event", + "Skill", + "SkillContext", + "SkillMeta", + "ToolResult", +] diff --git a/py-src/data_formulator/analyst/skills/core/SKILL.md b/py-src/data_formulator/analyst/skills/core/SKILL.md new file mode 100644 index 00000000..0be7d911 --- /dev/null +++ b/py-src/data_formulator/analyst/skills/core/SKILL.md @@ -0,0 +1,275 @@ +--- +name: core +description: >- + The analyst's built-in capabilities: data-inspection tools and the + always-available actions (visualize, ask_user, delegate). +when_to_use: Always loaded by default — this is the agent's baseline. +always_on: true +tools: + - execute_python_script + - inspect_source_data +actions: + - visualize + - ask_user + - delegate +--- + +# Core capabilities + +This describes the built-in **inspection tools** you use to gather data and the +always-available **actions** you take on it. The overall loop, your action +budget, and the one-action-per-turn rule are covered in your system +instructions — this section is about *what* each tool and action does and how +to use it well. + +## Tools (for data gathering) + +- **execute_python_script(code)** — run a general-purpose Python script to + inspect data, compute stats, transform tables, or verify assumptions. Its + stdout is returned to you (use `print()`); the script is for *your* analysis + and its output is never shown to the user. pandas, numpy, duckdb, sklearn, + scipy are available. **Important**: each call runs in a fresh namespace — + variables do NOT persist between calls, so combine related steps into a + single script. +- **inspect_source_data(table_names)** — get schema, stats, and sample rows for + source tables (cheaper than `execute_python_script` for basic inspection). +- **load_skill(name)** — load a skill's instructions into context so you can use + the action it unlocks (see the Skills section of your system instructions). + +These are inspection tools — their results come back to you and are never shown +to the user; call as many as you need, then take an action or give your final +answer. + +You analyse data that is **already in the workspace**. If the user's question +requires data that isn't present, do NOT try to find it yourself — use the +`delegate` action targeting the Data Loading agent. + +The initial context already includes sample rows and statistics for each table. +If the data is straightforward, go straight to the action without calling +tools. Tool results are returned to you before you act. + +## Actions + +Call an action as a tool call when you want to act on the data. Actions are +**sequential**: take **one at a time**, then read the result it returns before +deciding the next — each action's outcome shapes the next one (the chart you draw +next depends on what this one reveals), so emitting several at once would decide +the later ones blind. After each result you choose what to do — take another +action, or stop. **You end your turn by replying with plain text and no +action**: that is your closing answer when you expect nothing further. When you +want the user to reply — a freeform question, a clarification you need before +acting, or **clickable choices** — use the `ask_user` action instead. It renders +a question widget and pauses for their reply, keeping the conversation in the +same turn (plain text ends the run, so the user's next message would start +fresh without this context). + +**Be extremely concise.** Your plain-text replies — the closing answer that ends +the run and any per-step commentary — are shown verbatim to the user and double +as the artifact summary. Keep the closing answer to **one short sentence (≤20 +words)**: state the finding, not the process. Never narrate what you're about to +do or recap the chart's axes; let the charts and report speak for themselves. + +### `visualize` — chart a transform + +Run code that produces a DataFrame and render it as a chart. You then observe the +result and decide your next move. + +- `display_instruction` — ≤12 words; the question/hypothesis the chart + investigates (don't recap x/y/color — those are visible). Wrap a **column** in + `**…**` if it anchors the question. +- `title` — short descriptive chart heading (5–10 words, title case): the + subject, the dimensions compared, and the scope. Do NOT include the chart + type. This is shown as the chart's title. +- `code` — Python producing a DataFrame assigned to `output_variable`. +- `output_variable` — snake_case name the code assigns. +- `chart` — `{chart_type, encodings:{x,y,…}, config:{}}` (chart_type from the + chart type reference). +- `input_tables` — table names from [SOURCE TABLES] the code reads. +- `field_metadata` — field → SemanticType; `field_display_names` — field → + human-readable label. + +### `ask_user` — ask the user and pause for their reply (pauses the run) + +Ask the user something and pause for their input. Reach for this on **any** turn +where you want a reply — a freeform question, a clarification you need before +acting, or an explanation you want them to react to. Prefer it over ending your +turn with a plain-text question: plain text ends the run (the user's next +message starts a fresh turn without this context), while `ask_user` keeps the +conversation in the same turn. + +- `questions` — 1–3 items. Each is either a question that awaits an answer + (clarification) or a statement the user need not answer (explanation). A + question with no required answer and no options renders as a plain + explanation; offer chart-producing follow-ups as its `options`. +- each question: `text` (wrap a **column** in `**…**`), `responseType` + (`single_choice` when offering `options`, else `free_text`), `required` + (`true` for a clarification the run depends on, `false` for an explanation / + optional follow-up), and `options` (plain-text choices, **at most 3** — just + the most likely answers; the user can always type a freeform reply, so don't + enumerate every case). + +This is **terminal**: the run pauses after it and resumes when the user replies. + +### `delegate` — hand off to a peer agent + +Hand off to a peer agent when the question needs work outside your scope. + +- `target` — `"data_loading"` (the user's question needs data not in the + workspace). +- `options` — 1–2 seed prompts for the target agent; each becomes a one-click + button (label == seed prompt). If two, make them meaningfully distinct (e.g. + `'monthly orders 2024'`). +- `message` — a short note to the user that you're handing off. + +Only delegate if the workspace tables genuinely can't cover the question. + +## Choosing what to do + +Classify the question first (silently) to pick the right move and calibrate +effort: + +- *Conceptual / informational* (meaning, schema, what a field represents — no + chart needed): **answer directly in plain text** (no action). +- *Ambiguous* (you genuinely can't tell what's being asked): ask the user + rather than guessing — use the `ask_user` action (freeform or with clickable + choices) so their reply resumes the same turn. +- *Concrete* (one specific answer): **1 visualization**, then give your final + answer in plain text. +- *Progressive* (a small sequence, e.g. "why did revenue drop?"): **2–3 + visualizations**, then a closing plain-text answer tying them together. +- *Open-ended* (explicit exploration): **3–5 visualizations**, each a distinct + analytical angle (not variations on one axis), forming a narrative, then a + closing plain-text answer. +- *Missing data* (needs tables not in the workspace): + `delegate(target="data_loading")`. +- *Report / write-up request* (e.g. "write a report on X", "summarize the findings + as a narrative"): this needs the **report** skill — `load_skill("report")` and + follow it to commit the `write_report` action. **Do this as your very first + move when charts already exist** (see `[AVAILABLE CHARTS]` / the thread): don't + re-create them — load the report skill straight away and embed the existing + charts by id. Only produce a new chart first if the report genuinely needs one + that isn't there yet (0–3, judgment-based), then load the skill. + +For concrete/progressive questions, add the next chart only if it answers a gap +*raised* by the previous one. For open-ended exploration, do the reverse: each +chart should open a **new** analytical angle (temporal, spatial, distributional, +relational, comparative) rather than refine the last one — aim to use your full +budget on distinct perspectives. **Never** repeat a visualization already in the +trajectory or in another thread. + +## Chart Creation Guide + +The following reference material applies when you call the `visualize` tool. + +### A. Code Execution Rules + +**About the execution environment:** +- You can use BOTH DuckDB SQL and pandas operations in the same script +- The script will run in the workspace data directory (all data files are in the current directory) +- Each table in [CONTEXT] has a **file path** (e.g., `student_exam.parquet`, `sales.csv`). Use EXACTLY that path to load data: + - `.parquet`: `pd.read_parquet('file.parquet')` or DuckDB `read_parquet('file.parquet')` + - `.csv`: `pd.read_csv('file.csv')` or DuckDB `read_csv_auto('file.csv')` + - `.json`: `pd.read_json('file.json')` + - `.xlsx`/`.xls`: `pd.read_excel('file.xlsx')` + - `.txt`: `pd.read_csv('file.txt', sep='\t')` +- **IMPORTANT:** Use the exact filename from the context — do NOT change the file extension or assume all files are parquet. +- **Allowed libraries:** pandas, numpy, duckdb, math, datetime, json, statistics, collections, re, sklearn, scipy, random, itertools, functools, operator, time +- **Not allowed:** matplotlib, plotly, seaborn, requests, subprocess, os, sys, io, or any other library not listed above. +- File system access (open, write) and network access are also forbidden. + +**When to use DuckDB vs pandas:** +- **Prefer plain pandas** for most tasks — it's simpler and more readable. +- Only use DuckDB when the dataset is very large and you need efficient SQL aggregations, filtering, joins, or window functions. +- You can combine both: DuckDB for initial loading/filtering on large files, then pandas for complex operations. + +**Code structure:** standalone script (no function wrapper), imports at top. **CRITICAL:** The final result DataFrame MUST be assigned to the exact variable name you specified in `"output_variable"` — the system uses this name to extract the result. For example, if your output_variable is `sales_by_region`, the script must contain `sales_by_region = ...`. + +**DuckDB notes:** +- Escape single quotes with '' (not \') +- No Unicode escapes (\u0400); use character ranges directly: [а-яА-Я] +- Cast date columns explicitly: `CAST(col AS DATE)`, `CAST(col AS TIMESTAMP)` +- For complex datetime operations, load data first then use pandas datetime functions +- Critical identifier quoting rule: + * If a table/column name contains non-ASCII characters (e.g., Chinese, Japanese, Korean, Cyrillic, etc.), spaces, or punctuation, + you MUST wrap it in double quotes, e.g. SELECT "金额" FROM "客户表". + * Never output placeholder identifiers like your_table_name, your_column, your_condition. + +**Datetime handling:** +- `date` columns contain date-only values (YYYY-MM-DD). `datetime` columns contain date+time (ISO 8601). +- `time` columns contain time-only values (HH:mm:ss). `duration` columns are time intervals. +- Year → number. Year-month / year-month-day → string ("2020-01" / "2020-01-01"). +- Hour alone → number. Hour:min or h:m:s → string. Never return raw datetime objects. + +### B. Chart Type Reference + +The `chart_type` value in the `visualize` action MUST be one of the names listed +in the first column below (exact spelling, including capitalization). When a row +lists multiple names, pick whichever fits the "when to use" hint best. + +| chart_type | encodings | config | when to use | +|---|---|---|---| +| Scatter Plot | x, y, color, size, facet | opacity (0.1–1.0) | Relationships between two quantitative fields | +| Regression | x, y, color, size, facet | regressionMethod ("linear","log","exp","pow","quad","poly"), polyOrder (2–10) | Trend line over scatter; one line per color group | +| Bar Chart / Lollipop Chart / Waterfall Chart | x, y, color, facet | — | Bar: default categorical comparison. Lollipop: cleaner for ranked lists / sparse categories. Waterfall: cumulative gain/loss, each bar starts where the previous ended | +| Grouped Bar Chart | x, y, group, facet | — | Side-by-side bars across a second categorical dimension | +| Histogram / Density Plot | x, color, facet | — | Distribution of one quantitative field. Histogram: discrete bins, auto-binned. Density Plot: smooth KDE curve | +| Boxplot | x, y, color, facet | — | Distribution summary (median/quartiles/outliers) by category | +| Ranged Dot Plot | x, y, color, facet | — | Min–max range or two-point comparison per category | +| Line Chart | x, y, color, strokeDash, facet | interpolate ("linear","monotone","step") | Trends over an ordered (usually temporal) x-axis | +| Area Chart | x, y, color, facet | — | Magnitude over ordered x; auto-stacks when color is set | +| Pie Chart | size, color, facet | innerRadius (0–100; 0=pie, >0=donut) | Part-of-whole with ≤7 categories. Wedge value goes on **size**, not **theta** | +| Radar Chart | x, y, color, facet | — | Multi-metric profile/comparison; x = metric name, y = value, color = entity (long-form data) | +| Heatmap | x, y, color, facet | colorScheme — sequential ("viridis","blues","reds","oranges","greens") or diverging ("blueorange","redblue") | Matrix / 2D density; color encodes the quantitative cell value | +| Bar Table | x, y, color, facet | — | Ranked horizontal table with inline bars; one row per category. y = category, x = value | +| KPI Card | metric, value, goal | — | "Big number" dashboard tile(s); one row per tile. `value` must be pre-aggregated; `goal` is optional | +| Candlestick Chart | x, open, high, low, close, facet | — | OHLC financial data | +| World Map | longitude, latitude, color, size | projection ("mercator","equalEarth","naturalEarth1","orthographic"), projectionCenter ([lon,lat]) | Geographic points/regions on a world projection | +| US Map | longitude, latitude, color, size | — (fixed albersUsa) | US-only points/regions (albersUsa projection) | + +**Critical chart rules:** +- **Scatter Plot**: use config opacity (0.1–1.0) for dense data instead of encoding opacity. +- **Regression**: trend line is automatic — do NOT compute regression coefficients/predictions in Python. Use `color` to get separate trend lines per group. +- **Bar Chart**: x=categorical, y=quantitative (vertical bars). Swap x↔y for horizontal bars. Same-x rows are auto-stacked when `color` is set. +- **Grouped Bar Chart**: use the `group` channel (not `color`) for side-by-side bars. +- **Histogram**: do NOT pre-bin in Python — pass the raw quantitative field on `x` and the chart bins automatically. Pre-aggregating gives wrong bin widths. +- **Line Chart**: use `strokeDash` to differentiate line styles (e.g. actual vs forecast). +- **Pie Chart**: use the `size` channel (not `theta`) for wedge values. Avoid when >7–8 categories. +- **Radar Chart**: data must be long-form — one row per (entity, metric, value). If your data is wide-form (one column per metric), melt it first in the Python step. +- **Heatmap**: pick `colorScheme` by the meaning of the values. Use a **sequential** scheme (viridis/blues/reds/oranges/greens) for single-direction magnitudes (counts, rates, prices, scores — higher is simply more). Use a **diverging** scheme (blueorange/redblue) ONLY when the values have a meaningful center to read away from (e.g. profit/loss around 0, change vs. a baseline, temperature around freezing). +- **Bar Table**: y is the category column to rank; x is the quantitative value driving bar length. Don't sort in Python — the template sorts. +- **KPI Card**: channels are `metric`, `value`, `goal` (not x/y). One DataFrame row = one tile. The `value` column must already contain the final number to display (aggregate upstream in the Python step). +- **Candlestick Chart**: requires `open`, `high`, `low`, `close` columns. +- **World Map / US Map**: channel names are `longitude` / `latitude`, not `x` / `y`. +- **facet**: available for nearly all chart types; use a low-cardinality categorical field. +- All fields in `encodings` must also appear in `output_fields`. Typically use 2–3 channels (x, y, color/size). + +### C. Semantic Type Reference + +Choose the most specific type that fits. Only annotate fields used in chart encodings. + +| Category | Types | +|---|---| +| Temporal | DateTime, Date, Time, Timestamp, Year, Quarter, Month, Week, Day, Hour, YearMonth, YearQuarter, YearWeek, Decade, Duration | +| Monetary measures | Amount, Price | +| Physical measures | Quantity, Temperature | +| Proportion | Percentage | +| Signed/diverging | Profit, PercentageChange, Sentiment, Correlation | +| Generic measures | Count, Number | +| Discrete numeric | Rank, Score | +| Identifier | ID | +| Geographic | Latitude, Longitude, Country, State, City, Region, Address, ZipCode | +| Entity names | Category, Name | +| Coded categorical | Status, Boolean, Direction | +| Binned ranges | Range | +| Fallback | Unknown | + +Key guidelines: +- Use **Amount** for summed monetary totals, **Price** for per-unit prices, **Profit** for values that can be negative. +- Use **Temperature** (not Quantity) for temperature — it has special diverging behavior. +- Use **Year** (not Number) for columns like "year" with values 2020, 2021. + +### D. Statistical Analysis Guide + +- **Regression**: use chart_type "Regression" — the trend line is automatic, do NOT compute regression values in Python code. Configure method via `{"regressionMethod": "linear"}` (options: "linear", "log", "exp", "pow", "quad", "poly"; for poly add `{"polyOrder": 3}`). +- **Forecasting**: compute predicted future values in Python. Use Line Chart with strokeDash to distinguish actual vs forecast, and color for series grouping. +- **Clustering**: compute cluster assignments in Python. Output [x, y, cluster_id]. Use Scatter Plot with color → cluster_id. diff --git a/py-src/data_formulator/analyst/skills/core/__init__.py b/py-src/data_formulator/analyst/skills/core/__init__.py new file mode 100644 index 00000000..e546479a --- /dev/null +++ b/py-src/data_formulator/analyst/skills/core/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""core skill — always-on baseline tools + actions for the analyst. + +``SKILL.md`` holds the base prompt body (the shell formats it into the system +message); ``skill.py`` exposes ``get_skill()`` (the executable handler). +""" diff --git a/py-src/data_formulator/analyst/skills/core/skill.py b/py-src/data_formulator/analyst/skills/core/skill.py new file mode 100644 index 00000000..cc816936 --- /dev/null +++ b/py-src/data_formulator/analyst/skills/core/skill.py @@ -0,0 +1,399 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""core skill — the analyst's always-on baseline capabilities. + +Every other skill is optional and gated; ``core`` is ``always_on`` and loaded +automatically at the start of each run, so the agent is never truly empty. It +contributes the built-in data-inspection **tools** (``explore`` / +``inspect_source_data`` — ``load_skill`` is assembled by the shell because its +enum is dynamic) and the always-available **actions** — the committing tool +calls the agent acts with (``visualize`` / ``interact`` / ``delegate``; see +``design-docs/36``). + +Each handler does *processing* (validate the action arguments, run/normalize, +emit events) and **returns an observation string** that the shell appends to the +trajectory as the action's tool-call result — exactly like an inspection tool. +There is no control verdict: the agent reads the observation and decides its own +next move (commit another action, or stop by giving its final answer — a turn +with no action ends the run). The one exception is ``interact``: it puts a +question widget to the user, which the agent cannot observe, so it **returns +``None``** — the shell reads that as "no observation to continue from" and ends +the run, pausing for the user's reply. Heavy execution substrate (sandbox-backed +``run_visualize_code`` / ``run_explore_code``) lives on the shell and is reached +via ``ctx.runtime``. +""" + +from __future__ import annotations + +import logging +from typing import Any, Generator + +from data_formulator.agents.agent_utils import generate_data_summary +from data_formulator.agents.context import handle_inspect_source_data +from data_formulator.security.code_signing import sign_result + +from data_formulator.analyst.skills.base import ( + Event, + SkillContext, + ToolResult, +) + +logger = logging.getLogger(__name__) + +# Valid targets for a ``delegate`` action. Report generation is NOT a delegate +# target — it is the ``write_report`` action unlocked by the report skill. +_DELEGATE_TARGETS: tuple[str, ...] = ("data_loading",) + + +class CoreSkill: + """The core skill processor: the ``explore`` / ``inspect_source_data`` tool + handlers and the ``visualize`` / ``interact`` / ``delegate`` action handlers. + + Tool/action *schemas* live in ``core/tools.json`` and the skill's metadata + in ``SKILL.md`` frontmatter (``load_skill`` is assembled by the shell because + its enum is dynamic); this class is purely behaviour — it validates an + action's arguments and returns an observation string that the shell feeds + back as the action's tool-call result (or ``None`` for ``interact``, the one + terminal action that ends the run by pausing for the user). There is no + control verdict. + """ + + # ------------------------------------------------------------------ + # Tools + # ------------------------------------------------------------------ + + def handle_tool( + self, + name: str, + args: dict[str, Any], + ctx: SkillContext, + ) -> ToolResult: + """Execute a core inspection tool by delegating to the shell runtime. + + (In practice the shell's tool loop intercepts these inline — they need + loop-level sandbox state — but implementing them here keeps the skill + self-consistent and lets the shell route them generically if it stops + special-casing.) + """ + input_tables = (ctx.payload or {}).get("input_tables") or [] + if name == "execute_python_script": + result = ctx.runtime.run_explore_code(args.get("code", ""), input_tables) + text = result.get("stdout", "") + if result.get("error"): + text += f"\n\nError: {result['error']}" + return ToolResult(text=text) + if name == "inspect_source_data": + text = handle_inspect_source_data( + args.get("table_names", []), input_tables, ctx.workspace, + ) + return ToolResult(text=text) + return ToolResult(text=f"core has no tool '{name}'.") + + # ------------------------------------------------------------------ + # Actions — dispatch (each committing tool call routes to one handler) + # ------------------------------------------------------------------ + + def handle_action( + self, + action: str, + spec: dict[str, Any], + ctx: SkillContext, + ) -> Generator[Event, None, str | None]: + if action == "visualize": + return (yield from self._handle_visualize(spec, ctx)) + if action == "ask_user": + return (yield from self._handle_interact(spec, ctx)) + if action == "delegate": + return (yield from self._handle_delegate(spec, ctx)) + yield { + "type": "error", + "message": f"core cannot handle action '{action}'.", + "message_code": "agent.unknownAction", + } + return f"core cannot handle action '{action}'." + + # ------------------------------------------------------------------ + # visualize + # ------------------------------------------------------------------ + + def _handle_visualize( + self, action: dict[str, Any], ctx: SkillContext, + ) -> Generator[Event, None, str | None]: + code = action.get("code", "") + output_variable = action.get("output_variable", "result_df") + chart_spec = action.get("chart", {}) + field_metadata = action.get("field_metadata", {}) + field_display_names = action.get("field_display_names", {}) + display_instruction = action.get("display_instruction", "") + title = action.get("title", "") + step_index = int((ctx.payload or {}).get("completed_step_count", 0)) + 1 + + yield { + "type": "action", + "action": "visualize", + "display_instruction": display_instruction, + "input_tables": action.get("input_tables", []), + } + + viz_result = ctx.runtime.run_visualize_code( + code=code, + output_variable=output_variable, + chart_spec=chart_spec, + field_metadata=field_metadata, + field_display_names=field_display_names, + display_instruction=display_instruction, + title=title, + messages=ctx.trajectory, + ) + + if viz_result["status"] != "ok": + error_msg = viz_result.get("error_message", "Unknown error") + observation = ( + f"[OBSERVATION – Step {step_index} FAILED]\n\nError: {error_msg}" + ) + yield { + "type": "error", + "message": error_msg, + "display_instruction": display_instruction, + } + # Recoverable: hand the error back and let the agent re-decide. + return observation + + transform_result = viz_result["transform_result"] + sign_result(transform_result) + transformed_data = transform_result["content"] + + # Register the chart so a same-run report (and inspect_chart) can + # reference it by its forwarded, run-stable id. + ctx.runtime.register_run_chart(transform_result, chart_spec) + + yield { + "type": "result", + "status": "success", + "content": { + "question": display_instruction, + "result": transform_result, + }, + } + + observation = self._format_observation( + step_index=step_index, + display_instruction=display_instruction, + code=transform_result.get("code", ""), + data=transformed_data, + chart_id=transform_result.get("chart_id"), + workspace=ctx.workspace, + ) + return observation + + # ------------------------------------------------------------------ + # interact — put question(s) to the user and pause (terminal) + # ------------------------------------------------------------------ + + def _handle_interact( + self, action: dict[str, Any], ctx: SkillContext, + ) -> Generator[Event, None, str | None]: + """Render a structured question/explanation widget and end the run. + + ``interact`` is the one *terminal* action: the agent cannot observe its + own question, so there is nothing to feed back. On a valid payload it + yields the widget event and **returns ``None``** — the shell reads that + as "no observation to continue from" and stops the loop, waiting for the + user's reply (which starts a fresh turn). A malformed payload is instead + recoverable: it returns an error string so the agent can retry. + """ + try: + payload = self._normalize_interact_action(action) + except ValueError: + msg = "ask_user action requires non-empty questions." + yield { + "type": "error", + "message": msg, + "message_code": "agent.parseActionFailed", + } + return msg + yield { + "type": "interact", + "thought": action.get("thought", ""), + **payload, + } + return None + + # ------------------------------------------------------------------ + # delegate — hand off to a peer agent + # ------------------------------------------------------------------ + + def _handle_delegate( + self, action: dict[str, Any], ctx: SkillContext, + ) -> Generator[Event, None, str | None]: + try: + payload = self._normalize_delegate_action(action) + except ValueError as exc: + msg = str(exc) or "delegate action requires target and options." + yield { + "type": "error", + "message": msg, + "message_code": "agent.parseActionFailed", + } + return msg + yield { + "type": "delegate", + "thought": action.get("thought", ""), + **payload, + } + return ( + f"[DELEGATED to {payload['target']}] Handed off to the " + f"'{payload['target']}' agent; this run is complete." + ) + + # ------------------------------------------------------------------ + # Observation formatting + # ------------------------------------------------------------------ + + @staticmethod + def _format_observation( + step_index: int, + display_instruction: str, + code: str, + data: dict[str, Any], + workspace: Any, + chart_id: str | None = None, + ) -> str: + """Build the trajectory observation for a successful visualize step.""" + data_summary = generate_data_summary( + [{ + "name": data.get("virtual", {}).get("table_name", f"step_{step_index}"), + "rows": data["rows"], + }], + workspace=workspace, + ) + chart_ref = "" + if chart_id: + chart_ref = ( + f"\n\n**Chart id**: `{chart_id}` — to embed this chart in a report, " + f"write `![caption](chart://{chart_id})`; to read it again, pass this " + f"id to `inspect_chart`." + ) + return ( + f"[OBSERVATION – Step {step_index}]\n\n" + f"**Visualization**: {display_instruction}\n\n" + f"**Code**:\n```python\n{code}\n```\n\n" + f"**Transformed Data**:\n{data_summary}" + f"{chart_ref}" + ) + + # ------------------------------------------------------------------ + # Action-argument normalizers (moved verbatim from the shell) + # ------------------------------------------------------------------ + + @classmethod + def _sanitize_clarification_options(cls, raw_options: Any) -> list[dict[str, Any]]: + if not isinstance(raw_options, list): + return [] + options: list[dict[str, Any]] = [] + for raw_option in raw_options[:3]: + if isinstance(raw_option, str): + label = raw_option.strip() + label_code = "" + elif isinstance(raw_option, dict): + label = str(raw_option.get("label", "")).strip() + label_code = str(raw_option.get("label_code", "")).strip() + else: + continue + if not label and not label_code: + continue + option: dict[str, Any] = {} + if label: + option["label"] = label + if label_code: + option["label_code"] = label_code + options.append(option) + return options + + @classmethod + def _sanitize_clarification_questions(cls, raw_questions: Any) -> list[dict[str, Any]]: + if not isinstance(raw_questions, list): + return [] + questions: list[dict[str, Any]] = [] + for raw_question in raw_questions[:3]: + if not isinstance(raw_question, dict): + continue + text = str(raw_question.get("text", "")).strip() + text_code = str(raw_question.get("text_code", "")).strip() + if not text and not text_code: + continue + options = cls._sanitize_clarification_options(raw_question.get("options")) + response_type = raw_question.get("responseType") or raw_question.get("response_type") + if response_type not in ("single_choice", "free_text"): + response_type = "single_choice" if options else "free_text" + question: dict[str, Any] = { + "responseType": response_type, + "required": bool(raw_question.get("required", True)), + } + if text: + question["text"] = text + if text_code: + question["text_code"] = text_code + if isinstance(raw_question.get("text_params"), dict): + question["text_params"] = raw_question["text_params"] + if options: + question["options"] = options + questions.append(question) + return questions + + @classmethod + def _normalize_interact_action(cls, action: dict[str, Any]) -> dict[str, Any]: + """Normalize the ``interact`` action to ``{questions: [...]}``. + + Subsumes the clarify + explain shapes: + * the native shape carries ``questions: [{text, options?, required?, + responseType?}, ...]`` — clarifications (required answers / options) + and explanations (a statement the user need not answer) side by side; + * for back-compat we also accept a bare ``explanation`` string (+ an + optional ``followups`` list rendered as that question's options), + which becomes one non-required, free-text question. + """ + questions = cls._sanitize_clarification_questions(action.get("questions")) + + explanation = str(action.get("explanation", "")).strip() + if explanation: + followups = cls._sanitize_clarification_options(action.get("followups")) + explain_q: dict[str, Any] = { + "text": explanation, + "responseType": "single_choice", + "required": False, + } + if followups: + explain_q["options"] = followups + questions.append(explain_q) + + if not questions: + raise ValueError("ask_user action requires non-empty questions[]") + return {"questions": questions} + + @classmethod + def _normalize_delegate_action(cls, action: dict[str, Any]) -> dict[str, Any]: + target = str(action.get("target", "")).strip() + if target not in _DELEGATE_TARGETS: + raise ValueError( + f"delegate action requires 'target' ∈ {_DELEGATE_TARGETS}, got {target!r}" + ) + message = str(action.get("message") or "").strip() + raw_options = action.get("options") + cleaned: list[str] = [] + if isinstance(raw_options, list): + for opt in raw_options: + if isinstance(opt, str) and opt.strip(): + cleaned.append(opt.strip()) + if not cleaned: + raise ValueError("delegate action requires non-empty 'options[]'") + payload: dict[str, Any] = {"target": target, "options": cleaned[:2]} + if message: + payload["message"] = message + return payload + + +def get_skill() -> CoreSkill: + """Factory used by the registry's eager instantiation.""" + return CoreSkill() diff --git a/py-src/data_formulator/analyst/skills/core/tools.json b/py-src/data_formulator/analyst/skills/core/tools.json new file mode 100644 index 00000000..bcb3826d --- /dev/null +++ b/py-src/data_formulator/analyst/skills/core/tools.json @@ -0,0 +1,163 @@ +[ + { + "type": "function", + "function": { + "name": "execute_python_script", + "description": "Execute a general-purpose Python script in the sandbox. Here you use it to inspect data, compute statistics, transform tables, or verify assumptions before you act — write results to stdout with print() and that output is returned to you (it is NOT shown to the user). The script is for your own analysis, not for producing the final visualization. pandas, numpy, duckdb, sklearn, scipy are available.", + "parameters": { + "type": "object", + "properties": { + "purpose": { + "type": "string", + "description": "One-sentence description of what this script does and why (shown to user as progress)." + }, + "code": { + "type": "string", + "description": "Python script to execute. Use print() to surface output." + } + }, + "required": ["purpose", "code"] + } + } + }, + { + "type": "function", + "function": { + "name": "inspect_source_data", + "description": "Get a detailed summary of one or more source tables — schema, field-level statistics, and sample rows. Cheaper than execute_python_script for basic data inspection.", + "parameters": { + "type": "object", + "properties": { + "table_names": { + "type": "array", + "items": { "type": "string" }, + "description": "List of table names from [SOURCE TABLES] to inspect." + } + }, + "required": ["table_names"] + } + } + }, + { + "type": "function", + "function": { + "name": "visualize", + "description": "Commit a data transform + chart: run code producing a DataFrame and render it. The agent observes the result and continues.", + "parameters": { + "type": "object", + "properties": { + "title": { + "type": "string", + "description": "A short, descriptive title for the chart (5-10 words, title case). Summarize what the chart shows — the subject, the dimensions compared, and the scope. Do NOT include the chart type. Shown as the chart's heading." + }, + "display_instruction": { + "type": "string", + "description": "≤12 words. State the question or hypothesis the chart investigates — don't recap the chart spec (x/y/color/split are already visible). Wrap a **column** in ** ** if it anchors the question." + }, + "input_tables": { + "type": "array", + "items": { "type": "string" }, + "description": "Table names from [SOURCE TABLES] that the code reads." + }, + "code": { + "type": "string", + "description": "Python code producing a DataFrame assigned to output_variable." + }, + "output_variable": { + "type": "string", + "description": "snake_case name of the DataFrame variable the code assigns." + }, + "chart": { + "type": "object", + "description": "Chart spec: {chart_type, encodings:{x,y,...}, config:{}}. chart_type from the chart type reference." + }, + "field_metadata": { + "type": "object", + "description": "Map of field name -> SemanticType for the output columns." + }, + "field_display_names": { + "type": "object", + "description": "Map of field name -> human-readable display name for chart axes and table headers." + } + }, + "required": ["code", "output_variable", "chart"] + } + } + }, + { + "type": "function", + "function": { + "name": "ask_user", + "description": "Ask the user something and pause for their reply — the run resumes in the same turn with their answer in context. Use this for ANY turn where you want the user to respond: a freeform question, a clarification you need before acting, or an explanation you want them to react to. Freeform is fine (no clickable options required). Prefer this over ending your turn with a plain-text question: plain text ends the run and the user's next message starts a fresh turn without this context, whereas ask_user keeps the conversation going. Reserve plain text (no action) for your final answer when you expect nothing further.", + "parameters": { + "type": "object", + "properties": { + "thought": { + "type": "string", + "description": "Brief rationale (not shown to the user)." + }, + "questions": { + "type": "array", + "description": "One or more things to put to the user. Each is either a question that awaits an answer (clarification) or a statement the user need not answer (explanation). A question with no required answer and no options renders as a plain explanation. Ask at most 3.", + "items": { + "type": "object", + "properties": { + "text": { + "type": "string", + "description": "The question or explanation. For an explanation, keep it to 1–3 grounded sentences. Wrap a **column** in ** ** to highlight it." + }, + "responseType": { + "type": "string", + "enum": ["single_choice", "free_text"], + "description": "single_choice when options are offered; free_text when the user types a custom answer." + }, + "required": { + "type": "boolean", + "description": "false for an explanation / optional follow-up; true for a clarification the run depends on." + }, + "options": { + "type": "array", + "items": { "type": "string" }, + "description": "Plain-text choices, at most 3. Keep them to the few most likely answers — the user can always type a freeform reply, so don't try to enumerate every case. For a clarification these are answers; for an explanation these are short chart-producing follow-up prompts the user might click next (≤8 words each, phrased as the user would say them)." + } + }, + "required": ["text"] + } + } + }, + "required": ["questions"] + } + } + }, + { + "type": "function", + "function": { + "name": "delegate", + "description": "Hand off to a peer agent (e.g. data loading) when the question needs work outside this agent's scope, then end the run.", + "parameters": { + "type": "object", + "properties": { + "thought": { + "type": "string", + "description": "Brief rationale (not shown to the user)." + }, + "target": { + "type": "string", + "enum": ["data_loading"], + "description": "The peer agent to hand off to." + }, + "message": { + "type": "string", + "description": "Short note to the user that you're handing off, e.g. 'I'll hand this to the data loading agent — pick a search:'." + }, + "options": { + "type": "array", + "items": { "type": "string" }, + "description": "1–2 seed prompts for the target agent. Each becomes a one-click button (label == seed prompt); if two, make them meaningfully distinct." + } + }, + "required": ["target", "options"] + } + } + } +] diff --git a/py-src/data_formulator/analyst/skills/report/SKILL.md b/py-src/data_formulator/analyst/skills/report/SKILL.md new file mode 100644 index 00000000..1397fcd5 --- /dev/null +++ b/py-src/data_formulator/analyst/skills/report/SKILL.md @@ -0,0 +1,118 @@ +--- +name: report +description: >- + Turn an exploration (threads, findings, charts) into a single Markdown + report — note, blog post, executive summary, KPI dashboard, slide brief, or + multi-section analytical report, with embedded charts. +when_to_use: >- + The user asks to write up / summarize / report on what they explored, or + wants a shareable narrative document built from the charts and findings in + the data thread. Not for producing a single new chart (use visualize). +always_on: false +tools: + - inspect_chart +actions: + - write_report +--- + +# Skill: Report writing + +You are a data journalist / analyst who creates insightful, well-organized +reports based on data explorations. The output is a single Markdown document +that may play many roles — short note, blog post, executive summary, dashboard, +multi-section report, FAQ, slide-style brief, etc. Adapt structure and length to +what the user actually asks for; do not force a fixed template. + +## Emitting the report (the `write_report` action) + +First inspect whatever charts and data you need (see below), then write the +entire report and commit it by **calling the `write_report` tool** — it is the +committing action that ends this turn. Its `report` argument carries the +**full Markdown** of the finished report: + +- `report` — the complete report in Markdown: headings, prose, tables, and + embedded charts via `![caption](chart://chart_id)`. + +Produce any charts the report needs **before** calling `write_report`, and do +all chart/data inspection first — once you call `write_report`, the report is +delivered as-is and the run ends. + +## Context available to you +- **[PRIMARY TABLE(S)]** / **[OTHER AVAILABLE TABLES]**: Lightweight schema of datasets. +- **[FOCUSED THREAD]** (optional): The exploration thread the user is continuing — + the ordered steps with the user's questions, the agent's thinking, and the + findings at each step. This is the spine of the story you are telling. +- **[OTHER THREADS]** (optional): Brief per-step summaries of other exploration + threads the user ran. These are additional findings worth weaving in. +- **[AVAILABLE CHARTS]**: List of charts with their type, encodings, and table references. + +## Ground the report in the exploration +The thread context is your most important input. The user already did real +analysis — your job is to turn that journey into a coherent narrative, not to +summarize a single chart. Before writing: +- Read the FOCUSED THREAD and OTHER THREADS to understand the full set of + questions asked and findings reached. +- Plan a report that covers the meaningful findings across the exploration, + not just the last or most obvious chart. + +## Inspecting charts and data +You have two inspection tools available the whole time: `inspect_chart` and +`inspect_source_data`. Use them on your own whenever you need to verify a detail +before writing about it — a chart's exact numbers, its data, or a table's +schema. `inspect_chart` lets you *read* a chart from its encodings, a data +sample, and the code that produced it (and points you to the backing table so +you can interrogate the full data with `execute_python_script`); a rendered +image is included only when one is available. Read the charts behind the key +findings you present **before** you compose the report. + +## Write the report +Write the complete report in Markdown and pass it as the `report` argument of the +`write_report` tool. Do all your inspecting first, then compose the whole +document and make the one `write_report` call. + +### Embedding charts (REQUIRED FORMAT — do not change this) +To embed a chart image, use markdown image syntax with a `chart://` URL: + ![Caption describing the chart](chart://chart_id) + +Example: `![Monthly trade balance trend](chart://chart-123)` + +The chart_id must match one from [AVAILABLE CHARTS]. Place each chart embed on +its own line (it renders as a block). You can embed the same chart at most +once. Captions are short — one line describing what the chart shows. + +### Tables +For data tables, write standard markdown tables directly: +| date | value | +| --- | --- | +| 2020-01 | -43.5 | + +### Style & structure — adapt to the user's request +The user may ask for any of: +- a short note or social-style summary (a few sentences, one or two charts), +- a blog post / narrative report (intro → findings → takeaway), +- an executive summary (key numbers up top, then context), +- a KPI dashboard / multi-section overview (headings per topic, multiple charts + arranged with short commentary between them), +- a slide-style brief (compact sections with bullet points and embedded charts), +- a deeper analytical report with sub-sections, methodology notes, and caveats. + +Pick the structure that fits the request and the available material. Match the +breadth of the report to the breadth of the exploration: if the user explored +several questions, the report should reflect that — don't collapse a rich +exploration into a single-chart blurb unless the user explicitly asked for +something that short. Reasonable defaults if the user is vague: +- Start with a `# Title` that reflects the topic. +- Group related findings under `##` (and `###` if useful) headings, typically + one section per key finding / thread. +- Around each embedded chart, briefly explain what it shows and the key insight. +- Use bullets / short paragraphs / tables where they help; don't pad. +- Close with a brief takeaway or summary section if the report is more than a + few paragraphs. For very short outputs (notes, single-chart blurbs), a closing + summary is optional. + +### Guardrails +- Write in Markdown. Keep prose tight; let the data and charts carry the weight. +- Stay faithful to the data — do not invent numbers, comparisons, or causation + that the data does not actually support. +- It is fine to flag uncertainty ("based on the sample shown…") when appropriate. +- Embed every chart you discuss; don't reference a chart in prose without showing it. diff --git a/py-src/data_formulator/analyst/skills/report/__init__.py b/py-src/data_formulator/analyst/skills/report/__init__.py new file mode 100644 index 00000000..18189ba1 --- /dev/null +++ b/py-src/data_formulator/analyst/skills/report/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""report skill — streams a Markdown report from an exploration. + +``SKILL.md`` holds the instructions/action contract; ``skill.py`` exposes +``get_skill()`` (the executable handler, ported from ``agent_report_gen.py``). +""" diff --git a/py-src/data_formulator/analyst/skills/report/skill.py b/py-src/data_formulator/analyst/skills/report/skill.py new file mode 100644 index 00000000..0b99c1d2 --- /dev/null +++ b/py-src/data_formulator/analyst/skills/report/skill.py @@ -0,0 +1,211 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""report skill — turns an exploration into a Markdown report. + +The analyst shell decides to write a report (the ``write_report`` **action**), +then dispatches here. The model assembles the report in the **main agent loop**: +it loads this skill, inspects whatever charts/data it needs via the +skill-private ``inspect_chart`` tool (plus the always-on ``inspect_source_data``), +and then emits ``write_report`` — a committing tool call carrying the **full +Markdown** in its ``report`` argument. + +``write_report`` is the one *streaming* action (``stream_field="report"`` on +the ``report`` channel — declared via ``streaming_actions`` below). When the +model writes the report as that argument, the **agent loop** forwards it live as +incremental ``report``-channel ``text_delta``s as the tokens arrive +(design-docs/36 §5: the agent owns the generic forwarding envelope, the skill +stays declarative). This handler is then the buffered *fallback*: if the report +was not streamed (e.g. a provider without tool-arg streaming), it validates the +report, runs defense-in-depth cleanup, and yields the whole report as a single +``report``-channel event. Either way the emitted events are identical in shape — +live streaming is just the *same* event with more, smaller deltas, so the shell +and frontend contract is unchanged. ``write_report`` does not end the run on its +own — the shell feeds the returned observation back and the agent stops on the +next turn by committing no action. + + - ``{"type": "action", "action": "write_report"}`` — commitment + - ``{"type": "text_delta", "channel": "report", "content": …}`` — report prose +""" + +from __future__ import annotations + +import logging +import re +from typing import Any, Generator + +import pandas as pd + +from data_formulator.analyst.skills.base import ( + Event, + SkillContext, + ToolResult, +) + +logger = logging.getLogger(__name__) + + +# ── Leaked-tool-syntax stripping (defense in depth) ─────────────────────── + +_LEAK_SPECIAL_TOKEN = re.compile(r"<\|[^|>]*\|>") +_LEAK_TOOLCALL = re.compile( + r"(?:\bcommentary\b\s*)?\bto\s*=\s*functions\.[A-Za-z0-9_]+" + r"[\s\S]*?\{[\s\S]*?\}", +) + + +def _strip_leaked_tool_syntax(text: str) -> str: + """Remove leaked harmony special tokens and tool-call headers (with their + trailing JSON args) from the report. Clean prose is untouched.""" + text = _LEAK_TOOLCALL.sub("", text) + text = _LEAK_SPECIAL_TOKEN.sub("", text) + return text + + +# --------------------------------------------------------------------------- +# Skill +# --------------------------------------------------------------------------- + + +class ReportWritingSkill: + """The report skill processor: the ``inspect_chart`` tool handler and the + ``write_report`` action handler. + + Tool/action *schemas* live in ``report/tools.json`` and the skill's + metadata in ``SKILL.md`` frontmatter; this class is purely behaviour. The + ``write_report`` action streams its ``report`` argument on the ``report`` + channel; the agent loop owns that forwarding envelope and this handler is + the buffered fallback (see ``handle_action``). + """ + + # Streaming declaration (design-docs/36 §5): ``write_report`` streams its + # ``report`` argument live on the ``report`` channel. The agent reads this + # via ``registry.action_stream_spec`` to forward the argument as the model + # writes it; behaviour (which arg, which channel) lives here in code, not in + # the JSON schema sent to the model. + streaming_actions = {"write_report": ("report", "report")} + + # ------------------------------------------------------------------ + # Tool handler (inspection, called by the shell's tool loop) + # ------------------------------------------------------------------ + + def handle_tool( + self, + name: str, + args: dict[str, Any], + ctx: SkillContext, + ) -> ToolResult: + if name != "inspect_chart": + return ToolResult(text=f"report has no tool '{name}'.") + charts: list[dict[str, Any]] = (ctx.payload or {}).get("charts") or [] + text = self._handle_inspect_chart(args.get("chart_ids", []), charts) + return ToolResult(text=text) + + # ------------------------------------------------------------------ + # Action handler (buffered fallback — delivers the finished report) + # + # When the agent loop streamed the ``report`` argument live, it already + # emitted the ``action`` + ``report``-channel ``text_delta`` events and + # suppresses the duplicates this handler yields below; this handler still + # runs to validate and return the observation. On a provider without + # tool-arg streaming nothing was forwarded, so these yields are what the + # frontend receives — the same events, buffered. + # ------------------------------------------------------------------ + + def handle_action( + self, + action: str, + spec: dict[str, Any], + ctx: SkillContext, + ) -> Generator[Event, None, str | None]: + if action != "write_report": + yield { + "type": "error", + "message": f"report cannot handle action '{action}'.", + "message_code": "agent.unknownAction", + } + return f"report cannot handle action '{action}'." + + report = str(spec.get("report") or "").strip() + if not report: + msg = "write_report action requires a non-empty 'report'." + yield { + "type": "error", + "message": msg, + "message_code": "agent.parseActionFailed", + } + return msg + + # Announce the commitment (mirrors how visualize emits an action event). + yield { + "type": "action", + "action": "write_report", + } + + # Buffered delivery: emit the whole report as a single ``report``-channel + # event. Streaming later is the same event with more, smaller deltas. + yield { + "type": "text_delta", + "channel": "report", + "content": _strip_leaked_tool_syntax(report), + } + + return "[REPORT DELIVERED] The report was written and shown to the user." + + + def _handle_inspect_chart( + self, + chart_ids: list[str], + charts: list[dict[str, Any]], + ) -> str: + """Inspect charts by *reading their data*, not by rendering them. + + The agent "reads" a chart from its encodings + sample rows (+ the code + that produced it), which it can further interrogate with + ``execute_python_script``. This avoids fragile server-side rasterization + and the multi-modal round-trip — rendered chart images are no longer fed + to the agent (experiments showed they don't improve narration over + reading the data + spec directly). + + Returns the text summary of the inspected charts. + """ + results = [] + for chart_id in chart_ids: + chart = next((c for c in charts if c["chart_id"] == chart_id), None) + if not chart: + results.append(f"Chart {chart_id}: not found") + continue + + parts = [f"Chart: {chart_id}"] + parts.append(f" Type: {chart.get('chart_type', 'Unknown')}") + + encodings = chart.get("encodings", {}) + if encodings: + enc_str = ", ".join(f"{k}: {v}" for k, v in encodings.items() if v) + parts.append(f" Encodings: {enc_str}") + + if chart.get("code"): + parts.append(f" Code:\n```python\n{chart['code']}\n```") + + chart_data = chart.get("chart_data") + if chart_data and chart_data.get("rows"): + df = pd.DataFrame(chart_data["rows"]) + parts.append(f" Data ({len(df)} rows, {len(df.columns)} cols):") + parts.append(f" Columns: {', '.join(df.columns.tolist())}") + parts.append(f" Sample:\n{df.head(5).to_string()}") + if chart_data.get("name"): + parts.append( + f" To analyze the full chart data, run execute_python_script " + f"against table '{chart_data['name']}'." + ) + + parts.append(" [Read the chart from its encodings + data above]") + + results.append("\n".join(parts)) + + return "\n\n".join(results) + + +def get_skill() -> ReportWritingSkill: + """Factory used by the registry's eager instantiation.""" + return ReportWritingSkill() diff --git a/py-src/data_formulator/analyst/skills/report/tools.json b/py-src/data_formulator/analyst/skills/report/tools.json new file mode 100644 index 00000000..19e6e66c --- /dev/null +++ b/py-src/data_formulator/analyst/skills/report/tools.json @@ -0,0 +1,37 @@ +[ + { + "type": "function", + "function": { + "name": "inspect_chart", + "description": "Read one or more charts. Returns each chart's encodings, a sample of its data, and the transformation code that created it so you can reason about what it shows (use execute_python_script on the backing table for the full data). A rendered PNG is included only when one is available.", + "parameters": { + "type": "object", + "properties": { + "chart_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "List of chart IDs from [AVAILABLE CHARTS] to inspect." + } + }, + "required": ["chart_ids"] + } + } + }, + { + "type": "function", + "function": { + "name": "write_report", + "description": "Deliver a Markdown report and end the run. `report` is the full report text (embed charts with ![caption](chart://chart_id)).", + "parameters": { + "type": "object", + "properties": { + "report": { + "type": "string", + "description": "The full Markdown report text. Embed charts with ![caption](chart://chart_id) referencing IDs from [AVAILABLE CHARTS]." + } + }, + "required": ["report"] + } + } + } +] diff --git a/py-src/data_formulator/analyst/tools.py b/py-src/data_formulator/analyst/tools.py new file mode 100644 index 00000000..5713105d --- /dev/null +++ b/py-src/data_formulator/analyst/tools.py @@ -0,0 +1,151 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Inspection tools for the analyst agent. + +Tools are parallel-safe, internal, side-effect-free capabilities the agent may +call freely within a turn to gather information before committing to a single +user-visible action. See ``design-docs/35`` §4.1. + + - ``execute_python_script`` — run a general-purpose Python script in the + sandbox to inspect/compute (stdout returned). + - ``inspect_source_data`` — schema + stats + sample rows for source tables. + - ``load_skill`` — pull a skill's ``SKILL.md`` body into context, unlocking + its gated actions (progressive disclosure; reading a doc is read-only). + +``inspect_chart`` is a skill-private tool used by report-style skills and is +contributed by those skills rather than living in the always-on tool set. +""" + +from __future__ import annotations + +from typing import Any + +EXECUTE_PYTHON_SCRIPT_TOOL: dict[str, Any] = { + "type": "function", + "function": { + "name": "execute_python_script", + "description": ( + "Execute a general-purpose Python script in the sandbox. Here you " + "use it to inspect data, compute statistics, or verify assumptions " + "before you act — print() to stdout, which is returned to you and is " + "not shown to the user. pandas, numpy, duckdb, sklearn, scipy are available." + ), + "parameters": { + "type": "object", + "properties": { + "purpose": { + "type": "string", + "description": "One-sentence description of what this script does and why (shown to user as progress).", + }, + "code": { + "type": "string", + "description": "Python script to execute. Use print() to surface output.", + }, + }, + "required": ["purpose", "code"], + }, + }, +} + +INSPECT_SOURCE_DATA_TOOL: dict[str, Any] = { + "type": "function", + "function": { + "name": "inspect_source_data", + "description": ( + "Get a detailed summary of one or more source tables — schema, " + "field-level statistics, and sample rows. Cheaper than explore() " + "for basic data inspection." + ), + "parameters": { + "type": "object", + "properties": { + "table_names": { + "type": "array", + "items": {"type": "string"}, + "description": "List of table names from [SOURCE TABLES] to inspect.", + }, + }, + "required": ["table_names"], + }, + }, +} + + +def build_load_skill_tool(skill_names: list[str]) -> dict[str, Any]: + """Build the ``load_skill`` tool, constraining ``name`` to known skills. + + Loading a skill pulls its ``SKILL.md`` body into context and unlocks the + gated actions it declares. Reading a doc is read-only and idempotent, so + this is a tool (parallel-safe) rather than a serialized action. + """ + name_schema: dict[str, Any] = { + "type": "string", + "description": "The skill to load (unlocks the actions it declares).", + } + if skill_names: + name_schema["enum"] = list(skill_names) + return { + "type": "function", + "function": { + "name": "load_skill", + "description": ( + "Load a skill's instructions into context so you can use the " + "actions it unlocks. Call this BEFORE emitting a gated action " + "(e.g. load_skill('report') before write_report)." + ), + "parameters": { + "type": "object", + "properties": {"name": name_schema}, + "required": ["name"], + }, + }, + } + + +def build_tools( + skill_names: list[str], + extra_tools: list[dict[str, Any]] | None = None, + action_tools: list[dict[str, Any]] | None = None, +) -> list[dict[str, Any]]: + """Assemble the tool set exposed to the LLM each turn. + + Three groups share the one function-calling surface (see ``design-docs/36``): + + * **inspection tools** (``explore`` / ``inspect_source_data`` / a loaded + skill's own tools) — contributed by the always-on ``core`` skill and any + loaded skills, arriving via ``extra_tools``. Parallel-safe, non-committing. + * **``load_skill``** — the progressive-disclosure switch, added here with + its ``name`` enum built from ``skill_names`` (the loadable/gated skills). + * **action tools** — the committing surfaces a turn may end with + (``visualize`` / ``delegate`` always; + ``write_report`` once the report skill is loaded). Passed via + ``action_tools``; the agent partitions a response by which tool names + are committing actions and enforces the one-per-turn cardinality guard. + + Inspection tools are listed first, then ``load_skill``, then the committing + actions. De-duplicates by function name as a safety net (a clash is also + warned at registry-build time). + """ + tools: list[dict[str, Any]] = list(extra_tools or []) + if skill_names: + tools.append(build_load_skill_tool(skill_names)) + tools.extend(action_tools or []) + + seen: set[str] = set() + deduped: list[dict[str, Any]] = [] + for tool in tools: + name = tool.get("function", {}).get("name", "") + if name and name in seen: + continue + seen.add(name) + deduped.append(tool) + return deduped + + +__all__ = [ + "EXECUTE_PYTHON_SCRIPT_TOOL", + "INSPECT_SOURCE_DATA_TOOL", + "build_load_skill_tool", + "build_tools", +] diff --git a/py-src/data_formulator/app.py b/py-src/data_formulator/app.py index 47d2bda8..ef9dd4cb 100644 --- a/py-src/data_formulator/app.py +++ b/py-src/data_formulator/app.py @@ -219,7 +219,7 @@ def _register_blueprints(): from data_formulator.routes.credentials import credential_bp app.register_blueprint(credential_bp) - # Register knowledge management API (rules, skills, experiences) + # Register knowledge management API (rules, skills, workflows) from data_formulator.routes.knowledge import knowledge_bp app.register_blueprint(knowledge_bp) diff --git a/py-src/data_formulator/data_connector.py b/py-src/data_formulator/data_connector.py index f56f4aca..c82b9bfb 100644 --- a/py-src/data_formulator/data_connector.py +++ b/py-src/data_formulator/data_connector.py @@ -657,6 +657,16 @@ def _require_loader(self) -> ExternalDataLoader: loader = self._loaders.get(identity) if loader is not None: return loader + # No-auth connectors (e.g. built-in example datasets) are always + # available — there's nothing to connect, so lazily instantiate and + # cache the loader on first use. This mirrors the ``auth_mode == "none"`` + # special-casing in the connect/get-status/preview/import endpoints and + # keeps no-auth sources working for catalog/preview/import even when + # external data connectors are disabled (e.g. ephemeral/demo mode). + if _loader_auth_mode(self._loader_class) == "none": + loader = self._loader_class() + self._loaders[identity] = loader + return loader # Try auto-reconnect from vault loader = self._try_auto_reconnect(identity) if loader is not None: diff --git a/py-src/data_formulator/data_loader/local_folder_data_loader.py b/py-src/data_formulator/data_loader/local_folder_data_loader.py index 240771b0..2c41bc4f 100644 --- a/py-src/data_formulator/data_loader/local_folder_data_loader.py +++ b/py-src/data_formulator/data_loader/local_folder_data_loader.py @@ -250,7 +250,13 @@ def fetch_data_as_arrow( if ext == ".parquet": table = pq.read_table(str(resolved)) elif ext in (".csv", ".tsv"): - table = pa_csv.read_csv(str(resolved)) + # ``.tsv`` is tab-separated; pyarrow's read_csv defaults to a comma + # delimiter, so without this a TSV collapses into a single column + # (e.g. "id\trate" stays one field). Keep comma for ``.csv``. + parse_options = ( + pa_csv.ParseOptions(delimiter="\t") if ext == ".tsv" else None + ) + table = pa_csv.read_csv(str(resolved), parse_options=parse_options) elif ext in (".json", ".jsonl"): import pyarrow.json as pa_json table = pa_json.read_json(str(resolved)) diff --git a/py-src/data_formulator/data_loader/sample_datasets_loader.py b/py-src/data_formulator/data_loader/sample_datasets_loader.py index 6c3267cf..d74b6fa4 100644 --- a/py-src/data_formulator/data_loader/sample_datasets_loader.py +++ b/py-src/data_formulator/data_loader/sample_datasets_loader.py @@ -25,7 +25,10 @@ import pyarrow as pa from data_formulator.data_loader.external_data_loader import ExternalDataLoader -from data_formulator.datalake.parquet_utils import df_to_safe_records +from data_formulator.datalake.parquet_utils import ( + df_to_safe_records, + sanitize_dataframe_for_arrow, +) logger = logging.getLogger(__name__) @@ -66,6 +69,17 @@ def auth_mode() -> str: # credentials UI, and are always reported as ``connected: true``. return "none" + @staticmethod + def auth_config() -> dict: + # Mirror :meth:`auth_mode` for the modern auth interface. The base + # class defaults ``auth_config`` to ``{"mode": "credentials"}`` + # independently of ``auth_mode``, and ``_loader_auth_mode`` prefers + # ``auth_config``. Without this override the no-auth loader would be + # mis-classified as credential-based, breaking catalog/preview/import + # (which require a connection) whenever no loader was eagerly cached + # — e.g. in ephemeral / ``--disable-data-connectors`` deployments. + return {"mode": "none"} + @staticmethod def catalog_hierarchy() -> list[dict[str, str]]: return [ @@ -231,7 +245,13 @@ def fetch_data_as_arrow( logger.info("Returning %d / %d rows from sample dataset: %s", len(df), self._last_total_rows, source_table) - return pa.Table.from_pandas(df, preserve_index=False) + # Public sample JSON/CSV files frequently contain mixed-type object + # columns (e.g. movies.json's ``Title`` holds both strings and + # numeric values), which makes ``pa.Table.from_pandas`` raise + # ArrowTypeError. Coerce such columns to a consistent type first. + return pa.Table.from_pandas( + sanitize_dataframe_for_arrow(df), preserve_index=False + ) # ------------------------------------------------------------------ # Internal: cached full-dataset fetch diff --git a/py-src/data_formulator/datalake/workspace_manager.py b/py-src/data_formulator/datalake/workspace_manager.py index 679452ca..23e37176 100644 --- a/py-src/data_formulator/datalake/workspace_manager.py +++ b/py-src/data_formulator/datalake/workspace_manager.py @@ -169,6 +169,10 @@ def list_workspaces(self) -> list[dict]: workspace. If a workspace directory lacks this file (legacy), it is auto-repaired via :meth:`_ensure_meta`. + Every workspace directory is listed, including empty + "Untitled Session" entries from data-loading chats. Users + manage (rename/delete) these themselves via the sidebar. + Returns list of {"id": str, "display_name": str, "updated_at": str}. """ workspaces = [] @@ -184,13 +188,16 @@ def list_workspaces(self) -> list[dict]: except Exception: continue + tc = meta.get("tableCount") + cc = meta.get("chartCount") + workspaces.append({ "id": child.name, "display_name": meta.get("displayName", child.name), "created_at": meta.get("createdAt") or meta.get("updatedAt"), "updated_at": meta.get("updatedAt"), - "table_count": meta.get("tableCount"), - "chart_count": meta.get("chartCount"), + "table_count": tc, + "chart_count": cc, }) workspaces.sort(key=lambda w: w.get("updated_at") or "", reverse=True) diff --git a/py-src/data_formulator/knowledge/store.py b/py-src/data_formulator/knowledge/store.py index 0b290093..08463437 100644 --- a/py-src/data_formulator/knowledge/store.py +++ b/py-src/data_formulator/knowledge/store.py @@ -1,10 +1,10 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -"""Knowledge store — manages user knowledge files (rules, experiences). +"""Knowledge store — manages user knowledge files (rules, workflows). Each user has a ``knowledge/`` directory under their home with two -sub-directories: ``rules`` and ``experiences``. Every knowledge entry is a +sub-directories: ``rules`` and ``workflows``. Every knowledge entry is a Markdown file with YAML front matter. All file I/O is routed through :class:`ConfinedDir` for path safety. @@ -12,7 +12,7 @@ Directory depth constraints: - ``rules``: flat — only files directly under ``rules/`` (1 path part) -- ``experiences``: one level of sub-directories (up to 2 path parts) +- ``workflows``: one level of sub-directories (up to 2 path parts) """ from __future__ import annotations @@ -27,19 +27,27 @@ logger = logging.getLogger(__name__) -VALID_CATEGORIES = frozenset({"rules", "experiences"}) +VALID_CATEGORIES = frozenset({"rules", "workflows"}) _MAX_DEPTH = { "rules": 1, - "experiences": 2, # one sub-dir: "category/file.md" + "workflows": 2, # one sub-dir: "category/file.md" } KNOWLEDGE_LIMITS: dict[str, int] = { "rule_description_max": 100, "rules": 350, - "experiences": 2000, + # Soft length guidance for distilled workflows: the target the distill + # agent aims for, NOT a hard cap. Workflows may exceed it when an + # analysis genuinely needs the room (e.g. multiple abstraction levels). + # Writes are only rejected past WORKFLOW_HARD_MAX below. + "workflows": 6000, } +# Absolute safety ceiling for a workflow body. Guards against runaway LLM +# output while still letting rich, multi-section workflows through. +WORKFLOW_HARD_MAX: int = 24000 + # --------------------------------------------------------------------------- # Tokenization helpers for improved search scoring # --------------------------------------------------------------------------- @@ -151,14 +159,13 @@ class KnowledgeItemMeta: """ __slots__ = ( - "title", "tags", "source", "created", "description", "always_apply", + "title", "source", "created", "description", "always_apply", "source_workspace_id", "source_workspace_name", ) def __init__( self, title: str, - tags: list[str], source: str, created: str, description: str, @@ -167,7 +174,6 @@ def __init__( source_workspace_name: str = "", ): self.title = title - self.tags = tags self.source = source self.created = created self.description = description @@ -181,14 +187,6 @@ def from_raw(cls, meta: dict[str, Any], fallback_stem: str = "") -> "KnowledgeIt title = meta.get("title", fallback_stem) title = str(title) if title is not None else fallback_stem - raw_tags = meta.get("tags", []) - if isinstance(raw_tags, list): - tags = [str(t) for t in raw_tags] - elif raw_tags is None: - tags = [] - else: - tags = [str(raw_tags)] - source = str(meta.get("source", "manual") or "manual") created = str(meta.get("created", "") or "") description = str(meta.get("description", "") or "") @@ -198,7 +196,6 @@ def from_raw(cls, meta: dict[str, Any], fallback_stem: str = "") -> "KnowledgeIt return cls( title=title, - tags=tags, source=source, created=created, description=description, @@ -246,26 +243,64 @@ class KnowledgeStore: store = KnowledgeStore(user_home) items = store.list_all("rules") - content = store.read("experiences", "data-cleaning/handle-missing.md") + content = store.read("workflows", "data-cleaning/handle-missing.md") store.write("rules", "date-format.md", md_content) store.delete("rules", "date-format.md") - results = store.search("ROI", categories=["rules", "experiences"]) + results = store.search("ROI", categories=["rules", "workflows"]) """ def __init__(self, user_home: Path | str) -> None: user_home = Path(user_home) self._root = ConfinedDir(user_home / "knowledge", mkdir=True) + self._migrate_experiences_to_workflows() self._jails: dict[str, ConfinedDir] = { "rules": ConfinedDir(self._root.root / "rules", mkdir=True), - "experiences": ConfinedDir(self._root.root / "experiences", mkdir=True), + "workflows": ConfinedDir(self._root.root / "workflows", mkdir=True), } self._migrate_flat() # -- migration --------------------------------------------------------- + def _migrate_experiences_to_workflows(self) -> None: + """Move legacy ``experiences/`` files into ``workflows/`` (one-time). + + The feature was renamed from "experiences" to "workflows"; existing + users have files under ``knowledge/experiences/``. Move them so the + rename is transparent. + """ + old_root = self._root.root / "experiences" + if not old_root.is_dir(): + return + new_root = self._root.root / "workflows" + new_root.mkdir(parents=True, exist_ok=True) + for md_file in list(old_root.rglob("*.md")): + rel = md_file.relative_to(old_root) + dest = new_root / rel + dest.parent.mkdir(parents=True, exist_ok=True) + if dest.exists(): + stem = rel.stem + suffix_n = 1 + while dest.exists(): + dest = dest.parent / f"{stem}-{suffix_n}.md" + suffix_n += 1 + try: + md_file.rename(dest) + logger.info("Migrated experiences/%s → workflows/%s", rel, dest.name) + except Exception: + logger.warning("Failed to migrate experience file %s", md_file, exc_info=True) + # Remove the now-empty legacy tree (best effort) + try: + for sub in sorted(old_root.rglob("*"), reverse=True): + if sub.is_dir() and not any(sub.iterdir()): + sub.rmdir() + if not any(old_root.iterdir()): + old_root.rmdir() + except Exception: + logger.warning("Failed to clean up legacy experiences dir", exc_info=True) + def _migrate_flat(self) -> None: - """Move any experiences/subdir/file.md → experiences/file.md (one-time migration).""" - exp_root = self._jails["experiences"].root + """Move any workflows/subdir/file.md → workflows/file.md (one-time migration).""" + exp_root = self._jails["workflows"].root for md_file in list(exp_root.rglob("*.md")): rel = md_file.relative_to(exp_root) if len(rel.parts) <= 1: @@ -285,9 +320,9 @@ def _migrate_flat(self) -> None: parent = md_file.parent if parent != exp_root and not any(parent.iterdir()): parent.rmdir() - logger.info("Migrated knowledge experience %s → %s", rel, dest.name) + logger.info("Migrated knowledge workflow %s → %s", rel, dest.name) except Exception: - logger.warning("Failed to migrate experience file %s", md_file, exc_info=True) + logger.warning("Failed to migrate workflow file %s", md_file, exc_info=True) # -- path validation --------------------------------------------------- @@ -326,7 +361,7 @@ def _jail(self, category: str) -> ConfinedDir: def list_all(self, category: str) -> list[dict[str, Any]]: """List all knowledge entries in *category*. - Returns a list of dicts with ``title``, ``tags``, ``path``, + Returns a list of dicts with ``title``, ``path``, ``source``, and ``created`` parsed from front matter. For rules, also includes ``description`` and ``alwaysApply``. """ @@ -345,7 +380,6 @@ def list_all(self, category: str) -> list[dict[str, Any]]: rel = str(md_file.relative_to(jail.root)).replace("\\", "/") item: dict[str, Any] = { "title": km.title, - "tags": km.tags, "path": rel, "source": km.source, "created": km.created, @@ -353,9 +387,9 @@ def list_all(self, category: str) -> list[dict[str, Any]]: if category == "rules": item["description"] = km.description item["alwaysApply"] = km.always_apply - if category == "experiences": + if category == "workflows": # Surface session-distillation provenance so the frontend can - # find an existing session experience by workspace id + # find an existing session workflow by workspace id # without re-reading every file. See design-docs/24. if km.source_workspace_id: item["sourceWorkspaceId"] = km.source_workspace_id @@ -394,7 +428,15 @@ def write(self, category: str, path: str, content: str) -> Path: body_limit = KNOWLEDGE_LIMITS.get(category) if body_limit is not None: body_len = len(body.strip()) - if body_len > body_limit: + if category == "workflows": + # Soft guidance: the body_limit is a target the distill agent + # aims for, not a hard cap. Only reject far past the ceiling. + if body_len > WORKFLOW_HARD_MAX: + raise ValueError( + f"workflows body exceeds {WORKFLOW_HARD_MAX} characters " + f"(got {body_len})" + ) + elif body_len > body_limit: raise ValueError( f"{category} body exceeds {body_limit} characters " f"(got {body_len})" @@ -407,12 +449,12 @@ def delete(self, category: str, path: str) -> None: self.validate_path(category, path) self._jail(category).unlink(path) - # -- session experience helpers ---------------------------------------- + # -- session workflow helpers ---------------------------------------- - def find_experience_by_workspace_id( + def find_workflow_by_workspace_id( self, workspace_id: str, ) -> dict[str, Any] | None: - """Return the experience entry whose front matter records this workspace id. + """Return the workflow entry whose front matter records this workspace id. Used by the session-scoped distillation flow (design-docs/24) to upsert: when re-distilling the same session, overwrite the same @@ -421,11 +463,11 @@ def find_experience_by_workspace_id( if not workspace_id or not workspace_id.strip(): return None try: - for item in self.list_all("experiences"): + for item in self.list_all("workflows"): if item.get("sourceWorkspaceId") == workspace_id: return item except Exception: - logger.warning("find_experience_by_workspace_id failed", exc_info=True) + logger.warning("find_workflow_by_workspace_id failed", exc_info=True) return None # -- alwaysApply rules helper ------------------------------------------ @@ -511,12 +553,13 @@ def search( """Search across knowledge categories. Tokenizes *query* into keywords and scores each entry using - multi-field weighted matching (title > tags > filename > body). - Whole-string exact matches and table-name / tag overlaps receive + multi-field weighted matching (title > filename > body). + Whole-string exact matches and table-name overlaps receive additional bonuses. Non-manual sources are slightly discounted. *table_names* (optional) are table names from the current session; - when a table name appears in an entry's tags the entry is boosted. + when a table name appears in an entry's title or body the entry is + boosted. """ if not query or not query.strip(): return [] @@ -542,7 +585,7 @@ def search( continue score = self._match_score( - q, km.title, km.tags, md_file.stem, body[:200], + q, km.title, md_file.stem, body[:200], source=km.source, table_names=table_names, ) if score <= 0: @@ -552,7 +595,6 @@ def search( scored.append((score, { "category": cat, "title": km.title, - "tags": km.tags, "path": rel, "snippet": body[:500].strip(), "source": km.source, @@ -565,7 +607,6 @@ def search( def _match_score( query: str, title: str, - tags: list[str], stem: str, body_prefix: str, *, @@ -589,13 +630,10 @@ def _match_score( title_l = title.lower() stem_l = stem.lower() body_l = body_prefix.lower() - tags_l = [t.lower() for t in tags] for token in tokens: if token in title_l: score += 100 / n - if any(token in tl for tl in tags_l): - score += 50 / n if token in stem_l: score += 30 / n if token in body_l: @@ -604,14 +642,14 @@ def _match_score( # Whole-string bonus (handles short queries like "ROI") if q and q in title.lower(): score += 50 - if q and any(q in t.lower() for t in tags): - score += 50 - # Table-name → tag overlap bonus + # Table-name overlap bonus (title / body) if table_names: - tags_l_set = {t.lower() for t in tags} + title_l = title.lower() + body_l = body_prefix.lower() for tn in table_names: - if any(tn.lower() in tl for tl in tags_l_set): + tnl = tn.lower() + if tnl in title_l or tnl in body_l: score += 30 # Non-manual source slight discount diff --git a/py-src/data_formulator/routes/agents.py b/py-src/data_formulator/routes/agents.py index cdbf8808..ab77f6e5 100644 --- a/py-src/data_formulator/routes/agents.py +++ b/py-src/data_formulator/routes/agents.py @@ -18,9 +18,6 @@ import html import pandas as pd -from data_formulator.agents.agent_data_transform import DataTransformationAgent -from data_formulator.agents.agent_data_rec import DataRecAgent - from data_formulator.agents.agent_sort_data import SortDataAgent from data_formulator.agents.agent_simple import SimpleAgents from data_formulator.auth.identity import get_identity_id @@ -31,14 +28,12 @@ from data_formulator.agents.agent_data_load import DataLoadAgent from data_formulator.agents.agent_data_loading_chat import DataLoadingAgent from data_formulator.agents.agent_code_explanation import CodeExplanationAgent -from data_formulator.agents.agent_chart_insight import ChartInsightAgent -from data_formulator.agents.agent_interactive_explore import InteractiveExploreAgent -from data_formulator.agents.agent_report_gen import ReportGenAgent from data_formulator.agents.client_utils import Client from data_formulator.model_registry import model_registry from data_formulator.knowledge.store import KnowledgeStore -from data_formulator.agents.data_agent import DataAgent +from data_formulator.analyst.agent import AnalystAgent +from data_formulator.analyst.mini_agent import MiniAnalystAgent from data_formulator.agents.agent_language import build_language_instruction from data_formulator.security.sanitize import classify_llm_error, sanitize_error_message from data_formulator.error_handler import json_ok, stream_preflight_error, classify_and_wrap_llm_error @@ -74,29 +69,6 @@ def _get_knowledge_store(identity_id: str) -> KnowledgeStore | None: agent_bp = Blueprint('agent', __name__, url_prefix='/api/agent') -def _try_parse_explore_line(raw_line: str) -> str | None: - """Parse a single line from the exploration agent into an NDJSON line. - - The LLM is prompted to output one JSON object per line. Older prompts - used an SSE-style ``data: `` prefix which we strip for compatibility. - Non-JSON lines (thinking text, blank lines) are silently dropped. - """ - line = raw_line.strip() - if not line: - return None - if line.startswith("data:"): - line = line[5:].lstrip() - if not line.startswith("{"): - return None - try: - obj = json.loads(line) - if "type" not in obj: - obj = {"type": "question", **obj} - return json.dumps(obj, ensure_ascii=False) + "\n" - except (json.JSONDecodeError, ValueError): - return None - - def _with_warnings(gen): """Wrap an NDJSON generator to flush accumulated stream warnings. @@ -317,124 +289,20 @@ def sort_data_request(): logger.error("Error in sort-data", exc_info=e) raise classify_and_wrap_llm_error(e) from e -@agent_bp.route('/derive-data', methods=['GET', 'POST']) -def derive_data(): - if not request.is_json: - raise AppError(ErrorCode.INVALID_REQUEST, "Invalid request format") - - logger.info("# derive-data request") - content = request.get_json() - - client = get_client(content['model']) - - input_tables = content["input_tables"] - - instruction = content["extra_prompt"] - - max_repair_attempts = content["max_repair_attempts"] if "max_repair_attempts" in content else 1 - agent_coding_rules = content.get("agent_coding_rules", "") - current_visualization = content.get("current_visualization", None) - expected_visualization = content.get("expected_visualization", None) - - if "additional_messages" in content: - prev_messages = content["additional_messages"] - else: - prev_messages = [] +@agent_bp.route('/analyst-streaming', methods=['GET', 'POST']) +def analyst_streaming(): + """Unified AnalystAgent streaming endpoint (design-docs/35 + /36). - logger.debug("== input tables ===>") - for table in input_tables: - logger.debug(f"===> Table: {table['name']} (first 5 rows)") - logger.debug(table['rows'][:5]) - - logger.debug("== user spec ===") - logger.debug(instruction) - - mode = "transform" if current_visualization or expected_visualization else "recommendation" - primary_tables = content.get("primary_tables", None) - - try: - identity_id = get_identity_id() - workspace = get_workspace(identity_id) - max_display_rows = current_app.config['CLI_ARGS']['max_display_rows'] - - language_instruction = get_language_instruction(mode="compact") - - model_info = { - "model": content['model'].get("model", ""), - "endpoint": content['model'].get("endpoint", ""), - "api_base": content['model'].get("api_base", ""), - } - - knowledge_store = _get_knowledge_store(identity_id) - - if mode == "recommendation": - agent = DataRecAgent(client=client, workspace=workspace, agent_coding_rules=agent_coding_rules, language_instruction=language_instruction, max_display_rows=max_display_rows, model_info=model_info, knowledge_store=knowledge_store) - results = agent.run(input_tables, instruction, n=1, prev_messages=prev_messages, primary_tables=primary_tables) - else: - agent = DataTransformationAgent(client=client, workspace=workspace, agent_coding_rules=agent_coding_rules, language_instruction=language_instruction, max_display_rows=max_display_rows, model_info=model_info, knowledge_store=knowledge_store) - results = agent.run(input_tables, instruction, prev_messages, - current_visualization=current_visualization, expected_visualization=expected_visualization) - - repair_attempts = 0 - while ( - isinstance(results, list) - and len(results) > 0 - and results[0].get('status') in ('error', 'other error') - and repair_attempts < max_repair_attempts - ): - error_message = results[0].get('content', 'Unknown error') - logger.warning(f"[derive-data] Code generation failed (attempt {repair_attempts + 1}/{max_repair_attempts}), mode={mode}. Error: {error_message}") - new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_message}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur." - - prev_dialog = results[0].get('dialog', []) - - try: - if mode == "transform": - results = agent.followup(input_tables, prev_dialog, [], new_instruction, n=1) - if mode == "recommendation": - results = agent.followup(input_tables, prev_dialog, [], new_instruction, n=1) - except Exception as followup_exc: - logger.exception("derive_data followup failed") - results = [{ - "status": "error", - "content": classify_llm_error(followup_exc), - "code": "", - "dialog": [], - }] - break - - repair_attempts += 1 - logger.warning(f"[derive-data] Repair attempt {repair_attempts}/{max_repair_attempts} result: {results[0].get('status', 'unknown')}") - - if repair_attempts > 0: - logger.warning(f"[derive-data] Finished repair loop after {repair_attempts} attempt(s). Final status: {results[0].get('status', 'unknown')}") - - for r in results: - if r.get("status") in ("error", "other error") and r.get("content"): - r["content"] = sanitize_error_message(r["content"]) - sign_result(r) - - return json_ok({"results": results}) - except Exception as e: - logger.error("Error in derive-data", exc_info=e) - raise classify_and_wrap_llm_error(e) from e + The single ``AnalystAgent`` subsumes both data exploration and report + writing: it gathers with inspection tools, commits one action per turn + (``visualize`` / ``ask_user`` / ``delegate`` / ``write_report``), and streams + the report live on the ``report`` channel (same ``text_delta`` event the + frontend already routes). -@agent_bp.route('/data-agent-streaming', methods=['GET', 'POST']) -def data_agent_streaming(): - """Streaming tool-calling data exploration agent endpoint. - - The agent streams events as newline-delimited JSON: - text_delta – streamed text from the agent (narration) - tool_start – agent is about to call a tool (explore/visualize/clarify) - tool_result – tool execution result (visualize results match DataRecAgent format) - clarify – clarification question (loop pauses) - done – turn complete - error – error information - - To resume after a clarification, the client sends: - - trajectory: the trajectory list returned in the clarify event - - user_question: the user's reply (selections + freeform), already - assembled by the frontend (the same string shown in the timeline) + Streams newline-delimited JSON. Terminal events: ``completion`` (the run + finished or hit its budget), ``interact`` (a question widget pauses the run), + and ``error``. To resume after ``interact`` the client sends ``trajectory`` + (from the event) plus ``user_question`` (the assembled reply). """ from data_formulator.error_handler import stream_error_event @@ -454,20 +322,24 @@ def data_agent_streaming(): user_question = content.get("user_question", "") max_iterations = content.get("max_iterations", 5) max_repair_attempts = content.get("max_repair_attempts", 1) + # "mini" swaps in the single-decision MiniAnalystAgent (one visualize/explain + # per run) for small/local models; anything else uses the standard agent. + agent_mode = content.get("agent_mode", "standard") agent_exploration_rules = content.get("agent_exploration_rules", "") agent_coding_rules = content.get("agent_coding_rules", "") focused_thread = content.get("focused_thread", None) other_threads = content.get("other_threads", None) primary_tables = content.get("primary_tables", None) attached_images = content.get("attached_images", None) + charts = content.get("charts", None) resume_trajectory = content.get("trajectory", None) completed_step_count = content.get("completed_step_count", 0) if resume_trajectory is not None and not str(user_question or "").strip(): - return stream_preflight_error(AppError(ErrorCode.INVALID_REQUEST, "user_question is required to resume after clarification")) + return stream_preflight_error(AppError(ErrorCode.INVALID_REQUEST, "user_question is required to resume after interaction")) logger.setLevel(logging.INFO) - logger.info("# data-agent-streaming request") + logger.info(f"# analyst-streaming request (agent_mode={agent_mode})") logger.debug("== input tables ===>") for table in input_tables: logger.debug(f"===> Table: {table['name']}") @@ -479,30 +351,42 @@ def data_agent_streaming(): def generate(): try: - agent = DataAgent( - client=client, - workspace=workspace, - agent_exploration_rules=agent_exploration_rules, - agent_coding_rules=agent_coding_rules, - language_instruction=language_instruction, - max_iterations=max_iterations, - max_repair_attempts=max_repair_attempts, - identity_id=identity_id, - ) + if agent_mode == "mini": + # Single-decision agent; it forces max_iterations=1 internally and + # may run one optional data inspection before answering. + agent = MiniAnalystAgent( + client=client, + workspace=workspace, + agent_exploration_rules=agent_exploration_rules, + agent_coding_rules=agent_coding_rules, + language_instruction=language_instruction, + max_repair_attempts=max_repair_attempts, + identity_id=identity_id, + ) + else: + agent = AnalystAgent( + client=client, + workspace=workspace, + agent_exploration_rules=agent_exploration_rules, + agent_coding_rules=agent_coding_rules, + language_instruction=language_instruction, + max_iterations=max_iterations, + max_repair_attempts=max_repair_attempts, + identity_id=identity_id, + ) trajectory = None if resume_trajectory: # Append the user's reply (already assembled by the frontend - # from option clicks + any typed instructions) as a normal - # user message. The LLM correlates numbered selections back - # to the questions in the immediately preceding assistant - # message. + # from option clicks + any typed instructions) as a normal user + # message; the LLM correlates the selections back to the + # questions in the immediately preceding assistant turn. trajectory = list(resume_trajectory) trajectory.append({ "role": "user", "content": user_question, }) - logger.debug("== resuming after clarification ===>") + logger.debug("== resuming after interaction ===>") for event in agent.run( input_tables=input_tables, @@ -513,14 +397,15 @@ def generate(): completed_step_count=completed_step_count, primary_tables=primary_tables, attached_images=attached_images, + charts=charts, ): yield json.dumps(event, ensure_ascii=False) + '\n' - if event.get("type") in ("completion", "clarify", "explain"): + if event.get("type") in ("completion", "interact"): break except Exception as e: - logger.error("Error in data-agent-streaming", exc_info=e) + logger.error("Error in analyst-streaming", exc_info=e) yield stream_error_event(classify_and_wrap_llm_error(e)) logger.setLevel(logging.WARNING) @@ -531,92 +416,6 @@ def generate(): ) -@agent_bp.route('/refine-data', methods=['GET', 'POST']) -def refine_data(): - if not request.is_json: - raise AppError(ErrorCode.INVALID_REQUEST, "Invalid request format") - - logger.info("# refine-data request") - content = request.get_json() - - client = get_client(content['model']) - - input_tables = content["input_tables"] - dialog = content["dialog"] - - new_instruction = content["new_instruction"] - latest_data_sample = content["latest_data_sample"] - max_repair_attempts = content.get("max_repair_attempts", 1) - agent_coding_rules = content.get("agent_coding_rules", "") - current_visualization = content.get("current_visualization", None) - expected_visualization = content.get("expected_visualization", None) - - logger.debug("== input tables ===>") - for table in input_tables: - logger.debug(f"===> Table: {table['name']} (first 5 rows)") - logger.debug(table['rows'][:5]) - - logger.debug("== user spec ===>") - logger.debug(new_instruction) - - try: - identity_id = get_identity_id() - workspace = get_workspace(identity_id) - max_display_rows = current_app.config['CLI_ARGS']['max_display_rows'] - - language_instruction = get_language_instruction(mode="compact") - - model_info = { - "model": content['model'].get("model", ""), - "endpoint": content['model'].get("endpoint", ""), - "api_base": content['model'].get("api_base", ""), - } - - knowledge_store = _get_knowledge_store(identity_id) - agent = DataTransformationAgent(client=client, workspace=workspace, agent_coding_rules=agent_coding_rules, language_instruction=language_instruction, max_display_rows=max_display_rows, model_info=model_info, knowledge_store=knowledge_store) - results = agent.followup(input_tables, dialog, latest_data_sample, new_instruction, n=1, - current_visualization=current_visualization, expected_visualization=expected_visualization) - - repair_attempts = 0 - while ( - isinstance(results, list) - and len(results) > 0 - and results[0].get('status') in ('error', 'other error') - and repair_attempts < max_repair_attempts - ): - error_message = results[0].get('content', 'Unknown error') - logger.info(f"[refine-data] Code generation failed (attempt {repair_attempts + 1}/{max_repair_attempts}). Error: {error_message}") - new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_message}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur." - prev_dialog = results[0].get('dialog', []) - - try: - results = agent.followup(input_tables, prev_dialog, [], new_instruction, n=1) - except Exception as followup_exc: - logger.exception("refine_data followup failed") - results = [{ - "status": "error", - "content": classify_llm_error(followup_exc), - "code": "", - "dialog": [], - }] - break - - repair_attempts += 1 - logger.info(f"[refine-data] Repair attempt {repair_attempts}/{max_repair_attempts} result: {results[0].get('status', 'unknown')}") - - if repair_attempts > 0: - logger.info(f"[refine-data] Finished repair loop after {repair_attempts} attempt(s). Final status: {results[0].get('status', 'unknown')}") - - for r in results: - if r.get("status") in ("error", "other error") and r.get("content"): - r["content"] = sanitize_error_message(r["content"]) - sign_result(r) - - return json_ok({"results": results}) - except Exception as e: - logger.error("Error in refine-data", exc_info=e) - raise classify_and_wrap_llm_error(e) from e - @agent_bp.route('/code-expl', methods=['GET', 'POST']) def request_code_expl(): if not request.is_json: @@ -649,191 +448,6 @@ def request_code_expl(): logger.error("Error in code-expl", exc_info=e) raise classify_and_wrap_llm_error(e) from e -@agent_bp.route('/chart-insight', methods=['GET', 'POST']) -def request_chart_insight(): - from data_formulator.error_handler import classify_and_wrap_llm_error - from data_formulator.errors import AppError, ErrorCode - - if not request.is_json: - raise AppError(ErrorCode.INVALID_REQUEST, "Invalid request format") - - logger.info("# chart insight request") - content = request.get_json() - - chart_image = content.get("chart_image", "") - chart_type = content.get("chart_type", "") - field_names = content.get("field_names", []) - input_tables = content.get("input_tables", []) - - if not chart_image: - raise AppError(ErrorCode.VALIDATION_ERROR, "Chart image not available. Please retry.") - - model_config = content.get("model") - if not model_config: - raise AppError(ErrorCode.INVALID_REQUEST, "Model configuration is required") - - client = get_client(model_config) - identity_id = get_identity_id() - workspace = get_workspace(identity_id) - - try: - knowledge_store = _get_knowledge_store(identity_id) - agent = ChartInsightAgent(client=client, workspace=workspace, - language_instruction=get_language_instruction(), - knowledge_store=knowledge_store) - candidates = agent.run(chart_image, chart_type, field_names, input_tables) - - if not candidates or len(candidates) == 0: - logger.warning("[chart-insight] failed request_id=%s reason=no_candidates", - getattr(flask.g, 'request_id', '')) - raise AppError(ErrorCode.AGENT_ERROR, "Unable to generate chart insight") - - result = candidates[0] - if result.get('status') != 'ok': - reason = result.get('content', result.get('status', 'unknown')) - logger.warning("[chart-insight] failed request_id=%s reason=candidate_error detail=%s", - getattr(flask.g, 'request_id', ''), reason) - raise AppError(ErrorCode.AGENT_ERROR, "Unable to generate chart insight") - - logger.info("[chart-insight] done request_id=%s takeaway_count=%d", - getattr(flask.g, 'request_id', ''), - len(result.get('takeaways', []))) - return json_ok({"title": result.get("title", ""), - "takeaways": result.get("takeaways", [])}) - - except AppError: - raise - except Exception as e: - logger.error("Error in chart-insight", exc_info=e) - raise classify_and_wrap_llm_error(e) from e - -@agent_bp.route('/get-recommendation-questions', methods=['GET', 'POST']) -def get_recommendation_questions(): - from data_formulator.error_handler import stream_error_event - - if not request.is_json: - return stream_preflight_error(AppError(ErrorCode.INVALID_REQUEST, "Invalid request format")) - - logger.info("# get recommendation questions request") - content = request.get_json() - - client = get_client(content['model']) - input_tables = content.get("input_tables", []) - identity_id = get_identity_id() - workspace = get_workspace(identity_id) - - agent_exploration_rules = content.get("agent_exploration_rules", "") - start_question = content.get("start_question", None) - current_chart = content.get("current_chart", None) - focused_thread = content.get("focused_thread", None) - other_threads = content.get("other_threads", None) - primary_tables = content.get("primary_tables", None) - exploration_thread = content.get("exploration_thread", None) - current_data_sample = content.get("current_data_sample", None) - - knowledge_store = _get_knowledge_store(identity_id) - - def generate(): - agent = InteractiveExploreAgent(client=client, workspace=workspace, - agent_exploration_rules=agent_exploration_rules, - language_instruction=get_language_instruction(), - knowledge_store=knowledge_store) - try: - text_buf = "" - for chunk in agent.run( - input_tables, - start_question=start_question, - focused_thread=focused_thread, - other_threads=other_threads, - primary_tables=primary_tables, - current_chart=current_chart, - exploration_thread=exploration_thread, - current_data_sample=current_data_sample, - ): - if isinstance(chunk, dict): - # Flush pending text before emitting structured event - while "\n" in text_buf: - line, text_buf = text_buf.split("\n", 1) - ndjson_line = _try_parse_explore_line(line) - if ndjson_line: - yield ndjson_line - if "type" not in chunk: - chunk = {"type": "question", **chunk} - yield json.dumps(chunk, ensure_ascii=False) + "\n" - continue - text_buf += chunk - while "\n" in text_buf: - line, text_buf = text_buf.split("\n", 1) - ndjson_line = _try_parse_explore_line(line) - if ndjson_line: - yield ndjson_line - if text_buf.strip(): - ndjson_line = _try_parse_explore_line(text_buf) - if ndjson_line: - yield ndjson_line - except Exception as e: - logger.exception("get-recommendation-questions failed") - yield stream_error_event(classify_and_wrap_llm_error(e)) - - return Response( - stream_with_context(_with_warnings(generate())), - mimetype='application/x-ndjson', - ) - - -@agent_bp.route('/generate-report-chat', methods=['POST']) -def generate_report_chat(): - """Chat-driven report generation via @report-agent. - - Accepts lightweight context + user prompt. The agent inspects - charts/data on demand via tool calls and streams the report with - embed_chart / embed_table events. - """ - from data_formulator.error_handler import stream_error_event - - if not request.is_json: - return stream_preflight_error(AppError(ErrorCode.INVALID_REQUEST, "Invalid request format")) - - logger.info("# generate report chat request") - content = request.get_json() - - client = get_client(content['model']) - identity_id = get_identity_id() - workspace = get_workspace(identity_id) - - input_tables = content.get("input_tables", []) - charts = content.get("charts", []) - user_prompt = content.get("user_prompt", "Create a report summarizing the exploration.") - focused_thread = content.get("focused_thread", None) - other_threads = content.get("other_threads", None) - primary_tables = content.get("primary_tables", None) - - def generate(): - agent = ReportGenAgent( - client=client, - workspace=workspace, - language_instruction=get_language_instruction(), - ) - try: - for event in agent.run( - input_tables, - charts, - user_prompt=user_prompt, - focused_thread=focused_thread, - other_threads=other_threads, - primary_tables=primary_tables, - ): - yield json.dumps(event, ensure_ascii=False) + '\n' - except Exception as e: - logger.exception("generate-report-chat failed") - yield stream_error_event(classify_and_wrap_llm_error(e)) - - return Response( - stream_with_context(_with_warnings(generate())), - mimetype='application/x-ndjson', - ) - - @agent_bp.route('/refresh-derived-data', methods=['POST']) def refresh_derived_data(): """ diff --git a/py-src/data_formulator/routes/knowledge.py b/py-src/data_formulator/routes/knowledge.py index 901024c1..1a458ba9 100644 --- a/py-src/data_formulator/routes/knowledge.py +++ b/py-src/data_formulator/routes/knowledge.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -"""Knowledge management API — CRUD + search + experience distillation. +"""Knowledge management API — CRUD + search + workflow distillation. All endpoints use ``POST`` with JSON body. Access is scoped to the current user via ``get_identity_id()`` and confined via ``ConfinedDir``. @@ -155,44 +155,44 @@ def knowledge_search(): return json_ok({"results": results}) -# ── distill experience ──────────────────────────────────────────────────── +# ── distill workflow ──────────────────────────────────────────────────── -@knowledge_bp.route("/distill-experience", methods=["POST"]) -def distill_experience(): - """Distill user-visible analysis context into a reusable experience. +@knowledge_bp.route("/distill-workflow", methods=["POST"]) +def distill_workflow(): + """Distill user-visible analysis context into a reusable workflow. Session-scoped payload (design-docs/24): - ``experience_context`` carries a list of ``threads`` (one per leaf + ``workflow_context`` carries a list of ``threads`` (one per leaf derived table the user has on screen), each with its own chronological ``events`` array. ``workspace_id`` + ``workspace_name`` bind the resulting file to the active session so re-distilling upserts the same file. - Required body fields: ``experience_context`` and ``model``. + Required body fields: ``workflow_context`` and ``model``. Optional: ``user_instruction`` (natural-language focus hint for the LLM), - ``category_hint`` (sub-directory under experiences/). + ``category_hint`` (sub-directory under workflows/). """ data = request.get_json(silent=True) or {} - experience_context = data.get("experience_context") - if not isinstance(experience_context, dict): - raise AppError(ErrorCode.INVALID_REQUEST, "'experience_context' is required") + workflow_context = data.get("workflow_context") + if not isinstance(workflow_context, dict): + raise AppError(ErrorCode.INVALID_REQUEST, "'workflow_context' is required") - threads = experience_context.get("threads") + threads = workflow_context.get("threads") if not isinstance(threads, list) or not threads: raise AppError( ErrorCode.INVALID_REQUEST, - "'experience_context.threads' is required and must be a non-empty list", + "'workflow_context.threads' is required and must be a non-empty list", ) - workspace_id_raw = experience_context.get("workspace_id", "") + workspace_id_raw = workflow_context.get("workspace_id", "") workspace_id = workspace_id_raw.strip() if isinstance(workspace_id_raw, str) else "" - workspace_name_raw = experience_context.get("workspace_name", "") + workspace_name_raw = workflow_context.get("workspace_name", "") workspace_name = workspace_name_raw.strip() if isinstance(workspace_name_raw, str) else "" if not workspace_id or not workspace_name: raise AppError( ErrorCode.INVALID_REQUEST, - "'experience_context.workspace_id' and 'workspace_name' are required", + "'workflow_context.workspace_id' and 'workspace_name' are required", ) model_config = data.get("model") @@ -215,53 +215,55 @@ def distill_experience(): # Build client and run distillation from data_formulator.routes.agents import get_client, _get_ui_lang - from data_formulator.agents.agent_experience_distill import ExperienceDistillAgent + from data_formulator.agents.agent_workflow_distill import WorkflowDistillAgent client = get_client(model_config) - agent = ExperienceDistillAgent( + agent = WorkflowDistillAgent( client=client, language_code=_get_ui_lang(), timeout_seconds=timeout_seconds, ) try: - md_content = agent.run(experience_context, user_instruction=user_instruction) + md_content = agent.run(workflow_context, user_instruction=user_instruction) except Exception as exc: - logger.warning("Experience distillation LLM call failed: %s", type(exc).__name__) + logger.warning("Workflow distillation LLM call failed: %s", type(exc).__name__) from data_formulator.error_handler import classify_and_wrap_llm_error raise classify_and_wrap_llm_error(exc) from exc - # Save to knowledge/experiences/ + # Save to knowledge/workflows/ store = KnowledgeStore(user_home) - # Bind the file to the workspace, override title to - # "Experience from : ", and upsert below. - md_content = _apply_session_front_matter(md_content, workspace_id, workspace_name) + # Bind the file to the workspace, set the title to the agent-generated + # descriptive subtitle, and upsert below. + md_content, title_core, filename_hint = _apply_session_front_matter( + md_content, workspace_id, workspace_name, + ) - filename = _experience_filename(workspace_name) + filename = _workflow_filename(filename_hint or title_core or workspace_name) rel_path = f"{category_hint}/{filename}" if category_hint else filename - # Upsert: if a previous experience exists for this workspace at a + # Upsert: if a previous workflow exists for this workspace at a # different path (e.g. user renamed the workspace), delete it after a # successful write so we keep one file per session. - existing = store.find_experience_by_workspace_id(workspace_id) + existing = store.find_workflow_by_workspace_id(workspace_id) try: - store.write("experiences", rel_path, md_content) + store.write("workflows", rel_path, md_content) except ValueError as exc: raise AppError(ErrorCode.INVALID_REQUEST, str(exc)) from exc if existing and existing.get("path") and existing["path"] != rel_path: try: - store.delete("experiences", existing["path"]) + store.delete("workflows", existing["path"]) except Exception: logger.warning( - "Failed to delete stale session experience at %s", + "Failed to delete stale session workflow at %s", existing.get("path"), exc_info=True, ) - return json_ok({"path": rel_path, "category": "experiences"}) + return json_ok({"path": rel_path, "category": "workflows"}) # ── helpers for session-scoped distillation ─────────────────────────────── @@ -269,16 +271,21 @@ def distill_experience(): def _apply_session_front_matter( content: str, workspace_id: str, workspace_name: str, -) -> str: - """Override / inject session-binding fields in the experience front matter. - - - Composes the visible ``title`` as ``Experience from : `` - using the LLM-emitted ``subtitle`` (preferred) or pre-existing - ``title``. The original ``subtitle`` field is removed from the - front matter once consumed. +) -> tuple[str, str, str]: + """Override / inject session-binding fields in the workflow front matter. + + - Sets the visible ``title`` to the agent-emitted descriptive + ``subtitle`` (preferred) or the pre-existing ``title``, with any + legacy ``Workflow from : `` prefix stripped. The ``subtitle`` + field is removed from the front matter once consumed. + - Consumes the agent-emitted short ``filename`` hint (removed from the + front matter) and returns it so the caller can name the file without + using the long descriptive title. - Stamps ``source_workspace_id`` and ``source_workspace_name`` so the file can be looked up on subsequent distillations. - Forces ``source: distill`` (idempotent if already set). + + Returns ``(content_with_front_matter, title_core, filename_hint)``. """ from data_formulator.knowledge.store import parse_front_matter @@ -287,27 +294,31 @@ def _apply_session_front_matter( meta = {} subtitle = str(meta.pop("subtitle", "") or "").strip() + filename_hint = str(meta.pop("filename", "") or "").strip() existing_title = str(meta.get("title", "") or "").strip() - # Strip any "Experience from : " prefix from a prior pass so - # update-mode runs don't double-prefix when the LLM echoes the title. - title_core = subtitle or _strip_experience_prefix(existing_title) + # Strip any legacy "Workflow from : " (or "Experience from") + # prefix so update-mode runs don't carry it forward. + title_core = subtitle or _strip_workflow_prefix(existing_title) if not title_core: title_core = workspace_name - new_title = f"Experience from {workspace_name}: {title_core}" - meta["title"] = new_title + meta["title"] = title_core meta["source"] = "distill" meta["source_workspace_id"] = workspace_id meta["source_workspace_name"] = workspace_name - return _serialize_front_matter(meta, body) + return _serialize_front_matter(meta, body), title_core, filename_hint + +_EXP_PREFIX_RE = re.compile(r"^\s*(?:Workflow|Experience) from .+?:\s*", re.IGNORECASE) -_EXP_PREFIX_RE = re.compile(r"^\s*Experience from .+?:\s*", re.IGNORECASE) +# Path separators, Windows-reserved chars and control chars that must never +# appear in a filename derived from untrusted LLM output. +_UNSAFE_FILENAME_CHARS = re.compile(r'[\\/:*?"<>|\x00-\x1f]+') -def _strip_experience_prefix(title: str) -> str: +def _strip_workflow_prefix(title: str) -> str: return _EXP_PREFIX_RE.sub("", title).strip() @@ -323,16 +334,22 @@ def _serialize_front_matter(meta: dict, body: str) -> str: return f"---\n{yaml_text}\n---\n\n{body_text}" -def _experience_filename(workspace_name: str) -> str: - """Derive a deterministic filename from the workspace name. +def _workflow_filename(title: str) -> str: + """Slugify an LLM-supplied name into a clean, safe ``.md`` filename. - Re-distilling the same session always lands on the same file. - Falls back to a literal slug when sanitisation rejects the name. + Re-distilling a session upserts by ``source_workspace_id`` (see caller), + so the file is replaced even when the name changes. ``safe_data_filename`` + enforces the security boundary (basename only, no ``.``/``..``); the slug + step just keeps separators and reserved chars out so the name is clean and + portable. Unicode (e.g. CJK) is preserved. """ from data_formulator.datalake.parquet_utils import safe_data_filename - slug = workspace_name.strip().replace(" ", "-").lower()[:80] or "session-experience" + cleaned = _UNSAFE_FILENAME_CHARS.sub("-", title) + cleaned = re.sub(r"\s+", "-", cleaned.strip()) + cleaned = re.sub(r"-{2,}", "-", cleaned) + slug = cleaned.strip(".-").lower()[:80] or "session-workflow" try: return safe_data_filename(f"{slug}.md") except ValueError: - return "session-experience.md" + return "session-workflow.md" diff --git a/src/api/knowledgeApi.ts b/src/api/knowledgeApi.ts index 7c149f3a..a722c00f 100644 --- a/src/api/knowledgeApi.ts +++ b/src/api/knowledgeApi.ts @@ -2,7 +2,7 @@ // Licensed under the MIT License. /** - * Knowledge API client — CRUD, search, and experience distillation. + * Knowledge API client — CRUD, search, and workflow distillation. * * All endpoints use POST with JSON body. Requests go through * {@link fetchWithIdentity} for identity headers and 401 retry. @@ -14,11 +14,10 @@ import { apiRequest } from '../app/apiClient'; // ── Types ──────────────────────────────────────────────────────────────── -export type KnowledgeCategory = 'rules' | 'experiences'; +export type KnowledgeCategory = 'rules' | 'workflows'; export interface KnowledgeItem { title: string; - tags: string[]; path: string; source: string; created: string; @@ -27,25 +26,24 @@ export interface KnowledgeItem { /** Rules only: if true the rule is always injected into the agent prompt. */ alwaysApply?: boolean; /** - * Experiences only: workspace id this experience was distilled from. + * Workflows only: workspace id this workflow was distilled from. * Set by the session-scoped distillation flow (design-docs/24); used - * by the KnowledgePanel to find the existing session experience. + * by the KnowledgePanel to find the existing session workflow. */ sourceWorkspaceId?: string; - /** Experiences only: workspace display name at distillation time. */ + /** Workflows only: workspace display name at distillation time. */ sourceWorkspaceName?: string; } export interface KnowledgeLimits { rule_description_max: number; rules: number; - experiences: number; + workflows: number; } export interface KnowledgeSearchResult { category: KnowledgeCategory; title: string; - tags: string[]; path: string; snippet: string; source: string; @@ -122,7 +120,7 @@ export async function searchKnowledge( return data.results ?? []; } -export interface DistillExperienceResult { +export interface DistillWorkflowResult { path: string; category: string; } @@ -134,7 +132,7 @@ export interface DistillExperienceResult { * a deterministic filename + title. `threads` carries one chronological * `events` list per leaf table on screen. */ -export interface SessionExperienceContext { +export interface SessionWorkflowContext { context_id?: string; workspace_id: string; workspace_name: string; @@ -146,18 +144,18 @@ export interface SessionExperienceContext { payload_notes?: string[]; } -export async function distillSessionExperience( - sessionContext: SessionExperienceContext, +export async function distillSessionWorkflow( + sessionContext: SessionWorkflowContext, model: Record, instruction?: string, timeoutSeconds?: number, signal?: AbortSignal, -): Promise { - const { data } = await apiRequest<{ path: string; category: string }>('/api/knowledge/distill-experience', { +): Promise { + const { data } = await apiRequest<{ path: string; category: string }>('/api/knowledge/distill-workflow', { method: 'POST', headers: JSON_HEADERS, body: JSON.stringify({ - experience_context: sessionContext, + workflow_context: sessionContext, model, user_instruction: instruction, timeout_seconds: timeoutSeconds, diff --git a/src/app/App.tsx b/src/app/App.tsx index 17898f0c..aa30ba5e 100644 --- a/src/app/App.tsx +++ b/src/app/App.tsx @@ -51,8 +51,6 @@ import { ListItemText, CircularProgress, LinearProgress, - Switch, - FormControlLabel, } from '@mui/material'; @@ -709,7 +707,7 @@ const ConfigDialog: React.FC = () => { || isNaN(maxStretchFactor) || maxStretchFactor < 1 || maxStretchFactor > 5 || isNaN(frontendRowLimit) || frontendRowLimit < 100 || frontendRowLimit > rowLimitMax} onClick={() => { - dispatch(dfActions.setConfig({formulateTimeoutSeconds, defaultChartWidth, defaultChartHeight, maxStretchFactor, frontendRowLimit, paletteKey})); + dispatch(dfActions.setConfig({formulateTimeoutSeconds, defaultChartWidth, defaultChartHeight, maxStretchFactor, frontendRowLimit, paletteKey, miniMode: config.miniMode ?? false})); setOpen(false); }} > @@ -826,7 +824,7 @@ const AppShell: FC = () => { {tables.length === 0 && !activeWorkspace && ( - + {t('appBar.microsoftResearch')} )} @@ -1253,7 +1251,7 @@ export const AppFC: FC = function AppFC(appProps) { {configLoaded && authChecked ? ( ) : ( - + )} {migrationBrowserId && ( (); diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx index a3fe5add..fdc24d72 100644 --- a/src/app/dfSlice.tsx +++ b/src/app/dfSlice.tsx @@ -2,7 +2,7 @@ // Licensed under the MIT License. import { createAsyncThunk, createSlice, PayloadAction, createSelector } from '@reduxjs/toolkit' -import { Channel, Chart, ChartTemplate, DataCleanBlock, DataSourceConfig, EncodingItem, EncodingMap, FieldItem, Trigger, computeInsightKey, ChartInsight, ChartStyleVariant, DraftNode, InteractionEntry, DeriveStatus, ChatMessage, PendingTableLoad, PendingClarification } from '../components/ComponentType' +import { Channel, Chart, ChartTemplate, DataCleanBlock, DataSourceConfig, EncodingItem, EncodingMap, FieldItem, Trigger, ChartStyleVariant, DraftNode, InteractionEntry, DeriveStatus, ChatMessage, PendingTableLoad, PendingClarification } from '../components/ComponentType' import { enableMapSet } from 'immer'; import { DictTable } from "../components/ComponentType"; import { Message } from '../views/MessageSnackbar'; @@ -12,7 +12,6 @@ import { getDataTable } from '../views/ChartUtils'; import { getTriggers, getUrls, computeContentHash } from './utils'; import { apiRequest } from './apiClient'; import { deleteTablesFromWorkspace } from './workspaceService'; -import { getChartPngDataUrl } from './chartCache'; import i18n from '../i18n'; import { Type } from '../data/types'; import { createTableFromFromObjectArray, inferTypeFromValueArray, refineTemporalType } from '../data/utils'; @@ -118,6 +117,7 @@ export interface ClientConfig { maxStretchFactor: number; // max per-axis stretch multiplier for chart sizing (default 2.0) frontendRowLimit: number; // max rows to keep in browser when loading locally (non-virtual) paletteKey: string; // active color palette key from tokens.ts + miniMode: boolean; // when true, run the single-turn MiniAnalystAgent (for small/local models) } export interface GeneratedReport { @@ -131,6 +131,25 @@ export interface GeneratedReport { contentSnapshotHash?: string; prompt?: string; status?: 'generating' | 'completed' | 'error'; + // The run's closing answer (the agent's summary of what the report covers). + // Owned by the report — not borrowed onto a table's interaction log — so it + // is rendered and deleted together with the report (no cross-collection + // tagging). `summaryThought` is the agent's reasoning behind that summary. + summary?: string; + summaryThought?: string; + generatingPhase?: 'inspecting' | 'writing'; // transient: which phase the agent is in while generating + // transient: accumulated inspect steps, flipped to done on completion. + // `charts` carries lightweight descriptors (chartType for the icon + a + // display name) so the editor can render a chart-type icon next to a title + // or field list. Kept serializable (no React nodes) for redux-persist. + inspectionSteps?: { + label: string; + doneLabel?: string; // past-tense label shown once the step completes + done: boolean; + charts?: { chartType: string; name: string }[]; + startedAt?: number; // epoch ms when the tool call started + durationMs?: number; // wall time once the step is done + }[]; } export interface DataFormulatorState { @@ -167,7 +186,6 @@ export interface DataFormulatorState { viewMode: 'editor' | 'report'; chartSynthesisInProgress: string[]; - chartInsightInProgress: string[]; /** * Thumbnail PNG data URLs keyed by chart id. Stored in a separate slice @@ -210,6 +228,17 @@ export interface DataFormulatorState { * Transient — not persisted. */ dataLoadingChatResetCounter: number; + /** + * Pending submission queued for the data-loading chat. Set by any + * surface that wants to hand a prompt off to the chat (the menu + * agent input box, suggestion auto-run, external dialog callers). + * `DataLoadingChat` consumes it on render: it clears the slot and + * sends the carried payload as a fresh user message. Using a single + * redux slot (instead of props + a reset counter) eliminates the + * cross-tick race where the parent's pre-clear would otherwise + * cancel the auto-send for the new prompt. Transient — not persisted. + */ + dataLoadingChatPending: { text: string; images: string[]; attachments: string[] } | null; /** * Pending hand-off from the Data Agent to a peer agent. Set by the * Data Agent's `delegate` action card; consumed by `DataFormulator` @@ -234,6 +263,9 @@ export interface DataFormulatorState { /** Whether the data source sidebar is expanded (true) or collapsed to rail (false) */ dataSourceSidebarOpen: boolean; + /** Which data source sidebar tab is active. Persisted so it survives session refresh. */ + dataSourceSidebarTab: 'sources' | 'sessions' | 'knowledge'; + /** * One-shot signal asking the sidebar to focus a specific connector * (open the sidebar, switch to sources tab, expand + scroll-into-view @@ -267,7 +299,6 @@ const initialState: DataFormulatorState = { viewMode: 'editor', chartSynthesisInProgress: [], - chartInsightInProgress: [], chartThumbnails: {}, displayRowsTick: 0, @@ -289,6 +320,7 @@ const initialState: DataFormulatorState = { maxStretchFactor: 2.0, frontendRowLimit: DEFAULT_ROW_LIMIT, paletteKey: 'fluent', + miniMode: false, }, dataLoaderConnectParams: {}, @@ -299,6 +331,7 @@ const initialState: DataFormulatorState = { dataLoadingChatMessages: [], dataLoadingChatInProgress: false, dataLoadingChatResetCounter: 0, + dataLoadingChatPending: null, agentHandoffRequest: null, generatedReports: [], @@ -310,6 +343,8 @@ const initialState: DataFormulatorState = { dataSourceSidebarOpen: false, + dataSourceSidebarTab: 'sources', + focusedConnectorId: undefined, } @@ -328,6 +363,27 @@ const collectAllCharts = (state: DataFormulatorState): Chart[] => { return [...state.charts, ...triggerCharts]; }; +// Category-B encoding-action overrides (e.g. heatmap color scheme) are stored in +// chart.config keyed by the action key, and composed onto the encoding by the +// Flint compiler at assemble time (applyEncodingOverrides). When the user +// re-binds, clears, or swaps a channel that an override declares as a +// `dependency`, the stored value is stale, so we drop it here. This reset is +// host-side policy only; Flint never resets — it just composes +// "override + current encoding". The action's declared dependencies live in the +// template's EncodingActionDef. +const resetDependentEncodingOverrides = (chart: Chart, ...changedChannels: Channel[]) => { + if (!chart.config) return; + const actions = getChartTemplate(chart.chartType)?.encodingActions; + if (!actions || actions.length === 0) return; + for (const action of actions) { + const deps = action.dependencies; + if (!deps) continue; + if (changedChannels.some(ch => deps.includes(ch)) && chart.config[action.key] !== undefined) { + delete chart.config[action.key]; + } + } +}; + let getUnrefedDerivedTableIds = (state: DataFormulatorState) => { // find tables directly referred by charts let allCharts = collectAllCharts(state); @@ -556,97 +612,6 @@ export const fetchCodeExpl = createAsyncThunk( } ); -export const fetchChartInsight = createAsyncThunk( - "dataFormulatorSlice/fetchChartInsight", - async (args: { chartId: string; tableId: string }, { getState }) => { - console.log(">>> call agent to generate chart insight <<<"); - - const state = getState() as DataFormulatorState; - const chart = collectAllCharts(state).find(c => c.id === args.chartId); - if (!chart) throw new Error(`Chart not found: ${args.chartId}`); - - // Wait for chart image to be available in cache (replaces fixed 1.5s delay at call site) - const chartImage = await waitForChartImage(args.chartId); - if (!chartImage) { - throw new DOMException('Chart image not ready after waiting', 'ChartImageNotReady'); - } - - // Strip the data:image/png;base64, prefix for the backend - const base64Prefix = 'data:image/png;base64,'; - const imagePayload = chartImage.startsWith(base64Prefix) - ? chartImage.substring(base64Prefix.length) - : chartImage; - - // Collect field names from the encoding map - const fieldNames = Object.values(chart.encodingMap) - .map(enc => enc.fieldID) - .filter((id): id is string => !!id) - .map(id => { - const field = state.conceptShelfItems.find(f => f.id === id); - return field?.name || id; - }); - - // Collect input table info (include source tables for derived tables) - const table = state.tables.find(t => t.id === args.tableId); - const tableIds = table?.derive?.source ? [...table.derive.source, table.id] : [table?.id].filter(Boolean); - const inputTables = [...new Set(tableIds)] - .map(tId => state.tables.find(t => t.id === tId)) - .filter((t): t is DictTable => !!t) - .map(t => ({ - name: t.id, - rows: t.rows, - })); - - // Use unified timeout from user config - const timeoutSeconds = state.config.formulateTimeoutSeconds; - const controller = new AbortController(); - const timeoutId = setTimeout(() => { - controller.abort(new DOMException( - `Chart insight timed out after ${timeoutSeconds}s`, - 'TimeoutError', - )); - }, timeoutSeconds * 1000); - - try { - const { data } = await apiRequest(getUrls().CHART_INSIGHT_URL, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - chart_image: imagePayload, - chart_type: chart.chartType, - field_names: fieldNames, - input_tables: inputTables, - model: dfSelectors.getActiveModel(state), - }), - signal: controller.signal, - }); - - return { title: data.title, takeaways: data.takeaways, - chartId: args.chartId, insightKey: computeInsightKey(chart) }; - } finally { - clearTimeout(timeoutId); - } - } -); - -/** - * Wait for a chart image to appear in chartCache. - * Polls at short intervals up to a maximum timeout. - */ -async function waitForChartImage( - chartId: string, - timeoutMs: number = 8000, - intervalMs: number = 250, -): Promise { - const start = Date.now(); - while (Date.now() - start < timeoutMs) { - const image = await getChartPngDataUrl(chartId); - if (image) return image; - await new Promise(r => setTimeout(r, intervalMs)); - } - return undefined; -} - /** Fast fetch: returns the list of server-configured models instantly (no * connectivity check). The UI renders them immediately with a "testing" * spinner so the admin can see every configured model right away. */ @@ -710,7 +675,6 @@ export const dataFormulatorSlice = createSlice({ state.viewMode = 'editor'; state.chartSynthesisInProgress = []; - state.chartInsightInProgress = []; // Preserve serverConfig ??it reflects the actual server state, not user state @@ -720,6 +684,7 @@ export const dataFormulatorSlice = createSlice({ state.dataLoadingChatMessages = []; state.dataLoadingChatInProgress = false; state.dataLoadingChatResetCounter = (state.dataLoadingChatResetCounter ?? 0) + 1; + state.dataLoadingChatPending = null; state.generatedReports = []; @@ -749,12 +714,16 @@ export const dataFormulatorSlice = createSlice({ viewMode: state.viewMode, dataLoaderConnectParams: state.dataLoaderConnectParams, dataSourceSidebarOpen: state.dataSourceSidebarOpen, + dataSourceSidebarTab: state.dataSourceSidebarTab, activeWorkspace: action.payload, }; }, setDataSourceSidebarOpen: (state, action: PayloadAction) => { state.dataSourceSidebarOpen = action.payload; }, + setDataSourceSidebarTab: (state, action: PayloadAction<'sources' | 'sessions' | 'knowledge'>) => { + state.dataSourceSidebarTab = action.payload; + }, /** * Ask the data-source sidebar to focus a specific connector. * Opens the sidebar (if collapsed) and stores the target id; the @@ -837,6 +806,7 @@ export const dataFormulatorSlice = createSlice({ config: { ...initialState.config, ...(saved.config || {}) }, dataCleanBlocks: saved.dataCleanBlocks || [], dataLoadingChatMessages: saved.dataLoadingChatMessages || [], + dataLoadingChatPending: null, generatedReports: saved.generatedReports || [], // Reset transient fields @@ -844,7 +814,6 @@ export const dataFormulatorSlice = createSlice({ displayedMessageIdx: -1, viewMode: saved.viewMode || 'editor', chartSynthesisInProgress: [], - chartInsightInProgress: [], cleanInProgress: false, dataLoadingChatInProgress: false, dataLoadingChatResetCounter: 0, @@ -856,6 +825,7 @@ export const dataFormulatorSlice = createSlice({ activeWorkspace: saved.activeWorkspace ?? state.activeWorkspace ?? null, dataSourceSidebarOpen: state.dataSourceSidebarOpen, + dataSourceSidebarTab: state.dataSourceSidebarTab, // Reset display-rows tick so dependent components re-fetch. displayRowsTick: 0, @@ -1245,10 +1215,13 @@ export const dataFormulatorSlice = createSlice({ bumpDisplayRowsTick: (state) => { state.displayRowsTick = (state.displayRowsTick || 0) + 1; }, - updateChartInsight: (state, action: PayloadAction<{chartId: string, insight: ChartInsight}>) => { + // Zoom level applied by the resizer. Stored on the Chart (not in + // config, which is for template-defined properties) so it persists + // with the chart across focus changes and session save/load. + updateChartScaleFactor: (state, action: PayloadAction<{chartId: string, scaleFactor: number}>) => { let chart = collectAllCharts(state).find(c => c.id == action.payload.chartId); if (chart) { - chart.insight = action.payload.insight; + chart.scaleFactor = action.payload.scaleFactor === 1 ? undefined : action.payload.scaleFactor; } }, // --- Style variants (see design-docs/28-chart-style-refinement-agent.md) --- @@ -1293,14 +1266,35 @@ export const dataFormulatorSlice = createSlice({ // Replace a variant's spec in place — used by the "refresh stale variant" // flow (overlay in VisualizationView). The variant id stays the same so // the chip doesn't visibly disappear and re-appear. - updateStyleVariant: (state, action: PayloadAction<{chartId: string, variantId: string, vlSpec: any, rationale?: string, encodingFingerprint?: string}>) => { - const { chartId, variantId, vlSpec, rationale, encodingFingerprint } = action.payload; + updateStyleVariant: (state, action: PayloadAction<{chartId: string, variantId: string, vlSpec: any, rationale?: string, encodingFingerprint?: string, configUI?: ChartStyleVariant['configUI']}>) => { + const { chartId, variantId, vlSpec, rationale, encodingFingerprint, configUI } = action.payload; const chart = collectAllCharts(state).find(c => c.id === chartId); const v = chart?.styleVariants?.find(v => v.id === variantId); if (!v) return; v.vlSpec = vlSpec; if (rationale !== undefined) v.rationale = rationale; if (encodingFingerprint !== undefined) v.encodingFingerprint = encodingFingerprint; + // The agent re-authored the controls; replace them and reset values + // so stale keys don't linger. + if (configUI !== undefined) { + v.configUI = configUI && configUI.length > 0 ? configUI : undefined; + v.configValues = undefined; + } + }, + // Set the value of a single generative-UI control on a style variant. + // value === undefined removes the override (falls back to the control's + // defaultValue at render time). + updateVariantConfigValue: (state, action: PayloadAction<{chartId: string, variantId: string, key: string, value: any}>) => { + const { chartId, variantId, key, value } = action.payload; + const chart = collectAllCharts(state).find(c => c.id === chartId); + const v = chart?.styleVariants?.find(v => v.id === variantId); + if (!v) return; + if (value === undefined) { + if (v.configValues) delete v.configValues[key]; + } else { + if (!v.configValues) v.configValues = {}; + v.configValues[key] = value; + } }, updateChartEncoding: (state, action: PayloadAction<{chartId: string, channel: Channel, encoding: EncodingItem}>) => { let chartId = action.payload.chartId; @@ -1309,6 +1303,9 @@ export const dataFormulatorSlice = createSlice({ let chart = collectAllCharts(state).find(c => c.id == chartId); if (chart) { chart.encodingMap[channel] = encoding; + // The channel's binding changed — drop any Category-B override + // that depended on it (see resetDependentEncodingOverrides). + resetDependentEncodingOverrides(chart, channel); // Auto-revert to default whenever the user edits the encoding so // the canvas reflects what they're editing. Existing variants // stay in the chip strip (now stale). See @@ -1362,6 +1359,11 @@ export const dataFormulatorSlice = createSlice({ if (encoding.dtype !== value) changed = true; encoding.dtype = value; } + // When the user actually edits a channel in the shelf, drop any + // Category-B override computed against it (declared via the + // action's `dependencies`) so a stale override can't keep + // winning over the shelf edit. See resetDependentEncodingOverrides. + if (changed) resetDependentEncodingOverrides(chart, channel); // Auto-revert to default when the encoding actually changes // (see above). No-op updates must NOT clear the variant. if (changed && chart.activeVariantId) chart.activeVariantId = undefined; @@ -1379,6 +1381,8 @@ export const dataFormulatorSlice = createSlice({ chart.encodingMap[channel1] = { fieldID: enc2.fieldID, aggregate: enc2.aggregate, sortBy: enc2.sortBy, sortOrder: enc2.sortOrder }; chart.encodingMap[channel2] = { fieldID: enc1.fieldID, aggregate: enc1.aggregate, sortBy: enc1.sortBy, sortOrder: enc1.sortOrder }; + // Both channels' bindings changed — drop dependent overrides. + resetDependentEncodingOverrides(chart, channel1, channel2); // Auto-revert to default when the encoding changes (see above). if (chart.activeVariantId) chart.activeVariantId = undefined; } @@ -1665,6 +1669,20 @@ export const dataFormulatorSlice = createSlice({ state.dataLoadingChatMessages = []; state.dataLoadingChatInProgress = false; state.dataLoadingChatResetCounter = (state.dataLoadingChatResetCounter ?? 0) + 1; + // Note: `dataLoadingChatPending` is intentionally left + // alone. Callers that want "fresh slate + auto-send the + // new prompt" dispatch `clearChatMessages` followed by + // `setDataLoadingChatPending` in the same tick — clearing + // pending here would race with that ordering. + }, + setDataLoadingChatPending: ( + state, + action: PayloadAction<{ text: string; images: string[]; attachments: string[] }>, + ) => { + state.dataLoadingChatPending = action.payload; + }, + clearDataLoadingChatPending: (state) => { + state.dataLoadingChatPending = null; }, confirmTableLoad: (state, action: PayloadAction<{messageId: string, tableName: string}>) => { const msg = state.dataLoadingChatMessages.find(m => m.id === action.payload.messageId); @@ -1741,16 +1759,50 @@ export const dataFormulatorSlice = createSlice({ state.viewMode = 'editor'; } }, - updateGeneratedReportContent: (state, action: PayloadAction<{ id: string; content: string; status?: GeneratedReport['status']; title?: string }>) => { - const { id, content, status, title } = action.payload; + updateGeneratedReportContent: (state, action: PayloadAction<{ id: string; content: string; status?: GeneratedReport['status']; title?: string; triggerTableId?: string; summary?: string; summaryThought?: string }>) => { + const { id, content, status, title, triggerTableId, summary, summaryThought } = action.payload; const report = state.generatedReports.find(r => r.id === id); if (report) { report.content = content; if (title) report.title = title; if (status) report.status = status; + // The run's closing answer is owned by the report (rendered and + // deleted with it), not appended to a table's interaction log. + if (summary !== undefined) report.summary = summary; + if (summaryThought !== undefined) report.summaryThought = summaryThought; + // Re-anchor the report to the latest table produced during the + // run so it renders against the newest thread item (like charts). + if (triggerTableId) report.triggerTableId = triggerTableId; + // Once real report text starts streaming, switch the indicator to + // the "writing" phase. When generation ends, clear transient state. + if (content) report.generatingPhase = 'writing'; + if (status === 'completed' || status === 'error') { + report.generatingPhase = undefined; + report.inspectionSteps = undefined; + } report.updatedAt = Date.now(); } }, + updateGeneratedReportProgress: (state, action: PayloadAction<{ id: string; kind: 'start' | 'end'; label?: string; doneLabel?: string; charts?: { chartType: string; name: string }[] }>) => { + const { id, kind, label, doneLabel, charts } = action.payload; + const report = state.generatedReports.find(r => r.id === id); + if (!report) return; + report.generatingPhase = 'inspecting'; + const steps = report.inspectionSteps ?? []; + if (kind === 'start' && label) { + steps.push({ label, doneLabel, done: false, charts, startedAt: Date.now() }); + } else if (kind === 'end') { + // Flip the first still-pending step to done (FIFO matches the + // order the backend emits start/end), so concurrent tool calls + // each resolve independently rather than adding a new message. + const pending = steps.find(s => !s.done); + if (pending) { + pending.done = true; + if (pending.startedAt) pending.durationMs = Date.now() - pending.startedAt; + } + } + report.inspectionSteps = steps; + }, clearGeneratedReports: (state) => { state.generatedReports = []; // Redux Persist will handle persistence automatically @@ -1799,8 +1851,8 @@ export const dataFormulatorSlice = createSlice({ }); } // Reset other transient in-progress flags that snuck into the - // persisted blob (chartSynthesisInProgress / chartInsightInProgress - // are already blacklisted in store.ts). + // persisted blob (chartSynthesisInProgress is already blacklisted + // in store.ts). incoming.cleanInProgress = false; incoming.dataLoadingChatInProgress = false; incoming.sessionLoading = false; @@ -1973,58 +2025,6 @@ export const dataFormulatorSlice = createSlice({ }); } }) - .addCase(fetchChartInsight.pending, (state, action) => { - let chartId = action.meta.arg.chartId; - if (!state.chartInsightInProgress.includes(chartId)) { - state.chartInsightInProgress.push(chartId); - } - }) - .addCase(fetchChartInsight.fulfilled, (state, action) => { - let { chartId, insightKey, title, takeaways } = action.payload; - let chart = collectAllCharts(state).find(c => c.id === chartId); - if (chart && (title || (takeaways && takeaways.length > 0))) { - chart.insight = { title, takeaways: takeaways || [], key: insightKey }; - } - state.chartInsightInProgress = state.chartInsightInProgress.filter(id => id !== chartId); - console.log("fetched chart insight", action.payload); - }) - .addCase(fetchChartInsight.rejected, (state, action) => { - const chartId = action.meta.arg.chartId; - state.chartInsightInProgress = state.chartInsightInProgress.filter(id => id !== chartId); - - const errorName = action.error?.name; - - if (errorName === 'AbortError') { - // User cancelled — no feedback needed - return; - } - - if (errorName === 'TimeoutError') { - state.messages.push({ - timestamp: Date.now(), type: 'warning', - component: 'chart insight', - value: i18n.t('messages.chartInsightTimedOut', { - seconds: state.config.formulateTimeoutSeconds, - }), - }); - return; - } - - if (errorName === 'ChartImageNotReady') { - state.messages.push({ - timestamp: Date.now(), type: 'warning', - component: 'chart insight', - value: i18n.t('messages.chartInsightImageNotReady'), - }); - return; - } - - state.messages.push({ - timestamp: Date.now(), type: 'warning', - component: 'chart insight', - value: action.error?.message || i18n.t('messages.chartInsightFailed'), - }); - }) }, }) diff --git a/src/app/restyle.ts b/src/app/restyle.ts index 4e6d4f8c..a760bb79 100644 --- a/src/app/restyle.ts +++ b/src/app/restyle.ts @@ -11,7 +11,7 @@ * See design-docs/28-chart-style-refinement-agent.md. */ -import { Chart, ChartStyleVariant, FieldItem, DictTable, computeEncodingFingerprint } from '../components/ComponentType'; +import { Chart, ChartStyleVariant, VariantConfigControl, FieldItem, DictTable, computeEncodingFingerprint } from '../components/ComponentType'; import { assembleVegaChart, getUrls } from './utils'; import { apiRequest } from './apiClient'; import { checkChartAvailability } from '../views/ChartUtils'; @@ -95,7 +95,7 @@ export function buildSpecForRestyle( spec = JSON.parse(JSON.stringify(basedOnVariant.vlSpec)); } else { spec = JSON.parse(JSON.stringify(fullSpec)); - delete spec._computedConfig; + delete spec._options; } delete spec.data; return { spec, basedOnVariantId: basedOnVariant?.id, embeddedData }; @@ -122,7 +122,7 @@ export function buildDataContext( } export type RestyleResult = - | { kind: 'spec'; vlSpec: any; rationale?: string; label?: string } + | { kind: 'spec'; vlSpec: any; rationale?: string; label?: string; configUI?: VariantConfigControl[] } | { kind: 'out_of_scope'; rationale?: string }; /** @@ -172,6 +172,7 @@ export async function callRestyleAgent(args: { vlSpec: newSpec, rationale: typeof data.rationale === 'string' ? data.rationale : undefined, label: typeof data.label === 'string' ? data.label : undefined, + configUI: sanitizeConfigUI(data.configUI), }; } @@ -183,6 +184,7 @@ export function makeVariant(args: { rationale?: string; label: string; basedOnVariantId?: string; + configUI?: VariantConfigControl[]; }): ChartStyleVariant { return { id: `v-${Date.now()}`, @@ -193,5 +195,128 @@ export function makeVariant(args: { encodingFingerprint: computeEncodingFingerprint(args.chart), createdAt: Date.now(), rationale: args.rationale, + configUI: args.configUI && args.configUI.length > 0 ? args.configUI : undefined, + configValues: undefined, }; } + +// --------------------------------------------------------------------------- +// Variant generative-UI controls (path-based, no code execution) +// --------------------------------------------------------------------------- + +// Object keys that must never be used as a path segment — writing to these can +// pollute Object.prototype and is a classic prototype-pollution sink. +const FORBIDDEN_PATH_SEGMENTS = new Set(['__proto__', 'prototype', 'constructor']); + +/** + * Validate and normalize the `configUI` array returned by the restyle agent. + * Drops anything malformed (bad path, missing params, prototype-polluting + * segments) so a bad LLM payload can't produce broken or unsafe controls. + * Returns undefined when there are no usable controls. + */ +export function sanitizeConfigUI(raw: any): VariantConfigControl[] | undefined { + if (!Array.isArray(raw)) return undefined; + const out: VariantConfigControl[] = []; + const seenKeys = new Set(); + for (const c of raw) { + if (!c || typeof c !== 'object') continue; + const key = typeof c.key === 'string' ? c.key.trim() : ''; + const label = typeof c.label === 'string' ? c.label.trim() : ''; + if (!key || !label || seenKeys.has(key)) continue; + + // Path must be a non-empty array of strings/numbers with no + // prototype-polluting segments. + if (!Array.isArray(c.path) || c.path.length === 0) continue; + const path: (string | number)[] = []; + let pathOk = true; + for (const seg of c.path) { + if (typeof seg === 'number' && Number.isInteger(seg) && seg >= 0) { + path.push(seg); + } else if (typeof seg === 'string' && seg.length > 0 && !FORBIDDEN_PATH_SEGMENTS.has(seg)) { + path.push(seg); + } else { + pathOk = false; + break; + } + } + if (!pathOk) continue; + + if (c.type === 'binary') { + out.push({ key, label, path, type: 'binary', defaultValue: !!c.defaultValue }); + } else if (c.type === 'continuous') { + const min = Number(c.min), max = Number(c.max); + if (!isFinite(min) || !isFinite(max) || max <= min) continue; + const step = isFinite(Number(c.step)) && Number(c.step) > 0 ? Number(c.step) : undefined; + const dv = isFinite(Number(c.defaultValue)) ? Number(c.defaultValue) : min; + out.push({ key, label, path, type: 'continuous', min, max, step, defaultValue: dv }); + } else if (c.type === 'discrete') { + if (!Array.isArray(c.options) || c.options.length === 0) continue; + const options = c.options + .filter((o: any) => o && typeof o === 'object' && typeof o.label === 'string') + .map((o: any) => ({ value: o.value, label: o.label })); + if (options.length === 0) continue; + const dv = c.defaultValue !== undefined ? c.defaultValue : options[0].value; + out.push({ key, label, path, type: 'discrete', options, defaultValue: dv }); + } else { + continue; + } + seenKeys.add(key); + } + return out.length > 0 ? out : undefined; +} + +/** + * Write `value` into `obj` at `path`, creating intermediate objects/arrays as + * needed. Pure data operation — no code execution. Returns true on success. + * + * Safety: refuses prototype-polluting segments and won't descend through a + * non-object intermediate it can't safely replace. + */ +function setAtPath(obj: any, path: (string | number)[], value: any): boolean { + if (!obj || typeof obj !== 'object' || path.length === 0) return false; + let node = obj; + for (let i = 0; i < path.length - 1; i++) { + const seg = path[i]; + if (typeof seg === 'string' && FORBIDDEN_PATH_SEGMENTS.has(seg)) return false; + let next = node[seg as any]; + if (next === null || typeof next !== 'object') { + // Create the right container based on the next segment's type. + next = typeof path[i + 1] === 'number' ? [] : {}; + node[seg as any] = next; + } + node = next; + } + const last = path[path.length - 1]; + if (typeof last === 'string' && FORBIDDEN_PATH_SEGMENTS.has(last)) return false; + node[last as any] = value; + return true; +} + +/** + * Apply a variant's generative-UI controls to its Vega-Lite spec. + * + * For each control we write the current value (from `configValues`, falling + * back to `defaultValue`) into the spec at the control's `path`. The value may + * be a scalar or a whole object. This is a pure, declarative transform — no + * model-authored code runs. Returns a NEW spec; never mutates the input. + */ +export function applyVariantConfigUI( + spec: any, + configUI: VariantConfigControl[] | undefined, + configValues: Record | undefined, +): any { + if (!configUI || configUI.length === 0) return spec; + let working: any; + try { working = structuredClone(spec); } catch { working = JSON.parse(JSON.stringify(spec)); } + for (const control of configUI) { + const value = configValues && control.key in configValues + ? configValues[control.key] + : control.defaultValue; + try { + setAtPath(working, control.path, value); + } catch (err) { + console.warn(`[variant-config] control "${control.key}" failed to apply`, err); + } + } + return working; +} diff --git a/src/app/store.ts b/src/app/store.ts index 891080b6..c230f5a6 100644 --- a/src/app/store.ts +++ b/src/app/store.ts @@ -16,7 +16,7 @@ const persistConfig = { // globalModels are always fetched fresh from the server on each app start, // so there is no need (and it would cause stale-data issues) to persist them. // In-progress flags are transient and should not survive page refreshes. - blacklist: ['serverConfig', 'globalModels', 'chartSynthesisInProgress', 'chartInsightInProgress'], + blacklist: ['serverConfig', 'globalModels', 'chartSynthesisInProgress'], } const persistedReducer = persistReducer(persistConfig, dataFormulatorReducer) diff --git a/src/app/tableThunks.ts b/src/app/tableThunks.ts index 72760816..c148dbd9 100644 --- a/src/app/tableThunks.ts +++ b/src/app/tableThunks.ts @@ -22,6 +22,46 @@ import { DataFormulatorState, dfActions, fetchColumnStats, fetchFieldSemanticTyp import { tableDataDB } from './workspaceDB'; import i18n from '../i18n'; +/** + * Persist a derived / agent-generated table's full rows to IndexedDB for + * **ephemeral mode**, returning a copy that keeps only a sample + a `virtual` + * marker in Redux (mirroring how the `loadTable` thunk handles ephemeral data). + * + * In ephemeral mode the IndexedDB `table_data` store is the only durable source + * of truth: every API call ships those rows back to the server as + * `_workspace_tables`. Tables inserted straight into Redux (via + * `insertDerivedTables` / `overrideDerivedTables`) would otherwise never reach + * IndexedDB, leaving the server's scratch workspace — and the grid's pagination + * — with an empty data body. + * + * Callers must invoke this only when in ephemeral mode (they own that check). + * On save failure the original table is returned unchanged so the session keeps + * working with the full rows in Redux. + */ +export async function persistEphemeralDerivedTable(workspaceId: string, table: DictTable): Promise { + if (table.rows.length === 0) { + return table; + } + + const tableId = table.virtual?.tableId || table.id; + const fullRows = table.rows; + const fullRowCount = Math.max(table.virtual?.rowCount ?? 0, fullRows.length); + + try { + await tableDataDB.save(workspaceId, tableId, fullRows); + } catch (e) { + console.warn('[persistEphemeralDerivedTable] IndexedDB save failed; keeping full rows in Redux:', e); + return table; + } + + const sampleSize = Math.min(1000, fullRows.length); + return { + ...table, + rows: fullRows.slice(0, sampleSize), + virtual: { tableId, rowCount: fullRowCount }, + }; +} + /** Gzip-compress a string into a Blob using the browser's CompressionStream API. */ async function compressBlob(data: string): Promise { const blob = new Blob([new TextEncoder().encode(data)]); diff --git a/src/app/tokens.ts b/src/app/tokens.ts index acd78bd6..4695edb8 100644 --- a/src/app/tokens.ts +++ b/src/app/tokens.ts @@ -73,6 +73,24 @@ export const transition = { slow: 'all 0.3s ease', } as const; +// ── Floating overlay controls ────────────────────────────────────────── + +/** + * Floating icon-button pill for canvas overlays (chart visualization toolbar, + * report action buttons). A semi-transparent "glass" fill with a defined + * border so the control stays legible over busy, colorful canvases as well as + * plain document backgrounds. Resting state is neutral; spread this and + * override `color` / `&:hover` for destructive or active variants. + */ +export const floatingPillSx: SxProps = { + backgroundColor: 'background.paper', + border: `1px solid ${borderColor.divider}`, + boxShadow: '0 1px 4px rgba(0,0,0,0.12)', + color: 'text.secondary', + transition: transition.normal, + '&:hover': { backgroundColor: 'action.hover', color: 'primary.main' }, +}; + // ── Border radius ────────────────────────────────────────────────────── // Values are MUI spacing units (1 unit = 4px via theme.spacing) diff --git a/src/app/useAutoSave.tsx b/src/app/useAutoSave.tsx index 755a4918..3a167f14 100644 --- a/src/app/useAutoSave.tsx +++ b/src/app/useAutoSave.tsx @@ -15,7 +15,7 @@ const EXCLUDED_FIELDS = new Set([ 'models', 'selectedModelId', 'testedModels', 'dataLoaderConnectParams', 'identity', 'serverConfig', // Transient fields that shouldn't trigger or be included in saves - 'chartSynthesisInProgress', 'chartInsightInProgress', + 'chartSynthesisInProgress', 'cleanInProgress', 'sessionLoading', 'sessionLoadingLabel', // Thumbnails are derived from chart specs + table data; re-rendered // from the module cache on reload, so don't waste bandwidth saving them. diff --git a/src/app/useFormulateData.ts b/src/app/useFormulateData.ts deleted file mode 100644 index dd128b50..00000000 --- a/src/app/useFormulateData.ts +++ /dev/null @@ -1,606 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -import { useSelector, useDispatch } from 'react-redux'; -import { useTranslation } from 'react-i18next'; -import { DataFormulatorState, dfActions, dfSelectors, fetchCodeExpl, fetchChartInsight, fetchFieldSemanticType } from './dfSlice'; -import { AppDispatch } from './store'; -import { Chart, FieldItem, Trigger, createDictTable, DictTable } from '../components/ComponentType'; -import { getUrls, getTriggers, translateBackend } from './utils'; -import { apiRequest, streamRequest } from './apiClient'; -import { getErrorMessage } from './errorCodes'; - -export type IdeaItem = { - text: string; - goal: string; - tag: 'deep-dive' | 'pivot' | 'broaden' | 'cross-data' | 'statistical' | string; -}; - -export interface StreamIdeasOptions { - actionTableIds: string[]; - currentTable: DictTable; - onIdeas: (ideas: IdeaItem[]) => void; - onThinkingBuffer: (buffer: string) => void; - onLoadingChange: (loading: boolean) => void; - /** Backend progress phase updates (e.g. "building_context", "generating") */ - onProgress?: (phase: string) => void; - /** Chart image (PNG data URL) for current visualization context */ - currentChartImage?: string | null; - /** Sample rows from the current table */ - currentDataSample?: any[]; - /** Optional start question for idea generation */ - startQuestion?: string; -} - -export interface FormulateDataOptions { - instruction: string; - mode: 'formulate' | 'ideate'; - actionTableIds: string[]; - currentTable: DictTable; - overrideTableId?: string; - currentVisualization?: any; - expectedVisualization?: any; - /** The chart spec to embed in the trigger for the derived table */ - triggerChart: Chart; - /** - * Component-specific chart creation callback. - * Called with the candidate table, refined goal, and resolved concepts. - * Should dispatch chart creation actions and return the focused chart ID (or undefined). - */ - createChart: (params: { - candidateTable: DictTable; - refinedGoal: any; - currentConcepts: FieldItem[]; - }) => string | undefined; - /** Called before the request is made */ - onStarted?: () => void; - /** Called on successful formulation */ - onSuccess?: (params: { displayInstruction: string; candidateTable: DictTable; focusedChartId?: string }) => void; - /** Called on error */ - onError?: (error: any) => void; - /** Called after the request completes (success or error) */ - onFinally?: () => void; -} - -function generateTableId(tables: DictTable[]): string { - let tableSuffix = Number.parseInt((Date.now() - Math.floor(Math.random() * 10000)).toString().slice(-6)); - let tableId = `table-${tableSuffix}`; - while (tables.find(t => t.id === tableId) !== undefined) { - tableSuffix += 1; - tableId = `table-${tableSuffix}`; - } - return tableId; -} - -/** - * Shared hook for data formulation and idea streaming. - * Used by both EncodingShelfCard (chart-aware formulation) and ChartRecBox (NL-driven formulation). - */ -export function useFormulateData() { - const dispatch = useDispatch(); - const { t } = useTranslation(); - const tables = useSelector((state: DataFormulatorState) => state.tables); - const config = useSelector((state: DataFormulatorState) => state.config); - const conceptShelfItems = useSelector((state: DataFormulatorState) => state.conceptShelfItems); - const charts = useSelector(dfSelectors.getAllCharts); - const activeModel = useSelector(dfSelectors.getActiveModel); - - /** - * Resolve the actual chart that's rendered for a derived table. The - * `trigger.chart` saved on the table is just an "Auto" stub generated - * during the agent run — the chart the user actually sees lives in the - * Redux `charts` slice. Mirrors the lookup in `SimpleChartRecBox`. - */ - function resolveChartForTable(tableId: string) { - return charts.find(c => c.tableRef === tableId && c.source === 'trigger') - || charts.find(c => c.tableRef === tableId); - } - - /** Map a chart's encodingMap to `{ channel: fieldName }` (skips empties). */ - function chartEncodingsByName(chart: Chart | undefined): Record { - if (!chart?.encodingMap) return {}; - return Object.fromEntries( - Object.entries(chart.encodingMap) - .filter(([, v]: [string, any]) => v?.fieldID) - .map(([k, v]: [string, any]) => { - const field = conceptShelfItems.find(f => f.id === v.fieldID); - return [k, field?.name || v.fieldID]; - }) - ); - } - - /** - * Build a rich focused thread from the current table's derivation chain. - * Each step includes: user question, display instruction, chart type + encodings, - * created table metadata, and agent summary. - */ - function buildFocusedThread(currentTable: DictTable): any[] { - if (!currentTable.derive || currentTable.anchored) return []; - const triggers = getTriggers(currentTable, tables); - return triggers.map(trigger => { - const resultTable = tables.find(t2 => t2.id === trigger.resultTableId); - const interaction = trigger.interaction || []; - const userPrompt = interaction.find(e => e.role === 'prompt')?.content; - const instruction = interaction.find(e => e.role === 'instruction'); - const summary = interaction.find(e => e.role === 'summary'); - // Resolve the actual rendered chart (not the trigger's "Auto" stub) - // so chart_type + encodings reflect what the user is looking at. - const resolvedChart = resolveChartForTable(trigger.resultTableId); - return { - user_question: userPrompt || instruction?.content || '', - display_instruction: instruction?.displayContent || instruction?.content || '', - agent_thinking: instruction?.plan, - agent_summary: summary?.content, - table_name: resultTable?.virtual?.tableId || trigger.resultTableId, - columns: resultTable?.names || [], - row_count: resultTable?.virtual?.rowCount ?? resultTable?.rows?.length ?? 0, - chart_type: resolvedChart?.chartType || '', - encodings: chartEncodingsByName(resolvedChart), - }; - }); - } - - /** - * Build a legacy exploration thread (flat table list) for backward compatibility. - */ - function buildExplorationThread(currentTable: DictTable): any[] { - if (!currentTable.derive || currentTable.anchored) return []; - const triggers = getTriggers(currentTable, tables); - return triggers.map(trigger => ({ - name: trigger.resultTableId, - rows: tables.find(t2 => t2.id === trigger.resultTableId)?.rows, - description: `Derive from ${tables.find(t2 => t2.id === trigger.resultTableId)?.derive?.source}`, - })); - } - - /** - * Build peripheral thread summaries — leaf tables in the workspace that - * are NOT part of the focused chain. Mirrors the data agent's Tier 3 - * context (`SimpleChartRecBox.exploreFromChat`): all leaves except the - * focused one, with per-step `display → chart_type (encodings)` lines - * using resolved field names. - */ - function buildOtherThreads(currentTable: DictTable): any[] { - // Collect all table IDs in the focused thread - const focusedIds = new Set(); - if (currentTable.derive && !currentTable.anchored) { - const triggers = getTriggers(currentTable, tables); - for (const t of triggers) { - focusedIds.add(t.resultTableId); - } - } - focusedIds.add(currentTable.id); - - // Find every leaf table (no children, or all children anchored) that - // is derived from somewhere and NOT part of the focused chain. - const otherThreads: any[] = []; - for (const table of tables) { - if (focusedIds.has(table.id)) continue; - if (!table.derive) continue; - const children = tables.filter(c => c.derive?.trigger?.tableId === table.id); - const isLeaf = children.length === 0 || children.every(c => c.anchored); - if (!isLeaf) continue; - - const triggers = getTriggers(table, tables); - if (triggers.length === 0) continue; - - const STEP_FINDING_CHAR_LIMIT = 200; - const steps = triggers.map(trigger => { - const instr = trigger.interaction?.find(e => e.role === 'instruction'); - const label = instr?.displayContent || instr?.content || trigger.resultTableId; - // Use the actual rendered chart, not the trigger's "Auto" stub. - const chart = resolveChartForTable(trigger.resultTableId); - const chartType = chart?.chartType && chart.chartType !== 'Auto' ? chart.chartType : ''; - const encStr = Object.entries(chartEncodingsByName(chart)) - .map(([k, v]) => `${k}: ${v}`) - .join(', '); - // Per-step agent commentary: the `summary` entry the visualize - // action emits after running this step. - let finding = trigger.interaction?.find( - e => e.role === 'summary', - )?.content?.trim() || ''; - if (finding.length > STEP_FINDING_CHAR_LIMIT) { - finding = finding.slice(0, STEP_FINDING_CHAR_LIMIT - 1).trimEnd() + '…'; - } - const head = `${label}${chartType ? ` → ${chartType}` : ''}${encStr ? ` (${encStr})` : ''}`; - return finding ? `${head} — finding: ${finding}` : head; - }); - - const sourceTableId = triggers[0].tableId; - const sourceTable = tables.find(t => t.id === sourceTableId); - otherThreads.push({ - source_table: sourceTable?.virtual?.tableId || sourceTableId, - leaf_table: table.virtual?.tableId || table.id, - step_count: triggers.length, - steps, - }); - } - return otherThreads; - } - - /** - * Stream ideas/recommendations from the exploration agent via SSE. - */ - async function streamIdeas(options: StreamIdeasOptions): Promise { - const { - actionTableIds, currentTable, - onIdeas, onThinkingBuffer, onLoadingChange, onProgress, - currentChartImage, currentDataSample, - startQuestion, - } = options; - - onLoadingChange(true); - onThinkingBuffer(""); - onIdeas([]); - - let timeoutId: ReturnType | undefined; - let timedOut = false; - try { - const focusedThread = buildFocusedThread(currentTable); - const otherThreads = buildOtherThreads(currentTable); - const actionTables = actionTableIds.map(id => tables.find(t => t.id === id) as DictTable); - - const messageBody = JSON.stringify({ - model: activeModel, - input_tables: actionTables.map(t => ({ - name: t.virtual?.tableId || t.id.replace(/\.[^/.]+$/, ""), - })), - primary_tables: (() => { - if (currentTable.derive && !currentTable.anchored) { - return (currentTable.derive.source as string[]).map(id => { - const t = tables.find(tbl => tbl.id === id); - return t?.virtual?.tableId || id.replace(/\.[^/.]+$/, ""); - }); - } - return [currentTable.virtual?.tableId || currentTable.id.replace(/\.[^/.]+$/, "")]; - })(), - ...(focusedThread.length > 0 ? { focused_thread: focusedThread } : {}), - ...(otherThreads.length > 0 ? { other_threads: otherThreads } : {}), - ...(currentChartImage ? { current_chart: currentChartImage } : {}), - ...(startQuestion ? { start_question: startQuestion } : {}), - }); - - const engine = getUrls().GET_RECOMMENDATION_QUESTIONS; - const controller = new AbortController(); - timeoutId = setTimeout(() => { timedOut = true; controller.abort(); }, config.formulateTimeoutSeconds * 1000); - - const questions: IdeaItem[] = []; - for await (const event of streamRequest(engine, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: messageBody, - }, controller.signal)) { - if (event.type === 'error') { - throw new Error(event.error ? getErrorMessage(event.error) : t('messages.error')); - } - if (event.type === 'warning') { - dispatch(dfActions.addMessages({ - timestamp: Date.now(), type: 'warning', - component: 'exploration', - value: (event as any).warning?.message ?? 'Warning from server', - })); - continue; - } - if (event.type === 'progress') { - onProgress?.((event as any).phase); - continue; - } - if (event.type === 'question' && (event as any).text) { - questions.push({ - text: (event as any).text, - goal: (event as any).goal, - tag: (event as any).tag || 'deep-dive', - }); - onIdeas([...questions]); - continue; - } - if ((event as any).text) { - onThinkingBuffer((event as any).text); - } - } - clearTimeout(timeoutId); - timeoutId = undefined; - - if (questions.length === 0) { - throw new Error('No valid results returned from agent'); - } - } catch (error) { - if (error instanceof DOMException && error.name === 'AbortError') { - if (timedOut) { - dispatch(dfActions.addMessages({ - timestamp: Date.now(), type: 'warning', - component: 'exploration', - value: t('messages.agent.suggestionsTimedOut', { seconds: config.formulateTimeoutSeconds }), - })); - } - } else { - dispatch(dfActions.addMessages({ - timestamp: Date.now(), - type: "error", - component: "chart builder", - value: error instanceof Error ? error.message : t('messages.agent.unexpectedError'), - detail: error instanceof Error ? error.message : 'Unknown error', - })); - } - } finally { - if (timeoutId) clearTimeout(timeoutId); - onLoadingChange(false); - } - } - - /** - * Formulate data: send instruction to derive/refine endpoint and process the result. - * Handles request building, dialog continuation, table/concept creation, and error handling. - * Chart creation is delegated to the caller via the createChart callback. - */ - async function formulateData(options: FormulateDataOptions): Promise { - const { - instruction, mode, actionTableIds, currentTable, - overrideTableId, currentVisualization, expectedVisualization, - triggerChart, createChart, - onStarted, onSuccess, onError, onFinally, - } = options; - - if (actionTableIds.length === 0) return; - - onStarted?.(); - - const actionTables = actionTableIds.map(id => tables.find(t => t.id === id) as DictTable); - - // Build input_tables payload (shared across all request variants) - const inputTablesPayload = actionTables.map(t => ({ - name: t.virtual?.tableId || t.id.replace(/\.[^/.]+$/, ""), - rows: t.rows, - })); - - // Determine primary table names for agent context prioritization - // For derived tables, all source tables are primary; for source tables, just the current one - const primaryTableNames = (() => { - if (currentTable.derive && !currentTable.anchored) { - return (currentTable.derive.source as string[]).map(id => { - const t = tables.find(tbl => tbl.id === id); - return t?.virtual?.tableId || id.replace(/\.[^/.]+$/, ""); - }); - } - return [currentTable.virtual?.tableId || currentTable.id.replace(/\.[^/.]+$/, "")]; - })(); - - // Build base request body - let messageBody: any = { - mode, - input_tables: inputTablesPayload, - primary_tables: primaryTableNames, - extra_prompt: instruction, - model: activeModel, - ...(currentVisualization ? { current_visualization: currentVisualization } : {}), - ...(expectedVisualization ? { expected_visualization: expectedVisualization } : {}), - }; - let engine = getUrls().DERIVE_DATA; - - // Handle dialog continuation / refinement - if (currentTable.derive?.dialog && !currentTable.anchored) { - const sourceTableIds = currentTable.derive.source; - const tableIdsChanged = !sourceTableIds.every((id: string) => actionTableIds.includes(id)) || - !actionTableIds.every(id => sourceTableIds.includes(id)); - - if (mode === 'ideate' || tableIdsChanged) { - // Start fresh with prior dialog as additional context - messageBody.additional_messages = currentTable.derive.dialog; - engine = getUrls().DERIVE_DATA; - } else { - // Refine: continue existing dialog - messageBody = { - mode, - input_tables: inputTablesPayload, - dialog: currentTable.derive.dialog, - latest_data_sample: currentTable.rows.slice(0, 10), - new_instruction: instruction, - model: activeModel, - ...(currentVisualization ? { current_visualization: currentVisualization } : {}), - ...(expectedVisualization ? { expected_visualization: expectedVisualization } : {}), - }; - engine = getUrls().REFINE_DATA; - } - } - - const controller = new AbortController(); - let timedOut = false; - const timeoutId = setTimeout(() => { timedOut = true; controller.abort(); }, config.formulateTimeoutSeconds * 1000); - - apiRequest(engine, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(messageBody), - signal: controller.signal, - }) - .then(({ data }) => { - if (!data.results || data.results.length === 0) { - dispatch(dfActions.addMessages({ - "timestamp": Date.now(), - "component": "chart builder", - "type": "error", - "value": "No result is returned from the data formulation agent. Please try again.", - })); - onError?.(new Error("No results returned")); - return; - } - - const candidates = data["results"].filter((item: any) => item["status"] === "ok"); - - if (candidates.length === 0) { - const firstResult = data.results[0]; - dispatch(dfActions.addMessages({ - "timestamp": Date.now(), - "type": "error", - "component": "chart builder", - "value": "Data formulation failed, please try again.", - "code": firstResult.code, - "detail": translateBackend(firstResult.content, firstResult.content_code), - "diagnostics": firstResult.diagnostics, - })); - onError?.(new Error("All candidates failed")); - return; - } - - // Process the best candidate - const candidate = candidates[0]; - const code = candidate["code"]; - const codeSignature = candidate["code_signature"]; // HMAC signature from server - const rows = candidate["content"]["rows"]; - const dialog = candidate["dialog"]; - const refinedGoal = candidate['refined_goal']; - const displayInstruction = refinedGoal["display_instruction"]; - - // Determine table ID - let candidateTableId: string; - if (overrideTableId) { - candidateTableId = overrideTableId; - } else if (candidate["content"]["virtual"]) { - candidateTableId = candidate["content"]["virtual"]["table_name"]; - } else { - candidateTableId = generateTableId(tables); - } - - // Create trigger - // Resolve input table names from agent's response - const agentInputTables: string[] = refinedGoal['input_tables'] || []; - const resolvedSourceIds = agentInputTables.length > 0 - ? actionTableIds.filter(id => { - const t = tables.find(tbl => tbl.id === id); - if (!t) return false; - const name = t.virtual?.tableId || t.id.replace(/\.[^/.]+$/, ""); - return agentInputTables.some((n: string) => n.replace(/\.[^/.]+$/, "") === name); - }) - : actionTableIds; - const resolvedSourceNames = (resolvedSourceIds.length > 0 ? resolvedSourceIds : actionTableIds).map(id => { - const t = tables.find(tbl => tbl.id === id); - return t?.displayId || t?.virtual?.tableId || id.replace(/\.[^/.]+$/, ""); - }); - const trigger: Trigger = { - tableId: currentTable.id, - resultTableId: candidateTableId, - chart: triggerChart, - interaction: [{ - from: 'user' as const, - to: 'datarec-agent' as const, - role: 'instruction' as const, - content: instruction, - displayContent: displayInstruction, - inputTableNames: resolvedSourceNames, - timestamp: Date.now(), - }], - }; - - // Create candidate table with derive info - const candidateTable = createDictTable(candidateTableId, rows, { - code, - codeSignature, - outputVariable: refinedGoal['output_variable'] || 'result_df', - source: resolvedSourceIds.length > 0 ? resolvedSourceIds : actionTableIds, - dialog, - trigger, - }); - - if (candidate["content"]["virtual"]) { - candidateTable.virtual = { - tableId: candidate["content"]["virtual"]["table_name"], - rowCount: candidate["content"]["virtual"]["row_count"], - }; - } - - // Bootstrap metadata from agent field_metadata (temporary until fetchFieldSemanticType completes) - const fieldMetadata = refinedGoal['field_metadata']; - if (fieldMetadata && typeof fieldMetadata === 'object') { - for (const [fieldName, meta] of Object.entries(fieldMetadata)) { - if (!candidateTable.metadata[fieldName]) continue; - if (typeof meta === 'string') { - // Plain string format: { "field": "SemanticType" } - candidateTable.metadata[fieldName].semanticType = meta; - } else if (typeof meta === 'object' && meta !== null) { - // Dict format: { "field": { "semantic_type": "...", "unit": "...", ... } } - const m = meta as Record; - if (m['semantic_type']) { - candidateTable.metadata[fieldName].semanticType = m['semantic_type']; - } - if (m['unit']) { - candidateTable.metadata[fieldName].unit = m['unit']; - } - if (m['intrinsic_domain']) { - candidateTable.metadata[fieldName].intrinsicDomain = m['intrinsic_domain']; - } - } - } - } - - const fieldDisplayNames = refinedGoal['field_display_names']; - if (fieldDisplayNames && typeof fieldDisplayNames === 'object') { - for (const [fieldName, displayName] of Object.entries(fieldDisplayNames)) { - if (candidateTable.metadata[fieldName] && typeof displayName === 'string') { - candidateTable.metadata[fieldName].displayName = displayName; - } - } - } - - // Insert or override table - if (overrideTableId) { - dispatch(dfActions.overrideDerivedTables(candidateTable)); - } else { - dispatch(dfActions.insertDerivedTables(candidateTable)); - } - - // Add missing concepts - const names = candidateTable.names; - const missingNames = names.filter((name: string) => !conceptShelfItems.some(field => field.name === name)); - const conceptsToAdd = missingNames.map((name: string) => ({ - id: `concept-${name}-${Date.now()}`, - name, - source: "custom", - tableRef: "custom", - } as FieldItem)); - - dispatch(dfActions.addConceptItems(conceptsToAdd)); - dispatch(fetchFieldSemanticType(candidateTable)); - dispatch(fetchCodeExpl(candidateTable)); - - // Compute current concepts for chart creation - const currentConcepts = [...conceptShelfItems.filter(c => names.includes(c.name)), ...conceptsToAdd]; - - // Delegate chart creation to the caller - const focusedChartId = createChart({ candidateTable, refinedGoal, currentConcepts }); - - if (focusedChartId) { - dispatch(fetchChartInsight({ chartId: focusedChartId, tableId: candidateTable.id }) as any); - } - - onSuccess?.({ displayInstruction, candidateTable, focusedChartId }); - }) - .catch((error) => { - if (error.name === 'AbortError') { - if (timedOut) { - dispatch(dfActions.addMessages({ - timestamp: Date.now(), - component: "chart builder", - type: "warning", - value: t('messages.agent.formulationTimedOut', { seconds: config.formulateTimeoutSeconds }), - })); - } - } else { - console.error(error); - dispatch(dfActions.addMessages({ - timestamp: Date.now(), - component: "chart builder", - type: "error", - value: t('messages.agent.unexpectedError'), - detail: error.message, - })); - } - onError?.(error); - }) - .finally(() => { - clearTimeout(timeoutId); - onFinally?.(); - }); - } - - return { streamIdeas, formulateData }; -} diff --git a/src/app/useKnowledgeStore.ts b/src/app/useKnowledgeStore.ts index 0a6ea65d..6adeb60c 100644 --- a/src/app/useKnowledgeStore.ts +++ b/src/app/useKnowledgeStore.ts @@ -5,7 +5,7 @@ * Knowledge state management — React hooks for knowledge CRUD & search. * * Uses plain React state (not Redux) because knowledge data is server-side - * and only needed by the KnowledgePanel and save-as-experience flows. + * and only needed by the KnowledgePanel and save-as-workflow flows. * Errors are dispatched to the global MessageSnackbar via dfActions.addMessages. */ @@ -40,16 +40,16 @@ export function useKnowledgeStore() { const { t } = useTranslation(); const [rules, setRules] = useState({ ...EMPTY_CATEGORY }); - const [experiences, setExperiences] = useState({ ...EMPTY_CATEGORY }); + const [workflows, setWorkflows] = useState({ ...EMPTY_CATEGORY }); const [searchResults, setSearchResults] = useState([]); const [searching, setSearching] = useState(false); - const DEFAULT_LIMITS: KnowledgeLimits = { rule_description_max: 100, rules: 350, experiences: 2000 }; + const DEFAULT_LIMITS: KnowledgeLimits = { rule_description_max: 100, rules: 350, workflows: 2000 }; const [limits, setLimits] = useState(DEFAULT_LIMITS); - const stateMap = { rules, experiences }; - const setterMap = useRef({ rules: setRules, experiences: setExperiences }); + const stateMap = { rules, workflows }; + const setterMap = useRef({ rules: setRules, workflows: setWorkflows }); const fetchList = useCallback(async (category: KnowledgeCategory) => { const setter = setterMap.current[category]; @@ -71,7 +71,7 @@ export function useKnowledgeStore() { const fetchAll = useCallback(async () => { await Promise.all([ fetchList('rules'), - fetchList('experiences'), + fetchList('workflows'), fetchKnowledgeLimits().then(setLimits).catch(() => { /* best-effort */ }), ]); }, [fetchList]); @@ -184,7 +184,7 @@ export function useKnowledgeStore() { return { rules, - experiences, + workflows, stateMap, limits, searchResults, diff --git a/src/app/utils.tsx b/src/app/utils.tsx index 7688bd80..9be61eb3 100644 --- a/src/app/utils.tsx +++ b/src/app/utils.tsx @@ -28,12 +28,9 @@ export function getUrls() { SCRATCH_BASE_URL: `/api/agent/workspace/scratch`, CODE_EXPL_URL: `/api/agent/code-expl`, - CHART_INSIGHT_URL: `/api/agent/chart-insight`, SERVER_PROCESS_DATA_ON_LOAD: `/api/agent/process-data-on-load`, - DERIVE_DATA: `/api/agent/derive-data`, - REFINE_DATA: `/api/agent/refine-data`, - DATA_AGENT_STREAMING: `/api/agent/data-agent-streaming`, + ANALYST_STREAMING: `/api/agent/analyst-streaming`, // these functions involves database UPLOAD_DB_FILE: `/api/tables/upload-db-file`, @@ -51,7 +48,6 @@ export function getUrls() { EXPORT_TABLE_CSV: `/api/tables/export-table-csv`, GET_RECOMMENDATION_QUESTIONS: `/api/agent/get-recommendation-questions`, - GENERATE_REPORT_CHAT: `/api/agent/generate-report-chat`, // Workspace display name (auto-naming) WORKSPACE_NAME: `/api/agent/workspace-name`, diff --git a/src/app/workspaceService.ts b/src/app/workspaceService.ts index 01eaf00d..68088157 100644 --- a/src/app/workspaceService.ts +++ b/src/app/workspaceService.ts @@ -165,7 +165,7 @@ export async function exportWorkspace(id: string): Promise { const EXCLUDED = new Set([ 'models', 'selectedModelId', 'testedModels', 'dataLoaderConnectParams', 'identity', 'serverConfig', - 'chartSynthesisInProgress', 'chartInsightInProgress', + 'chartSynthesisInProgress', 'cleanInProgress', 'sessionLoading', 'sessionLoadingLabel', ]); const serializable: Record = {}; diff --git a/src/components/AnvilLoader.tsx b/src/components/AnvilLoader.tsx index 49471259..8e9c5f9e 100644 --- a/src/components/AnvilLoader.tsx +++ b/src/components/AnvilLoader.tsx @@ -2,7 +2,7 @@ // Licensed under the MIT License. import React, { useEffect, useState } from 'react'; -import { Box, Typography } from '@mui/material'; +import { Box, SxProps, Theme, Typography } from '@mui/material'; import { keyframes } from '@mui/system'; const ROWS = 3; @@ -69,28 +69,40 @@ function BinaryGrid() { ); } -export function AnvilLoader() { +export interface AnvilLoaderProps { + /** Override container height. Defaults to `'100vh'` (full-screen). */ + height?: string | number; + /** Optional text shown below the binary grid. When omitted no text is rendered. */ + label?: React.ReactNode; + /** Extra sx applied to the outermost container. */ + sx?: SxProps; +} + +export function AnvilLoader({ height = '100vh', label, sx }: AnvilLoaderProps) { return ( - - loading data formulator... - + {label !== undefined && ( + + {label} + + )} ); } diff --git a/src/components/ChartTemplates.tsx b/src/components/ChartTemplates.tsx index ac5c8d93..5385136a 100644 --- a/src/components/ChartTemplates.tsx +++ b/src/components/ChartTemplates.tsx @@ -99,25 +99,11 @@ export const CHART_ICONS: Record = { // Build CHART_TEMPLATES by adding icons to library template defs // --------------------------------------------------------------------------- -/** Global properties injected into any template that supports column/row faceting. */ -const FACET_AXIS_PROPERTIES = [ - { - key: 'independentYAxis', label: 'Independent Y-Axis', type: 'binary' as const, - visibleWhen: { channels: ['column', 'row'] }, - }, -]; - -function addIcons(defs: { chart: string; channels?: string[]; properties?: any[] }[]): ChartTemplate[] { - return defs.map(def => { - const hasFacetChannels = def.channels?.some(ch => ch === 'column' || ch === 'row'); - const extraProps = hasFacetChannels ? FACET_AXIS_PROPERTIES : []; - const mergedProperties = [...(def.properties || []).filter((p: any) => p.key !== 'independentYAxis'), ...extraProps]; - return { - ...def, - properties: mergedProperties, - icon: CHART_ICONS[def.chart] || , - }; - }) as ChartTemplate[]; +function addIcons(defs: { chart: string }[]): ChartTemplate[] { + return defs.map(def => ({ + ...def, + icon: CHART_ICONS[def.chart] || , + })) as ChartTemplate[]; } export const CHART_TEMPLATES: { [key: string]: ChartTemplate[] } = Object.fromEntries( diff --git a/src/components/ComponentType.tsx b/src/components/ComponentType.tsx index 3746db42..330befb4 100644 --- a/src/components/ComponentType.tsx +++ b/src/components/ComponentType.tsx @@ -335,12 +335,6 @@ export function createDictTable( } } -export interface ChartInsight { - title: string; - takeaways: string[]; - key: string; // "chartType|sortedFieldIds" — used to detect staleness -} - /** * A user-authored "skin" of a chart: a Vega-Lite spec edited via the * style/restyle agent. Variants share the chart's encoding and data — they @@ -359,8 +353,45 @@ export interface ChartStyleVariant { encodingFingerprint: string, // see computeEncodingFingerprint(); used to detect staleness createdAt: number, rationale?: string, // optional one-line explanation from the agent + // Generative UI: a few simple knobs the restyle agent attaches to the + // variant so the user can keep tweaking the agent-authored spec without + // re-prompting. While a variant is active these replace the chart-template + // config. See VariantConfigControl and applyVariantConfigUI in app/restyle.ts. + configUI?: VariantConfigControl[], + // Current value for each control, keyed by control.key. Missing key → use + // the control's defaultValue. + configValues?: Record, } +/** + * A single generative-UI control authored by the restyle agent for a style + * variant. Mirrors the shape of ChartPropertyDef (so it can reuse the same + * renderers) but instead of arbitrary code it carries a `path`: the location + * inside the Vega-Lite spec to write the chosen value to. + * + * Applying a control is a pure, declarative "set value at path" operation + * (see applyVariantConfigUI / setAtPath). There is NO code execution — the + * agent only chooses which knob, where it writes, and the allowed values. + * The written value may be a scalar OR a whole object (e.g. a full mark/axis + * sub-spec), which keeps the door open for richer restyle edits while staying + * safe. + */ +export type VariantConfigControl = { + key: string; + label: string; + /** + * Path into the vlSpec where the chosen value is written, as an array of + * object keys / array indices, e.g. ["mark","opacity"] or + * ["encoding","x","axis","labelAngle"]. Intermediate objects are created + * as needed. Prototype-polluting segments are rejected at apply time. + */ + path: (string | number)[]; +} & ( + | { type: 'continuous'; min: number; max: number; step?: number; defaultValue: number } + | { type: 'discrete'; options: { value: any; label: string }[]; defaultValue: any } + | { type: 'binary'; defaultValue: boolean } +); + export type Chart = { id: string, chartType: string, @@ -368,13 +399,15 @@ export type Chart = { tableRef: string, source: "user" | "trigger", config?: Record, // additional chart properties defined by the chart template - insight?: ChartInsight, // AI-generated insight about the visualization + title?: string, // AI-generated chart title (from the analyst's visualize action) + titleKey?: string, // "chartType|sortedFieldIds" snapshot when title was set; used to detect staleness styleVariants?: ChartStyleVariant[], // user-authored style refinements (see ChartStyleVariant) activeVariantId?: string, // id of the variant currently rendered in the focused canvas; undefined = default + scaleFactor?: number, // zoom level applied by the resizer; undefined = 1 (no zoom) unread?: boolean, // true for agent-generated charts the user hasn't focused yet; cleared on focus } -/** Compute a string key for insight invalidation: chartType|sortedFieldIds */ +/** Compute a string key for title-staleness invalidation: chartType|sortedFieldIds */ export function computeInsightKey(chart: Chart): string { const fieldIds = Object.values(chart.encodingMap) .map(enc => enc.fieldID) @@ -409,6 +442,7 @@ export let duplicateChart = (chart: Chart) : Chart => { tableRef: chart.tableRef, source: chart.source, config: chart.config ? JSON.parse(JSON.stringify(chart.config)) : undefined, + scaleFactor: chart.scaleFactor, // styleVariants are intentionally NOT copied: they are user-authored // refinements tied to the chart they were created on. A duplicate is a // fresh canvas. (See design-docs/28-chart-style-refinement-agent.md.) diff --git a/src/components/LoadPlanCard.tsx b/src/components/LoadPlanCard.tsx index 9b60effd..b91e54ab 100644 --- a/src/components/LoadPlanCard.tsx +++ b/src/components/LoadPlanCard.tsx @@ -16,8 +16,14 @@ import type { LoadPlan, LoadPlanCandidate, PendingTableLoad } from './ComponentT interface LoadPlanCardProps { plan: LoadPlan; - onConfirm: (selected: LoadPlanCandidate[]) => void; + onConfirm: (selected: LoadPlanCandidate[], opts?: { newWorkspace?: boolean }) => void; confirmed?: boolean; + /** When true, a workspace with existing data is already open, so the + * destination of the load is ambiguous. We then offer two explicit + * actions: add to the current workspace, or load into a fresh one. + * When false (empty/new workspace), a single "Load selected" button + * loads directly with no ambiguity. */ + canLoadInNewWorkspace?: boolean; } // Plans this small auto-expand each row's preview on first render so the @@ -48,7 +54,7 @@ const formatFilterValue = (value: any) => { return Array.isArray(value) ? value.join(', ') : String(value); }; -export const LoadPlanCard: React.FC = ({ plan, onConfirm, confirmed }) => { +export const LoadPlanCard: React.FC = ({ plan, onConfirm, confirmed, canLoadInNewWorkspace }) => { const theme = useTheme(); const { t } = useTranslation(); const [selection, setSelection] = useState>( @@ -143,12 +149,12 @@ export const LoadPlanCard: React.FC = ({ plan, onConfirm, con fetchPreview(candidate, idx); }; - const handleConfirm = async () => { + const handleConfirm = async (newWorkspace = false) => { const selected = plan.candidates.filter((c, i) => selection[i] && !c.resolutionError); if (selected.length === 0) return; setLoading(true); try { - await onConfirm(selected); + await onConfirm(selected, { newWorkspace }); } finally { setLoading(false); } @@ -257,12 +263,47 @@ export const LoadPlanCard: React.FC = ({ plan, onConfirm, con defaultValue: '✓ Loaded', })} + ) : canLoadInNewWorkspace ? ( + // A workspace with data is already open — make the load + // destination explicit rather than silently appending. + <> + + + ) : ( - ); } \ No newline at end of file diff --git a/src/views/DataFormulator.tsx b/src/views/DataFormulator.tsx index 2c21c15b..7b4ed2d9 100644 --- a/src/views/DataFormulator.tsx +++ b/src/views/DataFormulator.tsx @@ -40,6 +40,7 @@ import { DndProvider } from 'react-dnd' import { HTML5Backend } from 'react-dnd-html5-backend' import { toolName } from '../app/App'; import { DataThread } from './DataThread'; +import { threadPaneWidth } from './threadLayout'; import dfLogo from '../assets/df-logo.png'; import exampleImageTable from "../assets/example-image-table.png"; @@ -301,24 +302,37 @@ export const DataFormulatorFC = ({ }) => { // State for unified data upload dialog const [uploadDialogOpen, setUploadDialogOpen] = useState(false); const [uploadDialogInitialTab, setUploadDialogInitialTab] = useState('menu'); - const [uploadDialogInitialChatPrompt, setUploadDialogInitialChatPrompt] = useState(undefined); - const [uploadDialogInitialChatImages, setUploadDialogInitialChatImages] = useState(undefined); // Loading state for sessions (from Redux, shared with App.tsx) const sessionLoading = useSelector((state: DataFormulatorState) => state.sessionLoading); const sessionLoadingLabel = useSelector((state: DataFormulatorState) => state.sessionLoadingLabel); - const openUploadDialog = (tab: UploadTabType, initialChatPrompt?: string, initialChatImages?: string[]) => { + const openUploadDialog = (tab: UploadTabType) => { // If no workspace is active, generate an ID (backend creates folder lazily on first data op) if (!activeWorkspace) { dispatch(dfActions.setActiveWorkspace({ id: generateSessionId(), displayName: 'Untitled Session' })); } setUploadDialogInitialTab(tab); - setUploadDialogInitialChatPrompt(initialChatPrompt); - setUploadDialogInitialChatImages(initialChatImages); setUploadDialogOpen(true); }; + // Seed the Data Loading chat through the single redux `pending` slot, + // then navigate to the extract tab. This is the one channel that + // carries text, images, AND file attachments as first-class fields — + // replacing the older `initialChatPrompt/Images` props that silently + // dropped file attachments (they had no dedicated field and only + // survived if their name was baked into the prompt text). + const startDataLoadingChat = (text: string, images: string[] = [], attachments: string[] = []) => { + if (text.trim().length > 0 || images.length > 0 || attachments.length > 0) { + // Fresh query replaces any prior conversation. + if (dataLoadingChatMessages.length > 0) { + dispatch(dfActions.clearChatMessages()); + } + dispatch(dfActions.setDataLoadingChatPending({ text, images, attachments })); + } + openUploadDialog('extract'); + }; + // Honor cross-component requests to hand off to the Data Loading // chat seeded with a prompt (e.g. Data Agent's `delegate` card with // target='data_loading'). Hand-offs targeting other agents (e.g. @@ -326,7 +340,7 @@ export const DataFormulatorFC = ({ }) => { const agentHandoffRequest = useSelector((state: DataFormulatorState) => state.agentHandoffRequest); useEffect(() => { if (agentHandoffRequest && agentHandoffRequest.target === 'data_loading') { - openUploadDialog('extract', agentHandoffRequest.prompt, agentHandoffRequest.images); + startDataLoadingChat(agentHandoffRequest.prompt, agentHandoffRequest.images ?? [], []); dispatch(dfActions.clearAgentHandoffRequest()); } // openUploadDialog is stable enough for this purpose; we only react @@ -430,12 +444,9 @@ export const DataFormulatorFC = ({ }) => { //boxShadow: '0 0 5px rgba(0,0,0,0.1)', } - // Discrete column snapping for DataThread - const CARD_WIDTH = 220; - const CARD_GAP = 12; - const COLUMN_WIDTH = CARD_WIDTH + CARD_GAP; - const PANE_PADDING = 48; - const columnSize = (n: number) => n * COLUMN_WIDTH + PANE_PADDING; + // Discrete column snapping for DataThread. + // Column geometry is defined once in ./threadLayout and shared with + // DataThread so the pane snap points line up with the rendered columns. const allotmentRef = useRef(null); const containerRef = useRef(null); @@ -446,13 +457,13 @@ export const DataFormulatorFC = ({ }) => { let bestCols = 1; let bestDist = Infinity; for (let n = 1; n <= 3; n++) { - const dist = Math.abs(raw - columnSize(n)); + const dist = Math.abs(raw - threadPaneWidth(n)); if (dist < bestDist) { bestDist = dist; bestCols = n; } } - const snapped = columnSize(bestCols); + const snapped = threadPaneWidth(bestCols); if (Math.abs(raw - snapped) > 2) { const totalWidth = sizes.reduce((a, b) => a + b, 0); allotmentRef.current.resize([snapped, totalWidth - snapped]); @@ -532,10 +543,10 @@ export const DataFormulatorFC = ({ }) => { let newSize: number | null = null; if (prev <= 1 && threadCount > 1) { // Case 1: was 1 thread, now 2+ → expand to 2 columns - newSize = columnSize(2); + newSize = threadPaneWidth(2); } else if (prev > 1 && threadCount <= 1) { // Case 2: was 2+ threads, now 1 → shrink to 1 column - newSize = columnSize(1); + newSize = threadPaneWidth(1); } // Case 3: was 2+ threads and still 2+ → don't change (respect user's manual setting) @@ -558,7 +569,7 @@ export const DataFormulatorFC = ({ }) => { const fixedSplitPane = ( openUploadDialog((tab ?? 'add-connection') as UploadTabType)} + onOpenUploadDialog={(tab) => openUploadDialog((tab ?? 'menu') as UploadTabType)} connectorRefreshKey={connectorRefreshKey} /> { position: 'relative'}}> {tables.length > 0 ? ( - + { openUploadDialog(`connector:${conn.id}` as UploadTabType); } }} - onStartChat={(prompt, images) => openUploadDialog('extract', prompt, images)} + onStartChat={(prompt, images, attachments) => startDataLoadingChat(prompt, images, attachments)} hasPriorConversation={dataLoadingChatMessages.length > 0} onResumeChat={() => openUploadDialog('extract')} serverConfig={serverConfig} @@ -778,6 +791,7 @@ export const DataFormulatorFC = ({ }) => { value={wsSort} onChange={(e) => setWsSort(e.target.value as typeof wsSort)} disableUnderline + inputProps={{ 'aria-label': t('workspace.sortSessions') }} IconComponent={(props) => ( )} @@ -923,7 +937,7 @@ export const DataFormulatorFC = ({ }) => { {tables.length > 0 ? fixedSplitPane : ( openUploadDialog((tab ?? 'add-connection') as UploadTabType)} + onOpenUploadDialog={(tab) => openUploadDialog((tab ?? 'menu') as UploadTabType)} connectorRefreshKey={connectorRefreshKey} /> {dataUploadRequestBox} @@ -933,16 +947,9 @@ export const DataFormulatorFC = ({ }) => { open={uploadDialogOpen} onClose={() => { setUploadDialogOpen(false); - // Clear one-shot seed values so the next dialog - // open (e.g. via the upload button) doesn't - // re-fire the agent with a stale prompt/image. - setUploadDialogInitialChatPrompt(undefined); - setUploadDialogInitialChatImages(undefined); refreshPageConnectors(); }} initialTab={uploadDialogInitialTab} - initialChatPrompt={uploadDialogInitialChatPrompt} - initialChatImages={uploadDialogInitialChatImages} onConnectorsChanged={handleConnectorsChanged} /> {/* Loading overlay for session loading */} diff --git a/src/views/DataFrameTable.tsx b/src/views/DataFrameTable.tsx index dd32ecc9..afb03bbd 100644 --- a/src/views/DataFrameTable.tsx +++ b/src/views/DataFrameTable.tsx @@ -127,17 +127,24 @@ export const DataFrameTable: React.FC = ({ )} {displayCols.map((col, i) => { const desc = col !== '\u2026' ? columnDescriptions?.[col] : undefined; + if (desc) { + return ( + + + {col} + + + ); + } return ( - - - {col} - - + + {col} + ); })} diff --git a/src/views/DataLoadingChat.tsx b/src/views/DataLoadingChat.tsx index 379e29fb..16255865 100644 --- a/src/views/DataLoadingChat.tsx +++ b/src/views/DataLoadingChat.tsx @@ -38,6 +38,18 @@ import { loadTable } from '../app/tableThunks'; import { LoadPlanCard, PendingLoadsCard } from '../components/LoadPlanCard'; import { TablePreviewRow, TablePreviewData } from '../components/TablePreviewRow'; import { AgentChatInput } from './AgentChatInput'; +import { generateUUID } from '../app/identity'; + +// --------------------------------------------------------------------------- +// Helper: fresh workspace session id (mirrors DataSourceSidebar's scheme) +// --------------------------------------------------------------------------- + +const newWorkspaceSessionId = (): string => { + const now = new Date(); + const date = `${now.getFullYear()}${String(now.getMonth() + 1).padStart(2, '0')}${String(now.getDate()).padStart(2, '0')}`; + const time = `${String(now.getHours()).padStart(2, '0')}${String(now.getMinutes()).padStart(2, '0')}${String(now.getSeconds()).padStart(2, '0')}`; + return `session_${date}_${time}_${generateUUID().slice(0, 4)}`; +}; // --------------------------------------------------------------------------- // Helper: generate table name @@ -60,7 +72,11 @@ const getUniqueTableName = (baseName: string, existingNames: Set): strin // Modern monospace font stack for code blocks const CODE_FONT = '"SF Mono", "Cascadia Code", "Fira Code", Menlo, Consolas, "Liberation Mono", monospace'; -const MarkdownContent: React.FC<{ content: string }> = ({ content }) => { +// Memoized so typing in the chat input (which re-renders the parent +// `DataLoadingChat` on every keystroke) doesn't re-parse every assistant +// message through react-markdown. `content` is a stable string per +// committed message, so the default shallow equality is sufficient. +const MarkdownContent = React.memo(({ content }: { content: string }) => { return ( = ({ content }) => { ); -}; +}); // --------------------------------------------------------------------------- // Inline table preview — compact notebook-style @@ -317,10 +333,16 @@ const CodeBlockView: React.FC<{ block: CodeExecution }> = ({ block }) => { // Single chat message bubble // --------------------------------------------------------------------------- -const ChatBubble: React.FC<{ +// Memoized so typing in the chat input doesn't re-render every prior +// bubble (each one renders MarkdownContent + potentially code blocks / +// table previews, which is expensive on long threads). The parent +// stabilises `existingNames` via useMemo so memo equality holds across +// keystrokes. +const ChatBubble = React.memo<{ message: ChatMessage; existingNames: Set; -}> = ({ message, existingNames }) => { + onTableLoaded?: () => void; +}>(({ message, existingNames, onTableLoaded }) => { const theme = useTheme(); const { t } = useTranslation(); const dispatch = useDispatch(); @@ -340,6 +362,9 @@ const ChatBubble: React.FC<{ if (table) { dispatch(loadTable({ table: { ...table, source: { type: 'extract' as const } } })); dispatch(dfActions.confirmTableLoad({ messageId: message.id, tableName: pending.name })); + // Loading data is a deliberate commit — return the + // user to the canvas (the dialog closes via this hook). + onTableLoaded?.(); } } } catch (err) { @@ -435,7 +460,17 @@ const ChatBubble: React.FC<{ c.selected === false)} - onConfirm={async (selected: LoadPlanCandidate[]) => { + canLoadInNewWorkspace={existingNames.size > 0} + onConfirm={async (selected: LoadPlanCandidate[], opts?: { newWorkspace?: boolean }) => { + // When data already exists, the user may choose to + // start a fresh workspace instead of appending. We + // reset *before* loading so the X-Workspace-Id + // header (read live from the store at fetch time) + // targets the new session. + if (opts?.newWorkspace) { + const displayName = selected[0]?.displayName || 'Untitled Session'; + dispatch(dfActions.resetForNewWorkspace({ id: newWorkspaceSessionId(), displayName })); + } for (const item of selected) { const sourceTableName = item.sourceTableName || item.displayName; const table = { @@ -468,6 +503,11 @@ const ChatBubble: React.FC<{ })); } dispatch(dfActions.markLoadPlanConfirmed({ messageId: message.id })); + if (selected.length > 0) { + // Return the user to the canvas after a + // deliberate batch load. + onTableLoaded?.(); + } }} /> )} @@ -493,7 +533,7 @@ const ChatBubble: React.FC<{ ); -}; +}); // --------------------------------------------------------------------------- // Tool call label mapping @@ -517,7 +557,10 @@ interface ToolStep { label: string; } -const StreamingIndicator: React.FC<{ content: string; toolSteps: ToolStep[] }> = ({ content, toolSteps }) => { +// Memoized so an unrelated parent re-render (e.g. typing) doesn't +// reflow the shimmer animation. Props are state values that only change +// during an active stream. +const StreamingIndicator = React.memo<{ content: string; toolSteps: ToolStep[] }>(({ content, toolSteps }) => { const theme = useTheme(); return ( @@ -579,55 +622,56 @@ const StreamingIndicator: React.FC<{ content: string; toolSteps: ToolStep[] }> = )} ); -}; +}); // --------------------------------------------------------------------------- // Main chat component // --------------------------------------------------------------------------- -export interface DataLoadingChatProps { - /** - * Optional initial text to pre-fill the chat input when the component - * mounts (or when the value changes). Used by external entry points - * (e.g. landing page quick-chat box) that want to hand off a prompt - * to the agent. - */ - initialPrompt?: string; - /** - * Optional images (data URLs) to seed alongside `initialPrompt` — - * used when an external surface (e.g. landing-page agent box) has - * already collected pasted/attached images and is handing them off. - */ - initialImages?: string[]; - /** - * If true, automatically send the `initialPrompt` once on mount/change. - * Otherwise the prompt is only pre-filled and the user presses Enter. - */ - autoSendInitialPrompt?: boolean; +interface DataLoadingChatProps { + /** Called after a table is successfully loaded into the app. The + * upload dialog wires this to its close handler so loading data + * returns the user to the canvas. */ + onTableLoaded?: () => void; } -export const DataLoadingChat: React.FC = ({ - initialPrompt, - initialImages, - autoSendInitialPrompt, -}) => { +export const DataLoadingChat: React.FC = ({ onTableLoaded }) => { const theme = useTheme(); const { t } = useTranslation(); const dispatch = useDispatch(); + // Keep the latest callback in a ref so the stable `handleTableLoaded` + // identity below doesn't bust `ChatBubble`'s memoization even when the + // parent passes a fresh closure each render. + const onTableLoadedRef = useRef(onTableLoaded); + onTableLoadedRef.current = onTableLoaded; + const handleTableLoaded = useCallback(() => { + onTableLoadedRef.current?.(); + }, []); + const chatMessages = useSelector((state: DataFormulatorState) => state.dataLoadingChatMessages); const chatInProgress = useSelector((state: DataFormulatorState) => state.dataLoadingChatInProgress); - // External reset signal — bumped by `clearChatMessages` (manual reset - // button, new menu-level query, full session reset). When it changes - // we abort any in-flight stream, drop partial UI state, and re-seed - // from props if the parent provided a new prompt/images. Without - // this, an in-flight stream's eventual dispatches would leak into - // the freshly-cleared thread. + // External reset signal — bumped by `clearChatMessages` (manual + // reset button, fresh menu submission, full session reset). Used + // here only to abort an in-flight stream and invalidate any + // late-arriving dispatches from that stream via `sessionRef`. const chatResetCounter = useSelector((state: DataFormulatorState) => state.dataLoadingChatResetCounter ?? 0); + // Pending submission queued by an external surface (menu agent + // box, suggestion auto-run, external dialog caller). When set, we + // consume it in a useEffect: clear the slot first, then send the + // carried payload as a fresh user message via `sendMessage`. + // Single redux signal = no prop race. + const pendingSubmission = useSelector((state: DataFormulatorState) => state.dataLoadingChatPending); const existingTables = useSelector((state: DataFormulatorState) => state.tables); const activeModel = useSelector(dfSelectors.getActiveModel); const frontendRowLimit = useSelector((state: DataFormulatorState) => state.config?.frontendRowLimit ?? 2_000_000); - const existingNames = new Set(existingTables.map(tbl => tbl.id)); + // Stable reference across renders that don't actually change the + // table list — without this, every keystroke in the chat input + // would rebuild the Set and bust `ChatBubble`'s memo equality. + const existingNames = React.useMemo( + () => new Set(existingTables.map(tbl => tbl.id)), + [existingTables], + ); const [prompt, setPrompt] = useState(''); const [userImages, setUserImages] = useState([]); @@ -654,95 +698,44 @@ export const DataLoadingChat: React.FC = ({ // Auto-focus input useEffect(() => { inputRef.current?.focus(); }, []); - // ---- External initial prompt handling ------------------------------- - // Pre-fill the input (and optionally auto-send) when `initialPrompt` - // is provided. Used by external surfaces (e.g. landing-page quick chat - // box) to hand off text to the agent. Auto-send only fires for a - // fresh conversation — we never auto-resend on remount mid-chat. - const hasExistingMessages = chatMessages.length > 0; - const [pendingAutoSend, setPendingAutoSend] = useState(false); + // ---- Reset handling ------------------------------------------------- + // On external reset (counter bump from `clearChatMessages`): abort + // any in-flight stream, invalidate the current session token, and + // clear local input/streaming UI state. We deliberately do NOT + // re-seed anything here — a reset means "clean slate"; any new + // submission arrives separately via `pendingSubmission`. useEffect(() => { - // Detect external reset: abort, invalidate in-flight session, - // and clear all local UI state before re-seeding. Including - // `chatResetCounter` in the dep list also guarantees that an - // identical-prompt re-submission (same `initialPrompt` string) - // still triggers a fresh auto-send — otherwise the deps would - // be unchanged and the effect would skip. - const isReset = chatResetCounter !== lastResetRef.current; - if (isReset) { - lastResetRef.current = chatResetCounter; - sessionRef.current += 1; - abortControllerRef.current?.abort(); - abortControllerRef.current = null; - setStreamingContent(''); - setStreamingToolSteps([]); - setPrompt(''); - setUserImages([]); - setUserAttachments([]); - setPendingAutoSend(false); - } - - // Extract `[Uploaded: name]` mentions from the seeded prompt and - // surface them as chips. The mention template is locale-aware, - // so we build the regex from the current i18n value rather than - // hard-coding the English form. - const mentionTemplate = t('dataLoading.uploaded', { name: '__DF_NAME__' }); - const mentionPattern = mentionTemplate - .replace(/[.*+?^${}()|[\]\\]/g, '\\$&') - .replace('__DF_NAME__', '(.+?)'); - const mentionRegex = new RegExp(mentionPattern, 'g'); - let seededPrompt = initialPrompt || ''; - const extractedNames: string[] = []; - if (seededPrompt) { - let match: RegExpExecArray | null; - while ((match = mentionRegex.exec(seededPrompt)) !== null) { - extractedNames.push(match[1]); - } - if (extractedNames.length > 0) { - seededPrompt = seededPrompt - .replace(new RegExp(`\\n?${mentionPattern}`, 'g'), '') - .trim(); - } - } - - const hasText = seededPrompt.trim().length > 0; - const hasImages = !!initialImages && initialImages.length > 0; - const hasAttachments = extractedNames.length > 0; - // Skip re-seeding the input on a user-initiated reset — the - // reset is meant to restore a clean slate, not re-populate the - // input with the prompt the user just cleared. - if (!isReset) { - if (hasText) setPrompt(seededPrompt); - if (hasAttachments) setUserAttachments(extractedNames); - if (hasImages) { - // Always replace, never append. The prop is a "seed" — each - // change represents a fresh handoff from the parent, not an - // additive update. Appending caused the same image to stack - // up every time the parent re-rendered with a new array ref. - setUserImages([...initialImages!]); - } - } - // Auto-send only on a genuinely fresh open (no prior messages, - // and not a user-initiated reset). Resetting means the user wants - // a clean slate — re-running the seeded prompt against their will - // would defeat the purpose of the reset button. - if (autoSendInitialPrompt && !isReset && (hasText || hasImages || hasAttachments) && !hasExistingMessages) { - setPendingAutoSend(true); - } - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [initialPrompt, initialImages, autoSendInitialPrompt, chatResetCounter]); + if (chatResetCounter === lastResetRef.current) return; + lastResetRef.current = chatResetCounter; + sessionRef.current += 1; + abortControllerRef.current?.abort(); + abortControllerRef.current = null; + setStreamingContent(''); + setStreamingToolSteps([]); + setPrompt(''); + setUserImages([]); + setUserAttachments([]); + }, [chatResetCounter]); const stopGeneration = () => { abortControllerRef.current?.abort(); }; // ---- Send message ---- - const sendMessage = useCallback(() => { - const text = prompt.trim(); - if (!text && userImages.length === 0 && userAttachments.length === 0) return; + // Accepts an optional explicit payload so callers (suggestion + // auto-run, pending-submission consume) can submit the exact + // values they just chose without waiting for React state to flush. + // Reading via the `prompt`/`userImages`/`userAttachments` closures + // alone would be racy with batching and could submit the previous + // round's values on a fresh handoff. + const sendMessage = useCallback((explicit?: { text: string; images: string[]; attachments: string[] }) => { + const text = (explicit?.text ?? prompt).trim(); + const imgs = explicit?.images ?? userImages; + const atts = explicit?.attachments ?? userAttachments; + if (!text && imgs.length === 0 && atts.length === 0) return; if (chatInProgress) return; - const imageAttachments: ChatAttachment[] = userImages.map((url, i) => ({ + const imageAttachments: ChatAttachment[] = imgs.map((url, i) => ({ type: 'image' as const, name: `image-${i + 1}`, url, })); - const fileAttachments: ChatAttachment[] = userAttachments.map(name => ({ + const fileAttachments: ChatAttachment[] = atts.map(name => ({ type: 'file' as const, name, })); const attachments: ChatAttachment[] = [...imageAttachments, ...fileAttachments]; @@ -751,7 +744,7 @@ export const DataLoadingChat: React.FC = ({ // chips (rendered from `attachments`). The agent payload below // re-injects `[Uploaded: name]` mentions so the backend still // sees the file references inline. - const displayText = text || (userImages.length > 0 ? t('dataLoading.defaultImageMessage') : ''); + const displayText = text || (imgs.length > 0 ? t('dataLoading.defaultImageMessage') : ''); const userMsg: ChatMessage = { id: `msg-${Date.now()}-user`, role: 'user', @@ -967,25 +960,48 @@ export const DataLoadingChat: React.FC = ({ } } })(); - }, [prompt, userImages, chatInProgress, chatMessages, activeModel, existingTables, dispatch, streamingContent, t]); + }, [prompt, userImages, userAttachments, chatInProgress, chatMessages, activeModel, existingTables, dispatch, streamingContent, t]); - // Auto-send the initial prompt once it has been applied to state. + // Consume a queued submission from any external surface (menu + // agent input, suggestion auto-run, or a cross-component handoff + // routed through `startDataLoadingChat`). Single redux signal, + // single consumer — no prop race. + // + // Idempotency note: under React.StrictMode (dev), effects are + // intentionally double-invoked on mount with the *same* closure, + // so the `clearDataLoadingChatPending` dispatch in the first run + // isn't visible to the second run. `lastConsumedRef` tracks the + // exact payload object we've already sent, so the second + // invocation short-circuits before calling `sendMessage` again. + const lastConsumedRef = useRef(null); useEffect(() => { - if (!pendingAutoSend) return; + if (!pendingSubmission) return; + if (pendingSubmission === lastConsumedRef.current) return; if (chatInProgress) return; - if (prompt.trim().length === 0 && userImages.length === 0) return; - setPendingAutoSend(false); - sendMessage(); - }, [pendingAutoSend, prompt, userImages, chatInProgress, sendMessage]); + lastConsumedRef.current = pendingSubmission; + const payload = pendingSubmission; + dispatch(dfActions.clearDataLoadingChatPending()); + sendMessage(payload); + }, [pendingSubmission, chatInProgress, sendMessage, dispatch]); // Reuse the shared sample-task list so this in-session panel stays in // sync with the upload-dialog entry point (`UnifiedDataUploadDialog`). + // Auto-run is wired through the redux pending slot so the click — + // even on a chat with prior history — atomically clears the thread + // and queues the new submission. const focusSuggestions = React.useMemo(() => buildDataLoadingSuggestions({ t, setInput: setPrompt, setImages: setUserImages, setAttachments: setUserAttachments, - }), [t]); + requestAutoSend: (payload) => { + if (chatMessages.length > 0) { + dispatch(dfActions.clearChatMessages()); + } + dispatch(dfActions.setDataLoadingChatPending(payload)); + }, + // eslint-disable-next-line react-hooks/exhaustive-deps + }), [t, dispatch]); const isEmpty = chatMessages.length === 0 && !streamingContent; @@ -1047,7 +1063,7 @@ export const DataLoadingChat: React.FC = ({ ) : ( <> {chatMessages.map((msg) => ( - + ))} {streamingContent !== '' && } {chatInProgress && !streamingContent && } @@ -1065,7 +1081,7 @@ export const DataLoadingChat: React.FC = ({ onChange={setPrompt} images={userImages} onImagesChange={setUserImages} - onSend={sendMessage} + onSend={() => sendMessage()} onStop={stopGeneration} inProgress={chatInProgress} placeholder={t('dataLoading.placeholder')} @@ -1076,8 +1092,13 @@ export const DataLoadingChat: React.FC = ({ formData.append('file', file); apiRequest(getUrls().SCRATCH_UPLOAD_URL, { method: 'POST', body: formData, - }).then(() => { - setUserAttachments(prev => [...prev, file.name]); + }).then(({ data }) => { + // The backend hash-suffixes the filename + // (e.g. `name_a1b2c3d4.xlsx`). Store the + // server-assigned name so the `[Uploaded:]` + // mention points to the real scratch file. + const scratchName = (data?.path || `scratch/${file.name}`).replace(/^scratch\//, ''); + setUserAttachments(prev => [...prev, scratchName]); }).catch(err => console.error('Upload failed:', err)); }} attachments={userAttachments} diff --git a/src/views/DataSourceSidebar.tsx b/src/views/DataSourceSidebar.tsx index a8c4715d..f8d18759 100644 --- a/src/views/DataSourceSidebar.tsx +++ b/src/views/DataSourceSidebar.tsx @@ -42,7 +42,6 @@ import { VirtualizedCatalogTree } from '../components/VirtualizedCatalogTree'; import StorageIcon from '@mui/icons-material/Storage'; import AddIcon from '@mui/icons-material/Add'; -import FileUploadOutlinedIcon from '@mui/icons-material/FileUploadOutlined'; import FolderOpenIcon from '@mui/icons-material/FolderOpen'; import FolderOutlinedIcon from '@mui/icons-material/FolderOutlined'; import UploadFileIcon from '@mui/icons-material/UploadFile'; @@ -51,9 +50,6 @@ import ChevronLeftIcon from '@mui/icons-material/ChevronLeft'; import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; import ChevronRightIcon from '@mui/icons-material/ChevronRight'; import RefreshIcon from '@mui/icons-material/Refresh'; -import ContentPasteOutlinedIcon from '@mui/icons-material/ContentPasteOutlined'; -import SmartToyOutlinedIcon from '@mui/icons-material/SmartToyOutlined'; -import LinkOutlinedIcon from '@mui/icons-material/LinkOutlined'; import LinkOffOutlinedIcon from '@mui/icons-material/LinkOffOutlined'; import DeleteOutlineIcon from '@mui/icons-material/DeleteOutline'; import EditOutlinedIcon from '@mui/icons-material/EditOutlined'; @@ -159,9 +155,15 @@ export const DataSourceSidebar: React.FC<{ // built-in sample_datasets connector is shown there, giving users // something useful to explore immediately. The upgrade message only // appears when they try to add a new connector or link a folder. - const [initialTab, setInitialTab] = useState<'upload' | 'sources' | 'sessions' | 'knowledge'>('sources'); + // Stored in Redux so the active tab survives a session refresh. + // Fall back to 'sources' for older persisted state that predates this field. + const initialTab = useSelector((state: DataFormulatorState) => state.dataSourceSidebarTab ?? 'sources'); + const setInitialTab = useCallback( + (tab: 'sources' | 'sessions' | 'knowledge') => dispatch(dfActions.setDataSourceSidebarTab(tab)), + [dispatch], + ); - // External callers (e.g. SaveExperienceButton on success) can ask the + // External callers (e.g. workflow distill on success) can ask the // sidebar to open and switch to a specific tab. useEffect(() => { const handler = (e: Event) => { @@ -277,6 +279,18 @@ export const DataSourceSidebar: React.FC<{ pt: 1, gap: 0.5, }}> + {/* Primary action — adding data is the main task. Styled like + the view-switcher icons but kept in primary color as a + subtle cue; opens the upload dialog (landing menu). */} + + onOpenUploadDialog?.()} sx={{ + color: 'primary.main', + borderRadius: 1, + '&:hover': { bgcolor: 'action.hover' }, + }}> + + + { setInitialTab('sessions'); if (!isOpen) toggle(); else if (initialTab !== 'sessions') setInitialTab('sessions'); else toggle(); }} sx={{ color: isOpen && initialTab === 'sessions' ? 'primary.main' : 'text.secondary', @@ -295,15 +309,6 @@ export const DataSourceSidebar: React.FC<{ - - { setInitialTab('upload'); if (!isOpen) toggle(); else if (initialTab !== 'upload') setInitialTab('upload'); else toggle(); }} sx={{ - color: isOpen && initialTab === 'upload' ? 'primary.main' : 'text.secondary', - bgcolor: isOpen && initialTab === 'upload' ? 'action.selected' : 'transparent', - borderRadius: 1, - }}> - - - { setInitialTab('knowledge'); if (!isOpen) toggle(); else if (initialTab !== 'knowledge') setInitialTab('knowledge'); else toggle(); }} sx={{ color: isOpen && initialTab === 'knowledge' ? 'primary.main' : 'text.secondary', @@ -323,7 +328,6 @@ export const DataSourceSidebar: React.FC<{ panelWidth={panelWidth} onOpenUploadDialog={onOpenUploadDialog} onCollapse={toggle} - initialTab={initialTab} connectorRefreshKey={connectorRefreshKey} disableConnectors={disableConnectors} /> @@ -347,10 +351,9 @@ const DataSourceSidebarPanel: React.FC<{ panelWidth: number; onOpenUploadDialog?: (tab?: string) => void; onCollapse: () => void; - initialTab?: 'upload' | 'sources' | 'sessions' | 'knowledge'; connectorRefreshKey?: number; disableConnectors?: boolean; -}> = ({ panelWidth, onOpenUploadDialog, onCollapse, initialTab = 'sources', connectorRefreshKey = 0, disableConnectors = false }) => { +}> = ({ panelWidth, onOpenUploadDialog, onCollapse, connectorRefreshKey = 0, disableConnectors = false }) => { const { t } = useTranslation(); const dispatch = useDispatch(); @@ -418,13 +421,15 @@ const DataSourceSidebarPanel: React.FC<{ const [searchCatalogCache, setSearchCatalogCache] = useState>({}); const [searchingCatalog, setSearchingCatalog] = useState>({}); - // Sidebar tab: 'sources' or 'sessions' or 'knowledge' - const [activeTab, setActiveTab] = useState<'upload' | 'sources' | 'sessions' | 'knowledge'>(initialTab); - - // Sync tab when rail icon switches it - useEffect(() => { - setActiveTab(initialTab); - }, [initialTab]); + // Sidebar tab: 'sources' or 'sessions' or 'knowledge'. + // Stored in Redux so the active tab survives a session refresh; the + // `initialTab` prop is derived from the same Redux value upstream. + // Fall back to 'sources' for older persisted state that predates this field. + const activeTab = useSelector((state: DataFormulatorState) => state.dataSourceSidebarTab ?? 'sources'); + const setActiveTab = useCallback( + (tab: 'sources' | 'sessions' | 'knowledge') => dispatch(dfActions.setDataSourceSidebarTab(tab)), + [dispatch], + ); // ── Sessions ───────────────────────────────────────────────────────────── @@ -1292,39 +1297,6 @@ const DataSourceSidebarPanel: React.FC<{ overflow: 'hidden', }}> - {/* ── Upload Data tab ── */} - {activeTab === 'upload' && ( - - - - {t('sidebar.uploadData', { defaultValue: 'Upload Data' })} - - - - - - - - - {[ - { icon: , label: t('upload.uploadFile', { defaultValue: 'Upload file' }), tab: 'upload' }, - { icon: , label: t('upload.pasteData', { defaultValue: 'Paste data' }), tab: 'paste' }, - { icon: , label: t('upload.extractData', { defaultValue: 'Data Assistant' }), tab: 'extract' }, - { icon: , label: t('upload.loadFromUrl', { defaultValue: 'Load from URL' }), tab: 'url' }, - ].map((item, i) => ( - onOpenUploadDialog?.(item.tab)} - sx={{ display: 'flex', alignItems: 'center', gap: 0.75, px: 1.5, py: 0.75, cursor: 'pointer', color: 'text.primary', '&:hover': { bgcolor: 'action.hover' }, userSelect: 'none' }} - > - {item.icon} - {item.label} - - ))} - - - )} - {/* ── Data Connectors tab ── Sample datasets remain available even when external connectors are disabled; the Add Connector / Link Folder @@ -1403,13 +1375,14 @@ const DataSourceSidebarPanel: React.FC<{ size="small" onClick={runCatalogSearch} disabled={anyCatalogSearchLoading} + aria-label={t('sidebar.runCatalogSearch')} sx={{ p: 0.25 }} > {anyCatalogSearchLoading ? : } - + @@ -1726,6 +1699,7 @@ const DataSourceSidebarPanel: React.FC<{ value={sessionSort} onChange={(e) => setSessionSort(e.target.value as SessionSortKey)} disableUnderline + inputProps={{ 'aria-label': t('sidebar.sortSessions') }} IconComponent={(props) => ( )} diff --git a/src/views/DataThread.tsx b/src/views/DataThread.tsx index ae0cc9f1..fff265d2 100644 --- a/src/views/DataThread.tsx +++ b/src/views/DataThread.tsx @@ -83,8 +83,8 @@ import CheckCircleOutlineIcon from '@mui/icons-material/CheckCircleOutline'; import SmartToyOutlinedIcon from '@mui/icons-material/SmartToyOutlined'; import { AgentToyIcon } from './AgentToyIcon'; -import AutoAwesomeIcon from '@mui/icons-material/AutoAwesome'; import ArticleIcon from '@mui/icons-material/Article'; +import AutoAwesomeIcon from '@mui/icons-material/AutoAwesome'; import TerminalIcon from '@mui/icons-material/Terminal'; import ErrorOutlineIcon from '@mui/icons-material/ErrorOutline'; import WarningAmberIcon from '@mui/icons-material/WarningAmber'; @@ -97,6 +97,7 @@ import { ViewBorderStyle, ComponentBorderStyle, transition, radius, borderColor import { SimpleChartRecBox } from './SimpleChartRecBox'; import { InteractionEntryCard, ResolvedConversationCard, getEntryGutterIcon, getDefaultGutterIcon, PlanStepsView } from './InteractionEntryCard'; +import { CARD_WIDTH, CARD_GAP, PANEL_PADDING, fittableThreadColumns } from './threadLayout'; /** Pick the icon component for a step line based on known prefixes. */ // Re-exported from InteractionEntryCard — kept here for backward compat with gutter icon logic @@ -149,13 +150,13 @@ const LiveStatus: React.FC<{ startTime?: number; resetKey?: string }> = ({ start * ThinkingBanner — rather than right-flushed in a separate column. * The timer resets whenever the active step changes so it shows the time * spent on the **current** action, not the cumulative wait. */ -export const ThinkingStepsBanner = (steps: string[], sx?: SxProps, startTime?: number) => { +export const ThinkingStepsBanner = (steps: string[], sx?: SxProps, startTime?: number, active: boolean = true) => { const activeStep = steps.length > 0 ? steps[steps.length - 1] : ''; return ( : undefined} /> @@ -1177,7 +1178,7 @@ let SingleThreadGroupView: FC<{ }); // Build a flat sequence of timeline items: [trigger, table, charts, trigger, table, charts, ...] - type TimelineItem = { key: string; element: React.ReactNode; type: 'used-table' | 'trigger' | 'table' | 'chart' | 'leaf-trigger' | 'leaf-table' | 'report' | 'merge'; highlighted: boolean; tableId?: string; chartType?: string; isRunning?: boolean; isClarifying?: boolean; isCompleted?: boolean; interactionEntry?: InteractionEntry; reportId?: string; stepLabel?: string; gutterIcon?: React.ReactNode }; + type TimelineItem = { key: string; element: React.ReactNode; type: 'used-table' | 'trigger' | 'table' | 'chart' | 'leaf-trigger' | 'leaf-table' | 'artifact' | 'merge'; highlighted: boolean; tableId?: string; chartType?: string; isRunning?: boolean; isClarifying?: boolean; isCompleted?: boolean; interactionEntry?: InteractionEntry; reportId?: string; stepLabel?: string; gutterIcon?: React.ReactNode }; let timelineItems: TimelineItem[] = []; // Each running/clarifying draft should produce at most ONE banner per @@ -1332,17 +1333,6 @@ let SingleThreadGroupView: FC<{ const mergeIds = derivedTable?.derive?.source as string[] | undefined; if (entry.role === 'instruction' && mergeNames && mergeNames.length > 0 && mergeIds && mergeIds.length > 0) { const nextKey = sourceSetKey(mergeIds); - // eslint-disable-next-line no-console - console.log('[merge-node check]', { - tableId, - parentTableId: parentTable?.id, - initialSourceIds, - prevSourceKey, - mergeIds, - mergeNames, - nextKey, - fires: nextKey !== prevSourceKey, - }); if (nextKey !== prevSourceKey) { const mergeColor = highlighted ? theme.palette.primary.main : theme.palette.text.secondary; timelineItems.push({ @@ -1423,7 +1413,7 @@ let SingleThreadGroupView: FC<{ type: triggerType, highlighted, isRunning, - element: ThinkingStepsBanner(planLines, { px: 1, py: 0.5 }, isRunning ? lastUserTs : undefined), + element: ThinkingStepsBanner(planLines, { px: 1, py: 0.5 }, isRunning ? lastUserTs : undefined, isRunning), }); return; } @@ -1447,7 +1437,7 @@ let SingleThreadGroupView: FC<{ type: triggerType, highlighted, isRunning: false, - element: ThinkingStepsBanner(priorLines, { px: 1, py: 0.5 }), + element: ThinkingStepsBanner(priorLines, { px: 1, py: 0.5 }, undefined, false), }); } } @@ -1475,24 +1465,51 @@ let SingleThreadGroupView: FC<{ } if (runningDraft) renderedDraftIds.add(runningDraft.id); const draftInteraction = runningDraft?.derive?.trigger?.interaction; + // Once a report is streaming for this table, the generating report + // card (with its own spinner + "composing…" text) is the live + // indicator — so we drop the thinking banner entirely to avoid a + // second running state. We still render the prompt entries. + const generatingReports = (reportsByTriggerTable.get(tableId) || []) + .filter(r => r.status === 'generating'); + const hasGeneratingReport = generatingReports.length > 0; if (draftInteraction && draftInteraction.length > 0) { - renderSplitByClarity( - draftInteraction, - runningDraft?.derive?.runningPlan, - true, - 'agent-running-entry', - ); - } else { + if (hasGeneratingReport) { + // Just the prompt/clarity entries — no thinking banner. + pushInteractionEntries(draftInteraction, tableId, triggerType, highlighted, 'agent-running-entry'); + } else { + renderSplitByClarity( + draftInteraction, + runningDraft?.derive?.runningPlan, + true, + 'agent-running-entry', + ); + } + } else if (!hasGeneratingReport) { const runningAction = runningAgentTableIds.get(tableId); - const message = runningAction?.description || t('dataThread.working'); + // `description` is the running plan: steps joined by STEP_SEP + // ('\x1E'), which renders invisibly. Split it back into discrete + // steps and render through the per-step banner (icons + ✓), the + // same way the interaction-present path does — otherwise the + // steps collapse into one run-on blob. + const planLines = (runningAction?.description || '') + .split('\x1E').map(s => s.trim()).filter(Boolean); timelineItems.push({ key: `agent-running-${tableId}`, type: 'chart', highlighted, isRunning: true, - element: ThinkingBanner(message, { px: 1, py: 0.5 }, true, true), + element: planLines.length > 0 + ? ThinkingStepsBanner(planLines, { px: 1, py: 0.5 }) + : ThinkingBanner(t('dataThread.working'), { px: 1, py: 0.5 }, true, true), }); } + // Live generating report card: rendered here (after the prompt, + // inside the running draft block) so it appears below the prompt + // while the report streams in — never above it. Completed reports + // render in the artifact slot via pushReportItems. + for (const report of generatingReports) { + timelineItems.push(buildReportTimelineItem(report, highlighted)); + } } else if (clarifyAgentTableIds.has(tableId)) { const clarifyDraft = draftNodes.find(d => d.derive?.status === 'clarifying' && d.derive.trigger.tableId === tableId); if (clarifyDraft && renderedDraftIds.has(clarifyDraft.id)) { @@ -1555,80 +1572,92 @@ let SingleThreadGroupView: FC<{ }); } }; - - // Push report cards triggered from the given table - const pushReportItems = (tableId: string, highlighted: boolean) => { + // Build a single report's timeline item. Shared by pushReportItems + // (completed reports, in the artifact slot) and pushAgentDraftItems (the + // live generating card, rendered inside the running draft block so it sits + // below the prompt + thinking steps rather than above them). + const buildReportTimelineItem = (report: GeneratedReport, highlighted: boolean) => { + const isFocused = focusedId?.type === 'report' && focusedId.reportId === report.id; + const rowHL = highlighted || isFocused; + const isGenerating = report.status === 'generating'; + const gutterIcon = isGenerating + ? + : ; + const card = ( + dispatch(dfActions.setFocused({ type: 'report', reportId: report.id }))} + > + + + + {report.title || t('report.untitled')} + + {isGenerating && ( + + {t('report.composing')} + + )} + + + { e.stopPropagation(); dispatch(dfActions.deleteGeneratedReport(report.id)); }} + > + + + + + + ); + return { + key: `report-${report.id}`, type: 'artifact' as const, highlighted: rowHL, + reportId: report.id, gutterIcon, element: card, + }; + }; + // Push report artifacts triggered from the given table. A report is an + // *output card* of the run (like a chart) that OWNS its closing summary: + // the card renders, then the report's own summary renders right below it + // (from `report.summary`, not a table-anchored interaction entry), so the + // report and its summary live and die together. + // + // Only COMPLETED (non-generating) reports render here. A still-generating + // report is rendered live inside the running draft block (see + // pushAgentDraftItems) so it appears below the prompt, not above it. + const pushReportItems = ( + tableId: string, + highlighted: boolean, + triggerType: 'trigger' | 'leaf-trigger', + ) => { const reports = reportsByTriggerTable.get(tableId); if (!reports) return; for (const report of reports) { - const isFocused = focusedId?.type === 'report' && focusedId.reportId === report.id; - const isGenerating = report.status === 'generating'; - const selectedClassName = isFocused ? 'selected-report-card' : ''; - timelineItems.push({ - key: `report-${report.id}`, - type: 'report', - reportId: report.id, - highlighted: highlighted || isFocused, - element: ( - { - dispatch(dfActions.setFocused({ type: 'report', reportId: report.id })); - }} - > - - - - {report.title || t('report.untitled')} - - {isGenerating && ( - - {t('report.composing')} - - )} - - - { - e.stopPropagation(); - dispatch(dfActions.deleteGeneratedReport(report.id)); - }} - > - - - - - - ), - }); + if (report.status === 'generating') continue; + timelineItems.push(buildReportTimelineItem(report, highlighted)); + if (report.summary) { + const summaryEntry: InteractionEntry = { + from: 'data-agent', to: 'user', role: 'summary', + plan: report.summaryThought, + content: report.summary, + timestamp: report.updatedAt, + }; + pushInteractionEntries( + [summaryEntry], tableId, triggerType, highlighted, + `report-summary-${report.id}`, + ); + } } }; @@ -1696,10 +1725,14 @@ let SingleThreadGroupView: FC<{ // Add table card and its charts pushTableAndChartItems(tableId, tableElementList[i], 'table', isHighlighted); - // Add report cards anchored to charts of this table - pushReportItems(tableId, isHighlighted); + // Add report cards anchored to this table. Reports are output cards of + // the run (like charts), so they sit with the other outputs, BEFORE the + // run's closing summary. + pushReportItems(tableId, isHighlighted, 'trigger'); - // After-table entries (e.g. summary) + // After-table entries (e.g. summary). The run's closing summary is the + // final word and must follow the LAST artifact (table, chart, or + // report), so it is pushed after pushReportItems. const afterTable = afterTableMap.get(tableId); if (afterTable && afterTable.length > 0) { pushInteractionEntries(afterTable, tableId, 'trigger', isHighlighted, 'interaction-after'); @@ -1733,10 +1766,14 @@ let SingleThreadGroupView: FC<{ pushTableAndChartItems(lt.id, _buildTableCard(lt.id), 'leaf-table', isHL); - // Add report cards anchored to charts of this leaf table - pushReportItems(lt.id, isHL); + // Add report cards anchored to this leaf table. Reports are output cards + // of the run (like charts), so they sit with the other outputs, BEFORE + // the run's closing summary. + pushReportItems(lt.id, isHL, 'leaf-trigger'); - // After-table entries (e.g. summary) + // After-table entries (e.g. summary). The run's closing summary is the + // final word and must follow the LAST artifact (table, chart, or + // report), so it is pushed after pushReportItems. const leafAfterEntries = leafAfterTableMap.get(lt.id); if (leafAfterEntries && leafAfterEntries.length > 0) { pushInteractionEntries(leafAfterEntries, lt.id, 'leaf-trigger', isHL, 'leaf-after'); @@ -1751,6 +1788,9 @@ let SingleThreadGroupView: FC<{ const TIMELINE_GAP = '4px'; // gap between timeline and card content const DOT_SIZE = 6; const CARD_PY = '6px'; // vertical padding for each timeline row + // Mirror the left timeline gutter on the right so cards sit visually + // centred in their column instead of hugging the right edge. + const CARD_CONTENT_PR = `${TIMELINE_WIDTH}px`; // CSS `border-style: dashed` stretches dashes to fit each element's // height, so stacked segments end up with mismatched dash lengths. A @@ -1791,13 +1831,10 @@ let SingleThreadGroupView: FC<{ ? theme.palette.primary.main : 'rgba(0,0,0,0.15)'; - // For report items, show an article icon or spinner if generating - if (item.type === 'report') { - const report = item.reportId ? generatedReports.find(r => r.id === item.reportId) : undefined; - if (report?.status === 'generating') { - return ; - } - return ; + // Artifact output rows (reports today, future skill outputs) carry + // their own precomputed gutter dot from the artifact factory. + if (item.type === 'artifact') { + return item.gutterIcon ?? ; } // For running agent items, show a spinner instead of a dot @@ -1907,7 +1944,7 @@ let SingleThreadGroupView: FC<{ {isLast && hasContinuationBelow && } {isLast && !hasContinuationBelow && } - + {item.element} @@ -1983,7 +2020,7 @@ let SingleThreadGroupView: FC<{ {isLast && hasContinuationBelow && } {isLast && !hasContinuationBelow && } - + {item.element} @@ -2006,7 +2043,7 @@ let SingleThreadGroupView: FC<{ {isLast && hasContinuationBelow && } {isLast && !hasContinuationBelow && } - + {item.element} @@ -2054,7 +2091,7 @@ let SingleThreadGroupView: FC<{ )} {isLast && !hasContinuationBelow && } - {item.element} @@ -3117,13 +3154,9 @@ export const DataThread: FC<{sx?: SxProps}> = function ({ sx }) { // only one column fits, splitting a long thread into segments adds visual // overhead (continuation headers + ghost parents) without any layout // benefit, since the segments would just stack in the same single column. - const CARD_GAP = 12; // padding + spacing between cards in a column - const PANEL_PADDING = 16; - const CARD_WIDTH = 220; - const COLUMN_WIDTH = CARD_WIDTH + CARD_GAP; - // n columns need: n*CARD_WIDTH + (n-1)*CARD_GAP + PANEL_PADDING - // Solving for n: n <= (containerWidth - PANEL_PADDING + CARD_GAP) / COLUMN_WIDTH - const fittableColumns = Math.max(1, Math.min(3, Math.floor((containerWidth - PANEL_PADDING + CARD_GAP) / COLUMN_WIDTH))); + // Column geometry (CARD_WIDTH / CARD_GAP / PANEL_PADDING) is defined once + // in ./threadLayout and shared with DataFormulator's pane snapping. + const fittableColumns = fittableThreadColumns(containerWidth); // Adaptively split long derivation chains so the resulting segments fill // the available columns evenly. See `computeSplitExtraLeaves` for the diff --git a/src/views/DataView.tsx b/src/views/DataView.tsx index aa1263d0..f6c4a79f 100644 --- a/src/views/DataView.tsx +++ b/src/views/DataView.tsx @@ -2,11 +2,15 @@ // Licensed under the MIT License. import React, { FC, useEffect, useMemo, useCallback } from 'react'; +import ReactDOM from 'react-dom'; import _ from 'lodash'; -import { Typography, Box, Link, Breadcrumbs, useTheme, Fade } from '@mui/material'; +import { Typography, Box, Link, Breadcrumbs, useTheme, Fade, IconButton, Tooltip } from '@mui/material'; import { alpha } from '@mui/material/styles'; +import { useTranslation } from 'react-i18next'; +import OpenInFullIcon from '@mui/icons-material/OpenInFull'; +import CloseFullscreenIcon from '@mui/icons-material/CloseFullscreen'; import '../scss/DataView.scss'; @@ -16,11 +20,19 @@ import { useDispatch, useSelector } from 'react-redux'; import { Type } from '../data/types'; import { SelectableDataGrid } from './SelectableDataGrid'; import { formatCellValue, getColumnAlign } from './ViewUtils'; +import { borderColor } from '../app/tokens'; export interface FreeDataViewProps { + // When true, render a maximize/restore toggle that pops the table into a + // full-canvas overlay. Used wherever the grid is shown inline (under a + // chart, or as the focused-table preview). + maximizable?: boolean; } -export const FreeDataViewFC: FC = function DataView() { +export const FreeDataViewFC: FC = function DataView({ maximizable }) { + + const { t } = useTranslation(); + const [maximized, setMaximized] = React.useState(false); const dispatch = useDispatch(); @@ -32,6 +44,7 @@ export const FreeDataViewFC: FC = function DataView() { const focusedTableId = useMemo(() => { if (!focusedId) return undefined; if (focusedId.type === 'table') return focusedId.tableId; + if (focusedId.type !== 'chart') return undefined; const chartId = focusedId.chartId; const chart = allCharts.find(c => c.id === chartId); return chart?.tableRef; @@ -108,7 +121,7 @@ export const FreeDataViewFC: FC = function DataView() { ]; }, [targetTable, rowData, conceptShelfItems]); - return ( + const grid = ( @@ -124,4 +137,77 @@ export const FreeDataViewFC: FC = function DataView() { ); + + if (!maximizable) { + return grid; + } + + const toggleButton = ( + + setMaximized(m => !m)} + sx={{ + color: 'text.secondary', + '&:hover': { color: 'primary.main', backgroundColor: 'transparent' }, + }} + > + {maximized ? : } + + + ); + + // The toggle button sits just outside the table to the right (a slim panel), + // so it never overlaps the column headers and the card keeps its original look. + // In maximized mode the surrounding overlay already provides the card frame. + const cardSx = maximized ? { overflow: 'hidden' } : { + overflow: 'hidden', + borderRadius: '8px', + border: `1px solid ${borderColor.divider}`, + transition: 'box-shadow 0.2s ease', + '&:hover': { boxShadow: '0 0 8px rgba(25, 118, 210, 0.25)' }, + }; + const framed = ( + + + {grid} + + + {toggleButton} + + + ); + + if (maximized) { + const canvas = typeof document !== 'undefined' ? document.getElementById('vis-view-canvas') : null; + const overlay = ( + <> + {/* Transparent click-catcher — click outside to restore. Scoped to the visualization view. */} + setMaximized(false)} + sx={{ position: 'absolute', inset: 0, zIndex: 1299 }} + /> + {/* Table overlay filling the visualization view. */} + + {framed} + + + ); + return ( + <> + {/* Keep the inline slot occupied so surrounding layout doesn't jump. */} + + {canvas ? ReactDOM.createPortal(overlay, canvas) : overlay} + + ); + } + + return framed; } \ No newline at end of file diff --git a/src/views/EncodingBox.tsx b/src/views/EncodingBox.tsx index bf1abf4f..9052f31c 100644 --- a/src/views/EncodingBox.tsx +++ b/src/views/EncodingBox.tsx @@ -48,7 +48,7 @@ import _ from 'lodash'; import '../scss/EncodingShelf.scss'; import AnimateHeight from 'react-animate-height'; -import { getIconFromDtype, getIconFromType, groupConceptItems } from './ViewUtils'; +import { getIconFromDtype, getIconFromType } from './ViewUtils'; import { getUrls, fetchWithIdentity } from '../app/utils'; import { apiRequest } from '../app/apiClient'; import { Type } from '../data/types'; @@ -543,37 +543,24 @@ export const EncodingBox: FC = function EncodingBox({ channel, let normalizedDisplay = ""; let handleSelectOption = (option: string) => { - if (conceptShelfItems.map(f => f.name).includes(option)) { - //console.log(`yah-haha: ${option}`); - updateEncProp("fieldID", (conceptShelfItems.find(f => f.name == option) as FieldItem).id); - } else { - if (option == "") { - console.log("nothing happens") - } else { - let newConept = { - id: `concept-${Date.now()}`, name: option, - source: "custom", tableRef: "custom", - } as FieldItem; - dispatch(dfActions.updateConceptItems(newConept)); - updateEncProp("fieldID", newConept.id); - } - + // The encoding shelf only accepts fields that already exist in the + // current table. Selecting anything else (a stale concept from another + // table, or a typed-but-nonexistent name) is ignored — creating new + // fields here is not allowed, since that would require re-deriving data. + const fieldItem = conceptShelfItems.find(f => f.name == option); + const isAvailable = !!fieldItem && (!activeTable || activeTable.names.includes(option)); + if (isAvailable) { + updateEncProp("fieldID", (fieldItem as FieldItem).id); } } - let conceptGroups = groupConceptItems(conceptShelfItems, tables); - - let groupNames = [...new Set(conceptGroups.map(g => g.group))]; - conceptGroups.sort((a, b) => { - if (groupNames.indexOf(a.group) < groupNames.indexOf(b.group)) { - return -1; - } else if (groupNames.indexOf(a.group) > groupNames.indexOf(b.group)) { - return 1; - } else { - return activeTable && activeTable.names.includes(a.field.name) && !activeTable.names.includes(b.field.name) ? -1 : 1; - } - }) + // Field names selectable in this encoding shelf, listed in the same order as + // the columns of the current table. Only fields that exist in the table can + // be assigned here, so the table's column list is the source of truth — no + // need to derive or group them from the concept shelf. + let availableFieldNames = (activeTable ? activeTable.names : conceptShelfItems.map(f => f.name)) + .filter(name => name != ""); // Smart Popper component that switches between bottom-end and top-end const CustomPopper = (props: any) => { @@ -623,15 +610,10 @@ export const EncodingBox: FC = function EncodingBox({ channel, }} // value={tempValue} filterOptions={(options, params) => { - const filtered = filter(options, params); - const { inputValue } = params; - // Suggest the creation of a new value - const isExisting = options.some((option) => inputValue === option); - if (!isExisting) { - return [`${inputValue}`, ...filtered, ] - } else { - return [...filtered]; - } + // The encoding shelf only accepts fields that already exist in the + // current table — creating brand-new fields (which would require + // re-deriving data) is not allowed here. + return filter(options, params); }} sx={{ flexGrow: 1, @@ -647,138 +629,54 @@ export const EncodingBox: FC = function EncodingBox({ channel, handleHomeEndKeys autoHighlight id={`autocomplete-${chartId}-${channel}`} - options={conceptGroups.map(g => g.field.name).filter(name => name != "")} + options={availableFieldNames} getOptionLabel={(option) => { // Value selected with enter, right from the input return option; }} - groupBy={(option) => { - let groupItem = conceptGroups.find(item => item.field.name == option); - if (groupItem && groupItem.field.name != "") { - return `${groupItem.group}`; - } else { - return t('encoding.createNewFieldGroup') - } - }} - renderGroup={(params) => ( - - {params.group} - - {params.children} - - - )} renderOption={(props, option) => { - let renderOption = (conceptShelfItems.map(f => f.name).includes(option)) ? option : `${option}`; - let otherStyle = option == `` ? {color: "darkgray", fontStyle: "italic"} : {} - - // Find the field item for this option - const fieldItem = conceptShelfItems.find(f => f.name === option); - - if (fieldItem) { - // Create a mini concept card - let backgroundColor = theme.palette.primary.main; - if (fieldItem.source == "original") { - backgroundColor = theme.palette.primary.light; - } else if (fieldItem.source == "custom") { - backgroundColor = theme.palette.custom.main; - } - - // Add overlay logic similar to ConceptCard - make fields not in focused table more transparent - let draggleCardHeaderBgOverlay = 'rgba(255, 255, 255, 0.9)'; - - // Add subtle tint for non-focused fields - if (activeTable && !activeTable.names.includes(fieldItem.name)) { - draggleCardHeaderBgOverlay = 'rgba(255, 255, 255, 1)'; - } - - // Extract only the compatible props for Card - const { key, ...cardProps } = props; - - return ( - handleSelectOption(option)} - sx={{ - minWidth: 80, - backgroundColor, - position: "relative", - border: "none", - cursor: "pointer", - margin: '2px 4px', - "&:hover": { - boxShadow: "0 2px 4px 0 rgb(0 0 0 / 20%)" - } - }} - variant="outlined" - className={`data-field-list-item draggable-card`} - > - - - {getIconFromType(activeTable?.metadata[fieldItem.name]?.type || Type.Auto)} - - {fieldItem.name} - - - - - ); - } else { - // For non-existing options (like new field creation) - return ( - handleSelectOption(option)} - sx={{ - fontSize: "10px", - padding: '4px 6px', - margin: '2px 4px', - cursor: 'pointer', - border: '1px dashed #ccc', - borderRadius: '4px', - backgroundColor: 'rgba(0,0,0,0.02)', - height: '24px', - display: 'flex', - alignItems: 'center', - "&:hover": { - backgroundColor: 'rgba(0,0,0,0.05)' - }, - ...otherStyle - }} - > - {renderOption || t('encoding.newFieldNamePlaceholder')} - - ); - } + const { key, ...liProps } = props as any; + const dtype = activeTable?.metadata[option]?.type || Type.Auto; + return ( + handleSelectOption(option)} + sx={{ + display: 'flex', + alignItems: 'center', + gap: '6px', + fontSize: 11, + padding: '4px 8px !important', + cursor: 'pointer', + '&:hover': { backgroundColor: 'rgba(0,0,0,0.05)' }, + }} + > + {getIconFromType(dtype)} + + {option} + + + ); }} - freeSolo renderInput={(params) => ( { + // The MUI Autocomplete handles Enter on the input itself, + // and `autoHighlight` makes it auto-select the first option + // even when the typed text doesn't match. Intercept Enter in + // the capture phase: only let it through when the current + // input is an exact available field; otherwise neutralize it + // so a stray Enter never assigns a field or bubbles up to + // trigger an unrelated refresh/formulate. + if (event.key === 'Enter') { + const value = (event.target as HTMLInputElement).value?.trim(); + if (!value || !availableFieldNames.includes(value)) { + event.preventDefault(); + event.stopPropagation(); + } + } + }} sx={{height: "24px", "& .MuiInput-root": {height: "24px", fontSize: "small"}}} /> )} slotProps={{ @@ -789,6 +687,10 @@ export const EncodingBox: FC = function EncodingBox({ channel, '& .MuiAutocomplete-listbox': { maxHeight: '600px !important' }, + '& .MuiAutocomplete-noOptions': { + fontSize: '11px', + padding: '6px 12px', + }, } } }} diff --git a/src/views/EncodingShelfCard.tsx b/src/views/EncodingShelfCard.tsx index 39faebd2..5a0f9c10 100644 --- a/src/views/EncodingShelfCard.tsx +++ b/src/views/EncodingShelfCard.tsx @@ -32,6 +32,7 @@ import { Theme, Slider, CircularProgress, + LinearProgress, Button, Collapse, Dialog, @@ -43,12 +44,12 @@ import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; import React from 'react'; import { useDragLayer } from 'react-dnd'; -import { ThinkingBufferEffect } from '../components/FunComponents'; +import { ThinkingBufferEffect, WritingPencil } from '../components/FunComponents'; import { Channel, Chart, FieldItem, Trigger, duplicateChart, ChartStyleVariant, computeEncodingFingerprint, isVariantStale } from "../components/ComponentType"; import _ from 'lodash'; -const ConfigSlider: FC<{ +export const ConfigSlider: FC<{ value: number; propDef: { label: string; min?: number; max?: number; step?: number }; onCommit: (value: number) => void; @@ -134,14 +135,18 @@ import CloseIcon from '@mui/icons-material/Close'; import TipsAndUpdatesIcon from '@mui/icons-material/TipsAndUpdates'; import ArrowBackIcon from '@mui/icons-material/ArrowBack'; import PaletteOutlinedIcon from '@mui/icons-material/PaletteOutlined'; -import { IdeaChip } from './ChartRecBox'; -import { useFormulateData } from '../app/useFormulateData'; // Property and state of an encoding shelf export interface EncodingShelfCardProps { chartId: string; trigger?: Trigger; noBorder?: boolean; + // Render only the chat / follow-up box (+ ideas). Used by the floating + // chat FAB so the chat lives off-canvas. + chatOnly?: boolean; + // Render the encoding shelf without the chat box (+ no ideas). Used by the + // floating encoding popover at the top-right of the chart. + hideChat?: boolean; } @@ -305,14 +310,14 @@ export const TriggerCard: FC<{ * them to specific Vega-Lite config blocks (typography, color, gridlines, * background, title alignment, etc.). */ -interface StylePreset { +export interface StylePreset { key: string; label: string; description: string; instruction: string; } -const STYLE_PRESETS: StylePreset[] = [ +export const STYLE_PRESETS: StylePreset[] = [ { key: 'nyt', label: 'New York Times', @@ -334,13 +339,6 @@ const STYLE_PRESETS: StylePreset[] = [ instruction: 'Restyle this chart in the FiveThirtyEight (538) blog style.', }, - { - key: 'dark', - label: 'Dark Mode', - description: 'Dark theme', - instruction: - 'Restyle this chart for a dark theme.', - }, { key: 'presentation', label: 'Presentation', @@ -358,7 +356,7 @@ const STYLE_PRESETS: StylePreset[] = [ ]; -export const EncodingShelfCard: FC = function ({ chartId }) { +export const EncodingShelfCard: FC = function ({ chartId, chatOnly, hideChat }) { const { t } = useTranslation(); const theme = useTheme(); @@ -398,32 +396,10 @@ export const EncodingShelfCard: FC = function ({ chartId const [isRestyling, setIsRestyling] = useState(false); // Per-variant refresh in progress (variantId being refreshed, or null). const [refreshingVariantId, setRefreshingVariantId] = useState(null); - // Intent-classifier round-trip in progress. Distinct from isRestyling so - // the UI can show a single "thinking" state on the submit button covering - // classify → route → execute. See submitPrompt() and the discussion in - // chat about routing on Enter. - const [isClassifying, setIsClassifying] = useState(false); - // Phase shown in the inline status banner below the prompt input. Covers - // the whole submit pipeline so the user always knows what's happening: - // classifying → restyling | formulating → idle. - // Set explicitly inside submitPrompt() and cleared by the effect below - // that watches chartSynthesisInProgress for the data-agent path. - const [submitPhase, setSubmitPhase] = useState< - 'idle' | 'classifying' | 'restyling' | 'formulating' - >('idle'); const chartSynthesisInProgress = useSelector( (state: DataFormulatorState) => state.chartSynthesisInProgress, ); const isDataAgentRunning = chartSynthesisInProgress.includes(chartId); - // While we're in 'formulating' phase, watch the redux flag and clear the - // banner once the data agent finishes (success or error). The data agent - // is fire-and-forget from this card's perspective, so we can't rely on - // an explicit callback to mark completion. - useEffect(() => { - if (submitPhase === 'formulating' && !isDataAgentRunning) { - setSubmitPhase('idle'); - } - }, [submitPhase, isDataAgentRunning]); useEffect(() => { setPrompt(triggerPrompt); @@ -432,28 +408,11 @@ export const EncodingShelfCard: FC = function ({ chartId let encodingMap = chart?.encodingMap; const dispatch = useDispatch(); - const { streamIdeas, formulateData } = useFormulateData(); const [chartTypeMenuOpen, setChartTypeMenuOpen] = useState(false); - const [encodingHovered, setEncodingHovered] = useState(false); - - // Anchor for the bottom-left "style presets" menu in the follow-up - // speech bubble. A preset click sends a detailed style instruction - // straight to the restyle agent (no intent classification needed — - // these are guaranteed style-only changes by construction). - const [stylePresetAnchor, setStylePresetAnchor] = useState(null); - - // Auto-expand encoding shelf when dragging a concept or operator card - const { isDraggingField } = useDragLayer((monitor) => ({ - isDraggingField: monitor.isDragging() && - (monitor.getItemType() === 'concept-card' || monitor.getItemType() === 'operator-card'), - })); - const shouldExpand = encodingHovered || isDraggingField; - - // When no fields are assigned to any channel, show all channels expanded - const hasAnyField = Object.values(encodingMap).some(enc => enc?.fieldID); - const shouldExpandAll = !hasAnyField || shouldExpand; + // Encoding channels are always shown (no auto hide/expand on hover/drag). + const shouldExpandAll = true; let handleUpdateChartType = (newChartType: string) => { @@ -471,62 +430,6 @@ export const EncodingShelfCard: FC = function ({ chartId let isChartAvailable = checkChartAvailability(chart, conceptShelfItems, currentTable.rows); - // Consolidated chart state - maps chartId to its ideas, thinkingBuffer, and loading state - const [chartState, setChartState] = useState>({}); - const [ideaElapsed, setIdeaElapsed] = useState(0); - - // Get current chart's state - const currentState = chartState[chartId] || { ideas: [], thinkingBuffer: "", isLoading: false, phase: "" }; - const currentChartIdeas = currentState.ideas; - const thinkingBuffer = currentState.thinkingBuffer; - const isLoadingIdeas = currentState.isLoading; - const ideaPhase = currentState.phase; - - useEffect(() => { - if (!isLoadingIdeas) { setIdeaElapsed(0); return; } - // Tick once per second — fast enough to read as live, slow enough to - // stay readable; the loading indicator carries the liveness cue. - // Anchor to a start timestamp to avoid float drift. - const t0 = Date.now(); - const timer = setInterval(() => setIdeaElapsed(Math.floor((Date.now() - t0) / 1000)), 1000); - return () => clearInterval(timer); - }, [isLoadingIdeas]); - - const defaultChartState = { ideas: [] as any[], thinkingBuffer: "", isLoading: false, phase: "" }; - - const setIdeas = (ideas: {text: string, goal: string, tag: string}[]) => { - setChartState(prev => ({ - ...prev, - [chartId]: { ...defaultChartState, ...prev[chartId], ideas } - })); - }; - - const setThinkingBuffer = (thinkingBuffer: string) => { - setChartState(prev => ({ - ...prev, - [chartId]: { ...defaultChartState, ...prev[chartId], thinkingBuffer } - })); - }; - - const setIsLoadingIdeas = (isLoading: boolean) => { - setChartState(prev => ({ - ...prev, - [chartId]: { ...defaultChartState, ...prev[chartId], isLoading } - })); - }; - - const setIdeaPhase = (phase: string) => { - setChartState(prev => ({ - ...prev, - [chartId]: { ...defaultChartState, ...prev[chartId], phase } - })); - }; - let encodingBoxGroups = Object.entries(channelGroups) .filter(([group, channelList]) => channelList.some(ch => Object.keys(encodingMap).includes(ch))) .map(([group, channelList]) => { @@ -575,177 +478,6 @@ export const EncodingShelfCard: FC = function ({ chartId ...rootTables.map(t => t.id).filter(id => !priorityIds.includes(id)) ]; - let getIdeasForVisualization = async () => { - if (!currentTable || isLoadingIdeas) return; - - let chartAvailable = checkChartAvailability(chart, conceptShelfItems, currentTable.rows); - let currentChartPng = chartAvailable ? await vegaLiteSpecToPng(assembleVegaChart( - chart.chartType, chart.encodingMap, activeFields, currentTable.rows, - currentTable.metadata, 100, 80, false, chart.config)) : undefined; - if (currentChartPng) { - currentChartPng = await downscaleImageForAgent(currentChartPng); - } - - await streamIdeas({ - actionTableIds, - currentTable, - onIdeas: setIdeas, - onThinkingBuffer: setThinkingBuffer, - onLoadingChange: setIsLoadingIdeas, - onProgress: setIdeaPhase, - currentChartImage: currentChartPng, - currentDataSample: currentTable.rows.slice(0, 10), - }); - } - - // Function to handle idea chip click - const handleIdeaClick = (ideaText: string) => { - setPrompt(ideaText); - // Automatically start the data formulation process - deriveNewData(ideaText, 'ideate'); - }; - - - let deriveNewData = async ( - instruction: string, - mode: 'formulate' | 'ideate' = 'formulate', - overrideTableId?: string, - ) => { - - if (actionTableIds.length == 0) return; - - // Short-circuit: if all fields exist in source table, just reference it - if (currentTable.derive == undefined && instruction == "" && - (activeFields.length > 0 && activeCustomFields.length == 0) && - tables.some(t => t.derive == undefined && - activeFields.every(f => currentTable.names.includes(f.name)))) { - let tempTable = getDataTable(chart, tables, allCharts, conceptShelfItems, true); - dispatch(dfActions.updateTableRef({chartId: chartId, tableRef: tempTable.id})); - dispatch(dfActions.changeChartRunningStatus({chartId, status: true})); - setTimeout(function(){ - dispatch(dfActions.changeChartRunningStatus({chartId, status: false})); - dispatch(dfActions.clearUnReferencedTables()); - }, 400); - return; - } - - dispatch(dfActions.clearUnReferencedTables()); - - let fieldNamesStr = activeFields.map(f => f.name).reduce( - (a: string, b: string, i, array) => a + (i == 0 ? "" : (i < array.length - 1 ? ', ' : ' and ')) + b, ""); - - const actionId = `deriveNewData_${String(Date.now())}`; - const originTableId = focusedTableId || currentTable.id; - const actionDescription = instruction || `Derive ${fieldNamesStr}`; - - // Build chart visualization context - let chartComplete = checkChartAvailability(chart, conceptShelfItems, currentTable.rows); - let chartSpec = (mode == 'formulate' && Object.keys(activeSimpleEncodings).length > 0) ? { - chart_type: chart.chartType, - encodings: activeSimpleEncodings, - ...(chart.config ? { config: chart.config } : {}) - } : undefined; - - let currentChartImage: string | null | undefined = undefined; - if (chartComplete && chartSpec) { - currentChartImage = await vegaLiteSpecToPng(assembleVegaChart( - chart.chartType, chart.encodingMap, activeFields, currentTable.rows, - currentTable.metadata, 100, 80, false, chart.config - )); - if (currentChartImage) { - currentChartImage = await downscaleImageForAgent(currentChartImage); - } - } - - let currentVisualization = (chartComplete && chartSpec) ? { - chart_spec: chartSpec, - ...(currentChartImage ? { chart_image: currentChartImage } : {}) - } : undefined; - let expectedVisualization = (!chartComplete && chartSpec) ? { chart_spec: chartSpec } : undefined; - - let triggerChartSpec = duplicateChart(chart); - triggerChartSpec.source = "trigger"; - - formulateData({ - instruction, - mode, - actionTableIds, - currentTable, - overrideTableId, - currentVisualization, - expectedVisualization, - triggerChart: triggerChartSpec, - createChart: ({ candidateTable, refinedGoal, currentConcepts }) => { - let needToCreateNewChart = true; - let focusedChartId: string | undefined; - - if (mode != "ideate" && chart.chartType != "Auto" && overrideTableId != undefined && - allCharts.filter(c => c.source == "user").find(c => c.tableRef == overrideTableId)) { - let chartsFromOverrideTable = allCharts.filter(c => c.source == "user" && c.tableRef == overrideTableId); - let chartsWithSameEncoding = chartsFromOverrideTable.filter(c => { - let getSimpliedChartEnc = (ch: Chart) => { - return ch.chartType + ":" + Object.entries(ch.encodingMap) - .filter(([channel, enc]) => enc.fieldID != undefined) - .map(([channel, enc]) => `${channel}:${enc.fieldID}:${enc.aggregate}:${enc.sortOrder}:${enc.sortBy}:${enc.scheme}`) - .join(";"); - } - return getSimpliedChartEnc(c) == getSimpliedChartEnc(triggerChartSpec); - }); - if (chartsWithSameEncoding.length > 0) { - focusedChartId = chartsWithSameEncoding[0].id; - dispatch(dfActions.setFocused({ type: 'chart', chartId: focusedChartId })); - needToCreateNewChart = false; - } - } - - if (needToCreateNewChart) { - let newChart: Chart; - if (mode == "ideate" || chart.chartType == "Auto") { - newChart = resolveRecommendedChart(refinedGoal, currentConcepts, candidateTable); - } else if (chart.chartType == "Table") { - newChart = generateFreshChart(candidateTable.id, 'Table'); - } else { - newChart = structuredClone(chart) as Chart; - newChart.source = "user"; - newChart.id = `chart-${Date.now() - Math.floor(Math.random() * 10000)}`; - newChart.tableRef = candidateTable.id; - // Style variants belong to the chart they were authored - // against — don't carry them over to a follow-up chart. - // (See design-docs/28-chart-style-refinement-agent.md.) - newChart.styleVariants = undefined; - newChart.activeVariantId = undefined; - let chartEncodings = refinedGoal['chart']?.['encodings'] || refinedGoal['chart_encodings'] || {}; - newChart = resolveChartFields(newChart, currentConcepts, chartEncodings, candidateTable); - } - focusedChartId = newChart.id; - dispatch(dfActions.addAndFocusChart(newChart)); - } - return focusedChartId; - }, - onStarted: () => { - dispatch(dfActions.changeChartRunningStatus({chartId, status: true})); - }, - onSuccess: ({ displayInstruction, candidateTable, focusedChartId }) => { - if (chart.chartType == "Table" || chart.chartType == "Auto" || (existsWorkingTable == false)) { - dispatch(dfActions.deleteChartById(chartId)); - } - dispatch(dfActions.clearUnReferencedTables()); - dispatch(dfActions.clearUnReferencedCustomConcepts()); - dispatch(dfActions.setFocused({ type: 'chart', chartId: focusedChartId as string })); - dispatch(dfActions.addMessages({ - "timestamp": Date.now(), - "component": "chart builder", - "type": "success", - "value": t('encoding.formulationSucceeded', { fields: fieldNamesStr }) - })); - }, - onError: () => { - }, - onFinally: () => { - dispatch(dfActions.changeChartRunningStatus({chartId, status: false})); - }, - }); - } // --- Style variants (see design-docs/28-chart-style-refinement-agent.md) --- // Chip strip for navigating user-authored "skins" of the current chart's @@ -907,77 +639,6 @@ export const EncodingShelfCard: FC = function ({ chartId } }; - /** - * Single entry point for the input bubble's primary submit (Enter or the - * primary button). Routes the prompt to either the chart restyle agent - * (visual changes) or the data agent (data shape / chart-type changes) - * via a tiny LLM intent classifier. - * - * Style → data fallback: if the restyle agent comes back with - * out_of_scope (i.e. it decided this was actually a data change), we - * automatically retry with the data agent so the user doesn't have to - * re-press anything. The original out_of_scope toast is suppressed in - * that case to avoid the misleading "click formulate instead" hint. - * - * Heuristics-free: see src/app/intentClassifier.ts for the rationale - * behind a tiny LLM call vs. a keyword list (multilingual support). - */ - const submitPrompt = async () => { - const text = prompt.trim(); - if (!text) return; - if (isRestyling || isClassifying) return; - if (!activeModel) { - // Both agents need a model; the data agent path will surface its - // own error too, but failing fast here saves a classifier call. - dispatch(dfActions.addMessages({ - timestamp: Date.now(), - component: 'chart builder', - type: 'error', - value: 'No model is configured. Please select a model before submitting.', - })); - return; - } - - // If the chart isn't rendered yet there's nothing for the style - // agent to refine; just go straight to the data agent. - if (!isChartAvailable) { - setSubmitPhase('formulating'); - deriveNewData(text, 'formulate'); - return; - } - - setIsClassifying(true); - setSubmitPhase('classifying'); - let intent: 'style' | 'data' = 'data'; - try { - intent = await classifyChartIntent(text, activeModel); - } finally { - setIsClassifying(false); - } - - if (intent === 'data') { - setSubmitPhase('formulating'); - deriveNewData(text, 'formulate'); - return; - } - - // intent === 'style' — try restyle first, fall back to data on out_of_scope - setSubmitPhase('restyling'); - const result = await handleRestyleSubmit({ suppressOutOfScopeMessage: true }); - if (result === 'out_of_scope') { - // The restyle agent decided this was actually a data change. - // Hand off to the data agent. The banner switches from - // "restyling…" to "formulating data…" so the user sees the route - // change without an extra click. - setSubmitPhase('formulating'); - deriveNewData(text, 'formulate'); - // submitPhase will flip to 'idle' once the data agent finishes - // (see the chartSynthesisInProgress effect above). - } else { - // success or error — restyle path is fully done, clear banner. - setSubmitPhase('idle'); - } - }; /** * Refresh a stale variant: re-run its stored prompt against the @@ -1182,247 +843,12 @@ export const EncodingShelfCard: FC = function ({ chartId ) : null; - // zip multiple components together - const w: any = (a: any[], b: any[]) => a.length ? [a[0], ...w(b, a.slice(1))] : b; - - let formulateInputBox = - { - setPrompt(event.target.value); - }} - onKeyDown={(event: any) => { - if (event.key === 'Enter' && !event.shiftKey) { - event.preventDefault(); - if (prompt.trim().length > 0) { - // submitPrompt routes via the intent classifier: - // style requests go to the restyle agent; data / - // chart-type requests go to deriveNewData. - submitPrompt(); - } - } - }} - slotProps={{ - inputLabel: { shrink: true }, - }} - value={prompt} - placeholder={t('encoding.followUpChartPlaceholder')} - fullWidth - multiline - minRows={2} - maxRows={5} - /> - - {/* Left group: one-click style presets. Clicking the palette - icon opens a menu of curated "style sheets" (NYT, Economist, - FiveThirtyEight, minimal, dark mode, presentation, comic). - Each preset sends a detailed style instruction straight to the - restyle agent — bypassing the intent classifier since these - are guaranteed style-only changes. The user can still type - freeform instructions in the textbox above; the menu's - footer hint reminds them of that. */} - - - setStylePresetAnchor(e.currentTarget)} - > - - - - - setStylePresetAnchor(null)} - anchorOrigin={{ vertical: 'bottom', horizontal: 'left' }} - transformOrigin={{ vertical: 'top', horizontal: 'left' }} - slotProps={{ - paper: { - sx: { minWidth: 220, maxWidth: 260, mt: 0.5 }, - }, - }} - > - - - {t('encoding.stylePresetsHeader')} - - - {STYLE_PRESETS.map((preset) => ( - { - setStylePresetAnchor(null); - // Style presets are unambiguous style changes — - // skip the intent classifier and send the - // detailed instruction straight to the restyle - // agent. We also drive submitPhase so the inline - // status banner above shows "restyling…". - setSubmitPhase('restyling'); - handleRestyleSubmit({ instructionOverride: preset.instruction }) - .finally(() => setSubmitPhase('idle')); - }} - sx={{ py: 0.5 }} - > - - {preset.label} - - - ))} - - - {t('encoding.stylePresetsHint')} - - - - - {/* Right group: tips/ideas + primary submit. */} - - 0 ? t('encoding.refreshIdeas') : t('encoding.getIdeas')}> - - getIdeasForVisualization()}> - {isLoadingIdeas - ? - : } - - - - {/* Primary submit. The Enter key and this button both go through - submitPrompt(), which uses an LLM intent classifier to route - between the restyle agent and the data agent. The brush / - style-only button was removed in favor of this unified entry - point — if the classifier (or the user) is wrong, the restyle - agent's out_of_scope signal triggers an automatic data-agent - fallback. The trigger-override button below is kept because - it does something neither path does (re-derive into the same - table). See src/app/intentClassifier.ts. */} - {trigger ? (() => { - const overrideTableId = tables.find(t => t.derive?.trigger === trigger)?.id; - return overrideTableId ? ( - {t('encoding.formulateAndOverride')} {overrideTableId}}> - - { - deriveNewData(trigger!.interaction?.find(e => e.role === 'instruction')?.content || '', 'formulate', overrideTableId); - }}> - - - - ) : null; - })() - : - - - { - if (prompt.trim()) { - submitPrompt(); - } else { - // No text — only the field shelf has - // changes. Skip the classifier and run - // the data agent directly. - deriveNewData(prompt, 'formulate'); - } - }}> - {(isClassifying || isRestyling) - ? - : } - - - - } - - - let channelComponent = ( - + + {!chatOnly && (<> { handleAttachFiles(e.target.files); if (e.target) e.target.value = ''; }} + /> + { e.stopPropagation(); setUploadDialogOpen(true); }} + onClick={(e) => { e.stopPropagation(); fileInputRef.current?.click(); }} sx={{ p: 0.5, color: theme.palette.text.secondary, @@ -1683,44 +1872,31 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({ - {/* Agent mode toggle */} - - - {isChatFormulating ? ( ) : ( <> - {!isReportMode && ( + + + submitChat(t('chartRec.reportPrompt'), undefined, t('chartRec.askedForReport'))} + > + + + + submitChat(t('chartRec.exploreIdeasPrompt'), undefined, t('chartRec.askedForRecommendations'))} > @@ -1728,11 +1904,11 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({ - )} { if (pendingClarification) { @@ -1775,7 +1951,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({ message={draftNodes.find(d => d.derive?.status === 'running' && threadTableIds.has(d.derive.trigger.tableId)) ?.derive?.runningPlan} theme={theme} - color={isReportMode ? 'warning' : 'primary'} + color={'primary'} onCancel={cancelAgent} /> )} @@ -1786,11 +1962,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({ {/* The input box */} {inputBox} - setUploadDialogOpen(false)} - initialTab="menu" - /> ); }; diff --git a/src/views/TiptapReportEditor.tsx b/src/views/TiptapReportEditor.tsx index b9083511..fcc8fbaf 100644 --- a/src/views/TiptapReportEditor.tsx +++ b/src/views/TiptapReportEditor.tsx @@ -11,34 +11,325 @@ import { TableRow } from '@tiptap/extension-table-row'; import { TableHeader } from '@tiptap/extension-table-header'; import { TableCell } from '@tiptap/extension-table-cell'; import { Markdown } from 'tiptap-markdown'; -import { Box, Button, IconButton, Menu, MenuItem, Tooltip, Divider, useTheme, Typography } from '@mui/material'; +import { Box, IconButton, Tooltip, Divider, Typography, CircularProgress, useTheme } from '@mui/material'; import { alpha } from '@mui/material/styles'; -import { WritingPencil, ShimmerText, WritingIndicator } from '../components/FunComponents'; +import { WritingIndicator } from '../components/FunComponents'; +import { getChartTemplate } from '../components/ChartTemplates'; import FormatBoldIcon from '@mui/icons-material/FormatBold'; import FormatItalicIcon from '@mui/icons-material/FormatItalic'; import FormatListBulletedIcon from '@mui/icons-material/FormatListBulleted'; import FormatListNumberedIcon from '@mui/icons-material/FormatListNumbered'; import FormatQuoteIcon from '@mui/icons-material/FormatQuote'; import TitleIcon from '@mui/icons-material/Title'; -import ContentCopyIcon from '@mui/icons-material/ContentCopy'; -import ImageIcon from '@mui/icons-material/Image'; -import PictureAsPdfIcon from '@mui/icons-material/PictureAsPdf'; -import DownloadIcon from '@mui/icons-material/Download'; import CheckCircleIcon from '@mui/icons-material/CheckCircle'; +/** Compact "1.2s" / "850ms" style duration for inspection steps. */ +function formatStepDuration(ms: number): string { + if (ms < 1000) return `${Math.round(ms)}ms`; + return `${(ms / 1000).toFixed(1)}s`; +} + export interface TiptapReportEditorProps { content: string; // HTML content (from processReport) - editable?: boolean; + streamingText?: string; // raw markdown, shown via typewriter while writing-phase streams + resolveChartImage?: (chartId: string) => { url: string; width: number; height: number } | undefined; // for streaming chart embeds + editable?: boolean; // edit mode on/off (formatting toolbar visible, content editable) + isGenerating?: boolean; // report is still streaming; suppress export actions, show status + generatingPhase?: 'inspecting' | 'writing'; // which phase the agent is in while generating + // accumulated inspect steps so the user sees what's happening; `charts` + // carries chart-type + display name so we can show a type icon next to it + inspectionSteps?: InspectStep[]; reportId?: string; // triggers re-focus when switching reports onUpdate?: (html: string) => void; - onCopyContent?: () => void | Promise; - onCopyImage?: () => void | Promise; - onDownloadPng?: () => void | Promise; - onExportPdf?: () => void | Promise; - copyContentSuccess?: boolean; - copyImageSuccess?: boolean; } +// ── Generating-status UI ─────────────────────────────────────────────────── +// While a report streams, the canvas shows (in order): a "thinking…" spinner +// before anything arrives → a list of inspection steps (each flips to a ✓ with +// a duration) → a trailing "thinking…" once all steps resolve → and finally a +// pencil "writing…" overlay glued to the bottom of the growing text. + +export interface InspectStep { + label: string; + doneLabel?: string; // past-tense label shown once the step completes + done: boolean; + charts?: { chartType: string; name: string }[]; + startedAt?: number; // epoch ms when the tool call started + durationMs?: number; // wall time once the step is done +} + +/** Small fixed-size slot holding either a spinner or a ✓, aligned to text. + * Text stays uniformly muted; the icon carries the one bit of state color — + * a soft spinner while running, a green check once done (matching the data + * load chat's convention). */ +const StatusIcon: FC<{ done?: boolean }> = ({ done }) => ( + + {done + ? + : } + +); + +/** Spinner + gently pulsing label for "thinking…" / "still working" states. */ +const ThinkingRow: FC<{ label: string }> = ({ label }) => ( + + + + {label} + + +); + +/** A single inspection step: status icon, label + duration, then chart chips. */ +const InspectionStepRow: FC<{ step: InspectStep }> = ({ step }) => ( + + + + {/* Label and elapsed time sit together on the first line. */} + + + {step.done && step.doneLabel ? step.doneLabel : step.label} + + {step.done && step.durationMs != null && ( + + {formatStepDuration(step.durationMs)} + + )} + + {/* Each inspected chart gets its own line, even when there's only one. */} + {step.charts?.map((c, j) => ( + + + {getChartTemplate(c.chartType)?.icon} + + + {c.name} + + + ))} + + +); + +/** + * The in-flow status shown before the report text starts streaming: a muted + * title, then either a lone "thinking…" (nothing happening yet) or the + * accumulated inspection steps followed by a trailing "thinking…" once they + * all resolve. + */ +const InspectingStatus: FC<{ steps?: InspectStep[] }> = ({ steps }) => { + const { t } = useTranslation(); + return ( + + + {t('editor.workingTitle')} + + {steps?.length + ? steps.map((step, i) => ) + : null} + {(!steps?.length || steps.every(s => s.done)) && ( + + )} + + ); +}; + +/** Strip inline markdown emphasis markers for the lightweight streaming view. */ +function stripInlineMarkers(line: string): string { + return line + .replace(/\*\*(.+?)\*\*/g, '$1') + .replace(/(^|[^*])\*([^*]+)\*/g, '$1$2') + .replace(/`([^`]+)`/g, '$1'); +} + +/** Detect a chart-image line: ![caption](chart://id) or legacy [IMAGE(id)]. */ +function matchChartImageLine(line: string): { chartId: string; caption?: string } | null { + const md = line.match(/^!\[([^\]]*)\]\(chart:\/\/([^)]+)\)\s*$/); + if (md) return { caption: md[1] || undefined, chartId: md[2] }; + const legacy = line.match(/^\[IMAGE\(([^)]+)\)\]\s*$/); + if (legacy) return { chartId: legacy[1] }; + return null; +} + +type ResolveChartImage = (chartId: string) => { url: string; width: number; height: number } | undefined; + +/** + * Lightweight, line-based render of the streamed markdown. Good enough to read + * smoothly while text arrives; the real TipTap parse happens once on completion. + */ +const StreamingMarkdownLite: FC<{ text: string; caret?: React.ReactNode; resolveChartImage?: ResolveChartImage }> = ({ text, caret, resolveChartImage }) => { + const lines = text.split('\n'); + const lastIdx = lines.length - 1; + return ( + <> + {lines.map((line, i) => { + const tail = i === lastIdx ? caret : null; + const img = matchChartImageLine(line); + if (img) { + const cached = resolveChartImage?.(img.chartId); + if (cached) { + return ( + + + {tail} + + ); + } + return ( + + 📊 {img.caption || img.chartId}{tail} + + ); + } + const h = line.match(/^(#{1,3})\s+(.*)$/); + if (h) { + const level = h[1].length; + return ( + + {stripInlineMarkers(h[2])}{tail} + + ); + } + const li = line.match(/^[-*]\s+(.*)$/); + if (li) { + return ( + + + {stripInlineMarkers(li[1])}{tail} + + ); + } + return ( + + {stripInlineMarkers(line)}{tail} + + ); + })} + + ); +}; + +/** + * Typewriter buffer: smoothly reveals `text` regardless of how bursty the + * network deltas are. A rAF loop catches the displayed length up to the target, + * revealing more per frame when the backlog is large so it never falls behind. + */ +const StreamingText: FC<{ text: string; resolveChartImage?: ResolveChartImage }> = ({ text, resolveChartImage }) => { + const { t } = useTranslation(); + const textRef = useRef(text); + textRef.current = text; + const shownLenRef = useRef(0); + const [shown, setShown] = useStateReact(''); + + useEffect(() => { + let raf = 0; + let lastTime = performance.now(); + let lastTargetLen = 0; + let lastChunkTime = lastTime; + let fraction = 0; // sub-character reveal accumulator + + // Reveal rate in chars/ms, smoothed across chunks. Each time a chunk + // arrives we estimate the natural rate as (chunk size / time since the + // previous chunk), so the chunk is spread out over roughly the gap until + // the next one is expected — that feels like natural typing rather than + // dumping. Clamped to a sane min/max and floored so it never stalls. + const MIN_RATE = 0.012; // ~12 chars/sec — slowest "typing" we allow + const MAX_RATE = 0.20; // ~200 chars/sec — cap so big bursts don't blur + let rate = 0.03; // initial guess until the first interval is known + + const tick = () => { + const now = performance.now(); + const dt = Math.min(now - lastTime, 100); // clamp tab-switch gaps + lastTime = now; + + const target = textRef.current; + let len = shownLenRef.current; + if (len > target.length) { len = 0; fraction = 0; } // report cleared/restarted + + // On each new chunk, re-estimate the natural typing rate from this + // chunk's size and the interval since the previous chunk arrived. + const arrived = target.length - lastTargetLen; + if (arrived > 0) { + const interval = Math.max(now - lastChunkTime, 1); + lastChunkTime = now; + lastTargetLen = target.length; + const chunkRate = arrived / interval; + rate = rate * 0.7 + chunkRate * 0.3; // EMA smoothing across chunks + } + + const backlog = target.length - len; + if (backlog > 0) { + // Pace at the smoothed rate, but never below the min typing speed, + // and lift slightly when the backlog is large so we don't drift + // permanently behind a fast stream. + const catchUp = backlog > 240 ? 1.6 : backlog > 80 ? 1.25 : 1; + const effRate = Math.min(MAX_RATE, Math.max(MIN_RATE, rate) * catchUp); + fraction += effRate * dt; + const whole = Math.floor(fraction); + if (whole >= 1) { + fraction -= whole; + len = Math.min(target.length, len + whole); + shownLenRef.current = len; + setShown(target.slice(0, len)); + } + } + raf = requestAnimationFrame(tick); + }; + raf = requestAnimationFrame(tick); + return () => cancelAnimationFrame(raf); + }, []); + + return ( + + + } /> + + + + + ); +}; + /** Resizable image node view — drag bottom-right corner to resize */ const ResizableImageView: FC = ({ node, updateAttributes, selected }) => { const { src, alt, width, height } = node.attrs; @@ -175,20 +466,18 @@ const ToolbarButton: FC<{ export const TiptapReportEditor: FC = ({ content, + streamingText, + resolveChartImage, editable = true, + isGenerating = false, + generatingPhase, + inspectionSteps, reportId, onUpdate, - onCopyContent, - onCopyImage, - onDownloadPng, - onExportPdf, - copyContentSuccess = false, - copyImageSuccess = false, }) => { const theme = useTheme(); const { t } = useTranslation(); const isFocused = useRef(false); - const [imageMenuAnchor, setImageMenuAnchor] = useStateReact(null); const editor = useEditor({ extensions: [ @@ -247,6 +536,9 @@ export const TiptapReportEditor: FC = ({ // Always sync if the content contains new images (img tags) that aren't in the editor yet useEffect(() => { if (!editor) return; + // While the writing phase streams, the lightweight typewriter view owns the + // display — defer the (expensive) markdown parse until the stream completes. + if (generatingPhase === 'writing') return; if (!isFocused.current) { editor.commands.setContent(content, { emitUpdate: false }); } else { @@ -258,7 +550,7 @@ export const TiptapReportEditor: FC = ({ editor.commands.setContent(content, { emitUpdate: false }); } } - }, [editor, content]); + }, [editor, content, generatingPhase]); const copyAsRichText = useCallback(async () => { if (!editor) return; @@ -278,70 +570,32 @@ export const TiptapReportEditor: FC = ({ if (!editor) return null; const iconSx = { fontSize: 16 }; - const exportIconSx = { fontSize: 15 }; - const exportButtonSx = { - minWidth: 0, - height: 26, - px: 0.75, - py: 0, - borderRadius: '4px', - textTransform: 'none', - fontSize: 12, - fontWeight: 400, - lineHeight: 1, - color: 'text.secondary', - borderColor: 'transparent', - backgroundColor: 'transparent', - '& .MuiButton-startIcon': { - mr: 0.5, - ml: 0, - color: 'inherit', - }, - '&:hover': { - color: 'primary.main', - borderColor: alpha(theme.palette.primary.main, 0.08), - backgroundColor: alpha(theme.palette.primary.main, 0.08), - }, - }; - const exportMenuItemSx = { - minHeight: 30, - px: 1.25, - py: 0.5, - fontSize: 12, - color: 'text.secondary', - '& .MuiSvgIcon-root': { - fontSize: 15, - mr: 0.75, - color: 'text.disabled', - }, - }; - const hasExportActions = !!(onCopyContent || onCopyImage || onDownloadPng || onExportPdf); - const imageMenuOpen = Boolean(imageMenuAnchor); return ( - - {/* Toolbar — always visible, disabled during generation */} + + {/* Toolbar — only in edit mode (formatting); hidden when reading or generating */} + {editable && ( + {editable && ( editor.chain().focus().toggleBold().run()} @@ -395,126 +649,13 @@ export const TiptapReportEditor: FC = ({ - {hasExportActions && editable && ( - - {onCopyContent && ( - - )} - {(onCopyImage || onDownloadPng) && ( - <> - - setImageMenuAnchor(null)} - anchorOrigin={{ vertical: 'bottom', horizontal: 'right' }} - transformOrigin={{ vertical: 'top', horizontal: 'right' }} - slotProps={{ - paper: { - sx: { - mt: 0.5, - borderRadius: '6px', - boxShadow: '0 1px 4px rgba(0,0,0,0.12)', - border: `1px solid ${alpha(theme.palette.divider, 0.5)}`, - } - } - }} - > - {onCopyImage && ( - { - setImageMenuAnchor(null); - void onCopyImage(); - }} - sx={exportMenuItemSx} - > - - {t('report.copyImage')} - - )} - {onDownloadPng && ( - { - setImageMenuAnchor(null); - void onDownloadPng(); - }} - sx={exportMenuItemSx} - > - - {t('report.downloadPng')} - - )} - - - )} - {onExportPdf && ( - - )} - )} - {!editable && ( - - - {t('editor.generating')} - - )} + )} {/* Editor */} = ({ }, }, }}> - - {/* Shimmer overlay while generating */} - {!editable && ( - - - + {/* While inspecting, the report is still empty — show progress. + While writing, a typewriter view reveals the streamed text + smoothly; TipTap takes over (one parse) once it completes. */} + {isGenerating && generatingPhase !== 'writing' ? ( + + ) : isGenerating && generatingPhase === 'writing' ? ( + + ) : ( + )} diff --git a/src/views/UnifiedDataUploadDialog.tsx b/src/views/UnifiedDataUploadDialog.tsx index bd7167f8..73d325e1 100644 --- a/src/views/UnifiedDataUploadDialog.tsx +++ b/src/views/UnifiedDataUploadDialog.tsx @@ -448,12 +448,14 @@ export interface DataLoadMenuProps { onSelectConnector?: (connector: ConnectorInstance) => void; /** * Called when the user submits a prompt from the top-level Data Loading - * Agent chat box. Implementations should open the agent chat surface - * with the prompt (and optional pasted/attached images) pre-filled — - * typically auto-sent. If not provided, the chat box falls back to - * `onSelectTab('extract')`. + * Agent chat box. Implementations should hand the payload off to the + * agent chat surface, which will auto-send it as a fresh user + * message. Attachments are file names (already uploaded to the + * session scratch space) — the chat surface re-injects them as + * `[Uploaded: name]` mentions when building the backend payload. + * If not provided, the chat box falls back to `onSelectTab('extract')`. */ - onStartChat?: (prompt: string, images?: string[]) => void; + onStartChat?: (prompt: string, images: string[], attachments: string[]) => void; /** * True when a prior data-loading agent conversation exists in * state. When set together with `onResumeChat`, the menu renders @@ -605,22 +607,17 @@ export const DataLoadMenu: React.FC = ({ const submitAgentChat = () => { const text = agentInput.trim(); if (text.length === 0 && agentImages.length === 0 && agentAttachments.length === 0) { - // Empty submission — just open the chat surface. - if (onStartChat) onStartChat('', []); + // Empty submission — just surface the chat. + if (onStartChat) onStartChat('', [], []); else onSelectTab('extract'); return; } - // Augment the outgoing prompt with `[Uploaded: name]` lines so the - // agent sees attachments as text references, without polluting - // the editable input the user sees. - const mentions = agentAttachments - .map(name => t('dataLoading.uploaded', { name })) - .join('\n'); - const finalText = mentions - ? (text ? `${text}\n${mentions}` : mentions) - : text; + // Pass payload pieces unchanged — the chat surface builds the + // backend mentions itself. We deliberately do NOT pre-inject + // `[Uploaded: name]` into `text` here, so the visible message + // bubble stays clean and the file chips render uniformly. if (onStartChat) { - onStartChat(finalText, agentImages); + onStartChat(text, agentImages, agentAttachments); } else { onSelectTab('extract'); } @@ -631,14 +628,26 @@ export const DataLoadMenu: React.FC = ({ // Suggestions surfaced as a focus-time dropdown — sourced from a shared // factory so the in-session `DataLoadingChat` panel renders the exact - // same list. See `dataLoadingSuggestions.ts`. + // same list. See `dataLoadingSuggestions.ts`. Auto-run is routed + // through `onStartChat` so the parent dialog can dispatch its + // `clearChatMessages` + `setDataLoadingChatPending` sequence + // atomically — same path as a manual submit. const agentChatSuggestions = useMemo(() => buildDataLoadingSuggestions({ t, setInput: setAgentInput, setImages: setAgentImages, setAttachments: setAgentAttachments, ensureActiveWorkspace, - }), [t]); + requestAutoSend: onStartChat + ? (payload) => { + onStartChat(payload.text, payload.images, payload.attachments); + setAgentInput(''); + setAgentImages([]); + setAgentAttachments([]); + } + : undefined, + // eslint-disable-next-line react-hooks/exhaustive-deps + }), [t, onStartChat]); const agentChatBox = ( = ({ formData.append('file', file); apiRequest(getUrls().SCRATCH_UPLOAD_URL, { method: 'POST', body: formData, - }).then(() => { - setAgentAttachments(prev => [...prev, file.name]); + }).then(({ data }) => { + // The backend hash-suffixes the filename; store the + // server-assigned name so the `[Uploaded:]` mention + // resolves to the real scratch file. + const scratchName = (data?.path || `scratch/${file.name}`).replace(/^scratch\//, ''); + setAgentAttachments(prev => [...prev, scratchName]); }).catch(err => console.error('Upload failed:', err)); }} attachments={agentAttachments} @@ -1112,14 +1125,6 @@ export interface UnifiedDataUploadDialogProps { open: boolean; onClose: () => void; initialTab?: UploadTabType; - /** - * Optional initial prompt to hand off to the Data Loading Agent. When - * non-empty and `initialTab === 'extract'`, the prompt is pre-filled - * and auto-sent in the chat panel. - */ - initialChatPrompt?: string; - /** Optional images (data URLs) to seed the chat alongside `initialChatPrompt`. */ - initialChatImages?: string[]; onConnectorsChanged?: () => void; } @@ -1127,8 +1132,6 @@ export const UnifiedDataUploadDialog: React.FC = ( open, onClose, initialTab = 'menu', - initialChatPrompt, - initialChatImages, onConnectorsChanged, }) => { const theme = useTheme(); @@ -1143,21 +1146,6 @@ export const UnifiedDataUploadDialog: React.FC = ( const existingNames = new Set(existingTables.map(t => t.id)); const [activeTab, setActiveTab] = useState(initialTab === 'menu' ? 'menu' : initialTab); - // Prompt to seed the agent chat with. Sourced from the `initialChatPrompt` - // prop when the dialog opens directly on 'extract', or set internally - // when the user submits the in-menu agent chat box. - const [seededChatPrompt, setSeededChatPrompt] = useState( - initialTab === 'extract' ? initialChatPrompt : undefined, - ); - const [seededChatImages, setSeededChatImages] = useState( - initialTab === 'extract' ? initialChatImages : undefined, - ); - const [autoSendSeededPrompt, setAutoSendSeededPrompt] = useState( - initialTab === 'extract' && ( - (!!initialChatPrompt && initialChatPrompt.trim().length > 0) - || (!!initialChatImages && initialChatImages.length > 0) - ), - ); const fileInputRef = useRef(null); const urlInputRef = useRef(null); @@ -1175,27 +1163,8 @@ export const UnifiedDataUploadDialog: React.FC = ( if (open) { setConnectorInstances([]); refreshConnectors(); - // Re-seed chat prompt/images from props each time the dialog opens. - if (initialTab === 'extract') { - setSeededChatPrompt(initialChatPrompt); - setSeededChatImages(initialChatImages); - const hasText = !!initialChatPrompt && initialChatPrompt.trim().length > 0; - const hasImages = !!initialChatImages && initialChatImages.length > 0; - setAutoSendSeededPrompt(hasText || hasImages); - // Opening the dialog with a fresh prompt/images means the - // user wants a new data-loading conversation; clear any - // stale messages from a previous session so the new query - // isn't appended to an unrelated thread. - if ((hasText || hasImages) && dataLoadingChatMessages.length > 0) { - dispatch(dfActions.clearChatMessages()); - } - } else { - setSeededChatPrompt(undefined); - setSeededChatImages(undefined); - setAutoSendSeededPrompt(false); - } } - }, [open, refreshConnectors, identityKey, initialTab, initialChatPrompt, initialChatImages]); + }, [open, refreshConnectors, identityKey]); // Storage is determined by backend config — no user toggle const isEphemeral = serverConfig.WORKSPACE_BACKEND === 'ephemeral'; @@ -1848,29 +1817,32 @@ export const UnifiedDataUploadDialog: React.FC = ( setActiveTab(`connector:${conn.id}` as UploadTabType); } }} - onStartChat={(prompt, images) => { + onStartChat={(prompt, images, attachments) => { const hasText = prompt.trim().length > 0; - const hasImages = !!images && images.length > 0; - // If a prior conversation exists, treat a - // new query from the menu as a fresh data - // reload and reset the chat. Without this - // the new prompt would be appended onto an - // unrelated thread, confusing the agent. - if ((hasText || hasImages) && dataLoadingChatMessages.length > 0) { - dispatch(dfActions.clearChatMessages()); + const hasImages = images.length > 0; + const hasAttachments = attachments.length > 0; + // Always surface the chat. If the user + // is starting a fresh query, clear any + // prior conversation and enqueue the new + // submission as a redux `pending` slot + // — `DataLoadingChat` consumes it on + // render and auto-sends. Doing both + // dispatches in the same tick keeps the + // handoff atomic; there's no prop race. + if (hasText || hasImages || hasAttachments) { + if (dataLoadingChatMessages.length > 0) { + dispatch(dfActions.clearChatMessages()); + } + dispatch(dfActions.setDataLoadingChatPending({ + text: prompt, images, attachments, + })); } - setSeededChatPrompt(prompt); - setSeededChatImages(images); - setAutoSendSeededPrompt(hasText || hasImages); setActiveTab('extract'); }} hasPriorConversation={dataLoadingChatMessages.length > 0} onResumeChat={() => { // Reopen the existing thread without // clearing messages or auto-sending. - setSeededChatPrompt(undefined); - setSeededChatImages(undefined); - setAutoSendSeededPrompt(false); setActiveTab('extract'); }} serverConfig={serverConfig} @@ -2403,11 +2375,7 @@ export const UnifiedDataUploadDialog: React.FC = ( {/* Extract Data Tab */} - + {/* Local Folder Tab */} diff --git a/src/views/ViewUtils.tsx b/src/views/ViewUtils.tsx index e54b9cad..c72a4a2d 100644 --- a/src/views/ViewUtils.tsx +++ b/src/views/ViewUtils.tsx @@ -143,7 +143,14 @@ const formatTemporalValue = (value: any, dataType: Type): string => { }; const formatDuration = (value: any): string => { - if (typeof value === 'number') { + if (typeof value === 'number' && Number.isFinite(value)) { + // The h/m/s format assumes the value is in milliseconds. When the value + // isn't a whole number of seconds (e.g. seconds-based columns like + // 0.083), flooring would collapse everything to "0s" and destroy the + // data — so fall back to the plain number instead of over-formatting. + if (value === 0 || !Number.isInteger(value / 1_000)) { + return value.toLocaleString('en-US', { maximumFractionDigits: 4 }); + } const h = Math.floor(value / 3_600_000); const m = Math.floor((value % 3_600_000) / 60_000); const s = Math.floor((value % 60_000) / 1_000); diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx index 96d91d8c..698b5a72 100644 --- a/src/views/VisualizationView.tsx +++ b/src/views/VisualizationView.tsx @@ -15,28 +15,28 @@ import { ListItemIcon, ListItemText, MenuItem, - LinearProgress, Card, ListSubheader, Menu, CardContent, Slider, Dialog, + DialogTitle, DialogContent, TextField, - CircularProgress, Popover, + Popper, + Paper, + ClickAwayListener, Snackbar, Alert, Fade, Grow, - alpha, } from '@mui/material'; import _ from 'lodash'; -import { borderColor, transition } from '../app/tokens'; -import { WritingIndicator } from '../components/FunComponents'; +import { floatingPillSx } from '../app/tokens'; import ButtonGroup from '@mui/material/ButtonGroup'; @@ -44,26 +44,24 @@ import ButtonGroup from '@mui/material/ButtonGroup'; import '../scss/VisualizationView.scss'; import '../scss/DataView.scss'; import { useDispatch, useSelector } from 'react-redux'; -import { DataFormulatorState, dfActions, fetchChartInsight } from '../app/dfSlice'; +import { DataFormulatorState, dfActions } from '../app/dfSlice'; import { assembleVegaChart, extractFieldsFromEncodingMap, getUrls, prepVisTable, fetchWithIdentity } from '../app/utils'; import { displayRowsCache } from '../app/displayRowsCache'; -import { buildEmbeddedDataForChart } from '../app/restyle'; +import { buildEmbeddedDataForChart, applyVariantConfigUI } from '../app/restyle'; import { apiRequest } from '../app/apiClient'; import embed from 'vega-embed'; import { Chart, EncodingItem, EncodingMap, FieldItem, computeInsightKey } from '../components/ComponentType'; -import { DictTable } from "../components/ComponentType"; -import AddchartIcon from '@mui/icons-material/Addchart'; -import DeleteIcon from '@mui/icons-material/Delete'; import TerminalIcon from '@mui/icons-material/Terminal'; import QuestionAnswerIcon from '@mui/icons-material/QuestionAnswer'; +import TuneIcon from '@mui/icons-material/Tune'; import ContentCopyIcon from '@mui/icons-material/ContentCopy'; import ZoomInIcon from '@mui/icons-material/ZoomIn'; import ZoomOutIcon from '@mui/icons-material/ZoomOut'; -import FunctionsIcon from '@mui/icons-material/Functions'; import CasinoIcon from '@mui/icons-material/Casino'; import SaveAltIcon from '@mui/icons-material/SaveAlt'; import OpenInNewIcon from '@mui/icons-material/OpenInNew'; +import CloseIcon from '@mui/icons-material/Close'; import { AgentToyIcon, AnimatedAgentToyIcon } from './AgentToyIcon'; import { CHART_TEMPLATES, getChartTemplate } from '../components/ChartTemplates'; @@ -78,17 +76,16 @@ import 'prismjs/themes/prism.css'; //Example style, you can use another import { useTranslation } from 'react-i18next'; import { ChatDialog } from './ChatDialog'; -import { PlanStepsView } from './InteractionEntryCard'; -import { EncodingShelfThread } from './EncodingShelfThread'; +import { EncodingShelfCard } from './EncodingShelfCard'; +import { ChartQuickConfig } from './ChartQuickConfig'; +import { ChartVariantStrip } from './ChartVariantStrip'; import { CustomReactTable } from './ReactTable'; import { InsightIcon } from '../icons'; -import TableChartOutlinedIcon from '@mui/icons-material/TableChartOutlined'; import { FreeDataViewFC } from './DataView'; import { formatCellValue } from './ViewUtils'; import { dfSelectors } from '../app/dfSlice'; -import { ChartRecBox } from './ChartRecBox'; import { CodeExplanationCard, ConceptExplCards, extractConceptExplanations } from './ExplComponents'; import CodeIcon from '@mui/icons-material/Code'; @@ -239,6 +236,46 @@ export let SampleSizeEditor: FC<{ } +/** + * Recursively scale every width/height in a Vega-Lite spec by `factor`. + * Used to apply the zoom resizer to style-variant specs, which bypass the + * compiler's canvas sizing. Handles numeric sizes, `{step: N}` band sizes, + * `config.view.continuousWidth/Height` (how continuous-scale charts encode + * their plot size), and nested view-composition specs (spec / layer / + * concat / facet). + */ +const scaleSpecSize = (node: any, factor: number): void => { + if (!node || typeof node !== 'object') return; + for (const dim of ['width', 'height'] as const) { + const v = node[dim]; + if (typeof v === 'number') { + node[dim] = Math.round(v * factor); + } else if (v && typeof v === 'object' && typeof v.step === 'number') { + node[dim] = { ...v, step: Math.round(v.step * factor) }; + } + } + // Continuous-scale charts (e.g. line/area with quantitative or temporal + // axes) carry no top-level numeric width/height; their plot size lives in + // config.view.continuousWidth / continuousHeight. Scale those too so the + // zoom resizer affects continuous variant charts, not just discrete ones. + const view = node.config?.view; + if (view && typeof view === 'object') { + for (const dim of ['continuousWidth', 'continuousHeight'] as const) { + if (typeof view[dim] === 'number') { + view[dim] = Math.round(view[dim] * factor); + } + } + } + for (const key of ['spec', 'layer', 'concat', 'hconcat', 'vconcat', 'facet'] as const) { + const child = node[key]; + if (Array.isArray(child)) { + child.forEach(c => scaleSpecSize(c, factor)); + } else if (child && typeof child === 'object') { + scaleSpecSize(child, factor); + } + } +}; + /** Main chart uses vega-embed (interactive tooltips). Static toSVG() removes hover behavior. */ const VegaChartRenderer: FC<{ chart: Chart; @@ -291,6 +328,22 @@ const VegaChartRenderer: FC<{ ); spec.data = { values: variantValues }; + // Apply the variant's generative-UI controls (agent-authored simple + // knobs) onto the spec using the user's current values. This is a + // pure "set value at path" transform (no code execution) and runs + // before size scaling so a control that touches width/height is + // still scaled by the resizer. See applyVariantConfigUI. + spec = applyVariantConfigUI(spec, activeVariant.configUI, activeVariant.configValues); + + // Variants bypass assembleVegaChart, so the zoom resizer's + // scaleFactor (which normally flows through the compiler's canvas + // sizing) wouldn't affect them. Apply it directly by scaling every + // width/height in the stored spec — numeric sizes and {step: N} + // band sizes alike — so the resizer works on restyled charts too. + if (scaleFactor !== 1) { + scaleSpecSize(spec, scaleFactor); + } + } else { spec = assembleVegaChart( chart.chartType, @@ -312,17 +365,6 @@ const VegaChartRenderer: FC<{ return; } - // Seed chart config with heuristic-computed defaults for properties - // the user hasn't explicitly set (e.g. independentYAxis toggle). - // Variants don't carry computed config — the agent's spec is final. - if (!activeVariant && spec._computedConfig) { - for (const [key, value] of Object.entries(spec._computedConfig)) { - if (chart.config?.[key] === undefined) { - dispatch(dfActions.updateChartConfig({ chartId: chart.id, key, value })); - } - } - } - spec['background'] = 'white'; // Inject the insight title into the Vega-Lite spec instead of rendering @@ -351,7 +393,7 @@ const VegaChartRenderer: FC<{ const embedResult: { current?: Awaited> } = {}; el.innerHTML = ''; - embed(el, { ...spec }, { actions: true, renderer: 'canvas' }) + embed(el, { ...spec }, { actions: false, renderer: 'canvas' }) .then((result) => { if (cancelled) { result.finalize(); @@ -405,8 +447,19 @@ const VegaChartRenderer: FC<{ id={elementId} sx={{ maxWidth: '100%', - overflow: 'visible', - '& .vega-embed': { margin: 'auto', overflow: 'visible' }, + overflow: 'hidden', + // vega-embed adds its `.vega-embed` class to THIS element (the + // div we pass to embed()) and renders the / as a + // direct child. Vega writes explicit inline width/height (in CSS + // px) on that canvas/svg, so we must override them with + // !important to let the chart shrink to the panel width while + // keeping its aspect ratio (height: auto). A descendant + // `.vega-embed` selector would NOT match — the class is on this + // element itself, not a child. + '& > canvas, & > svg': { + maxWidth: '100%', + height: 'auto !important', + }, }} /> @@ -431,9 +484,6 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { let focusedChartId = focusedId?.type === 'chart' ? focusedId.chartId : undefined; let chartSynthesisInProgress = useSelector((state: DataFormulatorState) => state.chartSynthesisInProgress) || []; - let synthesisRunning = focusedChartId ? chartSynthesisInProgress.includes(focusedChartId) : false; - let handleDeleteChart = () => { focusedChartId && dispatch(dfActions.deleteChartById(focusedChartId)) } - // Track the assembled Vega-Lite spec from the renderer so we can open it in the Vega Editor const [renderedSpec, setRenderedSpec] = useState(null); const handleSpecReady = useCallback((spec: any | null) => { setRenderedSpec(spec); }, []); @@ -470,15 +520,24 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { const conceptShelfItems = useSelector((state: DataFormulatorState) => state.conceptShelfItems); - const [bottomTab, setBottomTab] = useState('data'); + const [codeDialogOpen, setCodeDialogOpen] = useState(false); const [localScaleFactor, setLocalScaleFactor] = useState(1); const [chatDialogOpen, setChatDialogOpen] = useState(false); + // Floating encoding-shelf popover. The button lives in the stable outer + // panel (not inside the chart's ), so it never remounts or shifts + // when the chart re-renders. We anchor the popover to that button via a ref. + const [encodingOpen, setEncodingOpen] = useState(false); + const editButtonRef = useRef(null); // Reset local UI state when focused chart changes useEffect(() => { - setBottomTab('data'); - setLocalScaleFactor(1); + setCodeDialogOpen(false); + // Restore the persisted zoom for the newly focused chart (stored on + // the Chart object so it survives switching charts and session + // save/load). Falls back to 1 for charts that have never been zoomed. + setLocalScaleFactor(focusedChart?.scaleFactor ?? 1); setChatDialogOpen(false); + setEncodingOpen(false); }, [focusedChartId]); @@ -661,11 +720,11 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { let triggerTable = tables.find(t => t.derive?.trigger?.chart?.id == focusedChart?.id); - // Chart insight - const chartInsightInProgress = useSelector((state: DataFormulatorState) => state.chartInsightInProgress) || []; - const insightLoading = chartInsightInProgress.includes(focusedChart.id); - const currentInsightKey = computeInsightKey(focusedChart); - const insightFresh = focusedChart.insight?.key === currentInsightKey; + // Chart title: surfaced as the rendered chart heading. The title is kept + // only while its key matches the chart's current encoded fields (chartType + // + field ids), so it stays through property edits (e.g. sort order) but is + // dropped once the encoded fields change. + const titleFresh = !!focusedChart.title && focusedChart.titleKey === computeInsightKey(focusedChart); const actionBtnSx = { padding: '4px', @@ -681,18 +740,6 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { }, }; - let deleteButton = ( - - - { handleDeleteChart() }}> - - - - - ); - let transformCode = ""; if (table.derive?.code) { transformCode = `${table.derive.code}` @@ -707,6 +754,7 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { @@ -715,85 +763,6 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { ); - // Toggle buttons for bottom-panel content (icon + text label) - const toggleBtnSx = (active: boolean) => ({ - textTransform: 'none' as const, - fontSize: '0.7rem', - padding: '2px 8px', - borderRadius: '6px', - color: active ? 'primary.main' : 'text.secondary', - backgroundColor: active ? 'rgba(25, 118, 210, 0.08)' : 'transparent', - transition: 'all 0.15s ease', - minWidth: 'auto', - '& .MuiButton-startIcon': { mr: 0.5 }, - '&:hover': { - backgroundColor: 'rgba(25, 118, 210, 0.08)', - color: 'primary.main', - }, - }); - - let dataButton = ( - - ); - - let derivedTableItems = hasDerived ? [ - , - ...(hasConcepts ? [ - - ] : []), - ] : []; - - let logButton = hasDerived ? ( - - - setChatDialogOpen(true)}> - - - - - ) : null; - - let insightButton = (!chartUnavailable && focusedChart.chartType !== "Table") ? ( - - ) : null; - - let chartActionButtons = [ - dataButton, - insightButton, - ...derivedTableItems, - , - logButton, - // vegaEditorButton, - deleteButton, - ] - - let chartMessage = ""; if (focusedChart.chartType == "Table") { chartMessage = t('chart.msgTable'); @@ -808,7 +777,6 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { } else if (table.derive) { chartMessage = t('chart.msgWarning'); } - let chartActionItems = isDataStale ? [] : ( {(table.virtual ? activeVisTableTotalRowCount > serverConfig.MAX_DISPLAY_ROWS : table.rows.length > serverConfig.MAX_DISPLAY_ROWS) && !(chartUnavailable || encodingShelfEmpty) ? ( @@ -844,8 +812,8 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { let focusedComponent = []; - let focusedElement = + let focusedElement = {/* Chart container chrome @@ -854,12 +822,11 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { positioned zoom-slider overlay (chartResizer, ~32px tall anchored top-left) never covers chart content. Without this, full-width charts like KPI grids run right up under the slider. - - pr: 28 → reserves a strip on the right for vega-embed's - actions menu ("..."), which floats at the top-right of the - Vega canvas and can otherwise hug / extend past the panel edge. - - minHeight: 280 → guarantees the Vega actions menu and its - dropdown have vertical room to render even when a chart's - intrinsic height is very small (e.g. one row of compact cards). + - pr: 28 → reserves a strip on the right for the floating + "edit chart" button overlay (see the focused-box in `content`). + - minHeight: 280 → guarantees the chart has vertical room to + render even when a chart's intrinsic height is very small + (e.g. one row of compact cards). These are view-level concerns and intentionally NOT solved per chart template. */} @@ -875,36 +842,42 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { scaleFactor={localScaleFactor} maxStretchFactor={config.maxStretchFactor} chartUnavailable={chartUnavailable} - insightTitle={insightFresh && focusedChart.insight?.title ? focusedChart.insight.title : undefined} + insightTitle={titleFresh ? focusedChart.title : undefined} onSpecReady={handleSpecReady} /> + {/* Quick chart-config controls (toggles/sliders/selects) for + fast in-place tweaks without opening the full encoding + popover. Kept INSIDE the chart-box so it reads as part of + the same chart component rather than drifting down toward + the data panel below. The bar also hosts the built-in + delete-chart action, so it always renders even when there + are no property controls (e.g. Table/Auto charts or while + synthesis is running — in which case property controls are + suppressed but delete stays reachable). */} + {chartActionItems} - + ; focusedComponent = [ + {/* Style-variant switcher now lives in the floating top toolbar + (see vis-view-canvas return) so it stays pinned alongside the + zoom resizer instead of scrolling with the chart content. */} {focusedElement} - - {chartActionButtons} - , {(() => { - const panelBoxSx = { - margin: '8px auto 24px auto', padding: '8px', borderRadius: '8px', - border: `1px solid ${borderColor.divider}`, - transition: 'box-shadow 0.2s ease', - '&:hover': { boxShadow: '0 0 8px rgba(25, 118, 210, 0.25)' }, - }; return - {bottomTab === 'data' && (() => { + {(() => { const ROW_HEIGHT = 25; const HEADER_HEIGHT = 32; const FOOTER_HEIGHT = 32; @@ -932,107 +905,15 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { return sum + Math.max(80, Math.min(280, contentLen * 10)) + 60; }, ROW_ID_COL_WIDTH); const SCROLLBAR_WIDTH = 17; - const adaptiveWidth = Math.max(MIN_TABLE_WIDTH, Math.min(MAX_TABLE_WIDTH, totalColWidth + SCROLLBAR_WIDTH + 16)); + // +34px gutter so the maximize button can sit just outside the table on the right. + const adaptiveWidth = Math.max(MIN_TABLE_WIDTH, Math.min(MAX_TABLE_WIDTH, totalColWidth + SCROLLBAR_WIDTH + 16)) + 34; return ( - - + + ); })()} - {bottomTab === 'code' && hasDerived && ( - - - {(() => { - const derive = triggerTable?.derive || table.derive; - const interaction = derive?.trigger?.interaction; - const lastEntry = interaction?.[interaction.length - 1]; - const plan = lastEntry?.plan || ''; - const planSteps = plan ? (plan.includes('\x1E') ? plan.split('\x1E') : plan.split('\n')).filter((s: string) => s.trim()) : []; - if (planSteps.length > 0) { - return ( - - - {t('chart.agentLog')} - - - - ); - } - return null; - })()} - - - - )} - {bottomTab === 'concepts' && hasConcepts && ( - - - - )} - {bottomTab === 'insight' && ( - - {insightLoading ? ( - - - - ) : insightFresh && focusedChart.insight ? ( - - - {(focusedChart.insight.takeaways || []).map((takeaway, i) => ( - alpha(theme.palette.background.paper, 0.5), - transition: transition.normal, - '&:hover': { - backgroundColor: (theme) => alpha(theme.palette.primary.main, 0.04), - }, - }}> - - {takeaway} - - - ))} - - - - ) : ( - - - {t('chart.noInsightAvailable')} - - - - )} - - )} ; })()} , @@ -1041,69 +922,210 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) { handleCloseDialog={() => setChatDialogOpen(false)} code={transformCode} dialog={triggerTable?.derive?.dialog || table.derive?.dialog as any[]} /> : null, + // Code inspector: derivation code + formula/concept metadata, opened from + // the floating top-right cluster. A clickaway/close dialog (not a bottom + // tab) so the bottom panel stays a pure data table. + hasDerived ? ( + setCodeDialogOpen(false)} + sx={{ '& .MuiDialog-paper': { maxHeight: '90%' } }} + maxWidth="md" fullWidth> + + + + {t('chart.code')} + + setCodeDialogOpen(false)}> + + + + + {hasConcepts && ( + + + {t('chart.derivedConcepts')} + + + + )} + + + + ) : null, ] - const ENCODING_SHELF_WIDTH = 240; - let content = [ - + {focusedComponent} , - /* Floating encoding shelf panel */ - - - - - + /* Encoding shelf popover, anchored to the floating "edit chart" button. + Rendered as a non-modal Popper (not a Modal-based Popover) so it does + NOT mount a full-viewport backdrop/focus-trap. That backdrop used to + swallow pointer events outside the panel, which broke dragging fields + from the data table into the encoding channels while the shelf is + open. A ClickAwayListener keeps the "click outside closes it" + behavior. It listens on `onMouseUp` (mirroring EncodingBox): MUI + menus/selects portal to document.body but remain REACT descendants of + this listener, so their events bubble through the React tree on + mouseUp (before the menu closes on click) and are correctly treated as + "inside" — picking a chart type therefore does not collapse the shelf. + A native HTML5 drag from the table fires no mouseUp, so dragging a + field in does not close the shelf either. */ + + setEncodingOpen(false)} + > + + + {/* Footer: low-emphasis link to inspect the assembled + Vega-Lite spec in the external Vega editor. */} + + + + + + ] let [scaleMin, scaleMax] = [0.2, 2.4] + // Persist the zoom onto the chart so it survives switching charts. + // Called on commit (button click / slider release) rather than on every + // drag tick, to avoid churning the charts array ref mid-drag. + const persistScaleFactor = React.useCallback((value: number) => { + if (!focusedChartId) return; + dispatch(dfActions.updateChartScaleFactor({ + chartId: focusedChartId, + scaleFactor: value, + })); + }, [dispatch, focusedChartId]); + // Memoize chart resizer to avoid re-creating Material-UI components on every render let chartResizer = useMemo(() => - { - setLocalScaleFactor(s => Math.max(scaleMin, Math.round((s - 0.1) * 10) / 10)); + { + const next = Math.max(scaleMin, Math.round((localScaleFactor - 0.1) * 10) / 10); + setLocalScaleFactor(next); + persistScaleFactor(next); }}> { - setLocalScaleFactor(newValue as number); - }} /> + value={localScaleFactor} + onChange={(event: Event, newValue: number | number[]) => { + setLocalScaleFactor(newValue as number); + }} + onChangeCommitted={(event, newValue) => { + persistScaleFactor(newValue as number); + }} /> - = scaleMax} onClick={() => { - setLocalScaleFactor(s => Math.min(scaleMax, Math.round((s + 0.1) * 10) / 10)); + = scaleMax} onClick={() => { + const next = Math.min(scaleMax, Math.round((localScaleFactor + 0.1) * 10) / 10); + setLocalScaleFactor(next); + persistScaleFactor(next); }}> - , [localScaleFactor, t]); - - return - {synthesisRunning ? - - : ''} - {chartUnavailable ? "" : chartResizer} + , [localScaleFactor, t, persistScaleFactor]); + + return + {/* No full-screen block while the agent works: the previous chart + stays visible, and progress is signaled non-intrusively on the + chat box + encoding shelf (see EncodingShelfCard). */} + {/* Floating top toolbar: zoom resizer + style-variant strip live + together here (NOT inside the scrolling chart content), so every + control stays pinned to the top of the panel instead of some + floating and some scrolling away. pointerEvents are disabled on the + empty bar area so it never blocks chart interaction underneath. */} + *': { pointerEvents: 'auto' }, + }}> + {chartResizer} + {focusedChart && focusedChart.chartType !== 'Table' && focusedChart.chartType !== 'Auto' && ( + + )} + {/* Right-aligned floating cluster near the top-right: "inspect / + edit this chart" controls grouped together (agent log + code + + encoding shelf). Chart deletion lives in the chart property-config + bar below the chart. */} + + {hasDerived && ( + + setChatDialogOpen(true)} + sx={floatingPillSx}> + + + + )} + {/* Code inspector button — opens the derivation code + formula + metadata in a dialog. Only shown for derived tables. */} + {hasDerived && ( + + setCodeDialogOpen(true)} + sx={floatingPillSx}> + + + + )} + {/* Edit-chart (encoding shelf) button — opens the encoding shelf + popover; stays available even when the chart can't render yet, + so users can fix the encoding. */} + {focusedChart && focusedChart.chartType !== 'Table' && focusedChart.chartType !== 'Auto' && ( + + setEncodingOpen(o => !o)} + sx={{ + ...floatingPillSx, + ...(encodingOpen ? { + backgroundColor: 'primary.main', + color: 'primary.contrastText', + '&:hover': { backgroundColor: 'primary.dark', color: 'primary.contrastText' }, + } : {}), + }}> + + + + )} + + {content} } @@ -1210,7 +1232,7 @@ export const VisualizationViewFC: FC = function VisualizationView } return ( - + @@ -1239,17 +1261,7 @@ export const VisualizationViewFC: FC = function VisualizationView const hasThread = hasRealCharts || hasDerivation; if (hasThread) { - return ( - <> - {focusedTableId ? : null} - - - {t('chart.orStartWithChartType')} - - - {chartSelectionBox} - - ); + return chartSelectionBox; } return ; })()} @@ -1281,18 +1293,15 @@ export const VisualizationViewFC: FC = function VisualizationView return sum + Math.max(80, Math.min(280, contentLen * 10)) + 60; }, ROW_ID_COL_WIDTH); const SCROLLBAR_WIDTH = 17; - const adaptiveWidth = Math.max(MIN_TABLE_WIDTH, Math.min(MAX_TABLE_WIDTH, totalColWidth + SCROLLBAR_WIDTH + 16)); + // +34px gutter so the maximize button can sit just outside the table on the right. + const adaptiveWidth = Math.max(MIN_TABLE_WIDTH, Math.min(MAX_TABLE_WIDTH, totalColWidth + SCROLLBAR_WIDTH + 16)) + 34; return ( - + ); })()} diff --git a/src/views/dataLoadingSuggestions.ts b/src/views/dataLoadingSuggestions.ts index 8d91b92b..f37e04f0 100644 --- a/src/views/dataLoadingSuggestions.ts +++ b/src/views/dataLoadingSuggestions.ts @@ -22,6 +22,12 @@ export interface DataLoadingSuggestion { onClick: () => void; } +export interface SuggestionPayload { + text: string; + images: string[]; + attachments: string[]; +} + export interface BuildSuggestionsArgs { t: TFunction; setInput: (value: string) => void; @@ -29,12 +35,22 @@ export interface BuildSuggestionsArgs { setAttachments: (names: string[]) => void; /** Optional hook that workspaces use to make sure a session exists before uploading. */ ensureActiveWorkspace?: () => void; + /** + * Optional auto-run hook. When provided, suggestions submit the + * complete payload immediately (after any required async upload / + * data-URL prep) instead of just pre-filling the input. Callers + * typically wire this to a redux pending-submission dispatch so the + * payload survives the parent→child handoff without prop races. + * When absent, the suggestion behaves like a paste: it only fills + * the input fields via the `set*` callbacks. + */ + requestAutoSend?: (payload: SuggestionPayload) => void; } const EXCEL_SAMPLE_NAME = 'climate-gas-indicator.xlsx'; export function buildDataLoadingSuggestions( - { t, setInput, setImages, setAttachments, ensureActiveWorkspace }: BuildSuggestionsArgs, + { t, setInput, setImages, setAttachments, ensureActiveWorkspace, requestAutoSend }: BuildSuggestionsArgs, ): DataLoadingSuggestion[] { const kindAsk = t('upload.agentChatSuggestion.kind.ask', { defaultValue: 'ask' }); const kindFind = t('upload.agentChatSuggestion.kind.find', { defaultValue: 'find' }); @@ -61,37 +77,38 @@ export function buildDataLoadingSuggestions( const iconSx = { fontSize: 14 }; + // Common: fill the input fields AND (if auto-run is enabled) submit + // the payload. Centralising the dual behaviour keeps every + // suggestion below short and consistent. + const fillAndMaybeSend = (payload: SuggestionPayload) => { + setImages(payload.images); + setAttachments(payload.attachments); + setInput(payload.text); + requestAutoSend?.(payload); + }; + return [ { kind: kindAsk, label: askLabel, icon: React.createElement(QuestionAnswerOutlinedIcon, { sx: iconSx }), - onClick: () => { - setImages([]); - setAttachments([]); - setInput(askLabel); - }, + onClick: () => fillAndMaybeSend({ text: askLabel, images: [], attachments: [] }), }, { kind: kindFind, label: findLabel, icon: React.createElement(SearchIcon, { sx: iconSx }), - onClick: () => { - setImages([]); - setAttachments([]); - setInput(findLabel); - }, + onClick: () => fillAndMaybeSend({ text: findLabel, images: [], attachments: [] }), }, { kind: kindExtract, label: extractExcelLabel, icon: React.createElement(TableChartOutlinedIcon, { sx: iconSx }), onClick: () => { - // Surface the attachment chip synchronously so it is - // always present when the user hits send, even if the - // upload below is still mid-flight. The chip is what - // gets serialised into the outgoing `[Uploaded: name]` - // mention and ultimately the chat bubble. + // Surface the attachment chip / input synchronously so + // it is visible during the async upload. The auto-send + // (if enabled) waits until the upload completes so the + // backend can actually find the scratch file. setImages([]); setAttachments([EXCEL_SAMPLE_NAME]); setInput(extractExcelLabel); @@ -108,6 +125,18 @@ export function buildDataLoadingSuggestions( method: 'POST', body: formData, }); }) + .then(({ data }) => { + // The backend hash-suffixes the filename, so use the + // server-assigned name for the chip and the mention + // — otherwise the agent looks for a file that the + // upload renamed and reports it missing. + const scratchName = (data?.path || `scratch/${EXCEL_SAMPLE_NAME}`).replace(/^scratch\//, ''); + setAttachments([scratchName]); + requestAutoSend?.({ + text: extractExcelLabel, images: [], + attachments: [scratchName], + }); + }) .catch(err => console.error('Sample Excel upload failed:', err)); }, }, @@ -116,16 +145,21 @@ export function buildDataLoadingSuggestions( label: extractImageLabel, icon: React.createElement(ImageOutlinedIcon, { sx: iconSx }), onClick: () => { + // Image needs to be read into a data URL before we can + // surface it as a chip or send it. Defer auto-send until + // the FileReader resolves. fetch(exampleImageTable) .then(res => res.blob()) .then(blob => { const reader = new FileReader(); reader.onload = () => { - if (reader.result) { - setImages([reader.result as string]); - setAttachments([]); - setInput(extractImageLabel); - } + if (!reader.result) return; + const dataUrl = reader.result as string; + fillAndMaybeSend({ + text: extractImageLabel, + images: [dataUrl], + attachments: [], + }); }; reader.readAsDataURL(blob); }); @@ -135,11 +169,7 @@ export function buildDataLoadingSuggestions( kind: kindExtract, label: extractTextLabel, icon: React.createElement(DescriptionOutlinedIcon, { sx: iconSx }), - onClick: () => { - setImages([]); - setAttachments([]); - setInput(extractTextPrompt); - }, + onClick: () => fillAndMaybeSend({ text: extractTextPrompt, images: [], attachments: [] }), }, ]; } diff --git a/src/views/threadLayout.ts b/src/views/threadLayout.ts new file mode 100644 index 00000000..fa793f2c --- /dev/null +++ b/src/views/threadLayout.ts @@ -0,0 +1,39 @@ +// Single source of truth for DataThread column geometry. +// +// Both the DataThread panel (which renders the thread columns) and +// DataFormulator (which snaps the resizable Allotment pane to whole-column +// widths) must agree on these values, otherwise the pane snap points won't +// line up with the actual rendered columns. Keep all width/padding tuning +// here. + +/** Visual width of a single thread card / column (px). */ +export const CARD_WIDTH = 248; + +/** Horizontal gap between adjacent columns (px). */ +export const CARD_GAP = 8; + +/** Total horizontal padding inside the thread panel (left + right, px). */ +export const PANEL_PADDING = 32; + +/** Max number of columns the thread panel will ever lay out. */ +export const MAX_THREAD_COLUMNS = 3; + +/** + * Pixel width required to display exactly `n` columns: + * n cards + (n-1) gaps + panel padding. + */ +export const threadPaneWidth = (n: number): number => + n * CARD_WIDTH + Math.max(0, n - 1) * CARD_GAP + PANEL_PADDING; + +/** + * How many whole columns fit within `containerWidth`, clamped to + * [1, MAX_THREAD_COLUMNS]. Inverse of `threadPaneWidth`. + */ +export const fittableThreadColumns = (containerWidth: number): number => + Math.max( + 1, + Math.min( + MAX_THREAD_COLUMNS, + Math.floor((containerWidth - PANEL_PADDING + CARD_GAP) / (CARD_WIDTH + CARD_GAP)), + ), + ); diff --git a/src/views/experienceContext.ts b/src/views/workflowContext.ts similarity index 98% rename from src/views/experienceContext.ts rename to src/views/workflowContext.ts index 98ec1c80..dac6e006 100644 --- a/src/views/experienceContext.ts +++ b/src/views/workflowContext.ts @@ -2,8 +2,8 @@ // Licensed under the MIT License. /** - * experienceContext — pure helpers that turn DataFormulator state into - * the timeline payload sent to `/api/knowledge/distill-experience`. + * workflowContext — pure helpers that turn DataFormulator state into + * the timeline payload sent to `/api/knowledge/distill-workflow`. * * No React, no Redux. Used by: * - SessionDistill.collectSessionThreads (live distillation) diff --git a/tests/backend/agents/test_agent_knowledge_integration.py b/tests/backend/agents/test_agent_knowledge_integration.py deleted file mode 100644 index 3efc65df..00000000 --- a/tests/backend/agents/test_agent_knowledge_integration.py +++ /dev/null @@ -1,278 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -"""Tests for DataAgent knowledge integration (Phase 3). - -Covers: -- Rules from KnowledgeStore injected into system prompt -- Both file-based rules and text-based rules coexist -- No rules → no User Rules section -- Library knowledge search and injection -- No matches → no injection -- Max 5 items limit -- search_knowledge / read_knowledge tool handlers -- Tool path traversal rejection -- Graceful degradation when knowledge store is unavailable -- Reasoning log records knowledge_search and knowledge_injected -""" - -from __future__ import annotations - -import os -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest - -from data_formulator.agents.data_agent import DataAgent - -pytestmark = [pytest.mark.backend] - -TEST_IDENTITY = "user:test-knowledge@example.com" - - -RULE_MD = """\ ---- -title: ROI Standard -tags: [finance] -created: 2026-04-26 -updated: 2026-04-26 ---- - -ROI = (revenue - cost) / cost -""" - -SKILL_MD = """\ ---- -title: Handle Missing Values -tags: [cleaning, pandas] -created: 2026-04-26 -updated: 2026-04-26 -source: agent_summarized ---- - -When encountering missing values, use fillna with median. -""" - - -@pytest.fixture() -def user_home(tmp_path): - """Prepare a user_home with knowledge entries.""" - rules_dir = tmp_path / "knowledge" / "rules" - rules_dir.mkdir(parents=True) - (rules_dir / "roi.md").write_text(RULE_MD, encoding="utf-8") - - exp_dir = tmp_path / "knowledge" / "experiences" / "cleaning" - exp_dir.mkdir(parents=True) - (exp_dir / "missing.md").write_text(SKILL_MD, encoding="utf-8") - - return tmp_path - - -@pytest.fixture() -def mock_client(): - c = MagicMock() - c.model = "test-model" - c.endpoint = "openai" - c.params = {"api_key": "test-key"} - return c - - -@pytest.fixture() -def mock_workspace(): - ws = MagicMock() - ws.get_fresh_name = MagicMock(return_value="test-table") - ws.user_home = None - return ws - - -def _make_agent(mock_client, mock_workspace, user_home, **kwargs): - mock_workspace.user_home = user_home - return DataAgent( - client=mock_client, - workspace=mock_workspace, - **kwargs, - ) - - -# ── Rules injection ────────────────────────────────────────────────────── - - -class TestRulesInjection: - def test_rules_injected_into_system_prompt(self, mock_client, mock_workspace, user_home): - agent = _make_agent(mock_client, mock_workspace, user_home) - prompt = agent._build_system_prompt() - assert "User Rules" in prompt - assert "MANDATORY" in prompt - assert "ROI Standard" in prompt - assert "ROI = (revenue - cost) / cost" in prompt - # User rules should appear BEFORE technical reference material - rules_pos = prompt.index("User Rules") - assert "Chart Creation Guide" in prompt - chart_guide_pos = prompt.index("Chart Creation Guide") - assert rules_pos < chart_guide_pos, ( - "User Rules must be injected before chart guide for higher attention" - ) - - def test_text_rules_and_knowledge_rules_coexist( - self, mock_client, mock_workspace, user_home - ): - agent = _make_agent( - mock_client, mock_workspace, user_home, - agent_exploration_rules="Always explain your reasoning", - ) - prompt = agent._build_system_prompt() - assert "Always explain your reasoning" in prompt - assert "ROI Standard" in prompt - - def test_no_rules_no_section(self, mock_client, mock_workspace, tmp_path): - (tmp_path / "knowledge" / "rules").mkdir(parents=True) - agent = _make_agent(mock_client, mock_workspace, tmp_path) - prompt = agent._build_system_prompt() - assert "User Rules" not in prompt - - def test_no_knowledge_store_graceful(self, mock_client, mock_workspace): - mock_workspace.user_home = None - agent = DataAgent( - client=mock_client, - workspace=mock_workspace, - ) - prompt = agent._build_system_prompt() - assert "User Rules" not in prompt - - -# ── Library knowledge injection ─────────────────────────────────────────── - - -class TestKnowledgeSearchInjection: - def test_relevant_knowledge_injected(self, mock_client, mock_workspace, user_home): - agent = _make_agent(mock_client, mock_workspace, user_home) - input_tables = [{"name": "sales_data"}] - messages = agent._build_initial_messages( - input_tables, "How to handle missing values?", - ) - user_msg = messages[1]["content"] - if isinstance(user_msg, list): - user_msg = "\n".join(p.get("text", "") for p in user_msg if p.get("type") == "text") - assert "[RELEVANT KNOWLEDGE]" in user_msg or agent._injected_knowledge == [] - - def test_no_match_no_injection(self, mock_client, mock_workspace, user_home): - agent = _make_agent(mock_client, mock_workspace, user_home) - input_tables = [{"name": "xyz_table"}] - messages = agent._build_initial_messages( - input_tables, "xyznonexistent query", - ) - user_msg = messages[1]["content"] - if isinstance(user_msg, list): - user_msg = "\n".join(p.get("text", "") for p in user_msg if p.get("type") == "text") - assert agent._injected_knowledge == [] - - def test_max_five_items(self, mock_client, mock_workspace, tmp_path): - rules_dir = tmp_path / "knowledge" / "rules" - rules_dir.mkdir(parents=True) - exp_dir = tmp_path / "knowledge" / "experiences" / "common" - exp_dir.mkdir(parents=True) - for i in range(10): - (exp_dir / f"exp-{i}.md").write_text( - f"---\ntitle: Common Experience {i}\ntags: [common]\n" - f"created: 2026-04-26\nupdated: 2026-04-26\n---\n" - f"Content about common topic {i}.\n", - encoding="utf-8", - ) - - agent = _make_agent(mock_client, mock_workspace, tmp_path) - results = agent._search_relevant_knowledge("common topic", []) - assert len(results) <= 5 - - -# ── Tool handlers ───────────────────────────────────────────────────────── - - -class TestKnowledgeToolHandlers: - def test_search_knowledge_returns_results(self, mock_client, mock_workspace, user_home): - agent = _make_agent(mock_client, mock_workspace, user_home) - result = agent._handle_search_knowledge({"query": "missing values"}) - assert "Handle Missing Values" in result - - def test_search_knowledge_no_match(self, mock_client, mock_workspace, user_home): - agent = _make_agent(mock_client, mock_workspace, user_home) - result = agent._handle_search_knowledge({"query": "xyznonexistent"}) - assert "No matching" in result - - def test_read_knowledge_returns_content(self, mock_client, mock_workspace, user_home): - agent = _make_agent(mock_client, mock_workspace, user_home) - result = agent._handle_read_knowledge( - {"category": "rules", "path": "roi.md"} - ) - assert "ROI = (revenue - cost) / cost" in result - - def test_read_knowledge_not_found(self, mock_client, mock_workspace, user_home): - agent = _make_agent(mock_client, mock_workspace, user_home) - result = agent._handle_read_knowledge( - {"category": "rules", "path": "ghost.md"} - ) - assert "not found" in result - - def test_read_knowledge_traversal_rejected(self, mock_client, mock_workspace, user_home): - agent = _make_agent(mock_client, mock_workspace, user_home) - result = agent._handle_read_knowledge( - {"category": "rules", "path": "../../etc/passwd.md"} - ) - assert "Invalid path" in result or "not found" in result.lower() - - def test_no_knowledge_store_returns_message(self, mock_client, mock_workspace): - mock_workspace.user_home = None - agent = DataAgent(client=mock_client, workspace=mock_workspace) - result = agent._handle_search_knowledge({"query": "anything"}) - assert "not available" in result - - result = agent._handle_read_knowledge({"category": "rules", "path": "file.md"}) - assert "not available" in result - - -# ── Graceful degradation ────────────────────────────────────────────────── - - -class TestGracefulDegradation: - def test_agent_works_without_knowledge(self, mock_client, mock_workspace): - """Agent with no user_home still constructs valid system prompt.""" - mock_workspace.user_home = None - agent = DataAgent( - client=mock_client, - workspace=mock_workspace, - ) - prompt = agent._build_system_prompt() - assert "data exploration agent" in prompt - - def test_empty_knowledge_dir(self, mock_client, mock_workspace, tmp_path): - """Agent with empty knowledge dir works normally.""" - (tmp_path / "knowledge" / "rules").mkdir(parents=True) - (tmp_path / "knowledge" / "experiences").mkdir(parents=True) - agent = _make_agent(mock_client, mock_workspace, tmp_path) - prompt = agent._build_system_prompt() - assert "User Rules" not in prompt - - -# ── Reasoning log integration ───────────────────────────────────────────── - - -class TestReasoningLogIntegration: - @patch.dict(os.environ, {"DF_AGENT_LOG": "on"}) - def test_session_start_includes_rules(self, mock_client, mock_workspace, user_home, tmp_path): - """session_start log event should be written (file-based check).""" - with patch.dict(os.environ, {"DATA_FORMULATOR_HOME": str(tmp_path)}): - agent = _make_agent( - mock_client, mock_workspace, user_home, - identity_id=TEST_IDENTITY, - ) - rlog = agent._reasoning_log - rlog.log( - "session_start", - rules_injected=["ROI Standard"], - knowledge_injected=agent._injected_knowledge, - ) - rlog.close() - # Logs are now stored system-level under DATA_FORMULATOR_HOME/agent-logs/ - logs_dir = tmp_path / "agent-logs" - jsonl_files = list(logs_dir.rglob("*.jsonl")) - assert len(jsonl_files) >= 1 diff --git a/tests/backend/agents/test_client_utils.py b/tests/backend/agents/test_client_utils.py index 02872a56..6fb81567 100644 --- a/tests/backend/agents/test_client_utils.py +++ b/tests/backend/agents/test_client_utils.py @@ -233,3 +233,169 @@ def test_gemini_prefix_applied_via_from_config(self): cfg = {"endpoint": "gemini", "model": "gemini-pro", "api_key": "k"} c = Client.from_config(cfg) assert c.model.startswith("gemini/") + + +# --------------------------------------------------------------------------- +# Ollama content-JSON -> tool_call salvage +# --------------------------------------------------------------------------- + +import json as _json +from types import SimpleNamespace + +from data_formulator.agents.client_utils import ( + _extract_json_objects, + _match_tool_from_obj, + _salvage_tool_calls_from_content, +) + + +def _core_action_tools(): + """The visualize / ask_user / delegate / execute_python_script schemas the + matcher disambiguates between.""" + return [ + {"type": "function", "function": { + "name": "execute_python_script", + "parameters": {"type": "object", + "properties": {"purpose": {"type": "string"}, + "code": {"type": "string"}}, + "required": ["purpose", "code"]}}}, + {"type": "function", "function": { + "name": "visualize", + "parameters": {"type": "object", + "properties": {"code": {"type": "string"}, + "output_variable": {"type": "string"}, + "chart": {"type": "object"}, + "title": {"type": "string"}}, + "required": ["code", "output_variable", "chart"]}}}, + {"type": "function", "function": { + "name": "ask_user", + "parameters": {"type": "object", + "properties": {"thought": {"type": "string"}, + "questions": {"type": "array"}}, + "required": ["questions"]}}}, + {"type": "function", "function": { + "name": "delegate", + "parameters": {"type": "object", + "properties": {"target": {"type": "string"}, + "options": {"type": "array"}}, + "required": ["target", "options"]}}}, + ] + + +class TestExtractJsonObjects: + def test_extracts_single_object(self): + assert _extract_json_objects('{"a": 1}') == ['{"a": 1}'] + + def test_ignores_braces_inside_strings(self): + text = '{"code": "x = {1: 2}; y = \\"}\\""}' + objs = _extract_json_objects(text) + assert len(objs) == 1 + assert _json.loads(objs[0])["code"] == 'x = {1: 2}; y = "}"' + + def test_extracts_object_from_markdown_fence(self): + text = 'Sure:\n```json\n{"tool": "visualize"}\n```\n' + objs = _extract_json_objects(text) + assert objs == ['{"tool": "visualize"}'] + + def test_no_object_returns_empty(self): + assert _extract_json_objects("just prose, no json") == [] + + +class TestMatchToolFromObj: + def test_explicit_wrapper_name_and_arguments(self): + obj = {"tool": "visualize", + "arguments": {"code": "df=1", "output_variable": "df", + "chart": {}}} + name, args = _match_tool_from_obj(obj, _core_action_tools()) + assert name == "visualize" + assert args["output_variable"] == "df" + + def test_bare_visualize_args_match_visualize_not_execute(self): + obj = {"output_variable": "t", "code": "df=1", "chart": {}} + name, _ = _match_tool_from_obj(obj, _core_action_tools()) + assert name == "visualize" + + def test_bare_execute_args_match_execute(self): + obj = {"purpose": "peek", "code": "print(1)"} + name, _ = _match_tool_from_obj(obj, _core_action_tools()) + assert name == "execute_python_script" + + def test_ask_user_shape(self): + obj = {"thought": "clarify", "questions": [{"text": "which?"}]} + name, _ = _match_tool_from_obj(obj, _core_action_tools()) + assert name == "ask_user" + + def test_nested_action_wrapper_shape(self): + # qwen2.5-coder emits this under the long agent prompt. + obj = {"thought": "show it", + "action": {"name": "visualize", + "arguments": {"code": "df=1", "output_variable": "df", + "chart": {"chart_type": "Bar Chart"}}}} + name, args = _match_tool_from_obj(obj, _core_action_tools()) + assert name == "visualize" + assert args["output_variable"] == "df" + + def test_nested_tool_wrapper_shape(self): + obj = {"tool": {"name": "ask_user", + "arguments": {"questions": [{"text": "?"}]}}} + name, _ = _match_tool_from_obj(obj, _core_action_tools()) + assert name == "ask_user" + + def test_non_matching_object_returns_none(self): + assert _match_tool_from_obj({"answer": "42"}, _core_action_tools()) is None + + +class TestSalvageToolCallsFromContent: + def _resp(self, content, tool_calls=None): + msg = SimpleNamespace(content=content, tool_calls=tool_calls) + return SimpleNamespace(choices=[SimpleNamespace(message=msg, + finish_reason="stop")]) + + def test_salvages_visualize_action_from_content(self): + content = _json.dumps({"output_variable": "t", "code": "df=1", + "chart": {"chart_type": "Bar Chart"}}) + resp = self._resp(content) + out = _salvage_tool_calls_from_content(resp, _core_action_tools()) + msg = out.choices[0].message + assert msg.tool_calls and msg.tool_calls[0].function.name == "visualize" + assert msg.content is None + assert out.choices[0].finish_reason == "tool_calls" + assert _json.loads(msg.tool_calls[0].function.arguments)["output_variable"] == "t" + + def test_does_not_touch_response_with_native_tool_calls(self): + existing = [SimpleNamespace(function=SimpleNamespace(name="visualize", + arguments="{}"))] + resp = self._resp(None, tool_calls=existing) + out = _salvage_tool_calls_from_content(resp, _core_action_tools()) + assert out.choices[0].message.tool_calls is existing + + def test_plain_text_answer_left_untouched(self): + resp = self._resp("The dataset has 14 languages.") + out = _salvage_tool_calls_from_content(resp, _core_action_tools()) + assert not getattr(out.choices[0].message, "tool_calls", None) + assert out.choices[0].message.content == "The dataset has 14 languages." + + def test_no_tools_is_noop(self): + content = _json.dumps({"output_variable": "t", "code": "df=1", "chart": {}}) + resp = self._resp(content) + out = _salvage_tool_calls_from_content(resp, []) + assert not getattr(out.choices[0].message, "tool_calls", None) + + +class TestMatchToolWireFormats: + def test_openai_tool_calls_array_in_content(self): + obj = {"tool_calls": [{"id": "x", "type": "function", + "function": {"name": "visualize", + "arguments": {"code": "df=1", + "output_variable": "df", + "chart": {}}}}]} + name, args = _match_tool_from_obj(obj, _core_action_tools()) + assert name == "visualize" + assert args["output_variable"] == "df" + + def test_stringified_arguments_are_parsed(self): + obj = {"name": "execute_python_script", + "arguments": '{"purpose": "peek", "code": "print(1)"}'} + name, args = _match_tool_from_obj(obj, _core_action_tools()) + assert name == "execute_python_script" + assert args["code"] == "print(1)" diff --git a/tests/backend/agents/test_data_agent_clarification.py b/tests/backend/agents/test_data_agent_clarification.py deleted file mode 100644 index 16107e6c..00000000 --- a/tests/backend/agents/test_data_agent_clarification.py +++ /dev/null @@ -1,241 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -"""Tests for DataAgent structured clarification events.""" - -from __future__ import annotations - -import pytest - -from data_formulator.agents.data_agent import DataAgent - -pytestmark = [pytest.mark.backend] - - -class _FakeClient: - model = "test-model" - - -def _agent() -> DataAgent: - return DataAgent(client=_FakeClient(), workspace=None) - - -class TestDataAgentClarification: - def test_clarify_action_outputs_structured_questions(self, monkeypatch) -> None: - agent = _agent() - - def fake_get_next_action(trajectory, input_tables, outer_iteration=0): - yield { - "type": "agent_action", - "action_data": { - "action": "clarify", - "questions": [ - { - "text": "Which metric should I use?", - "responseType": "single_choice", - "options": ["Revenue", "Orders"], - } - ], - }, - "reason": "ok", - "llm_calls": 1, - } - - monkeypatch.setattr(agent, "_get_next_action", fake_get_next_action) - - events = list(agent.run([], "", trajectory=[{"role": "system", "content": "test"}])) - - assert events[-1]["type"] == "clarify" - assert events[-1]["questions"] == [ - { - "text": "Which metric should I use?", - "responseType": "single_choice", - "required": True, - "options": [{"label": "Revenue"}, {"label": "Orders"}], - } - ] - assert "message" not in events[-1] - assert "options" not in events[-1] - - def test_tool_rounds_exhausted_outputs_clarify_question(self, monkeypatch) -> None: - agent = _agent() - - def fake_get_next_action(trajectory, input_tables, outer_iteration=0): - yield { - "type": "agent_action", - "action_data": None, - "reason": "tool_rounds_exhausted", - "llm_calls": 12, - } - - monkeypatch.setattr(agent, "_get_next_action", fake_get_next_action) - - events = list(agent.run([], "", trajectory=[{"role": "system", "content": "test"}])) - - clarify = events[-1] - assert clarify["type"] == "clarify" - assert clarify["questions"][0]["text_code"] == "agent.clarifyExhausted" - assert clarify["questions"][0]["options"][0]["label_code"] == "agent.clarifyOptionContinue" - assert "id" not in clarify["questions"][0] - assert "id" not in clarify["questions"][0]["options"][0] - # auto_select was removed; the user is expected to pick an option. - assert "auto_select" not in clarify - - def test_clarify_action_preserves_multiple_question_option_groups(self, monkeypatch) -> None: - agent = _agent() - - def fake_get_next_action(trajectory, input_tables, outer_iteration=0): - yield { - "type": "agent_action", - "action_data": { - "action": "clarify", - "questions": [ - { - "text": "Which metric?", - "options": ["Revenue"], - }, - { - "text": "Which period?", - "options": [{"label": "Last 12 months"}], - }, - ], - }, - "reason": "ok", - "llm_calls": 1, - } - - monkeypatch.setattr(agent, "_get_next_action", fake_get_next_action) - - events = list(agent.run([], "", trajectory=[{"role": "system", "content": "test"}])) - - questions = events[-1]["questions"] - assert [q["text"] for q in questions] == ["Which metric?", "Which period?"] - assert questions[0]["options"] == [{"label": "Revenue"}] - assert questions[1]["options"] == [{"label": "Last 12 months"}] - # No id fields anywhere - for q in questions: - assert "id" not in q - for opt in q.get("options", []): - assert "id" not in opt - - -class TestDataAgentDelegate: - """Tests for the delegate action.""" - - def test_emits_delegate_event_for_data_loading(self, monkeypatch) -> None: - agent = _agent() - - def fake_get_next_action(trajectory, input_tables, outer_iteration=0): - yield { - "type": "agent_action", - "action_data": { - "action": "delegate", - "thought": "User asked about Q4 sales but no sales table is loaded.", - "target": "data_loading", - "message": "I don't see a sales table loaded — want to import one?", - "options": ["quarterly sales 2024"], - }, - "reason": "ok", - "llm_calls": 1, - } - - monkeypatch.setattr(agent, "_get_next_action", fake_get_next_action) - - events = list(agent.run([], "", trajectory=[{"role": "system", "content": "test"}])) - - evt = events[-1] - assert evt["type"] == "delegate" - assert evt["target"] == "data_loading" - assert evt["options"] == ["quarterly sales 2024"] - assert evt["message"] == "I don't see a sales table loaded — want to import one?" - assert evt["thought"] == "User asked about Q4 sales but no sales table is loaded." - assert "trajectory" in evt - assert evt["completed_step_count"] == 0 - - def test_emits_delegate_event_for_report_gen(self, monkeypatch) -> None: - agent = _agent() - - def fake_get_next_action(trajectory, input_tables, outer_iteration=0): - yield { - "type": "agent_action", - "action_data": { - "action": "delegate", - "target": "report_gen", - "message": "Pick an angle for the write-up:", - "options": [ - "Write a 200-word executive summary of regional trends.", - "Create a detailed analytical report on regional trends with category breakdowns.", - ], - }, - "reason": "ok", - "llm_calls": 1, - } - - monkeypatch.setattr(agent, "_get_next_action", fake_get_next_action) - - events = list(agent.run([], "", trajectory=[{"role": "system", "content": "test"}])) - - evt = events[-1] - assert evt["type"] == "delegate" - assert evt["target"] == "report_gen" - assert len(evt["options"]) == 2 - assert evt["options"][0] == "Write a 200-word executive summary of regional trends." - assert evt["options"][1].startswith("Create a detailed") - - def test_missing_prompt_yields_parse_error(self, monkeypatch) -> None: - agent = _agent() - - def fake_get_next_action(trajectory, input_tables, outer_iteration=0): - yield { - "type": "agent_action", - "action_data": { - "action": "delegate", - "target": "data_loading", - "message": "missing", - "options": [""], - }, - "reason": "ok", - "llm_calls": 1, - } - - monkeypatch.setattr(agent, "_get_next_action", fake_get_next_action) - - events = list(agent.run([], "", trajectory=[{"role": "system", "content": "test"}])) - - # Last event should be an error event (not a delegate). - assert events[-1]["type"] != "delegate" - - def test_normalizer_validates_fields(self) -> None: - with pytest.raises(ValueError): - DataAgent._normalize_delegate_action( - {"target": "", "options": ["x"]} - ) - with pytest.raises(ValueError): - DataAgent._normalize_delegate_action( - {"target": "unknown", "options": ["x"]} - ) - with pytest.raises(ValueError): - DataAgent._normalize_delegate_action( - {"target": "data_loading", "options": []} - ) - with pytest.raises(ValueError): - DataAgent._normalize_delegate_action( - {"target": "data_loading", "options": [" "]} - ) - # Normal multi-option report_gen payload. - out = DataAgent._normalize_delegate_action({ - "target": " report_gen ", - "message": " pick one ", - "options": [" Brief recap. ", " Full report. "], - }) - assert out == { - "target": "report_gen", - "message": "pick one", - "options": ["Brief recap.", "Full report."], - } - # Message is optional; >2 options are truncated to 2. - out2 = DataAgent._normalize_delegate_action({ - "target": "report_gen", - "options": ["A", "B", "C"], - }) - assert out2 == {"target": "report_gen", "options": ["A", "B"]} diff --git a/tests/backend/agents/test_duckdb_notes_prompt.py b/tests/backend/agents/test_duckdb_notes_prompt.py index 385313f3..43fd4385 100644 --- a/tests/backend/agents/test_duckdb_notes_prompt.py +++ b/tests/backend/agents/test_duckdb_notes_prompt.py @@ -1,29 +1,35 @@ -"""Ensure SHARED_DUCKDB_NOTES contains the non-ASCII identifier quoting rule. +"""Ensure the core skill's DuckDB notes contain the non-ASCII identifier quoting rule. -This is a regression guard: the DuckDB notes prompt must remind the LLM to -wrap non-ASCII identifiers in double quotes when generating DuckDB SQL. +This is a regression guard: the live ``AnalystAgent`` loads its chart-creation +guidance from the core skill body (``analyst/skills/core/SKILL.md``). The DuckDB +notes there must remind the LLM to wrap non-ASCII identifiers in double quotes +when generating DuckDB SQL. """ from __future__ import annotations +from pathlib import Path + import pytest -from data_formulator.agents.chart_creation_guide import SHARED_DUCKDB_NOTES +import data_formulator pytestmark = [pytest.mark.backend] +_CORE_SKILL_BODY = ( + Path(data_formulator.__file__).parent + / "analyst" + / "skills" + / "core" + / "SKILL.md" +).read_text(encoding="utf-8") + def test_duckdb_notes_mentions_non_ascii_double_quoting() -> None: - lower = SHARED_DUCKDB_NOTES.lower() + lower = _CORE_SKILL_BODY.lower() assert "non-ascii" in lower or "non ascii" in lower - assert "double quotes" in lower or '"' in SHARED_DUCKDB_NOTES + assert '"' in _CORE_SKILL_BODY def test_duckdb_notes_mentions_identifier_quoting_rule() -> None: """The prompt should contain an explicit quoting rule for identifiers.""" - assert "identifier" in SHARED_DUCKDB_NOTES.lower() - - -def test_duckdb_notes_is_not_excessively_long() -> None: - """Overly long DuckDB notes can confuse models about the JSON output format. - Keep it under 800 characters to avoid prompt bloat.""" - assert len(SHARED_DUCKDB_NOTES) < 800 + assert "identifier" in _CORE_SKILL_BODY.lower() diff --git a/tests/backend/agents/test_interactive_explore_context.py b/tests/backend/agents/test_interactive_explore_context.py deleted file mode 100644 index 5680da67..00000000 --- a/tests/backend/agents/test_interactive_explore_context.py +++ /dev/null @@ -1,127 +0,0 @@ -"""Tests for recommendation-question context construction and inspect behavior.""" -from __future__ import annotations - -from datetime import datetime, timezone -from types import SimpleNamespace -from unittest.mock import MagicMock, patch - -import pandas as pd -import pytest - -from data_formulator.agents.agent_interactive_explore import InteractiveExploreAgent -from data_formulator.agents.agent_utils import format_dataframe_sample_with_budget -from data_formulator.agents.context import build_lightweight_table_context -from data_formulator.datalake.workspace_metadata import ( - ColumnInfo, - TableMetadata, - WorkspaceMetadata, -) - -pytestmark = [pytest.mark.backend] - - -@pytest.fixture() -def workspace_with_metadata(): - workspace = MagicMock() - workspace.read_data_as_df.return_value = pd.DataFrame({ - "category": ["office", "electronics", "office", "furniture", "office"], - "profit": [10, 20, 15, 5, 30], - }) - workspace.get_relative_data_file_path.return_value = "data/sales.parquet" - - metadata = WorkspaceMetadata.create_new() - metadata.add_table(TableMetadata( - name="sales", - source_type="data_loader", - filename="sales.parquet", - file_type="parquet", - created_at=datetime.now(timezone.utc), - description="Sales performance table", - columns=[ - ColumnInfo("category", "text", description="Business category"), - ColumnInfo("profit", "float64", description="Net profit"), - ], - )) - workspace.get_metadata.return_value = metadata - return workspace - - -class TestRecommendationContext: - def test_lightweight_context_includes_metadata_and_field_values(self, workspace_with_metadata): - context = build_lightweight_table_context( - [{"name": "sales"}], - workspace_with_metadata, - ) - - assert "Sales performance table" in context - assert "Business category" in context - assert "Net profit" in context - assert "Field value samples" in context - assert "office" in context - assert "electronics" in context - assert "Numeric stats" in context - - def test_sample_rows_floor_down_to_fit_budget(self): - df = pd.DataFrame({ - "name": ["alpha" * 20, "beta" * 20, "gamma" * 20], - "value": [1, 2, 3], - }) - - sample, displayed_rows, truncated = format_dataframe_sample_with_budget( - df, - max_rows=3, - max_chars=150, - index=False, - ) - - assert len(sample) <= 150 - assert displayed_rows < 3 - assert truncated is True - - -class TestInteractiveExploreAgent: - def test_run_skips_inspect_round_by_default(self, workspace_with_metadata): - client = MagicMock() - client.get_completion.return_value = [ - SimpleNamespace( - choices=[ - SimpleNamespace( - delta=SimpleNamespace(content='{"type":"question","text":"Q","goal":"G","tag":"pivot"}\n') - ) - ] - ) - ] - - agent = InteractiveExploreAgent(client=client, workspace=workspace_with_metadata) - - with patch.object(agent, "_run_inspect_round", wraps=agent._run_inspect_round) as inspect_round: - chunks = list(agent.run([{"name": "sales"}])) - - assert inspect_round.call_count == 0 - text_chunks = [c for c in chunks if isinstance(c, str)] - assert text_chunks == ['{"type":"question","text":"Q","goal":"G","tag":"pivot"}\n'] - - def test_run_yields_progress_events_in_order(self, workspace_with_metadata): - """Progress events must appear before any LLM text chunks.""" - client = MagicMock() - client.get_completion.return_value = [ - SimpleNamespace( - choices=[ - SimpleNamespace( - delta=SimpleNamespace(content='{"type":"question","text":"Q","goal":"G","tag":"pivot"}\n') - ) - ] - ) - ] - - agent = InteractiveExploreAgent(client=client, workspace=workspace_with_metadata) - chunks = list(agent.run([{"name": "sales"}])) - - progress_events = [c for c in chunks if isinstance(c, dict) and c.get("type") == "progress"] - assert len(progress_events) == 2 - assert progress_events[0]["phase"] == "building_context" - assert progress_events[1]["phase"] == "generating" - - first_text_idx = next(i for i, c in enumerate(chunks) if isinstance(c, str)) - last_progress_idx = max(i for i, c in enumerate(chunks) if isinstance(c, dict) and c.get("type") == "progress") - assert last_progress_idx < first_text_idx diff --git a/tests/backend/agents/test_mini_agent.py b/tests/backend/agents/test_mini_agent.py new file mode 100644 index 00000000..dd7d8924 --- /dev/null +++ b/tests/backend/agents/test_mini_agent.py @@ -0,0 +1,481 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Unit tests for data_formulator.analyst.mini_agent.MiniAnalystAgent. + +The mini agent makes a SINGLE analytic decision per run (one ``visualize`` or one +``explain``) with no multi-step loop. These tests cover the pure-logic seams (the +prompt, the reduced tool set, the JSON decision) plus end-to-end drives of +:meth:`run` with a scripted fake client and a stubbed core-skill dispatch, +asserting the key contracts: one decision, pure-text history, the two output +kinds, the two tool variations, and in-place repair of a failed chart. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from types import SimpleNamespace + +import pytest + +from data_formulator.analyst import mini_agent as ma +from data_formulator.analyst.mini_agent import MiniAnalystAgent + +pytestmark = [pytest.mark.backend] + + +_ACTION_NAMES = {"visualize", "ask_user", "delegate"} + + +def _mini_tools_list(allow_inspect: bool): + tools = [ + {"type": "function", "function": { + "name": "visualize", + "description": "Commit a data transform + chart.", + "parameters": {"type": "object", + "properties": {"code": {"type": "string"}, + "output_variable": {"type": "string"}, + "chart": {"type": "object"}}, + "required": ["code", "output_variable", "chart"]}}}, + ] + if allow_inspect: + tools.append({"type": "function", "function": { + "name": "execute_python_script", + "description": "Run a script.", + "parameters": {"type": "object", + "properties": {"code": {"type": "string"}}, + "required": ["code"]}}}) + tools.append(ma._EXPLAIN_TOOL) + return tools + + +def _resp(content): + msg = SimpleNamespace(content=content, tool_calls=None) + return SimpleNamespace(choices=[SimpleNamespace(message=msg, finish_reason="stop")]) + + +class _FakeRegistry: + def action_names(self): + return set(_ACTION_NAMES) + + +class _FakeRlog: + def log(self, *a, **k): + pass + + def close(self): + pass + + +def _bare_mini(allow_inspection=True): + """A MiniAnalystAgent with just the seams its decision logic touches stubbed + — no real LLM / sandbox / registry tool building. ``allow_inspection`` only + seeds the per-turn ``_decide(allow_inspect=...)`` input used by the ``_decide`` + helper below.""" + agent = MiniAnalystAgent.__new__(MiniAnalystAgent) + agent.allow_inspection = allow_inspection + agent.language_instruction = "" + agent.max_repair_attempts = 2 + agent.registry = _FakeRegistry() + agent._reasoning_log = _FakeRlog() + agent._session_id = "test-session" + agent.client = SimpleNamespace(model="test-model") + agent._run_explore_code = lambda code, tables: {"status": "ok", "stdout": "ROWS=3"} + agent._loaded_skill_tool_map = lambda: {} + agent._mini_tools = lambda allow_inspect: _mini_tools_list(allow_inspect) + return agent + + +def _decide(agent, scripted_contents, input_tables=None, allow_inspect=None): + """Run _decide with the client scripted to return ``scripted_contents`` in + order. Returns (events, decision_tuple, messages).""" + script = iter(scripted_contents) + agent._call_model = lambda messages: _resp(next(script)) + messages: list[dict] = [{"role": "system", "content": "sys"}, + {"role": "user", "content": "q"}] + gen = agent._decide( + messages, input_tables or [], 1, + allow_inspect=agent.allow_inspection if allow_inspect is None else allow_inspect) + events = [] + decision = None + try: + while True: + events.append(next(gen)) + except StopIteration as stop: + decision = stop.value + return events, decision, messages + + +# -------------------------------------------------------------------------- +# Prompt seams +# -------------------------------------------------------------------------- +class TestSystemPrompt: + def test_template_describes_both_output_kinds(self): + assert '"tool": "visualize"' in ma._MINI_PROMPT_TEMPLATE + assert '"tool": "explain"' in ma._MINI_PROMPT_TEMPLATE + assert "ONE JSON object" in ma._MINI_PROMPT_TEMPLATE + + def test_chart_reference_is_reduced_set(self): + # Exactly the seven reduced types, nothing more exotic. + for t in ma._MINI_CHART_TYPES: + assert t in ma._MINI_CHART_REFERENCE + assert "Boxplot" not in ma._MINI_CHART_REFERENCE + assert "Waterfall" not in ma._MINI_CHART_REFERENCE + assert len(ma._MINI_CHART_TYPES) == 7 + + def test_inspection_note_present(self): + agent = _bare_mini() + out = agent._build_system_prompt() + assert "execute_python_script" in out + assert all(c in out for c in ma._MINI_CHART_TYPES) + + +class TestMiniTools: + def test_mini_tools_offer_visualize_explain_and_inspection(self): + from data_formulator.analyst.skills import build_registry + agent = MiniAnalystAgent.__new__(MiniAnalystAgent) + agent.registry = build_registry() + agent._loaded_skills = {"core"} + names = {(t.get("function") or {}).get("name") + for t in agent._mini_tools(allow_inspect=True)} + assert "visualize" in names + assert "explain" in names + assert "execute_python_script" in names + + def test_mini_tools_drop_inspection_when_unavailable(self): + from data_formulator.analyst.skills import build_registry + agent = MiniAnalystAgent.__new__(MiniAnalystAgent) + agent.registry = build_registry() + agent._loaded_skills = {"core"} + names = {(t.get("function") or {}).get("name") + for t in agent._mini_tools(allow_inspect=False)} + assert names == {"visualize", "explain"} + + +# -------------------------------------------------------------------------- +# The single decision +# -------------------------------------------------------------------------- +class TestDecide: + def test_visualize_in_one_shot(self): + agent = _bare_mini(allow_inspection=False) + viz = json.dumps({"thought": "bar it", "tool": "visualize", "arguments": { + "code": "out=df", "output_variable": "out", + "chart": {"chart_type": "Bar Chart"}}}) + events, decision, messages = _decide(agent, [viz]) + assert decision[0] == "visualize" + assert decision[1]["output_variable"] == "out" + # thought surfaced + assert any(e["type"] == "thinking_text" and e["content"] == "bar it" + for e in events) + # pure-text history: the assistant turn is the verbatim JSON + assert messages[-1]["role"] == "assistant" + assert all("tool_calls" not in m for m in messages) + assert all(m["role"] != "tool" for m in messages) + + def test_explain_action_ends_with_text(self): + agent = _bare_mini(allow_inspection=False) + exp = json.dumps({"tool": "explain", + "arguments": {"text": "There are 42 rows."}}) + _, decision, _ = _decide(agent, [exp]) + assert decision == ("explain", "There are 42 rows.") + + def test_plain_text_is_explain(self): + agent = _bare_mini(allow_inspection=False) + _, decision, messages = _decide(agent, ["The data covers 2019-2023."]) + assert decision[0] == "explain" + assert "2019" in decision[1] + assert messages[-1]["role"] == "assistant" + + def test_inspection_then_visualize_keeps_history_pure_text(self): + agent = _bare_mini(allow_inspection=True) + inspect = json.dumps({"tool": "execute_python_script", + "arguments": {"code": "print(1)"}}) + viz = json.dumps({"tool": "visualize", "arguments": { + "code": "out=df", "output_variable": "out", + "chart": {"chart_type": "Line Chart"}}}) + events, decision, messages = _decide(agent, [inspect, viz]) + etypes = [e["type"] for e in events] + assert "tool_start" in etypes and "tool_result" in etypes + assert decision[0] == "visualize" + # the inspection observation came back as a [OBSERVATION] user turn + assert any(m["role"] == "user" and "[OBSERVATION]" in (m["content"] or "") + and "ROWS=3" in (m["content"] or "") for m in messages) + assert all("tool_calls" not in m for m in messages) + + def test_inspection_budget_is_one(self): + # Two inspection attempts: the second must be refused (no tool offered), + # nudging the model; the final visualize still commits. + agent = _bare_mini(allow_inspection=True) + inspect = json.dumps({"tool": "execute_python_script", + "arguments": {"code": "print(1)"}}) + viz = json.dumps({"tool": "visualize", "arguments": { + "code": "out=df", "output_variable": "out", + "chart": {"chart_type": "Bar Chart"}}}) + # script: inspect, inspect(again -> refused as unknown), visualize + _, decision, messages = _decide(agent, [inspect, inspect, viz]) + assert decision[0] == "visualize" + # a correction nudge was issued for the second (now unavailable) inspect + assert any("not available" in (m["content"] or "") + for m in messages if m["role"] == "user") + + def test_missing_required_field_triggers_one_correction(self): + agent = _bare_mini(allow_inspection=False) + bad = json.dumps({"tool": "visualize", + "arguments": {"code": "out=df", "output_variable": "out"}}) + good = json.dumps({"tool": "visualize", "arguments": { + "code": "out=df", "output_variable": "out", + "chart": {"chart_type": "Bar Chart"}}}) + _, decision, messages = _decide(agent, [bad, good]) + assert decision[0] == "visualize" + assert decision[1].get("chart") + assert any("[OBSERVATION] ERROR" in (m["content"] or "") + for m in messages if m["role"] == "user") + + +# -------------------------------------------------------------------------- +# End-to-end run(): result/completion events + repair +# -------------------------------------------------------------------------- +class _DummySandbox: + def __enter__(self): + return self + + def __exit__(self, *a): + return False + + +def _prep_run(agent, scripted_contents, monkeypatch): + """Wire run()'s collaborators: scripted client, stub initial messages, no-op + sandbox + reasoning-log + explore ns.""" + script = iter(scripted_contents) + agent._call_model = lambda messages: _resp(next(script)) + agent._build_initial_messages = lambda *a, **k: [ + {"role": "system", "content": "sys"}, {"role": "user", "content": "q"}] + agent._explore_ns_dir = lambda: Path("/nonexistent/mini-test-ns") + monkeypatch.setattr( + "data_formulator.sandbox.local_sandbox.SandboxSession", + lambda *a, **k: _DummySandbox()) + + +def _viz_result_event(): + return {"type": "result", "status": "success", "content": { + "question": "", + "result": {"code": "out=df", + "refined_goal": {"chart": {"chart_type": "Bar Chart"}, + "title": "T"}, + "content": {"rows": [{"x": 1, "y": 2}]}}}} + + +class TestRun: + def test_explain_run_emits_completion_summary(self, monkeypatch): + agent = _bare_mini(allow_inspection=False) + _prep_run(agent, [json.dumps( + {"tool": "explain", "arguments": {"text": "Sales are flat."}})], monkeypatch) + events = list(agent.run([{"name": "t"}], "is it growing?")) + comp = [e for e in events if e["type"] == "completion"] + assert comp and comp[0]["status"] == "success" + assert comp[0]["content"]["summary"] == "Sales are flat." + # an explain produces no result/chart + assert not any(e["type"] == "result" for e in events) + + def test_visualize_run_emits_result_then_completion(self, monkeypatch): + agent = _bare_mini(allow_inspection=False) + viz = json.dumps({"tool": "visualize", "arguments": { + "code": "out=df", "output_variable": "out", + "chart": {"chart_type": "Bar Chart"}}}) + _prep_run(agent, [viz], monkeypatch) + + def _viz_ok(*a, **k): + yield {"type": "action", "action": "visualize"} + yield _viz_result_event() + return "[OBSERVATION] Chart created." + agent._dispatch_skill_action = _viz_ok + + events = list(agent.run([{"name": "t"}], "show sales")) + etypes = [e["type"] for e in events] + assert "result" in etypes + comp = [e for e in events if e["type"] == "completion"] + assert comp and comp[0]["status"] == "success" + assert comp[0]["content"]["total_steps"] >= 0 + + def test_failed_visualize_is_repaired_in_place(self, monkeypatch): + agent = _bare_mini(allow_inspection=False) + viz1 = json.dumps({"tool": "visualize", "arguments": { + "code": "out=df.bad", "output_variable": "out", + "chart": {"chart_type": "Bar Chart"}}}) + viz2 = json.dumps({"tool": "visualize", "arguments": { + "code": "out=df", "output_variable": "out", + "chart": {"chart_type": "Bar Chart"}}}) + _prep_run(agent, [viz1, viz2], monkeypatch) + + calls = {"n": 0} + + def _viz_dispatch(*a, **k): + calls["n"] += 1 + if calls["n"] == 1: + yield {"type": "error", "message": "boom"} + return "[OBSERVATION – Step 1 FAILED]\n\nError: boom" + yield _viz_result_event() + return "[OBSERVATION] Chart created." + agent._dispatch_skill_action = _viz_dispatch + + events = list(agent.run([{"name": "t"}], "show sales")) + assert calls["n"] == 2 # one failure, one repair + assert any(e["type"] == "result" for e in events) + comp = [e for e in events if e["type"] == "completion"] + assert comp and comp[0]["status"] == "success" + + def test_repair_can_inspect_before_refixing(self, monkeypatch): + # The auto-revision loop may inspect the data to diagnose a failure + # (e.g. discover the real columns) before emitting a corrected chart. + agent = _bare_mini(allow_inspection=True) + agent.max_repair_attempts = 1 + viz_bad = json.dumps({"tool": "visualize", "arguments": { + "code": "out=df['rate']", "output_variable": "out", + "chart": {"chart_type": "Bar Chart"}}}) + inspect = json.dumps({"tool": "execute_python_script", + "arguments": {"code": "print(df.columns)"}}) + viz_good = json.dumps({"tool": "visualize", "arguments": { + "code": "out=df", "output_variable": "out", + "chart": {"chart_type": "Bar Chart"}}}) + # initial viz (fails) -> repair decides to inspect, then corrected viz + _prep_run(agent, [viz_bad, inspect, viz_good], monkeypatch) + + calls = {"n": 0} + + def _viz_dispatch(*a, **k): + calls["n"] += 1 + if calls["n"] == 1: + yield {"type": "error", "message": "KeyError rate"} + return "[OBSERVATION – Step 1 FAILED]\n\nError: KeyError - 'rate'" + yield _viz_result_event() + return "[OBSERVATION] Chart created." + agent._dispatch_skill_action = _viz_dispatch + + events = list(agent.run([{"name": "t"}], "show rate")) + # the repair turn ran an inspection before the corrected visualize + assert any(e["type"] == "tool_start" + and e.get("tool") == "execute_python_script" for e in events) + assert any(e["type"] == "result" for e in events) + comp = [e for e in events if e["type"] == "completion"] + assert comp and comp[0]["status"] == "success" + + def test_unrepairable_visualize_completes_without_chart(self, monkeypatch): + agent = _bare_mini(allow_inspection=False) + agent.max_repair_attempts = 0 # no repair budget + viz = json.dumps({"tool": "visualize", "arguments": { + "code": "out=df.bad", "output_variable": "out", + "chart": {"chart_type": "Bar Chart"}}}) + _prep_run(agent, [viz], monkeypatch) + + def _viz_fail(*a, **k): + yield {"type": "error", "message": "boom"} + return "[OBSERVATION – Step 1 FAILED]\n\nError: boom" + agent._dispatch_skill_action = _viz_fail + + events = list(agent.run([{"name": "t"}], "show sales")) + assert not any(e["type"] == "result" for e in events) + comp = [e for e in events if e["type"] == "completion"] + assert comp and comp[0]["status"] == "completed_no_viz" + # The run must not end silently: a failed chart surfaces an error event + # carrying the reason. In production the skill's own error event is + # dropped by the shell router, so run() re-surfaces it from the + # observation; here the message must reach the user with the cause. + errs = [e for e in events if e["type"] == "error" + and e.get("message_code") == "agent.miniNoChart"] + assert errs and "boom" in errs[0]["message"] + + def test_empty_reply_is_not_a_silent_explain(self, monkeypatch): + # A small model that returns nothing must not end the run with an empty + # completion; the summary falls back to a user-visible message. + agent = _bare_mini(allow_inspection=False) + _prep_run(agent, ["", ""], monkeypatch) # empty reply, then empty again + events = list(agent.run([{"name": "t"}], "is it growing?")) + comp = [e for e in events if e["type"] == "completion"] + assert comp and comp[0]["content"]["summary"].strip() + + + +# -------------------------------------------------------------------------- +# Plain-text transport seams (migrated from the removed simple_agent tests; +# MiniAnalystAgent now owns _catalog_reminder / _parse_action). +# -------------------------------------------------------------------------- + +def _proto_tools(): + """A representative tool list — an inspection tool with a unique required key + plus visualize — for exercising the generic protocol seams.""" + return [ + {"type": "function", "function": { + "name": "execute_python_script", + "description": "Run a script.", + "parameters": {"type": "object", + "properties": {"purpose": {"type": "string"}, + "code": {"type": "string"}}, + "required": ["purpose", "code"]}}}, + {"type": "function", "function": { + "name": "inspect_source_data", + "description": "Summarise source tables.", + "parameters": {"type": "object", + "properties": {"table_names": {"type": "array", + "items": {"type": "string"}}}, + "required": ["table_names"]}}}, + {"type": "function", "function": { + "name": "visualize", + "description": "Commit a data transform + chart.", + "parameters": {"type": "object", + "properties": {"code": {"type": "string"}, + "output_variable": {"type": "string"}, + "chart": {"type": "object"}}, + "required": ["code", "output_variable", "chart"]}}}, + ] + + +class TestCatalogReminder: + def test_splits_inspection_and_action_names(self): + agent = _bare_mini() + text = agent._catalog_reminder(_proto_tools()) + assert "execute_python_script" in text and "inspect_source_data" in text + assert "visualize" in text + # visualize is listed under Actions, not Inspection tools + inspect_part, action_part = text.split("Actions:") + assert "visualize" not in inspect_part + assert "visualize" in action_part + + +class TestParseAction: + def test_wrapped_tool_envelope(self): + content = json.dumps({"thought": "let's chart it", "tool": "visualize", + "arguments": {"code": "df=1", "output_variable": "df", + "chart": {}}}) + thought, name, args = MiniAnalystAgent._parse_action(content, _proto_tools()) + assert name == "visualize" + assert thought == "let's chart it" + assert args["output_variable"] == "df" + + def test_bare_args_matched_by_required_keys(self): + content = json.dumps({"table_names": ["t1", "t2"]}) + thought, name, args = MiniAnalystAgent._parse_action(content, _proto_tools()) + assert name == "inspect_source_data" + assert args["table_names"] == ["t1", "t2"] + + def test_nested_action_wrapper(self): + content = json.dumps({"thought": "go", "action": { + "name": "visualize", + "arguments": {"code": "df=1", "output_variable": "df", "chart": {}}}}) + thought, name, args = MiniAnalystAgent._parse_action(content, _proto_tools()) + assert name == "visualize" + assert thought == "go" + + def test_json_embedded_in_prose(self): + content = ('I will inspect first.\n' + '{"tool": "inspect_source_data", "arguments": {"table_names": ["t"]}}') + parsed = MiniAnalystAgent._parse_action(content, _proto_tools()) + assert parsed is not None + assert parsed[1] == "inspect_source_data" + + def test_plain_text_is_final_answer(self): + assert MiniAnalystAgent._parse_action( + "Here is the final summary.", _proto_tools()) is None + + def test_none_content(self): + assert MiniAnalystAgent._parse_action(None, _proto_tools()) is None diff --git a/tests/backend/agents/test_experience_distill.py b/tests/backend/agents/test_workflow_distill.py similarity index 74% rename from tests/backend/agents/test_experience_distill.py rename to tests/backend/agents/test_workflow_distill.py index a3b823c8..e44d4a86 100644 --- a/tests/backend/agents/test_experience_distill.py +++ b/tests/backend/agents/test_workflow_distill.py @@ -1,13 +1,13 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -"""Tests for ExperienceDistillAgent and the /api/knowledge/distill-experience endpoint. +"""Tests for WorkflowDistillAgent and the /api/knowledge/distill-workflow endpoint. Covers: -- _extract_context_summary correctly extracts experience context +- _extract_context_summary correctly extracts workflow context - Output Markdown includes valid YAML front matter - front matter contains source: distill and source metadata -- Generated experience file written to correct directory +- Generated workflow file written to correct directory - category_hint controls sub-directory """ @@ -18,8 +18,14 @@ import flask import pytest -from data_formulator.agents.agent_experience_distill import ExperienceDistillAgent -from data_formulator.knowledge.store import parse_front_matter +from data_formulator.agents.agent_workflow_distill import WorkflowDistillAgent +from data_formulator.knowledge.store import ( + KNOWLEDGE_LIMITS, + WORKFLOW_HARD_MAX, + parse_front_matter, +) + +WORKFLOW_SOFT_LIMIT = KNOWLEDGE_LIMITS["workflows"] pytestmark = [pytest.mark.backend] @@ -73,7 +79,7 @@ }, ] -SAMPLE_EXPERIENCE_CONTEXT = { +SAMPLE_WORKFLOW_CONTEXT = { "context_id": "ws-1", "workspace_id": "ws-1", "workspace_name": "Sales Region Analysis", @@ -86,7 +92,7 @@ class TestExtractContextSummary: def test_renders_each_event_type(self): - summary = ExperienceDistillAgent._extract_context_summary(SAMPLE_EXPERIENCE_CONTEXT) + summary = WorkflowDistillAgent._extract_context_summary(SAMPLE_WORKFLOW_CONTEXT) # message events assert "[user→data-agent/prompt]" in summary assert "Show sales by region" in summary @@ -113,7 +119,7 @@ def test_renders_each_event_type(self): assert "encoding: x=region(nominal)" in summary def test_empty_events_returns_marker(self): - summary = ExperienceDistillAgent._extract_context_summary({}) + summary = WorkflowDistillAgent._extract_context_summary({}) assert summary == "(empty context)" def test_user_content_is_not_displaycontent(self): @@ -131,7 +137,7 @@ def test_user_content_is_not_displaycontent(self): }], }], } - summary = ExperienceDistillAgent._extract_context_summary(ctx) + summary = WorkflowDistillAgent._extract_context_summary(ctx) assert "raw text" in summary def test_skips_non_dict_events(self): @@ -140,7 +146,7 @@ def test_skips_non_dict_events(self): {"type": "message", "from": "user", "to": "data-agent", "role": "prompt", "content": "ok"}, ]}]} - summary = ExperienceDistillAgent._extract_context_summary(ctx) + summary = WorkflowDistillAgent._extract_context_summary(ctx) assert "[user→data-agent/prompt]" in summary # No crashes; the bogus entries are silently dropped. @@ -154,7 +160,7 @@ def test_create_table_basic(self): "sample_rows": [{"a": 1}], "code": "x = 1", }]}]} - summary = ExperienceDistillAgent._extract_context_summary(ctx) + summary = WorkflowDistillAgent._extract_context_summary(ctx) assert "[create_table] t1" in summary def test_create_chart_without_encoding(self): @@ -163,7 +169,7 @@ def test_create_chart_without_encoding(self): "related_table_id": "t1", "mark_or_type": "line", }]}]} - summary = ExperienceDistillAgent._extract_context_summary(ctx) + summary = WorkflowDistillAgent._extract_context_summary(ctx) assert "[create_chart] line on t1" in summary assert "encoding:" not in summary @@ -187,7 +193,7 @@ def test_renders_multi_thread_with_headers(self): }, ], } - summary = ExperienceDistillAgent._extract_context_summary(ctx) + summary = WorkflowDistillAgent._extract_context_summary(ctx) assert "### Thread 1 (id=leaf-a)" in summary assert "### Thread 2 (id=leaf-b)" in summary assert "load gas prices" in summary @@ -236,10 +242,10 @@ def _mock_client(self): def test_produces_valid_markdown(self): client = self._mock_client() - agent = ExperienceDistillAgent(client=client) + agent = WorkflowDistillAgent(client=client) with patch.object(agent, "_call_llm", return_value=MOCK_CONTEXT_RESPONSE): - result = agent.run(SAMPLE_EXPERIENCE_CONTEXT) + result = agent.run(SAMPLE_WORKFLOW_CONTEXT) assert result.startswith("---") meta, body = parse_front_matter(result) @@ -249,11 +255,11 @@ def test_produces_valid_markdown(self): def test_fallback_front_matter_added(self): client = self._mock_client() - agent = ExperienceDistillAgent(client=client) + agent = WorkflowDistillAgent(client=client) no_fm_response = "# Sales Analysis\n\nJust some content." with patch.object(agent, "_call_llm", return_value=no_fm_response): - result = agent.run(SAMPLE_EXPERIENCE_CONTEXT) + result = agent.run(SAMPLE_WORKFLOW_CONTEXT) assert result.startswith("---") meta, _ = parse_front_matter(result) @@ -261,11 +267,11 @@ def test_fallback_front_matter_added(self): assert meta["source_context"] == "ws-1" def test_retries_once_when_body_too_long(self): - """If first LLM call produces body > limit, agent retries with condensation prompt.""" + """If first LLM call produces body over the soft target, agent retries with condensation prompt.""" client = self._mock_client() - agent = ExperienceDistillAgent(client=client) + agent = WorkflowDistillAgent(client=client) - long_body = "x" * 3000 + long_body = "x" * (WORKFLOW_SOFT_LIMIT + 1000) long_response = ( "---\ntitle: Long\ntags: []\ncreated: 2026-01-01\n" "updated: 2026-01-01\nsource: distill\nsource_context: t1\n---\n\n" @@ -283,18 +289,18 @@ def fake_call_llm(messages): return short_response with patch.object(agent, "_call_llm", side_effect=fake_call_llm): - result = agent.run(SAMPLE_EXPERIENCE_CONTEXT) + result = agent.run(SAMPLE_WORKFLOW_CONTEXT) assert call_count == 2 _, body = parse_front_matter(result) - assert len(body.strip()) <= 2000 + assert len(body.strip()) <= WORKFLOW_SOFT_LIMIT def test_retry_asks_for_slack_under_limit(self): - """The retry prompt asks the model for less than the hard limit.""" + """The retry prompt asks the model for less than the soft target.""" client = self._mock_client() - agent = ExperienceDistillAgent(client=client) + agent = WorkflowDistillAgent(client=client) - long_body = "x" * 3000 + long_body = "x" * (WORKFLOW_SOFT_LIMIT + 1000) long_response = ( "---\ntitle: L\ntags: []\ncreated: 2026-01-01\n" "updated: 2026-01-01\nsource: distill\nsource_context: t1\n---\n\n" @@ -309,21 +315,21 @@ def fake_call_llm(messages): return long_response if len(captured) == 1 else MOCK_CONTEXT_RESPONSE with patch.object(agent, "_call_llm", side_effect=fake_call_llm): - agent.run(SAMPLE_EXPERIENCE_CONTEXT) + agent.run(SAMPLE_WORKFLOW_CONTEXT) assert len(captured) == 2 retry_prompt = captured[1][-1]["content"] - # Must mention the slacked target (limit minus margin), not the raw limit. - expected_target = 2000 - agent.RETRY_MARGIN - assert f"within {expected_target} characters" in retry_prompt + # Must mention the slacked target (soft limit minus margin). + expected_target = WORKFLOW_SOFT_LIMIT - agent.RETRY_MARGIN + assert f"around {expected_target} characters" in retry_prompt def test_hard_trims_when_retry_still_over_limit(self): - """If the retry still overshoots, body is hard-trimmed to fit the limit.""" + """If the retry still blows past the hard ceiling, body is hard-trimmed to fit it.""" client = self._mock_client() - agent = ExperienceDistillAgent(client=client) + agent = WorkflowDistillAgent(client=client) - first_body = "x" * 3000 - retry_body = "y" * 2014 # mimics the real-world failure: 14 over + first_body = "x" * (WORKFLOW_SOFT_LIMIT + 1000) + retry_body = "y" * (WORKFLOW_HARD_MAX + 14) # mimics retry still over the ceiling front_matter = ( "---\ntitle: T\ntags: []\ncreated: 2026-01-01\n" "updated: 2026-01-01\nsource: distill\nsource_context: t1\n---\n\n" @@ -339,13 +345,13 @@ def fake_call_llm(messages): return resp with patch.object(agent, "_call_llm", side_effect=fake_call_llm): - result = agent.run(SAMPLE_EXPERIENCE_CONTEXT) + result = agent.run(SAMPLE_WORKFLOW_CONTEXT) # Both LLM calls happened. assert call_count == 2 - # Final body fits the hard limit (no save failure). + # Final body fits the hard ceiling (no save failure). _, body = parse_front_matter(result) - assert len(body.strip()) <= 2000 + assert len(body.strip()) <= WORKFLOW_HARD_MAX # Truncation marker is present so the user can see it was trimmed. assert "truncated" in body # Front matter preserved. @@ -355,7 +361,7 @@ def fake_call_llm(messages): def test_no_retry_when_body_within_limit(self): """If first LLM call is within limit, no retry happens.""" client = self._mock_client() - agent = ExperienceDistillAgent(client=client) + agent = WorkflowDistillAgent(client=client) call_count = 0 @@ -365,14 +371,14 @@ def fake_call_llm(messages): return MOCK_CONTEXT_RESPONSE with patch.object(agent, "_call_llm", side_effect=fake_call_llm): - agent.run(SAMPLE_EXPERIENCE_CONTEXT) + agent.run(SAMPLE_WORKFLOW_CONTEXT) assert call_count == 1 def test_language_instruction_injected_into_system_prompt(self): client = self._mock_client() zh_instruction = "[LANGUAGE INSTRUCTION]\nWrite in Simplified Chinese." - agent = ExperienceDistillAgent(client=client, language_instruction=zh_instruction) + agent = WorkflowDistillAgent(client=client, language_instruction=zh_instruction) captured_messages = [] @@ -381,7 +387,7 @@ def fake_call_llm(messages): return MOCK_CONTEXT_RESPONSE with patch.object(agent, "_call_llm", side_effect=fake_call_llm): - agent.run(SAMPLE_EXPERIENCE_CONTEXT) + agent.run(SAMPLE_WORKFLOW_CONTEXT) system_content = captured_messages[0]["content"] assert "[LANGUAGE INSTRUCTION]" in system_content @@ -389,7 +395,7 @@ def fake_call_llm(messages): def test_language_code_zh_injects_chinese_instruction(self): client = self._mock_client() - agent = ExperienceDistillAgent(client=client, language_code="zh") + agent = WorkflowDistillAgent(client=client, language_code="zh") captured_messages = [] @@ -398,7 +404,7 @@ def fake_call_llm(messages): return MOCK_CONTEXT_RESPONSE with patch.object(agent, "_call_llm", side_effect=fake_call_llm): - agent.run(SAMPLE_EXPERIENCE_CONTEXT) + agent.run(SAMPLE_WORKFLOW_CONTEXT) system_content = captured_messages[0]["content"] assert "Simplified Chinese" in system_content @@ -406,7 +412,7 @@ def fake_call_llm(messages): def test_language_code_en_no_extra_instruction(self): client = self._mock_client() - agent = ExperienceDistillAgent(client=client, language_code="en") + agent = WorkflowDistillAgent(client=client, language_code="en") captured_messages = [] @@ -415,27 +421,45 @@ def fake_call_llm(messages): return MOCK_CONTEXT_RESPONSE with patch.object(agent, "_call_llm", side_effect=fake_call_llm): - agent.run(SAMPLE_EXPERIENCE_CONTEXT) + agent.run(SAMPLE_WORKFLOW_CONTEXT) system_content = captured_messages[0]["content"] assert "in English" in system_content assert "[LANGUAGE INSTRUCTION]" not in system_content -# ── _experience_filename ────────────────────────────────────────────────── +# ── _workflow_filename ────────────────────────────────────────────────── -class TestExperienceFilename: - def test_derives_from_workspace_name(self): - from data_formulator.routes.knowledge import _experience_filename - name = _experience_filename("Sales Analysis Pattern") +class TestWorkflowFilename: + def test_derives_from_title(self): + from data_formulator.routes.knowledge import _workflow_filename + name = _workflow_filename("Sales Analysis Pattern") assert name.endswith(".md") assert "sales-analysis-pattern" in name.lower() - def test_fallback_when_workspace_name_blank(self): - from data_formulator.routes.knowledge import _experience_filename - name = _experience_filename(" ") - assert name == "session-experience.md" + def test_fallback_when_title_blank(self): + from data_formulator.routes.knowledge import _workflow_filename + name = _workflow_filename(" ") + assert name == "session-workflow.md" + + def test_rejects_path_traversal(self): + from data_formulator.routes.knowledge import _workflow_filename + # An LLM-supplied name must never escape the workflows directory. + for evil in ("../../etc/passwd", "..\\..\\win", "/etc/shadow", "a/b/c"): + name = _workflow_filename(evil) + assert "/" not in name + assert "\\" not in name + assert ".." not in name + assert name.endswith(".md") + + def test_strips_reserved_and_control_chars(self): + from data_formulator.routes.knowledge import _workflow_filename + name = _workflow_filename('sales:report*?"<>|\x00 v1') + assert name.endswith(".md") + for ch in ':*?"<>|\x00': + assert ch not in name + assert name == "sales-report-v1.md" # ── API endpoint ────────────────────────────────────────────────────────── @@ -453,7 +477,7 @@ def app(self, tmp_path): _app.register_blueprint(knowledge_bp) register_error_handlers(_app) - (tmp_path / "knowledge" / "experiences").mkdir(parents=True) + (tmp_path / "knowledge" / "workflows").mkdir(parents=True) with patch("data_formulator.routes.knowledge.get_identity_id", return_value="test-user"), \ patch("data_formulator.routes.knowledge.get_user_home", return_value=tmp_path): @@ -464,14 +488,14 @@ def client(self, app): return app.test_client() def test_missing_context_returns_error(self, client): - resp = client.post("/api/knowledge/distill-experience", + resp = client.post("/api/knowledge/distill-workflow", json={"model": {"endpoint": "openai", "model": "gpt-4o"}}) data = resp.get_json() assert data["status"] == "error" def test_missing_model_returns_error(self, client): - resp = client.post("/api/knowledge/distill-experience", - json={"experience_context": SAMPLE_EXPERIENCE_CONTEXT}) + resp = client.post("/api/knowledge/distill-workflow", + json={"workflow_context": SAMPLE_WORKFLOW_CONTEXT}) data = resp.get_json() assert data["status"] == "error" @@ -483,9 +507,9 @@ def test_missing_events_returns_error(self, client): "workspace_name": "Demo", "threads": [], } - resp = client.post("/api/knowledge/distill-experience", + resp = client.post("/api/knowledge/distill-workflow", json={ - "experience_context": bad_context, + "workflow_context": bad_context, "model": {"endpoint": "openai", "model": "gpt-4o", "api_key": "test"}, }) data = resp.get_json() @@ -497,9 +521,9 @@ def test_missing_events_field_returns_error(self, client): "workspace_id": "ws-1", "workspace_name": "Demo", } # no 'threads' key - resp = client.post("/api/knowledge/distill-experience", + resp = client.post("/api/knowledge/distill-workflow", json={ - "experience_context": bad_context, + "workflow_context": bad_context, "model": {"endpoint": "openai", "model": "gpt-4o", "api_key": "test"}, }) data = resp.get_json() @@ -508,22 +532,22 @@ def test_missing_events_field_returns_error(self, client): def test_successful_distill(self, client, tmp_path): with patch("data_formulator.routes.agents.get_client") as mock_gc, \ patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \ - patch("data_formulator.agents.agent_experience_distill.ExperienceDistillAgent.run", + patch("data_formulator.agents.agent_workflow_distill.WorkflowDistillAgent.run", return_value=MOCK_CONTEXT_RESPONSE): mock_gc.return_value = MagicMock() - resp = client.post("/api/knowledge/distill-experience", + resp = client.post("/api/knowledge/distill-workflow", json={ - "experience_context": SAMPLE_EXPERIENCE_CONTEXT, + "workflow_context": SAMPLE_WORKFLOW_CONTEXT, "model": {"endpoint": "openai", "model": "gpt-4o", "api_key": "test"}, }) data = resp.get_json() assert data["status"] == "success" - assert data["data"]["category"] == "experiences" + assert data["data"]["category"] == "workflows" assert data["data"]["path"].endswith(".md") # Verify file was written - exp_dir = tmp_path / "knowledge" / "experiences" + exp_dir = tmp_path / "knowledge" / "workflows" md_files = list(exp_dir.rglob("*.md")) assert len(md_files) >= 1 assert not (tmp_path / "agent-logs").exists() @@ -531,13 +555,13 @@ def test_successful_distill(self, client, tmp_path): def test_category_hint_creates_subdir(self, client, tmp_path): with patch("data_formulator.routes.agents.get_client") as mock_gc, \ patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \ - patch("data_formulator.agents.agent_experience_distill.ExperienceDistillAgent.run", + patch("data_formulator.agents.agent_workflow_distill.WorkflowDistillAgent.run", return_value=MOCK_CONTEXT_RESPONSE): mock_gc.return_value = MagicMock() - resp = client.post("/api/knowledge/distill-experience", + resp = client.post("/api/knowledge/distill-workflow", json={ - "experience_context": SAMPLE_EXPERIENCE_CONTEXT, + "workflow_context": SAMPLE_WORKFLOW_CONTEXT, "model": {"endpoint": "openai", "model": "gpt-4o", "api_key": "test"}, "category_hint": "sales", }) diff --git a/tests/backend/data/test_local_folder_loader.py b/tests/backend/data/test_local_folder_loader.py index c6d50804..5758d408 100644 --- a/tests/backend/data/test_local_folder_loader.py +++ b/tests/backend/data/test_local_folder_loader.py @@ -215,6 +215,11 @@ def test_fetch_tsv(self, data_dir: Path) -> None: loader.test_connection() table = loader.fetch_data_as_arrow("data.tsv") assert table.num_rows == 2 + # A TSV must split on tabs into separate columns, not collapse into one + # field like "id\tvalue" (regression: read_csv defaulted to a comma + # delimiter, so a tab-separated file became a single string column). + assert table.column_names == ["id", "value"] + assert table.column("value").to_pylist() == ["foo", "bar"] def test_fetch_parquet(self, data_dir: Path) -> None: loader = LocalFolderDataLoader({"root_dir": str(data_dir)}) diff --git a/tests/backend/data/test_workspace_manager.py b/tests/backend/data/test_workspace_manager.py index e2a00eb7..d82766c6 100644 --- a/tests/backend/data/test_workspace_manager.py +++ b/tests/backend/data/test_workspace_manager.py @@ -376,6 +376,13 @@ def test_legacy_workspace_with_only_yaml_appears_in_list(self, manager): yaml.safe_dump({"version": "1.1", "tables": {}}), encoding="utf-8", ) + # Pretend the legacy workspace had session state with tables. + (ws_dir / "session_state.json").write_text( + json.dumps({"tables": [{"id": "t1"}]}), + encoding="utf-8", + ) + # Trigger meta repair with a non-empty table count. + manager.save_session_state("legacy_ws", {"tables": [{"id": "t1"}]}) ws_list = manager.list_workspaces() ids = [w["id"] for w in ws_list] @@ -385,16 +392,23 @@ def test_legacy_workspace_with_only_yaml_appears_in_list(self, manager): assert (ws_dir / WORKSPACE_META_FILENAME).exists() def test_legacy_workspace_with_only_session_state_appears_in_list(self, manager): - """A directory with only session_state.json should be auto-repaired.""" + """A directory with session_state.json (containing tables) is + auto-repaired and visible in list_workspaces. The displayName + is inferred from session_state.""" ws_dir = manager.root / "state_only" ws_dir.mkdir(parents=True) (ws_dir / "session_state.json").write_text( json.dumps({ - "tables": [], + "tables": [{"id": "t1", "name": "T1"}], "activeWorkspace": {"displayName": "My Old Session"}, }), encoding="utf-8", ) + # Re-save so meta is written with tableCount > 0. + manager.save_session_state("state_only", { + "tables": [{"id": "t1", "name": "T1"}], + "activeWorkspace": {"displayName": "My Old Session"}, + }) ws_list = manager.list_workspaces() ids = [w["id"] for w in ws_list] @@ -405,7 +419,9 @@ def test_legacy_workspace_with_only_session_state_appears_in_list(self, manager) assert entry["display_name"] == "My Old Session" def test_legacy_workspace_with_empty_dir_appears_in_list(self, manager): - """Even a bare directory (no metadata files at all) should be listed.""" + """A bare directory with no metadata at all is auto-repaired by + _ensure_meta (meta.json gets created with fallback displayName) + and appears in list_workspaces.""" ws_dir = manager.root / "bare" ws_dir.mkdir(parents=True) @@ -413,7 +429,7 @@ def test_legacy_workspace_with_empty_dir_appears_in_list(self, manager): ids = [w["id"] for w in ws_list] assert "bare" in ids - # workspace_meta.json auto-created with fallback displayName = dir name + # Auto-repair created the meta with a fallback displayName. meta = json.loads((ws_dir / WORKSPACE_META_FILENAME).read_text(encoding="utf-8")) assert meta["displayName"] == "bare" @@ -452,3 +468,43 @@ def test_move_legacy_workspace_auto_repairs_meta(self, tmp_path): # Destination should have workspace_meta.json dst_ws = dst.get_workspace_path("old_ws") assert (dst_ws / WORKSPACE_META_FILENAME).exists() + + +class TestEmptyWorkspaceVisibility: + """list_workspaces() lists every workspace directory, including + empty "Untitled Session" entries from abandoned data-loading + chats. Users manage (rename/delete) these themselves via the + sidebar — they are not hidden.""" + + def test_empty_workspace_is_visible(self, manager): + manager.create_workspace("ghost") + # No save_session_state — meta has no tableCount/chartCount. + + ws_list = manager.list_workspaces() + + assert any(w["id"] == "ghost" for w in ws_list) + assert manager.workspace_exists("ghost") + + def test_workspace_with_tables_is_visible(self, manager): + manager.create_workspace("real") + manager.save_session_state("real", { + "tables": [{"id": "t1", "name": "T1"}], + "activeWorkspace": {"id": "real", "displayName": "Real"}, + }) + + ws_list = manager.list_workspaces() + + assert any(w["id"] == "real" for w in ws_list) + + def test_zero_count_workspace_is_visible(self, manager): + """A workspace whose tables were all deleted (zero tables) still + appears in the list — the user decides whether to remove it.""" + manager.create_workspace("emptied") + manager.save_session_state("emptied", { + "tables": [], + "activeWorkspace": {"id": "emptied", "displayName": "Emptied"}, + }) + + ws_list = manager.list_workspaces() + + assert any(w["id"] == "emptied" for w in ws_list) diff --git a/tests/backend/errors/test_api_error_protocol_contract.py b/tests/backend/errors/test_api_error_protocol_contract.py index b68a9563..2e377934 100644 --- a/tests/backend/errors/test_api_error_protocol_contract.py +++ b/tests/backend/errors/test_api_error_protocol_contract.py @@ -86,7 +86,7 @@ class TestStreamingErrorProtocol: def test_stream_preflight_error_uses_json_error_envelope(self, agents_client): resp = agents_client.post( - "/api/agent/data-agent-streaming", + "/api/agent/analyst-streaming", data="not json", content_type="text/plain", ) @@ -95,7 +95,7 @@ def test_stream_preflight_error_uses_json_error_envelope(self, agents_client): assert body["status"] == "error" assert body["error"]["code"] == ErrorCode.INVALID_REQUEST - def test_data_agent_streaming_emits_top_level_type_events(self, agents_client): + def test_analyst_streaming_emits_top_level_type_events(self, agents_client): agent_instance = MagicMock() agent_instance.run.return_value = [ {"type": "text_delta", "content": "hello"}, @@ -107,10 +107,10 @@ def test_data_agent_streaming_emits_top_level_type_events(self, agents_client): patch("data_formulator.routes.agents.get_client", return_value=object()), patch("data_formulator.routes.agents.get_workspace", return_value=object()), patch("data_formulator.datalake.workspace.get_user_home", return_value=object()), - patch("data_formulator.routes.agents.DataAgent", return_value=agent_instance), + patch("data_formulator.routes.agents.AnalystAgent", return_value=agent_instance), ): resp = agents_client.post( - "/api/agent/data-agent-streaming", + "/api/agent/analyst-streaming", json={ "model": {}, "input_tables": [], diff --git a/tests/backend/knowledge/test_knowledge_store.py b/tests/backend/knowledge/test_knowledge_store.py index 2444195b..f69ce37c 100644 --- a/tests/backend/knowledge/test_knowledge_store.py +++ b/tests/backend/knowledge/test_knowledge_store.py @@ -5,11 +5,11 @@ Covers: - list_all, read, write, delete for each category -- path depth constraints (rules=flat, experiences=1 sub-dir) +- path depth constraints (rules=flat, workflows=1 sub-dir) - .md extension enforcement - ConfinedDir traversal rejection - front matter parsing and graceful degradation -- search: title, tags, filename, body matching + ranking + limit +- search: title, filename, body matching + ranking + limit - search skips alwaysApply rules (they are injected via system prompt) - tokenization: English stopwords, CJK/ASCII mixed splitting - scoring: partial token match, source discount, table_names boost @@ -73,21 +73,20 @@ def test_lists_rules(self, store, tmp_path): items = store.list_all("rules") assert len(items) == 1 assert items[0]["title"] == "ROI Calculation" - assert items[0]["tags"] == ["finance", "computation"] assert items[0]["path"] == "roi.md" assert items[0]["source"] == "manual" - def test_lists_experiences_in_subdirs(self, store, tmp_path): - exp_dir = tmp_path / "knowledge" / "experiences" / "cleaning" + def test_lists_workflows_in_subdirs(self, store, tmp_path): + exp_dir = tmp_path / "knowledge" / "workflows" / "cleaning" exp_dir.mkdir(parents=True) (exp_dir / "missing.md").write_text(SAMPLE_MD_SKILL, encoding="utf-8") - items = store.list_all("experiences") + items = store.list_all("workflows") assert len(items) == 1 assert items[0]["path"] == "cleaning/missing.md" def test_empty_category_returns_empty(self, store): - items = store.list_all("experiences") + items = store.list_all("workflows") assert items == [] def test_front_matter_title_fallback_to_stem(self, store, tmp_path): @@ -139,9 +138,9 @@ def test_preserves_existing_front_matter(self, store): content = store.read("rules", "fm.md") assert "title: ROI Calculation" in content - def test_writes_experiences_in_subdir(self, store, tmp_path): - store.write("experiences", "cleaning/handle-missing.md", SAMPLE_MD_SKILL) - assert (tmp_path / "knowledge" / "experiences" / "cleaning" / "handle-missing.md").exists() + def test_writes_workflows_in_subdir(self, store, tmp_path): + store.write("workflows", "cleaning/handle-missing.md", SAMPLE_MD_SKILL) + assert (tmp_path / "knowledge" / "workflows" / "cleaning" / "handle-missing.md").exists() # ── CRUD: delete ────────────────────────────────────────────────────────── @@ -169,12 +168,12 @@ def test_rules_subdir_rejected(self): with pytest.raises(ValueError, match="sub-directories"): KnowledgeStore.validate_path("rules", "sub/file.md") - def test_experiences_one_subdir_ok(self): - KnowledgeStore.validate_path("experiences", "cat/file.md") + def test_workflows_one_subdir_ok(self): + KnowledgeStore.validate_path("workflows", "cat/file.md") - def test_experiences_two_subdirs_rejected(self): + def test_workflows_two_subdirs_rejected(self): with pytest.raises(ValueError, match="one level"): - KnowledgeStore.validate_path("experiences", "cat/sub/file.md") + KnowledgeStore.validate_path("workflows", "cat/sub/file.md") def test_skills_rejected_as_invalid(self): with pytest.raises(ValueError, match="Invalid category"): @@ -228,7 +227,7 @@ def _setup_knowledge(self, store, tmp_path): rules_dir = tmp_path / "knowledge" / "rules" (rules_dir / "roi.md").write_text(SAMPLE_MD, encoding="utf-8") - exp_dir = tmp_path / "knowledge" / "experiences" / "cleaning" + exp_dir = tmp_path / "knowledge" / "workflows" / "cleaning" exp_dir.mkdir(parents=True) (exp_dir / "missing.md").write_text(SAMPLE_MD_SKILL, encoding="utf-8") @@ -237,11 +236,6 @@ def test_search_by_title(self, store): assert len(results) >= 1 assert results[0]["title"] == "Handle Missing Values" - def test_search_by_tags(self, store): - results = store.search("pandas") - assert len(results) >= 1 - assert results[0]["title"] == "Handle Missing Values" - def test_search_by_filename(self, store): results = store.search("missing") assert len(results) >= 1 @@ -269,7 +263,7 @@ def test_max_results_limit(self, store, tmp_path): assert len(results) <= 5 def test_search_filters_by_category(self, store): - results = store.search("ROI", categories=["experiences"]) + results = store.search("ROI", categories=["workflows"]) assert len(results) == 0 def test_search_skips_always_apply_rules(self, store, tmp_path): @@ -304,13 +298,12 @@ def test_partial_token_match_finds_results(self, store): assert results[0]["title"] == "Handle Missing Values" def test_table_names_boost(self, store, tmp_path): - """Entries tagged with a session table name get boosted.""" - exp_dir = tmp_path / "knowledge" / "experiences" / "analysis" + """Entries mentioning a session table name (title/body) get boosted.""" + exp_dir = tmp_path / "knowledge" / "workflows" / "analysis" exp_dir.mkdir(parents=True) (exp_dir / "sales-tip.md").write_text( - "---\ntitle: Sales Analysis Tips\n" - "tags: [sales_data, revenue]\nsource: manual\n---\n" - "When analysing sales, check for seasonality.\n", + "---\ntitle: Sales Analysis Tips\nsource: manual\n---\n" + "When analysing sales_data, check for seasonality.\n", encoding="utf-8", ) results = store.search("analysis tips", table_names=["sales_data"]) @@ -319,7 +312,7 @@ def test_table_names_boost(self, store, tmp_path): def test_non_manual_source_discounted(self, store, tmp_path): """Non-manual entries score lower than equivalent manual entries.""" - exp_dir = tmp_path / "knowledge" / "experiences" + exp_dir = tmp_path / "knowledge" / "workflows" (exp_dir / "auto-tip.md").write_text( "---\ntitle: Tip One\ntags: [tip]\nsource: distill\n---\nSome tip.\n", encoding="utf-8", @@ -328,7 +321,7 @@ def test_non_manual_source_discounted(self, store, tmp_path): "---\ntitle: Tip One\ntags: [tip]\nsource: manual\n---\nSome tip.\n", encoding="utf-8", ) - results = store.search("Tip One", categories=["experiences"]) + results = store.search("Tip One", categories=["workflows"]) assert len(results) == 2 assert results[0]["source"] == "manual" assert results[1]["source"] == "distill" @@ -472,7 +465,7 @@ def test_all_stopwords_returns_empty(self): class TestMatchScore: def test_single_token_title_hit(self): score = KnowledgeStore._match_score( - "ROI", "ROI Calculation", [], "roi", "", + "ROI", "ROI Calculation", "roi", "", ) assert score > 0 @@ -481,49 +474,49 @@ def test_partial_tokens_accumulate(self): score = KnowledgeStore._match_score( "quarterly sales trend", "Sales Trend Analysis", - [], "analysis", "", + "analysis", "", ) assert score > 0 def test_whole_string_bonus(self): full = KnowledgeStore._match_score( - "ROI", "ROI Calculation", [], "roi", "", + "ROI", "ROI Calculation", "roi", "", ) no_title = KnowledgeStore._match_score( - "ROI", "Something Else", [], "roi", "", + "ROI", "Something Else", "roi", "", ) assert full > no_title def test_source_discount(self): manual = KnowledgeStore._match_score( - "ROI", "ROI Guide", ["finance"], "roi", "", + "ROI", "ROI Guide", "roi", "", source="manual", ) auto = KnowledgeStore._match_score( - "ROI", "ROI Guide", ["finance"], "roi", "", + "ROI", "ROI Guide", "roi", "", source="distill", ) assert auto == pytest.approx(manual * 0.9) def test_table_names_boost(self): without = KnowledgeStore._match_score( - "analysis", "Analysis Tips", ["sales_data"], "tips", "", + "analysis", "Analysis Tips", "tips", "about sales_data", ) with_tn = KnowledgeStore._match_score( - "analysis", "Analysis Tips", ["sales_data"], "tips", "", + "analysis", "Analysis Tips", "tips", "about sales_data", table_names=["sales_data"], ) assert with_tn > without def test_no_match_returns_zero(self): score = KnowledgeStore._match_score( - "xyznonexistent", "ROI Calculation", ["finance"], "roi", "body text", + "xyznonexistent", "ROI Calculation", "roi", "body text", ) assert score == 0 def test_cjk_mixed_query_matches(self): """Chinese+English query should match via extracted ASCII tokens.""" score = KnowledgeStore._match_score( - "帮我分析ROI", "ROI Calculation", ["finance"], "roi", "", + "帮我分析ROI", "ROI Calculation", "roi", "", ) assert score > 0 diff --git a/tests/backend/routes/test_agent_diagnostics_wiring.py b/tests/backend/routes/test_agent_diagnostics_wiring.py index 10cf8f24..f3dbcef5 100644 --- a/tests/backend/routes/test_agent_diagnostics_wiring.py +++ b/tests/backend/routes/test_agent_diagnostics_wiring.py @@ -69,7 +69,7 @@ def _make_llm_exception(body: str = "connection timeout") -> Exception: class TestDataRecAgentWiring: def _make_agent(self): - from data_formulator.agents.agent_data_rec import DataRecAgent + from eval_rec_ts.agent_data_rec import DataRecAgent client = MagicMock() workspace = MagicMock() workspace.get_fresh_name.return_value = "d-result_df" @@ -78,7 +78,7 @@ def _make_agent(self): model_info={"provider": "test", "model": "mock"}, ) - @patch("data_formulator.agents.agent_data_rec.supplement_missing_block") + @patch("eval_rec_ts.agent_data_rec.supplement_missing_block") @patch("data_formulator.sandbox.create_sandbox") def test_normal_response_has_diagnostics(self, mock_sandbox_factory, mock_supplement) -> None: mock_supplement.return_value = ( @@ -122,7 +122,7 @@ def test_exception_response_has_error_diagnostics(self) -> None: assert diag["agent"] == "DataRecAgent" assert diag["error"] == "rate limit" - @patch("data_formulator.agents.agent_data_rec.supplement_missing_block") + @patch("eval_rec_ts.agent_data_rec.supplement_missing_block") @patch("data_formulator.sandbox.create_sandbox") def test_execution_exception_diagnostics_are_sanitized(self, mock_sandbox_factory, mock_supplement) -> None: mock_supplement.return_value = ( @@ -163,7 +163,7 @@ def test_execution_exception_diagnostics_are_sanitized(self, mock_sandbox_factor class TestDataTransformAgentWiring: def _make_agent(self): - from data_formulator.agents.agent_data_transform import DataTransformationAgent + from eval_rec_ts.agent_data_transform import DataTransformationAgent client = MagicMock() workspace = MagicMock() workspace.get_fresh_name.return_value = "d-result_df" @@ -172,7 +172,7 @@ def _make_agent(self): model_info={"provider": "test", "model": "mock"}, ) - @patch("data_formulator.agents.agent_data_transform.supplement_missing_block") + @patch("eval_rec_ts.agent_data_transform.supplement_missing_block") @patch("data_formulator.sandbox.create_sandbox") def test_normal_response_has_diagnostics(self, mock_sandbox_factory, mock_supplement) -> None: mock_supplement.return_value = ( @@ -215,7 +215,7 @@ def test_exception_response_has_error_diagnostics(self) -> None: assert diag["agent"] == "DataTransformationAgent" assert diag["error"] == "server error" - @patch("data_formulator.agents.agent_data_transform.supplement_missing_block") + @patch("eval_rec_ts.agent_data_transform.supplement_missing_block") @patch("data_formulator.sandbox.create_sandbox") def test_execution_exception_diagnostics_are_sanitized(self, mock_sandbox_factory, mock_supplement) -> None: mock_supplement.return_value = ( diff --git a/tests/backend/routes/test_chart_insight_route.py b/tests/backend/routes/test_chart_insight_route.py deleted file mode 100644 index f6ba3c41..00000000 --- a/tests/backend/routes/test_chart_insight_route.py +++ /dev/null @@ -1,178 +0,0 @@ -"""Tests for /api/agent/chart-insight route. - -Validates input validation (missing image, missing model, non-vision model), -success path, and agent error handling via AppError. -""" -from __future__ import annotations - -import json -from unittest.mock import MagicMock, patch - -import flask -import pytest - -from data_formulator.errors import AppError, ErrorCode - -pytestmark = [pytest.mark.backend] - - -# --------------------------------------------------------------------------- -# Fixtures -# --------------------------------------------------------------------------- - -@pytest.fixture() -def app(): - """Minimal Flask app with agent_bp and error handlers registered.""" - test_app = flask.Flask(__name__) - test_app.config["TESTING"] = True - - from data_formulator.error_handler import register_error_handlers - from data_formulator.routes.agents import agent_bp - test_app.register_blueprint(agent_bp) - register_error_handlers(test_app) - - return test_app - - -@pytest.fixture() -def client(app): - return app.test_client() - - -def _valid_body(**overrides): - body = { - "chart_image": "iVBORw0KGgoAAAA==", - "chart_type": "Bar Chart", - "field_names": ["x", "y"], - "input_tables": [{"name": "t1", "rows": [{"x": 1}]}], - "model": {"provider": "openai", "model": "gpt-4o", "name": "gpt-4o"}, - } - body.update(overrides) - return body - - -# --------------------------------------------------------------------------- -# Input validation -# --------------------------------------------------------------------------- - -class TestChartInsightValidation: - - def test_non_json_request_returns_error(self, client) -> None: - resp = client.post( - "/api/agent/chart-insight", - data="not json", - content_type="text/plain", - ) - data = resp.get_json() - assert data["status"] == "error" - assert data["error"]["code"] == ErrorCode.INVALID_REQUEST - - def test_missing_chart_image_returns_error(self, client) -> None: - resp = client.post( - "/api/agent/chart-insight", - json=_valid_body(chart_image=""), - ) - data = resp.get_json() - assert data["status"] == "error" - assert data["error"]["code"] == ErrorCode.VALIDATION_ERROR - - def test_missing_model_returns_error(self, client) -> None: - resp = client.post( - "/api/agent/chart-insight", - json=_valid_body(model=None), - ) - data = resp.get_json() - assert data["status"] == "error" - assert data["error"]["code"] == ErrorCode.INVALID_REQUEST - - -# --------------------------------------------------------------------------- -# Success path -# --------------------------------------------------------------------------- - -class TestChartInsightSuccess: - - @patch("data_formulator.routes.agents._get_knowledge_store") - @patch("data_formulator.routes.agents.get_workspace") - @patch("data_formulator.routes.agents.get_identity_id", return_value="test-user") - @patch("data_formulator.routes.agents.get_client") - @patch("data_formulator.routes.agents.ChartInsightAgent") - def test_success_returns_title_and_takeaways( - self, - MockAgent, - mock_get_client, - mock_get_identity, - mock_get_workspace, - mock_get_ks, - client, - ) -> None: - agent_instance = MagicMock() - agent_instance.run.return_value = [{ - "status": "ok", - "title": "Key Insights", - "takeaways": ["Point A", "Point B"], - }] - MockAgent.return_value = agent_instance - - resp = client.post("/api/agent/chart-insight", json=_valid_body()) - assert resp.status_code == 200 - data = resp.get_json() - assert data["status"] == "success" - assert data["data"]["title"] == "Key Insights" - assert data["data"]["takeaways"] == ["Point A", "Point B"] - - -# --------------------------------------------------------------------------- -# Agent failure paths -# --------------------------------------------------------------------------- - -class TestChartInsightAgentErrors: - - @patch("data_formulator.routes.agents._get_knowledge_store") - @patch("data_formulator.routes.agents.get_workspace") - @patch("data_formulator.routes.agents.get_identity_id", return_value="test-user") - @patch("data_formulator.routes.agents.get_client") - @patch("data_formulator.routes.agents.ChartInsightAgent") - def test_empty_candidates_returns_agent_error( - self, MockAgent, mock_client, mock_id, mock_ws, mock_ks, client, - ) -> None: - MockAgent.return_value.run.return_value = [] - - resp = client.post("/api/agent/chart-insight", json=_valid_body()) - data = resp.get_json() - assert data["status"] == "error" - assert data["error"]["code"] == ErrorCode.AGENT_ERROR - - @patch("data_formulator.routes.agents._get_knowledge_store") - @patch("data_formulator.routes.agents.get_workspace") - @patch("data_formulator.routes.agents.get_identity_id", return_value="test-user") - @patch("data_formulator.routes.agents.get_client") - @patch("data_formulator.routes.agents.ChartInsightAgent") - def test_candidate_status_not_ok_returns_agent_error( - self, MockAgent, mock_client, mock_id, mock_ws, mock_ks, client, - ) -> None: - MockAgent.return_value.run.return_value = [{"status": "error", "content": "parse fail"}] - - resp = client.post("/api/agent/chart-insight", json=_valid_body()) - data = resp.get_json() - assert data["status"] == "error" - assert data["error"]["code"] == ErrorCode.AGENT_ERROR - - @patch("data_formulator.routes.agents._get_knowledge_store") - @patch("data_formulator.routes.agents.get_workspace") - @patch("data_formulator.routes.agents.get_identity_id", return_value="test-user") - @patch("data_formulator.routes.agents.get_client") - @patch("data_formulator.routes.agents.ChartInsightAgent") - def test_llm_exception_returns_classified_error( - self, MockAgent, mock_client, mock_id, mock_ws, mock_ks, client, - ) -> None: - exc = Exception("Error code: 401 - Unauthorized, invalid api key") - MockAgent.return_value.run.side_effect = exc - - resp = client.post("/api/agent/chart-insight", json=_valid_body()) - data = resp.get_json() - assert data["status"] == "error" - assert data["error"]["code"] in ( - ErrorCode.LLM_AUTH_FAILED, - ErrorCode.LLM_UNKNOWN_ERROR, - ) diff --git a/tests/backend/routes/test_derive_data_repair_loop.py b/tests/backend/routes/test_derive_data_repair_loop.py deleted file mode 100644 index 93fb64cf..00000000 --- a/tests/backend/routes/test_derive_data_repair_loop.py +++ /dev/null @@ -1,387 +0,0 @@ -"""Integration tests for the derive-data and refine-data repair loop improvements. - -Covers: -- Repair loop triggers on both 'error' and 'other error' statuses -- Empty results list does not crash (IndexError guard) -- Followup exceptions are caught gracefully with safe generic messages -- get-recommendation-questions never leaks exception details to the client -""" -from __future__ import annotations - -import json -import shutil -from contextlib import contextmanager -from unittest.mock import MagicMock, patch - -import pytest -from flask import Flask - -from data_formulator.routes.agents import agent_bp - -pytestmark = [pytest.mark.backend] - -MODULE = "data_formulator.routes.agents" - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def _make_ok_result(code: str = "x = 1") -> dict: - return { - "status": "ok", - "code": code, - "content": {"rows": [], "virtual": {"table_name": "t", "row_count": 0}}, - "dialog": [{"role": "system", "content": "..."}], - "agent": "DataRecAgent", - "refined_goal": {}, - } - - -def _make_error_result(status: str = "error", content: str = "some error") -> dict: - return { - "status": status, - "code": "bad_code()", - "content": content, - "dialog": [{"role": "system", "content": "..."}], - "agent": "DataRecAgent", - "refined_goal": {}, - } - - -@contextmanager -def _mock_workspace(): - """Yield a (mock_workspace, tmp_workspace_cm) that stubs out workspace deps.""" - ws = MagicMock() - ws.list_tables.return_value = set() - - @contextmanager - def fake_temp_data(ws_inner, temp_data): - yield ws_inner - - yield ws, fake_temp_data - - -def _build_app(): - from data_formulator.error_handler import register_error_handlers - - app = Flask(__name__) - app.config["TESTING"] = True - app.config["CLI_ARGS"] = {"max_display_rows": 100} - app.register_blueprint(agent_bp) - register_error_handlers(app) - return app - - -def _derive_data_payload(**overrides) -> dict: - base = { - "token": "test-token", - "model": {"endpoint": "openai", "model": "gpt-4", "api_key": "k", "api_base": "http://x"}, - "input_tables": [{"name": "t1", "rows": [{"a": 1}]}], - "extra_prompt": "do something", - "max_repair_attempts": 1, - } - base.update(overrides) - return base - - -def _refine_data_payload(**overrides) -> dict: - base = { - "token": "test-token", - "model": {"endpoint": "openai", "model": "gpt-4", "api_key": "k", "api_base": "http://x"}, - "input_tables": [{"name": "t1", "rows": [{"a": 1}]}], - "dialog": [{"role": "system", "content": "..."}], - "new_instruction": "fix it", - "latest_data_sample": [{"a": 1}], - "max_repair_attempts": 1, - } - base.update(overrides) - return base - - -# --------------------------------------------------------------------------- -# derive-data: repair loop status matching -# --------------------------------------------------------------------------- - -class TestDeriveDataRepairLoop: - - def _post_derive(self, client, payload): - return client.post( - "/api/agent/derive-data", - data=json.dumps(payload), - content_type="application/json", - ) - - def test_repair_loop_triggers_on_other_error(self) -> None: - """'other error' status should enter the repair loop (not just 'error').""" - app = _build_app() - - mock_agent = MagicMock() - mock_agent.run.return_value = [_make_error_result(status="other error")] - mock_agent.followup.return_value = [_make_ok_result()] - - with _mock_workspace() as (ws, fake_ctx): - with ( - patch(f"{MODULE}.get_client", return_value=MagicMock()), - patch(f"{MODULE}.get_identity_id", return_value="test-user"), - patch(f"{MODULE}.get_workspace", return_value=ws), - patch(f"{MODULE}.get_language_instruction", return_value=""), - patch(f"{MODULE}.DataRecAgent", return_value=mock_agent), - patch(f"{MODULE}.sign_result"), - ): - with app.test_client() as client: - resp = self._post_derive(client, _derive_data_payload()) - - data = resp.get_json() - assert data["status"] == "success" - assert data["data"]["results"][0]["status"] == "ok" - mock_agent.followup.assert_called_once() - - def test_repair_loop_skips_when_status_is_ok(self) -> None: - """When initial result is 'ok', repair loop should not execute.""" - app = _build_app() - - mock_agent = MagicMock() - mock_agent.run.return_value = [_make_ok_result()] - - with _mock_workspace() as (ws, fake_ctx): - with ( - patch(f"{MODULE}.get_client", return_value=MagicMock()), - patch(f"{MODULE}.get_identity_id", return_value="test-user"), - patch(f"{MODULE}.get_workspace", return_value=ws), - patch(f"{MODULE}.get_language_instruction", return_value=""), - patch(f"{MODULE}.DataRecAgent", return_value=mock_agent), - patch(f"{MODULE}.sign_result"), - ): - with app.test_client() as client: - resp = self._post_derive(client, _derive_data_payload()) - - data = resp.get_json() - assert data["data"]["results"][0]["status"] == "ok" - mock_agent.followup.assert_not_called() - - def test_empty_results_does_not_crash(self) -> None: - """If agent.run() returns an empty list, no IndexError should occur.""" - app = _build_app() - - mock_agent = MagicMock() - mock_agent.run.return_value = [] - - with _mock_workspace() as (ws, fake_ctx): - with ( - patch(f"{MODULE}.get_client", return_value=MagicMock()), - patch(f"{MODULE}.get_identity_id", return_value="test-user"), - patch(f"{MODULE}.get_workspace", return_value=ws), - patch(f"{MODULE}.get_language_instruction", return_value=""), - patch(f"{MODULE}.DataRecAgent", return_value=mock_agent), - patch(f"{MODULE}.sign_result"), - ): - with app.test_client() as client: - resp = self._post_derive(client, _derive_data_payload()) - - data = resp.get_json() - assert data["status"] == "success" - assert data["data"]["results"] == [] - - def test_followup_exception_is_caught(self) -> None: - """If agent.followup() raises, the error should be caught and a safe - classified message returned (no raw exception text).""" - app = _build_app() - - mock_agent = MagicMock() - mock_agent.run.return_value = [_make_error_result(status="error")] - mock_agent.followup.side_effect = RuntimeError("LLM connection timeout") - - with _mock_workspace() as (ws, fake_ctx): - with ( - patch(f"{MODULE}.get_client", return_value=MagicMock()), - patch(f"{MODULE}.get_identity_id", return_value="test-user"), - patch(f"{MODULE}.get_workspace", return_value=ws), - patch(f"{MODULE}.get_language_instruction", return_value=""), - patch(f"{MODULE}.DataRecAgent", return_value=mock_agent), - patch(f"{MODULE}.sign_result"), - ): - with app.test_client() as client: - resp = self._post_derive(client, _derive_data_payload()) - - data = resp.get_json() - assert data["status"] == "success" - result = data["data"]["results"][0] - assert result["status"] == "error" - # classify_llm_error maps "timeout" → safe timeout message - assert "timed out" in result["content"].lower() or "timeout" in result["content"].lower() - # Raw exception text must not leak - assert "LLM connection timeout" not in result["content"] - - -# --------------------------------------------------------------------------- -# refine-data: same repair loop tests -# --------------------------------------------------------------------------- - -class TestRefineDataRepairLoop: - - def _post_refine(self, client, payload): - return client.post( - "/api/agent/refine-data", - data=json.dumps(payload), - content_type="application/json", - ) - - def test_repair_loop_triggers_on_other_error(self) -> None: - app = _build_app() - - mock_agent = MagicMock() - mock_agent.followup.side_effect = [ - [_make_error_result(status="other error")], - [_make_ok_result()], - ] - - with _mock_workspace() as (ws, fake_ctx): - with ( - patch(f"{MODULE}.get_client", return_value=MagicMock()), - patch(f"{MODULE}.get_identity_id", return_value="test-user"), - patch(f"{MODULE}.get_workspace", return_value=ws), - patch(f"{MODULE}.get_language_instruction", return_value=""), - patch(f"{MODULE}.DataTransformationAgent", return_value=mock_agent), - patch(f"{MODULE}.sign_result"), - ): - with app.test_client() as client: - resp = self._post_refine(client, _refine_data_payload()) - - data = resp.get_json() - assert data["data"]["results"][0]["status"] == "ok" - assert mock_agent.followup.call_count == 2 - - def test_empty_results_does_not_crash(self) -> None: - app = _build_app() - - mock_agent = MagicMock() - mock_agent.followup.return_value = [] - - with _mock_workspace() as (ws, fake_ctx): - with ( - patch(f"{MODULE}.get_client", return_value=MagicMock()), - patch(f"{MODULE}.get_identity_id", return_value="test-user"), - patch(f"{MODULE}.get_workspace", return_value=ws), - patch(f"{MODULE}.get_language_instruction", return_value=""), - patch(f"{MODULE}.DataTransformationAgent", return_value=mock_agent), - patch(f"{MODULE}.sign_result"), - ): - with app.test_client() as client: - resp = self._post_refine(client, _refine_data_payload()) - - data = resp.get_json() - assert data["status"] == "success" - assert data["data"]["results"] == [] - - def test_followup_exception_in_repair_is_caught(self) -> None: - """Followup exception returns a safe classified message, not raw exception text.""" - app = _build_app() - - mock_agent = MagicMock() - mock_agent.followup.side_effect = [ - [_make_error_result(status="error")], - RuntimeError("API key expired"), - ] - - with _mock_workspace() as (ws, fake_ctx): - with ( - patch(f"{MODULE}.get_client", return_value=MagicMock()), - patch(f"{MODULE}.get_identity_id", return_value="test-user"), - patch(f"{MODULE}.get_workspace", return_value=ws), - patch(f"{MODULE}.get_language_instruction", return_value=""), - patch(f"{MODULE}.DataTransformationAgent", return_value=mock_agent), - patch(f"{MODULE}.sign_result"), - ): - with app.test_client() as client: - resp = self._post_refine(client, _refine_data_payload()) - - data = resp.get_json() - result = data["data"]["results"][0] - assert result["status"] == "error" - # Raw exception text must not appear - assert "API key expired" not in result["content"] - # Should be classified as a model request failure (generic fallback) - assert result["content"] in ( - "Model request failed", - "Authentication failed — please check your API key", - ) - - -# --------------------------------------------------------------------------- -# get-recommendation-questions: error message uses classify_llm_error -# --------------------------------------------------------------------------- - -class TestGetRecommendationQuestionsError: - - def test_error_message_is_classified_not_raw(self) -> None: - """Error response uses classify_llm_error — safe pre-defined message, - not the raw exception text.""" - app = _build_app() - - mock_agent = MagicMock() - mock_agent.run.side_effect = ValueError("column 'x' not found in table") - - with _mock_workspace() as (ws, fake_ctx): - with ( - patch(f"{MODULE}.get_client", return_value=MagicMock()), - patch(f"{MODULE}.get_identity_id", return_value="test-user"), - patch(f"{MODULE}.get_workspace", return_value=ws), - patch(f"{MODULE}.get_language_instruction", return_value=""), - patch(f"{MODULE}.InteractiveExploreAgent", return_value=mock_agent), - ): - with app.test_client() as client: - resp = client.post( - "/api/agent/get-recommendation-questions", - data=json.dumps({ - "model": {"endpoint": "openai", "model": "gpt-4", - "api_key": "k", "api_base": "http://x"}, - "input_tables": [{"name": "t", "rows": []}], - }), - content_type="application/json", - ) - - lines = resp.data.decode("utf-8").strip().split("\n") - assert len(lines) >= 1 - error_events = [json.loads(l) for l in lines - if l.strip() and json.loads(l).get("type") == "error"] - assert len(error_events) == 1 - - err = error_events[0]["error"] - assert "column 'x' not found" not in err["message"] - assert err["retry"] is False - - def test_error_message_never_leaks_api_keys(self) -> None: - """Even when exception contains API keys, classify_and_wrap_llm_error - returns a safe pre-defined message without any raw exception text.""" - app = _build_app() - - mock_agent = MagicMock() - mock_agent.run.side_effect = RuntimeError("auth failed api_key=sk-secret123 for model") - - with _mock_workspace() as (ws, fake_ctx): - with ( - patch(f"{MODULE}.get_client", return_value=MagicMock()), - patch(f"{MODULE}.get_identity_id", return_value="test-user"), - patch(f"{MODULE}.get_workspace", return_value=ws), - patch(f"{MODULE}.get_language_instruction", return_value=""), - patch(f"{MODULE}.InteractiveExploreAgent", return_value=mock_agent), - ): - with app.test_client() as client: - resp = client.post( - "/api/agent/get-recommendation-questions", - data=json.dumps({ - "model": {"endpoint": "openai", "model": "gpt-4", - "api_key": "k", "api_base": "http://x"}, - "input_tables": [{"name": "t", "rows": []}], - }), - content_type="application/json", - ) - - lines = resp.data.decode("utf-8").strip().split("\n") - error_events = [json.loads(l) for l in lines - if l.strip() and json.loads(l).get("type") == "error"] - assert len(error_events) >= 1 - err = error_events[0]["error"] - assert "sk-secret123" not in err["message"] - assert err["code"] == "LLM_AUTH_FAILED" diff --git a/tests/backend/routes/test_knowledge_routes.py b/tests/backend/routes/test_knowledge_routes.py index ddc2b7ab..f5ac69ff 100644 --- a/tests/backend/routes/test_knowledge_routes.py +++ b/tests/backend/routes/test_knowledge_routes.py @@ -167,7 +167,7 @@ def test_delete_nonexistent(self, client): class TestKnowledgeSearch: def test_search_returns_results(self, client, tmp_path): - exp_dir = tmp_path / "knowledge" / "experiences" / "finance" + exp_dir = tmp_path / "knowledge" / "workflows" / "finance" exp_dir.mkdir(parents=True, exist_ok=True) (exp_dir / "roi.md").write_text(SAMPLE_MD, encoding="utf-8") @@ -191,7 +191,7 @@ def test_search_invalid_category(self, client): assert data["status"] == "error" def test_search_filters_by_category(self, client, tmp_path): - exp_dir = tmp_path / "knowledge" / "experiences" / "finance" + exp_dir = tmp_path / "knowledge" / "workflows" / "finance" exp_dir.mkdir(parents=True, exist_ok=True) (exp_dir / "roi.md").write_text(SAMPLE_MD, encoding="utf-8") @@ -202,7 +202,7 @@ def test_search_filters_by_category(self, client, tmp_path): assert len(data["data"]["results"]) == 0 -SESSION_EXPERIENCE_CONTEXT = { +SESSION_WORKFLOW_CONTEXT = { "context_id": "ws-1", "workspace_id": "ws-1", "workspace_name": "Gasoline prices 2024", @@ -233,6 +233,7 @@ def test_search_filters_by_category(self, client, tmp_path): DISTILLED_MD = """\ --- subtitle: monthly sales aggregation +filename: monthly sales tags: [sales, time-series] created: 2026-05-06 updated: 2026-05-06 @@ -251,37 +252,37 @@ def test_search_filters_by_category(self, client, tmp_path): """ -class TestDistillExperience: - def test_distill_experience_from_context(self, client, tmp_path): +class TestDistillWorkflow: + def test_distill_workflow_from_context(self, client, tmp_path): with patch("data_formulator.routes.agents.get_client", return_value=object()), \ patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \ patch( - "data_formulator.agents.agent_experience_distill." - "ExperienceDistillAgent.run", + "data_formulator.agents.agent_workflow_distill." + "WorkflowDistillAgent.run", return_value=DISTILLED_MD, ) as run: - resp = client.post("/api/knowledge/distill-experience", json={ - "experience_context": SESSION_EXPERIENCE_CONTEXT, + resp = client.post("/api/knowledge/distill-workflow", json={ + "workflow_context": SESSION_WORKFLOW_CONTEXT, "model": {"endpoint": "openai", "key": "x", "model": "gpt"}, }) data = resp.get_json() assert data["status"] == "success" - assert data["data"]["category"] == "experiences" - assert (tmp_path / "knowledge" / "experiences" / data["data"]["path"]).exists() + assert data["data"]["category"] == "workflows" + assert (tmp_path / "knowledge" / "workflows" / data["data"]["path"]).exists() assert not (tmp_path / "agent-logs").exists() run.assert_called_once() - def test_distill_experience_llm_timeout_returns_structured_error(self, client): + def test_distill_workflow_llm_timeout_returns_structured_error(self, client): with patch("data_formulator.routes.agents.get_client", return_value=object()), \ patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \ patch( - "data_formulator.agents.agent_experience_distill." - "ExperienceDistillAgent.run", + "data_formulator.agents.agent_workflow_distill." + "WorkflowDistillAgent.run", side_effect=TimeoutError("request timed out"), ): - resp = client.post("/api/knowledge/distill-experience", json={ - "experience_context": SESSION_EXPERIENCE_CONTEXT, + resp = client.post("/api/knowledge/distill-workflow", json={ + "workflow_context": SESSION_WORKFLOW_CONTEXT, "model": {"endpoint": "openai", "key": "x", "model": "gpt"}, }) @@ -291,55 +292,60 @@ def test_distill_experience_llm_timeout_returns_structured_error(self, client): assert data["error"]["code"] == "LLM_TIMEOUT" assert data["error"]["retry"] is True - def test_distill_experience_missing_context(self, client): - resp = client.post("/api/knowledge/distill-experience", json={ + def test_distill_workflow_missing_context(self, client): + resp = client.post("/api/knowledge/distill-workflow", json={ "model": {"endpoint": "openai", "key": "x", "model": "gpt"}, }) data = resp.get_json() assert data["status"] == "error" - def test_distill_experience_missing_threads(self, client): - bad_context = {k: v for k, v in SESSION_EXPERIENCE_CONTEXT.items() if k != "threads"} - resp = client.post("/api/knowledge/distill-experience", json={ - "experience_context": bad_context, + def test_distill_workflow_missing_threads(self, client): + bad_context = {k: v for k, v in SESSION_WORKFLOW_CONTEXT.items() if k != "threads"} + resp = client.post("/api/knowledge/distill-workflow", json={ + "workflow_context": bad_context, "model": {"endpoint": "openai", "key": "x", "model": "gpt"}, }) data = resp.get_json() assert data["status"] == "error" - def test_distill_experience_missing_workspace(self, client): - bad_context = {k: v for k, v in SESSION_EXPERIENCE_CONTEXT.items() + def test_distill_workflow_missing_workspace(self, client): + bad_context = {k: v for k, v in SESSION_WORKFLOW_CONTEXT.items() if k not in ("workspace_id", "workspace_name")} - resp = client.post("/api/knowledge/distill-experience", json={ - "experience_context": bad_context, + resp = client.post("/api/knowledge/distill-workflow", json={ + "workflow_context": bad_context, "model": {"endpoint": "openai", "key": "x", "model": "gpt"}, }) data = resp.get_json() assert data["status"] == "error" - def test_distill_session_overrides_title_with_workspace_name(self, client, tmp_path): - """Session-scoped distillation composes 'Experience from : '.""" + def test_distill_session_uses_descriptive_title(self, client, tmp_path): + """Session-scoped distillation uses the agent subtitle as the title.""" with patch("data_formulator.routes.agents.get_client", return_value=object()), \ patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \ patch( - "data_formulator.agents.agent_experience_distill." - "ExperienceDistillAgent.run", + "data_formulator.agents.agent_workflow_distill." + "WorkflowDistillAgent.run", return_value=DISTILLED_MD, ): - resp = client.post("/api/knowledge/distill-experience", json={ - "experience_context": SESSION_EXPERIENCE_CONTEXT, + resp = client.post("/api/knowledge/distill-workflow", json={ + "workflow_context": SESSION_WORKFLOW_CONTEXT, "model": {"endpoint": "openai", "key": "x", "model": "gpt"}, }) data = resp.get_json() assert data["status"] == "success" path = data["data"]["path"] - # Filename is derived from the workspace name, not the LLM subtitle. - assert path == "gasoline-prices-2024.md" - saved = (tmp_path / "knowledge" / "experiences" / path).read_text(encoding="utf-8") - assert "title: 'Experience from Gasoline prices 2024: monthly sales aggregation'" in saved \ - or "title: \"Experience from Gasoline prices 2024: monthly sales aggregation\"" in saved \ - or "title: Experience from Gasoline prices 2024: monthly sales aggregation" in saved + # Filename is derived from the short agent-emitted `filename` hint, + # not the long descriptive title. + assert path == "monthly-sales.md" + saved = (tmp_path / "knowledge" / "workflows" / path).read_text(encoding="utf-8") + assert "title: monthly sales aggregation" in saved \ + or "title: 'monthly sales aggregation'" in saved \ + or "title: \"monthly sales aggregation\"" in saved + # No legacy "Workflow from :" prefix on the title. + assert "Workflow from" not in saved + # The filename hint is consumed, not persisted in the front matter. + assert "filename:" not in saved # Workspace stamps are present so the file can be looked up later. assert "source_workspace_id: ws-1" in saved assert "source_workspace_name: Gasoline prices 2024" in saved @@ -347,42 +353,46 @@ def test_distill_session_overrides_title_with_workspace_name(self, client, tmp_p assert "## Method" in saved def test_distill_session_upserts_existing_workspace_file(self, client, tmp_path): - """Re-distilling the same workspace overwrites the same file.""" + """Re-distilling the same workspace replaces the prior file.""" + second_md = DISTILLED_MD.replace( + "filename: monthly sales", + "filename: annual revenue", + ) with patch("data_formulator.routes.agents.get_client", return_value=object()), \ patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \ patch( - "data_formulator.agents.agent_experience_distill." - "ExperienceDistillAgent.run", - return_value=DISTILLED_MD, + "data_formulator.agents.agent_workflow_distill." + "WorkflowDistillAgent.run", + side_effect=[DISTILLED_MD, second_md], ): - client.post("/api/knowledge/distill-experience", json={ - "experience_context": SESSION_EXPERIENCE_CONTEXT, + client.post("/api/knowledge/distill-workflow", json={ + "workflow_context": SESSION_WORKFLOW_CONTEXT, "model": {"endpoint": "openai", "key": "x", "model": "gpt"}, }) - # Re-distill: workspace renamed, so the slug changes — old file - # should be removed in favour of the new one. - renamed = {**SESSION_EXPERIENCE_CONTEXT, "workspace_name": "Diesel 2024"} - resp = client.post("/api/knowledge/distill-experience", json={ - "experience_context": renamed, + # Re-distill: the filename hint changes, so the slug changes — old + # file should be removed in favour of the new one (matched by + # source_workspace_id). + resp = client.post("/api/knowledge/distill-workflow", json={ + "workflow_context": SESSION_WORKFLOW_CONTEXT, "model": {"endpoint": "openai", "key": "x", "model": "gpt"}, }) data = resp.get_json() assert data["status"] == "success" new_path = data["data"]["path"] - exp_dir = tmp_path / "knowledge" / "experiences" + exp_dir = tmp_path / "knowledge" / "workflows" # Stale slug deleted, new slug present. - assert not (exp_dir / "gasoline-prices-2024.md").exists() + assert not (exp_dir / "monthly-sales.md").exists() assert (exp_dir / new_path).exists() - assert new_path == "diesel-2024.md" + assert new_path == "annual-revenue.md" - def test_distill_session_skips_subtitle_double_prefix(self, client, tmp_path): - """Update-mode runs that re-emit a prefixed title don't double-prefix.""" - # Simulate a prior run where the LLM echoed an Experience-prefixed title + def test_distill_session_strips_legacy_title_prefix(self, client, tmp_path): + """Update-mode runs strip any legacy 'Workflow from :' prefix.""" + # Simulate a prior run where the LLM echoed a Workflow-prefixed title # without a subtitle. prior_md = ( "---\n" - "title: 'Experience from Gasoline prices 2024: prior insight'\n" + "title: 'Workflow from Gasoline prices 2024: prior insight'\n" "tags: [a]\n" "created: 2026-05-06\n" "updated: 2026-05-06\n" @@ -392,17 +402,18 @@ def test_distill_session_skips_subtitle_double_prefix(self, client, tmp_path): with patch("data_formulator.routes.agents.get_client", return_value=object()), \ patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \ patch( - "data_formulator.agents.agent_experience_distill." - "ExperienceDistillAgent.run", + "data_formulator.agents.agent_workflow_distill." + "WorkflowDistillAgent.run", return_value=prior_md, ): - resp = client.post("/api/knowledge/distill-experience", json={ - "experience_context": SESSION_EXPERIENCE_CONTEXT, + resp = client.post("/api/knowledge/distill-workflow", json={ + "workflow_context": SESSION_WORKFLOW_CONTEXT, "model": {"endpoint": "openai", "key": "x", "model": "gpt"}, }) data = resp.get_json() assert data["status"] == "success" - saved = (tmp_path / "knowledge" / "experiences" / data["data"]["path"]).read_text(encoding="utf-8") - # The "Experience from ..." prefix is stripped before re-prefixing. - assert saved.count("Experience from") == 1 + saved = (tmp_path / "knowledge" / "workflows" / data["data"]["path"]).read_text(encoding="utf-8") + # The legacy "Workflow from ..." prefix is fully stripped. + assert "Workflow from" not in saved + assert "prior insight" in saved diff --git a/tests/frontend/unit/app/chartInsight.test.ts b/tests/frontend/unit/app/chartInsight.test.ts deleted file mode 100644 index 106fdb89..00000000 --- a/tests/frontend/unit/app/chartInsight.test.ts +++ /dev/null @@ -1,119 +0,0 @@ -/** - * Tests for Chart Insight fetchChartInsight thunk behavior. - * - * Covers: - * - rejected reducer: message type varies by error name - * (AbortError = silent, TimeoutError = timeout msg, ChartImageNotReady = image msg, other = generic) - */ -import { describe, it, expect, vi, beforeEach } from 'vitest'; - -// We test the reducer logic by building a minimal extraReducers matcher. -// Since the reducer is tightly coupled to createSlice, we test via the -// slice's reducer function directly. - -// Mock all heavy deps before importing the slice -vi.mock('../../../../src/app/utils', () => ({ - fetchWithIdentity: vi.fn(), - getTriggers: vi.fn(() => []), - getUrls: vi.fn(() => ({ - CHART_INSIGHT_URL: '/api/agent/chart-insight', - })), - computeContentHash: vi.fn(() => 'hash'), -})); -vi.mock('../../../../src/app/chartCache', () => ({ - getChartPngDataUrl: vi.fn(), -})); -vi.mock('../../../../src/app/workspaceService', () => ({ - deleteTablesFromWorkspace: vi.fn(), -})); -vi.mock('../../../../src/app/identity', () => ({ - Identity: {}, - IdentityType: { BROWSER: 'browser' }, - getBrowserId: vi.fn(() => 'browser-id'), -})); -vi.mock('../../../../src/app/store', () => ({ - store: { - getState: vi.fn(() => ({})), - dispatch: vi.fn(), - }, -})); -vi.mock('../../../../src/i18n', () => ({ - default: { - t: (key: string, params?: Record) => { - if (key === 'messages.chartInsightTimedOut') { - return `Chart insight timed out after ${params?.seconds}s`; - } - if (key === 'messages.chartInsightImageNotReady') { - return 'Chart image was not ready'; - } - if (key === 'messages.chartInsightFailed') { - return 'Failed to generate chart insight'; - } - return key; - }, - }, -})); - -// --------------------------------------------------------------------------- -// Tests — rejected reducer message discrimination -// --------------------------------------------------------------------------- - -describe('fetchChartInsight rejected reducer', () => { - // We import the reducer and simulate action.error shapes - // The reducer reads: action.error.name, action.error.message, action.meta.arg.chartId - let reducer: any; - let fetchChartInsight: any; - let initialState: any; - - beforeEach(async () => { - const mod = await import('../../../../src/app/dfSlice'); - reducer = mod.dataFormulatorSlice.reducer; - fetchChartInsight = mod.fetchChartInsight; - - initialState = { - ...mod.dataFormulatorSlice.getInitialState(), - chartInsightInProgress: ['chart-1'], - }; - }); - - function makeRejectedAction(errorName: string, errorMessage: string = 'test') { - return { - type: fetchChartInsight.rejected.type, - meta: { arg: { chartId: 'chart-1' } }, - error: { name: errorName, message: errorMessage }, - }; - } - - it('AbortError produces no message', () => { - const state = reducer(initialState, makeRejectedAction('AbortError')); - expect(state.messages).toHaveLength(0); - expect(state.chartInsightInProgress).not.toContain('chart-1'); - }); - - it('TimeoutError produces a timeout warning with seconds', () => { - const state = reducer(initialState, makeRejectedAction('TimeoutError')); - expect(state.messages).toHaveLength(1); - expect(state.messages[0].type).toBe('warning'); - expect(state.messages[0].value).toContain('timed out'); - expect(state.messages[0].value).toContain(String(initialState.config.formulateTimeoutSeconds)); - }); - - it('ChartImageNotReady produces an image-not-ready warning', () => { - const state = reducer(initialState, makeRejectedAction('ChartImageNotReady')); - expect(state.messages).toHaveLength(1); - expect(state.messages[0].type).toBe('warning'); - expect(state.messages[0].value).toContain('not ready'); - }); - - it('generic error produces a warning with the error message', () => { - const state = reducer(initialState, makeRejectedAction('Error', 'Model returned nonsense')); - expect(state.messages).toHaveLength(1); - expect(state.messages[0].type).toBe('warning'); - expect(state.messages[0].value).toBe('Model returned nonsense'); - }); - - it('removes chartId from chartInsightInProgress', () => { - const state = reducer(initialState, makeRejectedAction('Error')); - expect(state.chartInsightInProgress).not.toContain('chart-1'); - }); -}); diff --git a/tests/frontend/unit/app/i18nLocales.test.ts b/tests/frontend/unit/app/i18nLocales.test.ts new file mode 100644 index 00000000..dd6c9933 --- /dev/null +++ b/tests/frontend/unit/app/i18nLocales.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, it } from "vitest"; + +import en from "../../../../src/i18n/locales/en"; +import zh from "../../../../src/i18n/locales/zh"; + +type TranslationValue = string | Record; +type TranslationMap = Record; + +function collectKeys(value: TranslationMap, prefix = ""): Set { + const keys = new Set(); + + for (const [key, child] of Object.entries(value)) { + const nextPrefix = prefix ? `${prefix}.${key}` : key; + if (typeof child === "string") { + keys.add(nextPrefix); + } else { + for (const childKey of collectKeys(child, nextPrefix)) { + keys.add(childKey); + } + } + } + + return keys; +} + +describe("i18n locale bundles", () => { + it("keeps Simplified Chinese translation keys aligned with English", () => { + expect(collectKeys(zh)).toEqual(collectKeys(en)); + }); +}); diff --git a/tests/frontend/unit/lib/agents-chart/flint_py_extract.test.ts b/tests/frontend/unit/lib/agents-chart/flint_py_extract.test.ts new file mode 100644 index 00000000..54fcc888 --- /dev/null +++ b/tests/frontend/unit/lib/agents-chart/flint_py_extract.test.ts @@ -0,0 +1,240 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. +// +// Comprehensive fixture extractor for the Flint-Py compatibility suite. +// +// Walks GALLERY_TREE, collects every page rendered with a VegaLite-relevant +// backend (single library='vegalite' OR render='triple', which always includes +// VL), and runs the JS `assembleVegaLite` on every TestCase produced by every +// referenced generator. For each case we write: +// flint-py/tests/fixtures//input.json +// flint-py/tests/fixtures//expected.json (only on JS success) +// flint-py/tests/fixtures//meta.json (always) +// +// A top-level `manifest.json` records every case with its status, chart type, +// gallery section/category/page provenance, and any JS error message. + +import { describe, it } from 'vitest'; +import * as fs from 'node:fs'; +import * as path from 'node:path'; + +import { GALLERY_TREE, TEST_GENERATORS } from '../../../../../src/lib/agents-chart/test-data'; +import { assembleVegaLite } from '../../../../../src/lib/agents-chart'; +import type { TestCase } from '../../../../../src/lib/agents-chart/test-data/types'; +import type { ChartAssemblyInput, ChartEncoding } from '../../../../../src/lib/agents-chart/core/types'; + +const CANVAS_SIZE = { width: 400, height: 300 } as const; +const DEFAULT_OPTIONS = { addTooltips: true } as const; + +const FIXTURES_ROOT = path.resolve(__dirname, '../../../../../flint-py/tests/fixtures'); + +interface ManifestEntry { + slug: string; + title: string; + chartType: string; + section: string; + category: string; + page: string; + generator: string; + library: 'vegalite' | 'triple'; + status: 'js_success' | 'js_error'; + jsError?: string; + fixtureDir?: string; +} + +function slugify(s: string): string { + return s + .toLowerCase() + .replace(/[^a-z0-9]+/g, '_') + .replace(/^_+|_+$/g, '') + .slice(0, 80); +} + +/** Convert a TestCase into the ChartAssemblyInput that ChartGallery passes to assembleVegaLite. */ +function testCaseToInput(tc: TestCase): ChartAssemblyInput { + const encodings: Record = {}; + for (const [channel, ei] of Object.entries(tc.encodingMap)) { + if (ei && ei.fieldID) { + const entry: ChartEncoding = { field: ei.fieldID }; + if (ei.dtype) entry.type = ei.dtype as any; + if (ei.aggregate) entry.aggregate = ei.aggregate as any; + if (ei.sortOrder) entry.sortOrder = ei.sortOrder as any; + if (ei.sortBy) entry.sortBy = ei.sortBy; + if (ei.scheme) entry.scheme = ei.scheme; + encodings[channel] = entry; + } + } + + const semanticTypes: Record = {}; + for (const [name, meta] of Object.entries(tc.metadata)) { + if (meta.semanticType) semanticTypes[name] = meta.semanticType; + } + if (tc.semanticAnnotations) { + for (const [name, ann] of Object.entries(tc.semanticAnnotations)) { + semanticTypes[name] = ann; + } + } + + return { + data: { values: tc.data }, + semantic_types: semanticTypes, + chart_spec: { + chartType: tc.chartType, + encodings, + canvasSize: CANVAS_SIZE, + ...(tc.chartProperties ? { chartProperties: tc.chartProperties } : {}), + }, + options: { ...DEFAULT_OPTIONS, ...(tc.assembleOptions ?? {}) }, + }; +} + +/** Walk GALLERY_TREE and collect every (section, category, page, generator) tuple + * that produces VegaLite output. Each generator may appear under multiple pages; + * we de-duplicate by generator key, preferring the first page that referenced it. + */ +interface PageRef { + section: string; + category: string; + page: string; + library: 'vegalite' | 'triple'; +} + +function collectVlGeneratorRefs(): Map { + const seen = new Map(); + for (const section of GALLERY_TREE) { + for (const category of section.categories) { + for (const page of category.pages) { + const isVl = (page.render === 'single' && page.library === 'vegalite') + || page.render === 'triple'; + if (!isVl) continue; + for (const gen of page.generatorKeys) { + if (!seen.has(gen)) { + seen.set(gen, { + section: section.id, + category: category.id, + page: page.id, + library: page.render === 'triple' ? 'triple' : 'vegalite', + }); + } + } + } + } + } + return seen; +} + +describe('flint-py fixture extraction (full gallery)', () => { + fs.mkdirSync(FIXTURES_ROOT, { recursive: true }); + const manifest: ManifestEntry[] = []; + const refs = collectVlGeneratorRefs(); + + for (const [genKey, ref] of refs) { + describe(genKey, () => { + const generator = TEST_GENERATORS[genKey]; + if (!generator) { + it('skip — generator not registered', () => { + manifest.push({ + slug: `_missing__${slugify(genKey)}`, + title: '(generator not registered)', + chartType: '', + section: ref.section, + category: ref.category, + page: ref.page, + generator: genKey, + library: ref.library, + status: 'js_error', + jsError: 'generator key not found in TEST_GENERATORS', + }); + }); + return; + } + + let cases: TestCase[]; + try { + cases = generator(); + } catch (e: any) { + it('skip — generator threw', () => { + manifest.push({ + slug: `_gen_threw__${slugify(genKey)}`, + title: '(generator threw)', + chartType: '', + section: ref.section, + category: ref.category, + page: ref.page, + generator: genKey, + library: ref.library, + status: 'js_error', + jsError: `generator() threw: ${e?.message || String(e)}`, + }); + }); + return; + } + + cases.forEach((tc, idx) => { + const slug = `${slugify(genKey)}__${String(idx).padStart(2, '0')}__${slugify(tc.title || `case${idx}`)}`; + it(tc.title || `case ${idx}`, () => { + const dir = path.join(FIXTURES_ROOT, slug); + fs.mkdirSync(dir, { recursive: true }); + + const entry: ManifestEntry = { + slug, + title: tc.title || `case ${idx}`, + chartType: tc.chartType, + section: ref.section, + category: ref.category, + page: ref.page, + generator: genKey, + library: ref.library, + status: 'js_success', + fixtureDir: slug, + }; + + let input: ChartAssemblyInput; + try { + input = testCaseToInput(tc); + } catch (e: any) { + entry.status = 'js_error'; + entry.jsError = `testCaseToInput threw: ${e?.message || String(e)}`; + manifest.push(entry); + fs.writeFileSync(path.join(dir, 'meta.json'), JSON.stringify(entry, null, 2)); + return; + } + + let spec: unknown; + try { + spec = assembleVegaLite(input); + } catch (e: any) { + entry.status = 'js_error'; + entry.jsError = e?.message || String(e); + manifest.push(entry); + fs.writeFileSync( + path.join(dir, 'input.json'), + JSON.stringify({ title: tc.title, description: tc.description, chartType: tc.chartType, input }, null, 2), + ); + fs.writeFileSync(path.join(dir, 'meta.json'), JSON.stringify(entry, null, 2)); + return; + } + + fs.writeFileSync( + path.join(dir, 'input.json'), + JSON.stringify({ title: tc.title, description: tc.description, chartType: tc.chartType, input }, null, 2), + ); + fs.writeFileSync( + path.join(dir, 'expected.json'), + JSON.stringify(spec, null, 2), + ); + fs.writeFileSync(path.join(dir, 'meta.json'), JSON.stringify(entry, null, 2)); + manifest.push(entry); + }); + }); + }); + } + + it('writes the fixture manifest', () => { + manifest.sort((a, b) => a.slug.localeCompare(b.slug)); + fs.writeFileSync( + path.join(FIXTURES_ROOT, 'manifest.json'), + JSON.stringify(manifest, null, 2), + ); + }); +}); diff --git a/tests/frontend/unit/lib/agents-chart/sortAction.test.ts b/tests/frontend/unit/lib/agents-chart/sortAction.test.ts new file mode 100644 index 00000000..b9152bbd --- /dev/null +++ b/tests/frontend/unit/lib/agents-chart/sortAction.test.ts @@ -0,0 +1,210 @@ +import { describe, expect, it } from 'vitest'; +import { makeSortAction } from '../../../../../src/lib/agents-chart'; +import { assembleVegaLite } from '../../../../../src/lib/agents-chart'; + +const baseCanvas = { width: 400, height: 300 }; + +describe('makeSortAction (Sort encoding action)', () => { + const action = makeSortAction(); + + describe('get — derive control value from base encodings', () => { + it('returns undefined (Default) when no sort is set', () => { + const enc = { x: { field: 'cat', type: 'nominal' as const }, y: { field: 'val', aggregate: 'sum' as const } }; + expect(action.get(enc)).toBeUndefined(); + }); + + it('reads value sort from sortBy referencing the measure channel', () => { + const enc = { + x: { field: 'cat', type: 'nominal' as const, sortBy: 'y', sortOrder: 'descending' as const }, + y: { field: 'val', aggregate: 'sum' as const }, + }; + expect(action.get(enc)).toBe('value-desc'); + }); + + it('treats a bare label sort (sortOrder, no sortBy) as Default', () => { + const enc = { + x: { field: 'cat', type: 'nominal' as const, sortOrder: 'ascending' as const }, + y: { field: 'val', aggregate: 'sum' as const }, + }; + expect(action.get(enc)).toBeUndefined(); + }); + + it('treats unrepresentable sorts (custom order / by-color) as Default', () => { + const enc = { + x: { field: 'cat', type: 'nominal' as const, sortBy: '["B","A"]' }, + y: { field: 'val', aggregate: 'sum' as const }, + }; + expect(action.get(enc)).toBeUndefined(); + }); + + it('detects a horizontal orientation (measure on x, category on y)', () => { + const enc = { + x: { field: 'val', aggregate: 'sum' as const }, + y: { field: 'cat', type: 'nominal' as const, sortBy: 'x', sortOrder: 'ascending' as const }, + }; + expect(action.get(enc)).toBe('value-asc'); + }); + + it('returns undefined when the category axis is temporal (not sortable)', () => { + const enc = { + x: { field: 'month', type: 'temporal' as const }, + y: { field: 'val', type: 'quantitative' as const, aggregate: 'sum' as const }, + }; + expect(action.get(enc)).toBeUndefined(); + }); + + it('returns undefined when both axes are quantitative (scatter)', () => { + const enc = { + x: { field: 'a', type: 'quantitative' as const }, + y: { field: 'b', type: 'quantitative' as const }, + }; + expect(action.get(enc)).toBeUndefined(); + }); + }); + + describe('isApplicable — type-aware visibility gate', () => { + it('is applicable when a discrete category + measure pair exists', () => { + const enc = { x: { field: 'cat', type: 'nominal' as const }, y: { field: 'val', aggregate: 'sum' as const } }; + expect(action.isApplicable?.({ encodings: enc })).toBe(true); + }); + + it('is not applicable for a temporal-x time series', () => { + const enc = { + x: { field: 'month', type: 'temporal' as const }, + y: { field: 'val', type: 'quantitative' as const, aggregate: 'sum' as const }, + }; + expect(action.isApplicable?.({ encodings: enc })).toBe(false); + }); + + it('is not applicable when no measure axis exists', () => { + const enc = { x: { field: 'cat', type: 'nominal' as const }, y: { field: 'cat2', type: 'nominal' as const } }; + expect(action.isApplicable?.({ encodings: enc })).toBe(false); + }); + }); + + describe('set — compose the override onto the category channel', () => { + const enc = { x: { field: 'cat', type: 'nominal' as const }, y: { field: 'val', aggregate: 'sum' as const } }; + + it('value-desc writes sortBy=measure + descending on the category channel', () => { + const next = action.set(enc, 'value-desc'); + expect(next.x.sortBy).toBe('y'); + expect(next.x.sortOrder).toBe('descending'); + }); + + it('Default (undefined) clears both sort fields', () => { + const sorted = action.set(enc, 'value-desc'); + const cleared = action.set(sorted, undefined); + expect(cleared.x.sortBy).toBeUndefined(); + expect(cleared.x.sortOrder).toBeUndefined(); + }); + + it('does not mutate the input encodings', () => { + action.set(enc, 'value-desc'); + expect(enc.x).not.toHaveProperty('sortBy'); + }); + + it('targets the category channel under horizontal orientation', () => { + const horizontal = { x: { field: 'val', aggregate: 'sum' as const }, y: { field: 'cat', type: 'nominal' as const } }; + const next = action.set(horizontal, 'value-asc'); + expect(next.y.sortBy).toBe('x'); + expect(next.y.sortOrder).toBe('ascending'); + expect(next.x.sortBy).toBeUndefined(); + }); + + it('is a no-op when there is no discrete category axis (temporal x)', () => { + const temporal = { + x: { field: 'month', type: 'temporal' as const }, + y: { field: 'val', type: 'quantitative' as const, aggregate: 'sum' as const }, + }; + const next = action.set(temporal, 'value-desc'); + expect(next).toBe(temporal); + }); + }); + + describe('end-to-end: override composed by the compiler', () => { + const data = { + values: [ + { category: 'A', value: 20 }, + { category: 'B', value: 50 }, + { category: 'C', value: 10 }, + ], + }; + + it('value-desc override sorts the bar x-axis by the measure', () => { + const spec = assembleVegaLite({ + data, + semantic_types: { category: 'Category', value: 'Quantity' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'category' }, y: { field: 'value', aggregate: 'sum' } }, + chartProperties: { sort: 'value-desc' }, + canvasSize: baseCanvas, + }, + }); + expect(spec.encoding.x.sort).toBe('-y'); + }); + + it('no override leaves the template default ordering', () => { + const spec = assembleVegaLite({ + data, + semantic_types: { category: 'Category', value: 'Quantity' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'category' }, y: { field: 'value', aggregate: 'sum' } }, + canvasSize: baseCanvas, + }, + }); + expect(spec.encoding.x.sort).not.toBe('-y'); + }); + + it('applies value-desc when the measure type is auto (resolved by the compiler)', () => { + // The y measure has no explicit `type` and no aggregate — its + // quantitative-ness is only known after semantic resolution. The + // override must still compose (regression: previously no-op'd). + const spec = assembleVegaLite({ + data: { + values: [ + { category: 'A', value: 20 }, + { category: 'B', value: 50 }, + { category: 'C', value: 10 }, + ], + }, + semantic_types: { category: 'Category', value: 'Quantity' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'category' }, y: { field: 'value' } }, + chartProperties: { sort: 'value-desc' }, + canvasSize: baseCanvas, + }, + }); + expect(spec.encoding.x.sort).toBe('-y'); + }); + + it('value-desc overrides a field’s intrinsic ordinal ordering', () => { + // Ordinal category with canonical levels would normally sort by those + // levels; an explicit value sort must win over the intrinsic order. + const spec = assembleVegaLite({ + data: { + values: [ + { budget: 'Under $10M', pct: 65 }, + { budget: '$10M-$30M', pct: 62 }, + { budget: '$30M-$70M', pct: 64 }, + { budget: '$70M-$150M', pct: 76 }, + { budget: '$150M+', pct: 97 }, + ], + }, + semantic_types: { + budget: { semanticType: 'Category', sortOrder: ['Under $10M', '$10M-$30M', '$30M-$70M', '$70M-$150M', '$150M+'] }, + pct: 'Percentage', + }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'budget', type: 'ordinal' }, y: { field: 'pct' } }, + chartProperties: { sort: 'value-desc' }, + canvasSize: baseCanvas, + }, + }); + expect(spec.encoding.x.sort).toBe('-y'); + }); + }); +}); diff --git a/tests/frontend/unit/lib/agents-chart/vegalite/bandedLabelAngle.test.ts b/tests/frontend/unit/lib/agents-chart/vegalite/bandedLabelAngle.test.ts new file mode 100644 index 00000000..7dfeb4c5 --- /dev/null +++ b/tests/frontend/unit/lib/agents-chart/vegalite/bandedLabelAngle.test.ts @@ -0,0 +1,65 @@ +import { describe, expect, it } from 'vitest'; +import { assembleVegaLite } from '../../../../../../src/lib/agents-chart'; + +const canvas = { width: 400, height: 300 }; + +/** + * Numeric labels on a banded (discrete) x-axis must not be forced horizontal + * when they would crowd — many/wide numbers should rotate. Few, short numbers + * stay horizontal. A continuous (non-banded) quantitative axis is left to + * Vega-Lite's own overlap handling. + */ +describe('banded x-axis numeric label angle', () => { + it('rotates many wide numeric labels on a banded ordinal x-axis', () => { + const values = Array.from({ length: 30 }, (_, i) => ({ + bucket: 1000000 + i * 125000, + count: 10 + (i % 7), + })); + const spec: any = assembleVegaLite({ + data: { values }, + semantic_types: { bucket: 'Quantity', count: 'Quantity' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'bucket', type: 'ordinal' }, y: { field: 'count' } }, + canvasSize: canvas, + }, + }); + expect(spec.config.axisX.labelAngle).toBe(-45); + }); + + it('keeps a few short numeric labels horizontal', () => { + const values = [ + { bucket: 1, count: 10 }, + { bucket: 2, count: 20 }, + { bucket: 3, count: 15 }, + ]; + const spec: any = assembleVegaLite({ + data: { values }, + semantic_types: { bucket: 'Quantity', count: 'Quantity' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'bucket', type: 'ordinal' }, y: { field: 'count' } }, + canvasSize: canvas, + }, + }); + expect(spec.config.axisX.labelAngle).toBe(0); + }); + + it('leaves a continuous (non-banded) quantitative x-axis to VL overlap handling', () => { + const values = Array.from({ length: 25 }, (_, i) => ({ + bucket: 1000000 + i * 125000, + count: 10 + (i % 7), + })); + const spec: any = assembleVegaLite({ + data: { values }, + semantic_types: { bucket: 'Quantity', count: 'Quantity' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'bucket' }, y: { field: 'count' } }, + canvasSize: canvas, + }, + }); + // Continuous axis: no forced labelAngle override from banded-label logic. + expect(spec.config.axisX?.labelAngle).toBeUndefined(); + }); +}); diff --git a/tests/frontend/unit/lib/agents-chart/vegalite/chartOptionApplicability.test.ts b/tests/frontend/unit/lib/agents-chart/vegalite/chartOptionApplicability.test.ts new file mode 100644 index 00000000..d1818441 --- /dev/null +++ b/tests/frontend/unit/lib/agents-chart/vegalite/chartOptionApplicability.test.ts @@ -0,0 +1,189 @@ +import { describe, expect, it } from 'vitest'; +import { assembleVegaLite } from '../../../../../../src/lib/agents-chart'; + +const canvas = { width: 600, height: 400 }; + +/** Keys of options Flint reports as applicable for a rendered spec. */ +const applicableKeys = (spec: any): string[] => + (spec._options ?? []).filter((o: any) => o.applicable).map((o: any) => o.key); +/** Whether a given option key is carried in the catalog at all. */ +const hasOption = (spec: any, key: string): boolean => + (spec._options ?? []).some((o: any) => o.key === key); + +describe('stackMode applicability (gated on a series/color channel)', () => { + const rows = [ + { region: 'N', cat: 'a', val: 3 }, { region: 'N', cat: 'b', val: 5 }, + { region: 'S', cat: 'a', val: 2 }, { region: 'S', cat: 'b', val: 4 }, + ]; + + it('is applicable when color (the series dimension) is bound', () => { + const spec = assembleVegaLite({ + data: { values: rows }, + semantic_types: { region: 'Category', cat: 'Category', val: 'Quantity' }, + chart_spec: { + chartType: 'Stacked Bar Chart', + encodings: { x: { field: 'region' }, y: { field: 'val' }, color: { field: 'cat' } }, + canvasSize: canvas, + }, + }) as any; + expect(applicableKeys(spec)).toContain('stackMode'); + }); + + it('is NOT applicable without a color channel (nothing to stack)', () => { + const spec = assembleVegaLite({ + data: { values: rows }, + semantic_types: { region: 'Category', val: 'Quantity' }, + chart_spec: { + chartType: 'Stacked Bar Chart', + encodings: { x: { field: 'region' }, y: { field: 'val' } }, + canvasSize: canvas, + }, + }) as any; + expect(hasOption(spec, 'stackMode')).toBe(true); + expect(applicableKeys(spec)).not.toContain('stackMode'); + }); +}); + +describe('independentYAxis applicability (faceted + quantitative y)', () => { + const facetRows = [ + { g: 'A', x: 'p', y: 1 }, { g: 'A', x: 'q', y: 2 }, + { g: 'B', x: 'p', y: 100 }, { g: 'B', x: 'q', y: 300 }, + ]; + + it('is applicable when faceted with a quantitative y of diverging ranges', () => { + const spec = assembleVegaLite({ + data: { values: facetRows }, + semantic_types: { g: 'Category', x: 'Category', y: 'Quantity' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'x' }, y: { field: 'y' }, column: { field: 'g' } }, + canvasSize: canvas, + }, + }) as any; + expect(applicableKeys(spec)).toContain('independentYAxis'); + }); + + it('is NOT applicable when not faceted', () => { + const spec = assembleVegaLite({ + data: { values: facetRows }, + semantic_types: { x: 'Category', y: 'Quantity' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'x' }, y: { field: 'y' } }, + canvasSize: canvas, + }, + }) as any; + expect(applicableKeys(spec)).not.toContain('independentYAxis'); + }); +}); + +describe('showPercent applicability (additive, single-sign, non-zero total)', () => { + function barTable(values: any[], semantic_types: any) { + return assembleVegaLite({ + data: { values }, + semantic_types, + chart_spec: { + chartType: 'Bar Table', + encodings: { y: { field: 'cat' }, x: { field: 'val' } }, + canvasSize: canvas, + }, + }) as any; + } + + it('is applicable for an additive single-sign measure with a non-zero total', () => { + const spec = barTable( + [{ cat: 'a', val: 10 }, { cat: 'b', val: 20 }, { cat: 'c', val: 30 }], + { cat: 'Category', val: 'Quantity' }, + ); + expect(applicableKeys(spec)).toContain('showPercent'); + }); + + it('is NOT applicable for a mixed-sign measure (share would be misleading)', () => { + const spec = barTable( + [{ cat: 'a', val: 10 }, { cat: 'b', val: -20 }, { cat: 'c', val: 5 }], + { cat: 'Category', val: 'Number' }, + ); + expect(hasOption(spec, 'showPercent')).toBe(true); + expect(applicableKeys(spec)).not.toContain('showPercent'); + }); +}); + +describe('xAxisType applicability (date-like x with dual interpretation)', () => { + // Year-month strings: the resolver classifies these as temporal, but the + // modest distinct set is equally readable as discrete category labels. + const monthRows = [ + { month: '2010-01', cost: 17.8 }, { month: '2011-04', cost: 20.1 }, + { month: '2012-06', cost: 19.0 }, { month: '2013-09', cost: 19.9 }, + { month: '2014-11', cost: 21.0 }, + ]; + + function barWithX(values: any[], semantic_types: any, chartProperties?: any) { + return assembleVegaLite({ + data: { values }, + semantic_types, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'month' }, y: { field: 'cost' } }, + canvasSize: canvas, + ...(chartProperties ? { chartProperties } : {}), + }, + }) as any; + } + + it('is applicable for a date-like temporal x with a modest distinct count', () => { + const spec = barWithX(monthRows, { month: 'YearMonth', cost: 'Quantity' }); + expect(applicableKeys(spec)).toContain('xAxisType'); + }); + + it('forces a discrete (nominal) x when the user picks "nominal"', () => { + const spec = barWithX( + monthRows, { month: 'YearMonth', cost: 'Quantity' }, { xAxisType: 'nominal' }, + ); + // Override flows through to the encoding type the whole pipeline sees. + expect(spec.encoding?.x?.type).toBe('nominal'); + // The control stays visible after an explicit choice. + expect(applicableKeys(spec)).toContain('xAxisType'); + }); + + it('is NOT applicable for a plain categorical x (no temporal interpretation)', () => { + const spec = assembleVegaLite({ + data: { values: [{ region: 'N', cost: 3 }, { region: 'S', cost: 5 }] }, + semantic_types: { region: 'Category', cost: 'Quantity' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'region' }, y: { field: 'cost' } }, + canvasSize: canvas, + }, + }) as any; + expect(hasOption(spec, 'xAxisType')).toBe(true); + expect(applicableKeys(spec)).not.toContain('xAxisType'); + }); + + it('offers yAxisType for a date-like temporal y (transposed/horizontal bar)', () => { + const spec = assembleVegaLite({ + data: { values: monthRows }, + semantic_types: { month: 'YearMonth', cost: 'Quantity' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { y: { field: 'month' }, x: { field: 'cost' } }, + canvasSize: canvas, + }, + }) as any; + expect(applicableKeys(spec)).toContain('yAxisType'); + }); + + it('forces a discrete (nominal) y when the user picks "nominal"', () => { + const spec = assembleVegaLite({ + data: { values: monthRows }, + semantic_types: { month: 'YearMonth', cost: 'Quantity' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { y: { field: 'month' }, x: { field: 'cost' } }, + canvasSize: canvas, + chartProperties: { yAxisType: 'nominal' }, + }, + }) as any; + expect(spec.encoding?.y?.type).toBe('nominal'); + expect(applicableKeys(spec)).toContain('yAxisType'); + }); +}); diff --git a/tests/frontend/unit/lib/agents-chart/vegalite/closedDomainStacking.test.ts b/tests/frontend/unit/lib/agents-chart/vegalite/closedDomainStacking.test.ts new file mode 100644 index 00000000..623e8aa4 --- /dev/null +++ b/tests/frontend/unit/lib/agents-chart/vegalite/closedDomainStacking.test.ts @@ -0,0 +1,92 @@ +import { describe, expect, it } from 'vitest'; +import { assembleVegaLite } from '../../../../../../src/lib/agents-chart'; + +const canvas = { width: 600, height: 400 }; + +/** + * Regression: a closed-domain measure (Correlation, intrinsic [-1, 1]) on a bar + * chart that stacks — either via a color series or via repeated categories with + * no color — must NOT keep the intrinsic clamp domain, or the stacked bars + * overflow/clip past the fixed axis bound. + */ +describe('closed-domain stacked bar overflow', () => { + it('drops the intrinsic [-1,1] clamp when a color series stacks past the bound', () => { + const products = ['A', 'B', 'C', 'D']; + const series = ['s1', 's2', 's3', 's4']; + const values: any[] = []; + for (const p of products) { + for (const s of series) values.push({ product: p, series: s, corr: 0.9 }); + } + const spec = assembleVegaLite({ + data: { values }, + semantic_types: { product: 'Category', series: 'Category', corr: 'Correlation' }, + chart_spec: { + chartType: 'Stacked Bar Chart', + encodings: { x: { field: 'product' }, y: { field: 'corr' }, color: { field: 'series' } }, + canvasSize: canvas, + }, + }); + expect(spec.encoding.y.scale?.domain).toBeUndefined(); + expect(spec.encoding.y.scale?.clamp).toBeUndefined(); + }); + + it('drops the intrinsic clamp when repeated categories stack with NO color', () => { + const products = ['A', 'B', 'C', 'D', 'E']; + const values: any[] = []; + for (const p of products) { + for (let i = 0; i < 4; i++) { + values.push({ product: p, corr: p === 'C' ? -0.21 : 0.9 }); + } + } + const spec = assembleVegaLite({ + data: { values }, + semantic_types: { product: 'Category', corr: 'Correlation' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'product' }, y: { field: 'corr' } }, + canvasSize: canvas, + }, + }); + expect(spec.encoding.y.scale?.domain).toBeUndefined(); + expect(spec.encoding.y.scale?.clamp).toBeUndefined(); + }); + + it('detects overflow on the negative side even when signed totals would cancel', () => { + // Per category: three +0.5 and four -0.5 → signed sum = -0.5 (within [-1,1]), + // but the negative stack reaches -2.0, overflowing the lower bound. + const products = ['A', 'B']; + const values: any[] = []; + for (const p of products) { + for (let i = 0; i < 3; i++) values.push({ product: p, corr: 0.5 }); + for (let i = 0; i < 4; i++) values.push({ product: p, corr: -0.5 }); + } + const spec = assembleVegaLite({ + data: { values }, + semantic_types: { product: 'Category', corr: 'Correlation' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'product' }, y: { field: 'corr' } }, + canvasSize: canvas, + }, + }); + expect(spec.encoding.y.scale?.domain).toBeUndefined(); + }); + + it('keeps the intrinsic [-1,1] domain for a non-stacking chart (one row per category)', () => { + const values = [ + { product: 'A', corr: 0.9 }, + { product: 'B', corr: -0.21 }, + { product: 'C', corr: 0.4 }, + ]; + const spec = assembleVegaLite({ + data: { values }, + semantic_types: { product: 'Category', corr: 'Correlation' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'product' }, y: { field: 'corr' } }, + canvasSize: canvas, + }, + }); + expect(spec.encoding.y.scale?.domain).toEqual([-1, 1]); + }); +}); diff --git a/tests/frontend/unit/lib/agents-chart/vegalite/logScale.test.ts b/tests/frontend/unit/lib/agents-chart/vegalite/logScale.test.ts new file mode 100644 index 00000000..ab8d011e --- /dev/null +++ b/tests/frontend/unit/lib/agents-chart/vegalite/logScale.test.ts @@ -0,0 +1,157 @@ +import { describe, expect, it } from 'vitest'; +import { assembleVegaLite, getChartOptions } from '../../../../../../src/lib/agents-chart'; + +const canvas = { width: 500, height: 400 }; + +/** Keys of options Flint reports as applicable for a rendered spec. */ +const applicableKeys = (spec: any): string[] => + (spec._options ?? []).filter((o: any) => o.applicable).map((o: any) => o.key); +/** Look up a single option descriptor on a rendered spec. */ +const optionFor = (spec: any, key: string): any => + (spec._options ?? []).find((o: any) => o.key === key); + +// Wide-range positive values (≥ 6 orders of magnitude) so the engine's +// conservative log recommendation fires, and the offer-eligibility (≥ 3 +// decades) is comfortably met. +const wideX = Array.from({ length: 12 }, (_, i) => ({ + x: Math.pow(10, i * 0.7), // 1 … ~10^7.7 + y: i + 1, +})); + +function scatter(encodings: any, chartProperties?: any) { + return assembleVegaLite({ + data: { values: wideX }, + semantic_types: { x: 'Quantity', y: 'Number' }, + chart_spec: { + chartType: 'Scatter Plot', + encodings, + canvasSize: canvas, + chartProperties, + }, + }) as any; +} + +describe('per-axis log scale: offer eligibility + user override', () => { + it('offers logScale_x on a wide-range continuous quantitative position axis', () => { + const spec = scatter({ x: { field: 'x' }, y: { field: 'y' } }); + expect(applicableKeys(spec)).toContain('logScale_x'); + }); + + it('does NOT offer log on a narrow-range axis', () => { + const narrow = Array.from({ length: 12 }, (_, i) => ({ x: 10 + i, y: i })); + const spec = assembleVegaLite({ + data: { values: narrow }, + semantic_types: { x: 'Number', y: 'Number' }, + chart_spec: { + chartType: 'Scatter Plot', + encodings: { x: { field: 'x' }, y: { field: 'y' } }, + canvasSize: canvas, + }, + }) as any; + expect(applicableKeys(spec)).not.toContain('logScale_x'); + }); + + it("unset follows the engine recommendation (log for wide-range additive measure)", () => { + const spec = scatter({ x: { field: 'x' }, y: { field: 'y' } }); + expect(spec.encoding.x.scale?.type).toBe('log'); + // and the option's resolved value reflects that recommendation + expect(optionFor(spec, 'logScale_x')?.value).toBe(true); + }); + + it("false overrides the recommendation and forces a linear axis", () => { + const spec = scatter({ x: { field: 'x' }, y: { field: 'y' } }, { logScale_x: false }); + expect(spec.encoding.x.scale?.type).not.toBe('log'); + // still offered, so the user can revert + expect(applicableKeys(spec)).toContain('logScale_x'); + }); + + it("true forces a log axis even when the engine would not recommend it", () => { + // Generic 'Number' over a moderate (non-recommended) range: default stays linear. + const vals = Array.from({ length: 12 }, (_, i) => ({ x: (i + 1) * 50, y: i })); + const auto = assembleVegaLite({ + data: { values: vals }, + semantic_types: { x: 'Number', y: 'Number' }, + chart_spec: { + chartType: 'Scatter Plot', + encodings: { x: { field: 'x' }, y: { field: 'y' } }, + canvasSize: canvas, + }, + }) as any; + expect(auto.encoding.x.scale?.type).not.toBe('log'); + + const forced = assembleVegaLite({ + data: { values: vals }, + semantic_types: { x: 'Number', y: 'Number' }, + chart_spec: { + chartType: 'Scatter Plot', + encodings: { x: { field: 'x' }, y: { field: 'y' } }, + canvasSize: canvas, + chartProperties: { logScale_x: true }, + }, + }) as any; + expect(forced.encoding.x.scale?.type).toBe('log'); + }); + + it("uses symlog for a true toggle when the data contains zeros", () => { + const withZeros = [{ x: 0, y: 0 }, ...Array.from({ length: 11 }, (_, i) => ({ x: Math.pow(10, i * 0.6), y: i + 1 }))]; + const spec = assembleVegaLite({ + data: { values: withZeros }, + semantic_types: { x: 'Number', y: 'Number' }, + chart_spec: { + chartType: 'Scatter Plot', + encodings: { x: { field: 'x' }, y: { field: 'y' } }, + canvasSize: canvas, + chartProperties: { logScale_x: true }, + }, + }) as any; + expect(spec.encoding.x.scale?.type).toBe('symlog'); + }); + + it('never offers log on a length-cognitive bar chart, even with wide-range data', () => { + const spec = assembleVegaLite({ + data: { values: wideX.map((d, i) => ({ cat: `c${i}`, val: d.x })) }, + semantic_types: { cat: 'Category', val: 'Quantity' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'cat' }, y: { field: 'val' } }, + canvasSize: canvas, + }, + }) as any; + // Length marks never even carry the log-scale option in their catalog. + expect((spec._options ?? []).find((o: any) => o.key.startsWith('logScale'))).toBeUndefined(); + }); + + it('offers log only on the quantitative value axis of a line chart (not the temporal axis)', () => { + const series = Array.from({ length: 12 }, (_, i) => ({ + t: `2020-${String((i % 12) + 1).padStart(2, '0')}-01`, + v: Math.pow(10, i * 0.7), + })); + const spec = assembleVegaLite({ + data: { values: series }, + semantic_types: { t: 'Date', v: 'Quantity' }, + chart_spec: { + chartType: 'Line Chart', + encodings: { x: { field: 't' }, y: { field: 'v' } }, + canvasSize: canvas, + }, + }) as any; + expect(applicableKeys(spec)).toContain('logScale_y'); + expect(applicableKeys(spec)).not.toContain('logScale_x'); + }); + + it('getChartOptions reports the same applicable options as the rendered spec', () => { + const input = { + data: { values: wideX }, + semantic_types: { x: 'Quantity', y: 'Number' }, + chart_spec: { + chartType: 'Scatter Plot', + encodings: { x: { field: 'x' }, y: { field: 'y' } }, + canvasSize: canvas, + }, + }; + const spec = assembleVegaLite(input) as any; + const options = getChartOptions(input); + expect(options).toEqual(spec._options); + expect(options.filter(o => o.applicable).map(o => o.key)).toContain('logScale_x'); + }); +}); diff --git a/tests/frontend/unit/lib/agents-chart/vegalite/zeroBaseline.test.ts b/tests/frontend/unit/lib/agents-chart/vegalite/zeroBaseline.test.ts new file mode 100644 index 00000000..aaf32803 --- /dev/null +++ b/tests/frontend/unit/lib/agents-chart/vegalite/zeroBaseline.test.ts @@ -0,0 +1,161 @@ +import { describe, expect, it } from 'vitest'; +import { assembleVegaLite } from '../../../../../../src/lib/agents-chart'; + +const canvas = { width: 500, height: 400 }; + +/** Keys of options Flint reports as applicable for a rendered spec. */ +const applicableKeys = (spec: any): string[] => + (spec._options ?? []).filter((o: any) => o.applicable).map((o: any) => o.key); +/** Look up a single option descriptor on a rendered spec. */ +const optionFor = (spec: any, key: string): any => + (spec._options ?? []).find((o: any) => o.key === key); + +/** Resolve the y scale across the possible spec nestings (top / layer / facet). */ +function yScale(spec: any): any { + return ( + spec?.encoding?.y?.scale ?? + spec?.spec?.encoding?.y?.scale ?? + (Array.isArray(spec?.layer) + ? spec.layer.find((l: any) => l.encoding?.y?.scale)?.encoding?.y?.scale + : undefined) ?? + (Array.isArray(spec?.spec?.layer) + ? spec.spec.layer.find((l: any) => l.encoding?.y?.scale)?.encoding?.y?.scale + : undefined) + ); +} + +/** Does the resolved y scale anchor the axis at zero? */ +function yIncludesZero(spec: any): boolean { + const scale = yScale(spec); + if (!scale) return false; + if (scale.zero === true) return true; + if (Array.isArray(scale.domain)) return scale.domain[0] === 0; + if (scale.domainMin === 0) return true; + return false; +} + +/** A scatter plot (position-cognitive) with a typed quantitative y axis. */ +function scatterY(yType: string, yValues: number[], chartProperties?: any) { + const values = yValues.map((v, i) => ({ x: i + 1, y: v })); + return assembleVegaLite({ + data: { values }, + // x = Number (zero-meaningful → forced → never offers includeZero_x), + // so only the y axis is under test. + semantic_types: { x: 'Number', y: yType }, + chart_spec: { + chartType: 'Scatter Plot', + encodings: { x: { field: 'x' }, y: { field: 'y' } }, + canvasSize: canvas, + chartProperties, + }, + }) as any; +} + +describe('zero-baseline toggle: offered only when the choice is a genuine toss-up', () => { + it('does NOT offer Zero Y for an arbitrary type away from zero (zero is meaningless → just fit data)', () => { + // Temperature = arbitrary; zero is not a meaningful reference, so the + // engine fits the data and there is nothing to debate. + const spec = scatterY('Temperature', [60, 70, 80, 90, 100]); + expect(applicableKeys(spec)).not.toContain('includeZero_y'); + expect(yIncludesZero(spec)).toBe(false); + }); + + it('does NOT offer Zero Y for a contextual type close to zero (engine confidently includes zero)', () => { + // Percentage = contextual; data 5–25 hugs zero (proximity 0.2) → engine + // includes zero and is confident enough that no toggle is needed. + const spec = scatterY('Percentage', [5, 10, 15, 20, 25]); + expect(applicableKeys(spec)).not.toContain('includeZero_y'); + expect(yIncludesZero(spec)).toBe(true); + }); + + it('does NOT offer Zero Y for a meaningful type whose data already spans toward zero', () => { + // Price = meaningful; data 10–40 (proximity 0.25) already spans most of the + // way to zero, so including zero barely changes the view → keep zero on + // silently, no toggle. + const spec = scatterY('Price', [10, 20, 30, 40]); + expect(applicableKeys(spec)).not.toContain('includeZero_y'); + expect(yIncludesZero(spec)).toBe(true); + }); + + it('offers Zero Y for a meaningful type far from zero on a position mark (default ON)', () => { + // Price = meaningful; data 1000–1200 (proximity 0.83) sits far from zero, so + // anchoring at zero would crush the data into a thin band — a real + // zoom-vs-anchor toss-up. Toggle is offered, recommended ON. + const spec = scatterY('Price', [1000, 1050, 1100, 1150, 1200]); + expect(applicableKeys(spec)).toContain('includeZero_y'); + expect(optionFor(spec, 'includeZero_y')?.value).toBe(true); + expect(yIncludesZero(spec)).toBe(true); + }); + + it('does NOT offer Zero Y for an unknown/unrecognized type (no opinion to debate)', () => { + const spec = scatterY('Mystery', [60, 70, 80, 90]); + expect(applicableKeys(spec)).not.toContain('includeZero_y'); + }); + + it('does NOT offer Zero Y on a bar chart (length mark — baseline is structural)', () => { + const spec = assembleVegaLite({ + data: { + values: [ + { cat: 'a', y: 60 }, { cat: 'b', y: 70 }, + { cat: 'c', y: 80 }, { cat: 'd', y: 90 }, + ], + }, + semantic_types: { cat: 'Category', y: 'Temperature' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'cat' }, y: { field: 'y' } }, + canvasSize: canvas, + }, + }) as any; + expect(applicableKeys(spec)).not.toContain('includeZero_y'); + }); + + it('does NOT offer Zero Y for a meaningful type on a bar chart (mandatory baseline)', () => { + const spec = assembleVegaLite({ + data: { + values: [ + { cat: 'a', y: 10 }, { cat: 'b', y: 20 }, + { cat: 'c', y: 30 }, { cat: 'd', y: 40 }, + ], + }, + semantic_types: { cat: 'Category', y: 'Price' }, + chart_spec: { + chartType: 'Bar Chart', + encodings: { x: { field: 'cat' }, y: { field: 'y' } }, + canvasSize: canvas, + }, + }) as any; + expect(applicableKeys(spec)).not.toContain('includeZero_y'); + }); +}); + +describe('zero-baseline toggle: the choice drives the rendered axis', () => { + it('unset follows the engine decision (arbitrary away from zero → fits data)', () => { + const spec = scatterY('Temperature', [60, 70, 80, 90, 100]); + expect(yIncludesZero(spec)).toBe(false); + }); + + it('ON forces the axis to include zero', () => { + const spec = scatterY('Temperature', [60, 70, 80, 90, 100], { includeZero_y: true }); + expect(yIncludesZero(spec)).toBe(true); + // stays offered so the user can revert + expect(applicableKeys(spec)).toContain('includeZero_y'); + }); + + it('OFF fits the data even over a zero-anchored semantic domain', () => { + // Percentage close to zero would default to a zero baseline (and a + // [0,100]-style intrinsic floor). Turning the toggle OFF must win: the + // axis fits the data and is NOT re-pinned to zero. + const spec = scatterY('Percentage', [5, 10, 15, 20, 25], { includeZero_y: false }); + expect(yIncludesZero(spec)).toBe(false); + expect(applicableKeys(spec)).toContain('includeZero_y'); + }); + + it('OFF fits the data for a meaningful type on a line/point chart', () => { + // Price 0.8–2.0 (the screenshot case): default ON shows zero, but turning + // the toggle OFF fits the data instead of crushing it against the baseline. + const spec = scatterY('Price', [0.8, 1.0, 1.4, 1.8, 2.0], { includeZero_y: false }); + expect(yIncludesZero(spec)).toBe(false); + expect(applicableKeys(spec)).toContain('includeZero_y'); + }); +}); diff --git a/tests/frontend/unit/views/ClarificationPanel.test.tsx b/tests/frontend/unit/views/ClarificationPanel.test.tsx index 86274f34..c0648d30 100644 --- a/tests/frontend/unit/views/ClarificationPanel.test.tsx +++ b/tests/frontend/unit/views/ClarificationPanel.test.tsx @@ -16,6 +16,8 @@ vi.mock('react-i18next', () => ({ 'chartRec.clarificationQuestionLabel': `${params?.index}.`, 'chartRec.optionalClarification': '(optional)', 'chartRec.freeTextClarificationPlaceholder': 'Type your answer...', + 'chartRec.customAnswerPlaceholder': 'Or type your own answer...', + 'chartRec.confirmAnswer': 'Confirm answer', 'chartRec.freeTextClarificationHint': 'Type your answer in the chat box below.', }; return labels[key] || key; @@ -82,7 +84,7 @@ describe('ClarificationPanel', () => { expect(onSubmit).not.toHaveBeenCalled(); }); - it('shows a chat-box hint for free-text questions and renders no input', () => { + it('renders an inline input under a free-text question and submits it tagged to that question', () => { const onSubmit = vi.fn(); render( @@ -96,8 +98,116 @@ describe('ClarificationPanel', () => { />, ); - expect(screen.getByText('Type your answer in the chat box below.')).toBeInTheDocument(); - expect(screen.queryByPlaceholderText('Type your answer...')).toBeNull(); + // No "use the chat box" hint anymore — the panel is self-contained. + expect(screen.queryByText('Type your answer in the chat box below.')).toBeNull(); + + // The input sits inline under the question (its own answer field), not the + // choice-only override. + const input = screen.getByPlaceholderText('Type your answer...'); + expect(input).toBeInTheDocument(); + expect(screen.queryByPlaceholderText('Or type your own answer...')).toBeNull(); + + // Empty input → nothing to submit yet. expect(onSubmit).not.toHaveBeenCalled(); + + fireEvent.change(input, { target: { value: 'Focus on 2024.' } }); + fireEvent.keyDown(input, { key: 'Enter' }); + + // Tagged to the question it answers (index 0), not a generic freeform blob. + expect(onSubmit).toHaveBeenCalledWith([{ + question_index: 0, + answer: 'Focus on 2024.', + source: 'free_text', + }]); + }); + + it('lets a single-choice question take a typed answer instead of a chip', () => { + const onSubmit = vi.fn(); + + render( + , + ); + + // single_choice now offers BOTH the chip and its own freeform field. + expect(screen.getByRole('button', { name: 'Revenue' })).toBeInTheDocument(); + const input = screen.getByPlaceholderText('Or type your own answer...'); + + fireEvent.change(input, { target: { value: 'Actually, profit margin.' } }); + fireEvent.keyDown(input, { key: 'Enter' }); + + // Tagged to question 0 as a free_text answer (not a generic -1 override). + expect(onSubmit).toHaveBeenCalledWith([{ + question_index: 0, + answer: 'Actually, profit margin.', + source: 'free_text', + }]); + }); + + it('supersedes a selected option when the user types a custom answer', () => { + const onSelectAnswer = vi.fn(); + const onClearAnswer = vi.fn(); + + render( + , + ); + + const input = screen.getByPlaceholderText('Or type your own answer...'); + fireEvent.change(input, { target: { value: 'profit margin' } }); + + // Typing records a free_text answer (autoSubmit=false) that overrides the + // prior option pick. + expect(onSelectAnswer).toHaveBeenCalledWith( + 0, + { question_index: 0, answer: 'profit margin', source: 'free_text' }, + false, + ); + + // Clearing the field removes the answer entirely. + fireEvent.change(input, { target: { value: '' } }); + expect(onClearAnswer).toHaveBeenCalledWith(0); + }); + + it('records a typed answer live and submits it on Enter', () => { + const onSubmit = vi.fn(); + + render( + , + ); + + const input = screen.getByPlaceholderText('Type your answer...'); + fireEvent.change(input, { target: { value: 'Focus on 2024.' } }); + fireEvent.keyDown(input, { key: 'Enter' }); + + expect(onSubmit).toHaveBeenCalledWith([{ + question_index: 0, + answer: 'Focus on 2024.', + source: 'free_text', + }]); }); }); diff --git a/tests/frontend/unit/views/formatCellValue.test.ts b/tests/frontend/unit/views/formatCellValue.test.ts index 9f1269b3..4901ccfb 100644 --- a/tests/frontend/unit/views/formatCellValue.test.ts +++ b/tests/frontend/unit/views/formatCellValue.test.ts @@ -87,7 +87,15 @@ describe('formatCellValue', () => { expect(formatCellValue(3600000, Type.Duration)).toBe('1h'); expect(formatCellValue(90000, Type.Duration)).toBe('1m 30s'); expect(formatCellValue(5000, Type.Duration)).toBe('5s'); - expect(formatCellValue(0, Type.Duration)).toBe('0s'); + expect(formatCellValue(0, Type.Duration)).toBe('0'); + }); + + it('should not over-format sub-second Duration values', () => { + // Seconds-based columns (e.g. timestamp_sec: 0, 0.083, 0.167) must not + // collapse to "0s" — show the plain number instead. + expect(formatCellValue(0.083, Type.Duration)).toBe('0.083'); + expect(formatCellValue(0.167, Type.Duration)).toBe('0.167'); + expect(formatCellValue(1.5, Type.Duration)).toBe('1.5'); }); it('should pass through non-numeric Duration as string', () => { diff --git a/yarn.lock b/yarn.lock index cbfaebd1..c2ee214e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -290,7 +290,7 @@ "@epic-web/invariant@^1.0.0": version "1.0.0" - resolved "https://registry.npmjs.org/@epic-web/invariant/-/invariant-1.0.0.tgz#1073e5dee6dd540410784990eb73e4acd25c9813" + resolved "https://registry.npmjs.org/@epic-web/invariant/-/invariant-1.0.0.tgz" integrity sha512-lrTPqgvfFQtR/eY/qkIzp98OGdNJu0m5ji3q/nJI8v3SXkRKEnWiOxMmbvcSoAIzv/cGiuvRy57k4suKQSAdwA== "@esbuild/aix-ppc64@0.27.7": @@ -298,16 +298,16 @@ resolved "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz" integrity sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg== -"@esbuild/android-arm64@0.27.7": - version "0.27.7" - resolved "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz" - integrity sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ== - "@esbuild/android-arm@0.27.7": version "0.27.7" resolved "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.7.tgz" integrity sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ== +"@esbuild/android-arm64@0.27.7": + version "0.27.7" + resolved "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz" + integrity sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ== + "@esbuild/android-x64@0.27.7": version "0.27.7" resolved "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.7.tgz" @@ -333,16 +333,16 @@ resolved "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.7.tgz" integrity sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ== -"@esbuild/linux-arm64@0.27.7": - version "0.27.7" - resolved "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz" - integrity sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A== - "@esbuild/linux-arm@0.27.7": version "0.27.7" resolved "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.7.tgz" integrity sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA== +"@esbuild/linux-arm64@0.27.7": + version "0.27.7" + resolved "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz" + integrity sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A== + "@esbuild/linux-ia32@0.27.7": version "0.27.7" resolved "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.7.tgz" @@ -473,7 +473,7 @@ minimatch "^3.1.5" strip-json-comments "^3.1.1" -"@eslint/js@9.39.4", "@eslint/js@^9.15.0": +"@eslint/js@^9.15.0", "@eslint/js@9.39.4": version "9.39.4" resolved "https://registry.npmjs.org/@eslint/js/-/js-9.39.4.tgz" integrity sha512-nE7DEIchvtiFTwBw4Lfbu59PG+kCofhjsKaCWzxTpt4lfRjRMqG6uMBzKXuEcyXhOHoUp9riAm7/aWYGhXZ9cw== @@ -691,18 +691,6 @@ dependencies: "@babel/runtime" "^7.29.2" -"@mui/utils@9.0.0": - version "9.0.0" - resolved "https://registry.npmjs.org/@mui/utils/-/utils-9.0.0.tgz" - integrity sha512-bQcqyg/gjULUqTuyUjSAFr6LQGLvtkNtDbJerAtoUn9kGZ0hg5QJiN1PLHMLbeFpe3te1831uq7GFl2ITokGdg== - dependencies: - "@babel/runtime" "^7.29.2" - "@mui/types" "^9.0.0" - "@types/prop-types" "^15.7.15" - clsx "^2.1.1" - prop-types "^15.8.1" - react-is "^19.2.4" - "@mui/utils@^7.3.9": version "7.3.9" resolved "https://registry.npmjs.org/@mui/utils/-/utils-7.3.9.tgz" @@ -715,6 +703,18 @@ prop-types "^15.8.1" react-is "^19.2.3" +"@mui/utils@9.0.0": + version "9.0.0" + resolved "https://registry.npmjs.org/@mui/utils/-/utils-9.0.0.tgz" + integrity sha512-bQcqyg/gjULUqTuyUjSAFr6LQGLvtkNtDbJerAtoUn9kGZ0hg5QJiN1PLHMLbeFpe3te1831uq7GFl2ITokGdg== + dependencies: + "@babel/runtime" "^7.29.2" + "@mui/types" "^9.0.0" + "@types/prop-types" "^15.7.15" + clsx "^2.1.1" + prop-types "^15.8.1" + react-is "^19.2.4" + "@mui/x-internals@^9.1.0": version "9.1.0" resolved "https://registry.npmjs.org/@mui/x-internals/-/x-internals-9.1.0.tgz" @@ -739,71 +739,16 @@ prop-types "^15.8.1" react-transition-group "^4.4.5" -"@parcel/watcher-android-arm64@2.5.6": - version "2.5.6" - resolved "https://registry.npmjs.org/@parcel/watcher-android-arm64/-/watcher-android-arm64-2.5.6.tgz#5f32e0dba356f4ac9a11068d2a5c134ca3ba6564" - integrity sha512-YQxSS34tPF/6ZG7r/Ih9xy+kP/WwediEUsqmtf0cuCV5TPPKw/PQHRhueUo6JdeFJaqV3pyjm0GdYjZotbRt/A== +"@oxc-project/types@=0.122.0": + version "0.122.0" + resolved "https://registry.npmjs.org/@oxc-project/types/-/types-0.122.0.tgz" + integrity sha512-oLAl5kBpV4w69UtFZ9xqcmTi+GENWOcPF7FCrczTiBbmC0ibXxCwyvZGbO39rCVEuLGAZM84DH0pUIyyv/YJzA== "@parcel/watcher-darwin-arm64@2.5.6": version "2.5.6" resolved "https://registry.npmjs.org/@parcel/watcher-darwin-arm64/-/watcher-darwin-arm64-2.5.6.tgz" integrity sha512-Z2ZdrnwyXvvvdtRHLmM4knydIdU9adO3D4n/0cVipF3rRiwP+3/sfzpAwA/qKFL6i1ModaabkU7IbpeMBgiVEA== -"@parcel/watcher-darwin-x64@2.5.6": - version "2.5.6" - resolved "https://registry.npmjs.org/@parcel/watcher-darwin-x64/-/watcher-darwin-x64-2.5.6.tgz#bf05d76a78bc15974f15ec3671848698b0838063" - integrity sha512-HgvOf3W9dhithcwOWX9uDZyn1lW9R+7tPZ4sug+NGrGIo4Rk1hAXLEbcH1TQSqxts0NYXXlOWqVpvS1SFS4fRg== - -"@parcel/watcher-freebsd-x64@2.5.6": - version "2.5.6" - resolved "https://registry.npmjs.org/@parcel/watcher-freebsd-x64/-/watcher-freebsd-x64-2.5.6.tgz#8bc26e9848e7303ac82922a5ae1b1ef1bdb48a53" - integrity sha512-vJVi8yd/qzJxEKHkeemh7w3YAn6RJCtYlE4HPMoVnCpIXEzSrxErBW5SJBgKLbXU3WdIpkjBTeUNtyBVn8TRng== - -"@parcel/watcher-linux-arm-glibc@2.5.6": - version "2.5.6" - resolved "https://registry.npmjs.org/@parcel/watcher-linux-arm-glibc/-/watcher-linux-arm-glibc-2.5.6.tgz#1328fee1deb0c2d7865079ef53a2ba4cc2f8b40a" - integrity sha512-9JiYfB6h6BgV50CCfasfLf/uvOcJskMSwcdH1PHH9rvS1IrNy8zad6IUVPVUfmXr+u+Km9IxcfMLzgdOudz9EQ== - -"@parcel/watcher-linux-arm-musl@2.5.6": - version "2.5.6" - resolved "https://registry.npmjs.org/@parcel/watcher-linux-arm-musl/-/watcher-linux-arm-musl-2.5.6.tgz#bad0f45cb3e2157746db8b9d22db6a125711f152" - integrity sha512-Ve3gUCG57nuUUSyjBq/MAM0CzArtuIOxsBdQ+ftz6ho8n7s1i9E1Nmk/xmP323r2YL0SONs1EuwqBp2u1k5fxg== - -"@parcel/watcher-linux-arm64-glibc@2.5.6": - version "2.5.6" - resolved "https://registry.npmjs.org/@parcel/watcher-linux-arm64-glibc/-/watcher-linux-arm64-glibc-2.5.6.tgz#b75913fbd501d9523c5f35d420957bf7d0204809" - integrity sha512-f2g/DT3NhGPdBmMWYoxixqYr3v/UXcmLOYy16Bx0TM20Tchduwr4EaCbmxh1321TABqPGDpS8D/ggOTaljijOA== - -"@parcel/watcher-linux-arm64-musl@2.5.6": - version "2.5.6" - resolved "https://registry.npmjs.org/@parcel/watcher-linux-arm64-musl/-/watcher-linux-arm64-musl-2.5.6.tgz#da5621a6a576070c8c0de60dea8b46dc9c3827d4" - integrity sha512-qb6naMDGlbCwdhLj6hgoVKJl2odL34z2sqkC7Z6kzir8b5W65WYDpLB6R06KabvZdgoHI/zxke4b3zR0wAbDTA== - -"@parcel/watcher-linux-x64-glibc@2.5.6": - version "2.5.6" - resolved "https://registry.npmjs.org/@parcel/watcher-linux-x64-glibc/-/watcher-linux-x64-glibc-2.5.6.tgz#ce437accdc4b30f93a090b4a221fd95cd9b89639" - integrity sha512-kbT5wvNQlx7NaGjzPFu8nVIW1rWqV780O7ZtkjuWaPUgpv2NMFpjYERVi0UYj1msZNyCzGlaCWEtzc+exjMGbQ== - -"@parcel/watcher-linux-x64-musl@2.5.6": - version "2.5.6" - resolved "https://registry.npmjs.org/@parcel/watcher-linux-x64-musl/-/watcher-linux-x64-musl-2.5.6.tgz#02400c54b4a67efcc7e2327b249711920ac969e2" - integrity sha512-1JRFeC+h7RdXwldHzTsmdtYR/Ku8SylLgTU/reMuqdVD7CtLwf0VR1FqeprZ0eHQkO0vqsbvFLXUmYm/uNKJBg== - -"@parcel/watcher-win32-arm64@2.5.6": - version "2.5.6" - resolved "https://registry.npmjs.org/@parcel/watcher-win32-arm64/-/watcher-win32-arm64-2.5.6.tgz#caae3d3c7583ca0a7171e6bd142c34d20ea1691e" - integrity sha512-3ukyebjc6eGlw9yRt678DxVF7rjXatWiHvTXqphZLvo7aC5NdEgFufVwjFfY51ijYEWpXbqF5jtrK275z52D4Q== - -"@parcel/watcher-win32-ia32@2.5.6": - version "2.5.6" - resolved "https://registry.npmjs.org/@parcel/watcher-win32-ia32/-/watcher-win32-ia32-2.5.6.tgz#9ac922550896dfe47bfc5ae3be4f1bcaf8155d6d" - integrity sha512-k35yLp1ZMwwee3Ez/pxBi5cf4AoBKYXj00CZ80jUz5h8prpiaQsiRPKQMxoLstNuqe2vR4RNPEAEcjEFzhEz/g== - -"@parcel/watcher-win32-x64@2.5.6": - version "2.5.6" - resolved "https://registry.npmjs.org/@parcel/watcher-win32-x64/-/watcher-win32-x64-2.5.6.tgz#73fdafba2e21c448f0e456bbe13178d8fe11739d" - integrity sha512-hbQlYcCq5dlAX9Qx+kFb0FHue6vbjlf0FrNzSKdYK2APUf7tGfGxQCk2ihEREmbR6ZMc0MVAD5RIX/41gpUzTw== - "@parcel/watcher@^2.4.1": version "2.5.6" resolved "https://registry.npmjs.org/@parcel/watcher/-/watcher-2.5.6.tgz" @@ -883,141 +828,26 @@ resolved "https://registry.npmjs.org/@remix-run/router/-/router-1.23.2.tgz" integrity sha512-Ic6m2U/rMjTkhERIa/0ZtXJP17QUi2CbWE7cqx4J58M8aA3QTfW+2UlQ4psvTX9IO1RfNVhK3pcpdjej7L+t2w== +"@rolldown/binding-darwin-arm64@1.0.0-rc.11": + version "1.0.0-rc.11" + resolved "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.11.tgz" + integrity sha512-7WQgR8SfOPwmDZGFkThUvsmd/nwAWv91oCO4I5LS7RKrssPZmOt7jONN0cW17ydGC1n/+puol1IpoieKqQidmg== + "@rolldown/pluginutils@1.0.0-beta.27": version "1.0.0-beta.27" resolved "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz" integrity sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA== -"@rollup/rollup-android-arm-eabi@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.60.0.tgz#7e158ddfc16f78da99c0d5ccbae6cae403ef3284" - integrity sha512-WOhNW9K8bR3kf4zLxbfg6Pxu2ybOUbB2AjMDHSQx86LIF4rH4Ft7vmMwNt0loO0eonglSNy4cpD3MKXXKQu0/A== - -"@rollup/rollup-android-arm64@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.60.0.tgz#49f4ae0e22b6f9ffbcd3818b9a0758fa2d10b1cd" - integrity sha512-u6JHLll5QKRvjciE78bQXDmqRqNs5M/3GVqZeMwvmjaNODJih/WIrJlFVEihvV0MiYFmd+ZyPr9wxOVbPAG2Iw== +"@rolldown/pluginutils@1.0.0-rc.11": + version "1.0.0-rc.11" + resolved "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.11.tgz" + integrity sha512-xQO9vbwBecJRv9EUcQ/y0dzSTJgA7Q6UVN7xp6B81+tBGSLVAK03yJ9NkJaUA7JFD91kbjxRSC/mDnmvXzbHoQ== "@rollup/rollup-darwin-arm64@4.60.0": version "4.60.0" resolved "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.60.0.tgz" integrity sha512-qEF7CsKKzSRc20Ciu2Zw1wRrBz4g56F7r/vRwY430UPp/nt1x21Q/fpJ9N5l47WWvJlkNCPJz3QRVw008fi7yA== -"@rollup/rollup-darwin-x64@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.60.0.tgz#1bf7a92b27ebdd5e0d1d48503c7811160773be1a" - integrity sha512-WADYozJ4QCnXCH4wPB+3FuGmDPoFseVCUrANmA5LWwGmC6FL14BWC7pcq+FstOZv3baGX65tZ378uT6WG8ynTw== - -"@rollup/rollup-freebsd-arm64@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.60.0.tgz#5ccf537b99c5175008444702193ad0b1c36f7f16" - integrity sha512-6b8wGHJlDrGeSE3aH5mGNHBjA0TTkxdoNHik5EkvPHCt351XnigA4pS7Wsj/Eo9Y8RBU6f35cjN9SYmCFBtzxw== - -"@rollup/rollup-freebsd-x64@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.60.0.tgz#1196ecd7bf4e128624ef83cd1f9d785114474a77" - integrity sha512-h25Ga0t4jaylMB8M/JKAyrvvfxGRjnPQIR8lnCayyzEjEOx2EJIlIiMbhpWxDRKGKF8jbNH01NnN663dH638mA== - -"@rollup/rollup-linux-arm-gnueabihf@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.60.0.tgz#cc147633a4af229fee83a737bf2334fbac3dc28e" - integrity sha512-RzeBwv0B3qtVBWtcuABtSuCzToo2IEAIQrcyB/b2zMvBWVbjo8bZDjACUpnaafaxhTw2W+imQbP2BD1usasK4g== - -"@rollup/rollup-linux-arm-musleabihf@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.60.0.tgz#3559f9f060153ea54594a42c3b87a297bedcc26e" - integrity sha512-Sf7zusNI2CIU1HLzuu9Tc5YGAHEZs5Lu7N1ssJG4Tkw6e0MEsN7NdjUDDfGNHy2IU+ENyWT+L2obgWiguWibWQ== - -"@rollup/rollup-linux-arm64-gnu@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.60.0.tgz#e91f887b154123485cfc4b59befe2080fcd8f2df" - integrity sha512-DX2x7CMcrJzsE91q7/O02IJQ5/aLkVtYFryqCjduJhUfGKG6yJV8hxaw8pZa93lLEpPTP/ohdN4wFz7yp/ry9A== - -"@rollup/rollup-linux-arm64-musl@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.60.0.tgz#660752f040df9ba44a24765df698928917c0bf21" - integrity sha512-09EL+yFVbJZlhcQfShpswwRZ0Rg+z/CsSELFCnPt3iK+iqwGsI4zht3secj5vLEs957QvFFXnzAT0FFPIxSrkQ== - -"@rollup/rollup-linux-loong64-gnu@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.60.0.tgz#cb0e939a5fa479ccef264f3f45b31971695f869c" - integrity sha512-i9IcCMPr3EXm8EQg5jnja0Zyc1iFxJjZWlb4wr7U2Wx/GrddOuEafxRdMPRYVaXjgbhvqalp6np07hN1w9kAKw== - -"@rollup/rollup-linux-loong64-musl@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.60.0.tgz#42f86fbc82cd1a81be2d346476dd3231cf5ee442" - integrity sha512-DGzdJK9kyJ+B78MCkWeGnpXJ91tK/iKA6HwHxF4TAlPIY7GXEvMe8hBFRgdrR9Ly4qebR/7gfUs9y2IoaVEyog== - -"@rollup/rollup-linux-ppc64-gnu@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.60.0.tgz#39776a647a789dc95ea049277c5ef8f098df77f9" - integrity sha512-RwpnLsqC8qbS8z1H1AxBA1H6qknR4YpPR9w2XX0vo2Sz10miu57PkNcnHVaZkbqyw/kUWfKMI73jhmfi9BRMUQ== - -"@rollup/rollup-linux-ppc64-musl@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.60.0.tgz#466f20029a8e8b3bb2954c7ddebc9586420cac2c" - integrity sha512-Z8pPf54Ly3aqtdWC3G4rFigZgNvd+qJlOE52fmko3KST9SoGfAdSRCwyoyG05q1HrrAblLbk1/PSIV+80/pxLg== - -"@rollup/rollup-linux-riscv64-gnu@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.60.0.tgz#cff9877c78f12e7aa6246f6902ad913e99edb2b7" - integrity sha512-3a3qQustp3COCGvnP4SvrMHnPQ9d1vzCakQVRTliaz8cIp/wULGjiGpbcqrkv0WrHTEp8bQD/B3HBjzujVWLOA== - -"@rollup/rollup-linux-riscv64-musl@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.60.0.tgz#9a762fb99b5a82a921017f56491b7e892b9fb17d" - integrity sha512-pjZDsVH/1VsghMJ2/kAaxt6dL0psT6ZexQVrijczOf+PeP2BUqTHYejk3l6TlPRydggINOeNRhvpLa0AYpCWSQ== - -"@rollup/rollup-linux-s390x-gnu@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.60.0.tgz#9d25ad8ac7dab681935baf78ac5ea92d14629cdf" - integrity sha512-3ObQs0BhvPgiUVZrN7gqCSvmFuMWvWvsjG5ayJ3Lraqv+2KhOsp+pUbigqbeWqueGIsnn+09HBw27rJ+gYK4VQ== - -"@rollup/rollup-linux-x64-gnu@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.0.tgz#5e5139e11819fa38a052368da79422cb4afcf466" - integrity sha512-EtylprDtQPdS5rXvAayrNDYoJhIz1/vzN2fEubo3yLE7tfAw+948dO0g4M0vkTVFhKojnF+n6C8bDNe+gDRdTg== - -"@rollup/rollup-linux-x64-gnu@^4.24.4": - version "4.60.4" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.4.tgz#23c9bf79771d804fb87415eb0767569f273261e5" - integrity sha512-Boiz5+MsaROEWDf+GGEwF8VMHGhlUoQMtIPjOgA5fv4osupqTVnJteQNKJwUcnUog2G55jYXH7KZFFiJe0TEzQ== - -"@rollup/rollup-linux-x64-musl@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.60.0.tgz#b6211d46e11b1f945f5504cc794fce839331ed08" - integrity sha512-k09oiRCi/bHU9UVFqD17r3eJR9bn03TyKraCrlz5ULFJGdJGi7VOmm9jl44vOJvRJ6P7WuBi/s2A97LxxHGIdw== - -"@rollup/rollup-openbsd-x64@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.60.0.tgz#e6e09eebaa7012bb9c7331b437a9e992bd94ca35" - integrity sha512-1o/0/pIhozoSaDJoDcec+IVLbnRtQmHwPV730+AOD29lHEEo4F5BEUB24H0OBdhbBBDwIOSuf7vgg0Ywxdfiiw== - -"@rollup/rollup-openharmony-arm64@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.60.0.tgz#f7d99ae857032498e57a5e7259fb7100fd24a87e" - integrity sha512-pESDkos/PDzYwtyzB5p/UoNU/8fJo68vcXM9ZW2V0kjYayj1KaaUfi1NmTUTUpMn4UhU4gTuK8gIaFO4UGuMbA== - -"@rollup/rollup-win32-arm64-msvc@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.60.0.tgz#41e392f5d9f3bf1253fdaf2f6d6f6b1bfc452856" - integrity sha512-hj1wFStD7B1YBeYmvY+lWXZ7ey73YGPcViMShYikqKT1GtstIKQAtfUI6yrzPjAy/O7pO0VLXGmUVWXQMaYgTQ== - -"@rollup/rollup-win32-ia32-msvc@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.60.0.tgz#f41b0490be0e5d3cf459b4dc076a192b532adea9" - integrity sha512-SyaIPFoxmUPlNDq5EHkTbiKzmSEmq/gOYFI/3HHJ8iS/v1mbugVa7dXUzcJGQfoytp9DJFLhHH4U3/eTy2Bq4w== - -"@rollup/rollup-win32-x64-gnu@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.60.0.tgz#0fcf9f1fcb750f0317b13aac3b3231687e6397a5" - integrity sha512-RdcryEfzZr+lAr5kRm2ucN9aVlCCa2QNq4hXelZxb8GG0NJSazq44Z3PCCc8wISRuCVnGs0lQJVX5Vp6fKA+IA== - -"@rollup/rollup-win32-x64-msvc@4.60.0": - version "4.60.0" - resolved "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.60.0.tgz#3afdb30405f6d4248df5e72e1ca86c5eab55fab8" - integrity sha512-PrsWNQ8BuE00O3Xsx3ALh2Df8fAj9+cvvX9AIA6o4KpATR98c9mud4XtDWVvsEuyia5U4tVSTKygawyJkjm60w== - "@standard-schema/spec@^1.1.0": version "1.1.0" resolved "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.1.0.tgz" @@ -1028,61 +858,6 @@ resolved "https://registry.npmjs.org/@swc/core-darwin-arm64/-/core-darwin-arm64-1.15.21.tgz" integrity sha512-SA8SFg9dp0qKRH8goWsax6bptFE2EdmPf2YRAQW9WoHGf3XKM1bX0nd5UdwxmC5hXsBUZAYf7xSciCler6/oyA== -"@swc/core-darwin-x64@1.15.21": - version "1.15.21" - resolved "https://registry.npmjs.org/@swc/core-darwin-x64/-/core-darwin-x64-1.15.21.tgz#05ff28c00a7045d9760c847e19604fff02b6e3ea" - integrity sha512-//fOVntgowz9+V90lVsNCtyyrtbHp3jWH6Rch7MXHXbcvbLmbCTmssl5DeedUWLLGiAAW1wksBdqdGYOTjaNLw== - -"@swc/core-linux-arm-gnueabihf@1.15.21": - version "1.15.21" - resolved "https://registry.npmjs.org/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.15.21.tgz#d52a0fac1933fe4e4180a196417053571d6c255f" - integrity sha512-meNI4Sh6h9h8DvIfEc0l5URabYMSuNvyisLmG6vnoYAS43s8ON3NJR8sDHvdP7NJTrLe0q/x2XCn6yL/BeHcZg== - -"@swc/core-linux-arm64-gnu@1.15.21": - version "1.15.21" - resolved "https://registry.npmjs.org/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.15.21.tgz#32cd1b9d0d4be4d53ccfbc122ac61289f37735b9" - integrity sha512-QrXlNQnHeXqU2EzLlnsPoWEh8/GtNJLvfMiPsDhk+ht6Xv8+vhvZ5YZ/BokNWSIZiWPKLAqR0M7T92YF5tmD3g== - -"@swc/core-linux-arm64-musl@1.15.21": - version "1.15.21" - resolved "https://registry.npmjs.org/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.15.21.tgz#0993e8b2ffac4f1141fa7b158e8dd982c2476c1a" - integrity sha512-8/yGCMO333ultDaMQivE5CjO6oXDPeeg1IV4sphojPkb0Pv0i6zvcRIkgp60xDB+UxLr6VgHgt+BBgqS959E9g== - -"@swc/core-linux-ppc64-gnu@1.15.21": - version "1.15.21" - resolved "https://registry.npmjs.org/@swc/core-linux-ppc64-gnu/-/core-linux-ppc64-gnu-1.15.21.tgz#5f6765d9a36235d95fd5c69f6d848973e85d8180" - integrity sha512-ucW0HzPx0s1dgRvcvuLSPSA/2Kk/VYTv9st8qe1Kc22Gu0Q0rH9+6TcBTmMuNIp0Xs4BPr1uBttmbO1wEGI49Q== - -"@swc/core-linux-s390x-gnu@1.15.21": - version "1.15.21" - resolved "https://registry.npmjs.org/@swc/core-linux-s390x-gnu/-/core-linux-s390x-gnu-1.15.21.tgz#f96779dc2ba8d47298bca3ceaa961e0f460aa0bd" - integrity sha512-ulTnOGc5I7YRObE/9NreAhQg94QkiR5qNhhcUZ1iFAYjzg/JGAi1ch+s/Ixe61pMIr8bfVrF0NOaB0f8wjaAfA== - -"@swc/core-linux-x64-gnu@1.15.21": - version "1.15.21" - resolved "https://registry.npmjs.org/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.15.21.tgz#0ffe779d5fd060bfb7992176f51d317c81c6aaaf" - integrity sha512-D0RokxtM+cPvSqJIKR6uja4hbD+scI9ezo95mBhfSyLUs9wnPPl26sLp1ZPR/EXRdYm3F3S6RUtVi+8QXhT24Q== - -"@swc/core-linux-x64-musl@1.15.21": - version "1.15.21" - resolved "https://registry.npmjs.org/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.15.21.tgz#2ea9fab26555d27c715aed6a08604a8296e4af50" - integrity sha512-nER8u7VeRfmU6fMDzl1NQAbbB/G7O2avmvCOwIul1uGkZ2/acbPH+DCL9h5+0yd/coNcxMBTL6NGepIew+7C2w== - -"@swc/core-win32-arm64-msvc@1.15.21": - version "1.15.21" - resolved "https://registry.npmjs.org/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.15.21.tgz#b401f34f38d744ca2b800bf2574ef5f7b20ca52f" - integrity sha512-+/AgNBnjYugUA8C0Do4YzymgvnGbztv7j8HKSQLvR/DQgZPoXQ2B3PqB2mTtGh/X5DhlJWiqnunN35JUgWcAeQ== - -"@swc/core-win32-ia32-msvc@1.15.21": - version "1.15.21" - resolved "https://registry.npmjs.org/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.15.21.tgz#c761e981725d137abd7abcecff88d1dc2d76baad" - integrity sha512-IkSZj8PX/N4HcaFhMQtzmkV8YSnuNoJ0E6OvMwFiOfejPhiKXvl7CdDsn1f4/emYEIDO3fpgZW9DTaCRMDxaDA== - -"@swc/core-win32-x64-msvc@1.15.21": - version "1.15.21" - resolved "https://registry.npmjs.org/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.15.21.tgz#4878cd851b4f98033e19fca78953201aef736edd" - integrity sha512-zUyWso7OOENB6e1N1hNuNn8vbvLsTdKQ5WKLgt/JcBNfJhKy/6jmBmqI3GXk/MyvQKd5SLvP7A0F36p7TeDqvw== - "@swc/core@^1.12.11": version "1.15.21" resolved "https://registry.npmjs.org/@swc/core/-/core-1.15.21.tgz" @@ -1268,22 +1043,22 @@ resolved "https://registry.npmjs.org/@tiptap/extension-strike/-/extension-strike-3.22.2.tgz" integrity sha512-YFC3elKU1L8PiGbcB6tqd/7vWPF5IbydJz0POJpHzSjstX+VfT8VsvS7ubxVuSIWQ11kGkH3mzX6LX8JHsHZxg== -"@tiptap/extension-table-cell@^3.23.6": +"@tiptap/extension-table-cell@^3.22.2": version "3.23.6" resolved "https://registry.npmjs.org/@tiptap/extension-table-cell/-/extension-table-cell-3.23.6.tgz" integrity sha512-hS9TmmvRlT9/ikT+0ukACS+hmJuii4zQaH47cg3oJkz/Fv7O7tL7GZniKtK6l2OUZGPhY+4SV2RkDB6bD7DXfw== -"@tiptap/extension-table-header@^3.23.6": +"@tiptap/extension-table-header@^3.22.2": version "3.23.6" resolved "https://registry.npmjs.org/@tiptap/extension-table-header/-/extension-table-header-3.23.6.tgz" integrity sha512-D6o0a1cJXUU0xWakainBFGPnGHinQkPcdu1YqGd/PoFANY38lnuZt/NW2O/OLfLXu5LXDRfpqF1+dsKww27dUA== -"@tiptap/extension-table-row@^3.23.6": +"@tiptap/extension-table-row@^3.22.2": version "3.23.6" resolved "https://registry.npmjs.org/@tiptap/extension-table-row/-/extension-table-row-3.23.6.tgz" integrity sha512-OauWVzkyRQg0rKOqM/a3PuKPc1S7YXMb1LRN7Nh8Ytvglvd7GFRTbl1lVqdZRaz4Jzopag4PQnriIZfMPUpxWw== -"@tiptap/extension-table@^3.23.6": +"@tiptap/extension-table@^3.22.2": version "3.23.6" resolved "https://registry.npmjs.org/@tiptap/extension-table/-/extension-table-3.23.6.tgz" integrity sha512-XbhZXjhsS6AP7ThoZxjAnNs+NiR81YRori25l6E+ORqB7quiPkIXOAi5h4AIpkn/CYIqze6ere11lWsYpDjtaQ== @@ -1618,7 +1393,7 @@ dependencies: "@types/estree" "*" -"@types/estree@*", "@types/estree@1.0.8", "@types/estree@^1.0.0", "@types/estree@^1.0.6", "@types/estree@^1.0.8": +"@types/estree@*", "@types/estree@^1.0.0", "@types/estree@^1.0.6", "@types/estree@^1.0.8", "@types/estree@1.0.8": version "1.0.8" resolved "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz" integrity sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w== @@ -1789,7 +1564,7 @@ resolved "https://registry.npmjs.org/@types/validator/-/validator-13.15.10.tgz" integrity sha512-T8L6i7wCuyoK8A/ZeLYt1+q0ty3Zb9+qbSSvrIVitzT3YjZqkTZ40IbRsPanlB4h1QB3JVL1SYCdR6ngtFYcuA== -"@typescript-eslint/eslint-plugin@8.57.2", "@typescript-eslint/eslint-plugin@^8.16.0": +"@typescript-eslint/eslint-plugin@^8.16.0", "@typescript-eslint/eslint-plugin@8.57.2": version "8.57.2" resolved "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.57.2.tgz" integrity sha512-NZZgp0Fm2IkD+La5PR81sd+g+8oS6JwJje+aRWsDocxHkjyRw0J5L5ZTlN3LI1LlOcGL7ph3eaIUmTXMIjLk0w== @@ -1803,7 +1578,7 @@ natural-compare "^1.4.0" ts-api-utils "^2.4.0" -"@typescript-eslint/parser@8.57.2", "@typescript-eslint/parser@^8.16.0": +"@typescript-eslint/parser@^8.16.0", "@typescript-eslint/parser@8.57.2": version "8.57.2" resolved "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.57.2.tgz" integrity sha512-30ScMRHIAD33JJQkgfGW1t8CURZtjc2JpTrq5n2HFhOefbAhb7ucc7xJwdWcrEtqUIYJ73Nybpsggii6GtAHjA== @@ -1831,7 +1606,7 @@ "@typescript-eslint/types" "8.57.2" "@typescript-eslint/visitor-keys" "8.57.2" -"@typescript-eslint/tsconfig-utils@8.57.2", "@typescript-eslint/tsconfig-utils@^8.57.2": +"@typescript-eslint/tsconfig-utils@^8.57.2", "@typescript-eslint/tsconfig-utils@8.57.2": version "8.57.2" resolved "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.57.2.tgz" integrity sha512-3Lm5DSM+DCowsUOJC+YqHHnKEfFh5CoGkj5Z31NQSNF4l5wdOwqGn99wmwN/LImhfY3KJnmordBq/4+VDe2eKw== @@ -1847,7 +1622,7 @@ debug "^4.4.3" ts-api-utils "^2.4.0" -"@typescript-eslint/types@8.57.2", "@typescript-eslint/types@^8.57.2": +"@typescript-eslint/types@^8.57.2", "@typescript-eslint/types@8.57.2": version "8.57.2" resolved "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.57.2.tgz" integrity sha512-/iZM6FnM4tnx9csuTxspMW4BOSegshwX5oBDznJ7S4WggL7Vczz5d2W11ecc4vRrQMQHXRSxzrCsyG5EsPPTbA== @@ -2067,6 +1842,11 @@ argparse@^2.0.1: resolved "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz" integrity sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q== +aria-query@^5.0.0, aria-query@^5.3.2: + version "5.3.2" + resolved "https://registry.npmjs.org/aria-query/-/aria-query-5.3.2.tgz" + integrity sha512-COROpnaoap1E2F000S62r6A60uHZnmlvomhfyT2DlTcrY1OrBKn2UhH7qn5wTC9zMvD0AY7csdPSNwKP+7WiQw== + aria-query@5.3.0: version "5.3.0" resolved "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz" @@ -2074,11 +1854,6 @@ aria-query@5.3.0: dependencies: dequal "^2.0.3" -aria-query@^5.0.0, aria-query@^5.3.2: - version "5.3.2" - resolved "https://registry.npmjs.org/aria-query/-/aria-query-5.3.2.tgz" - integrity sha512-COROpnaoap1E2F000S62r6A60uHZnmlvomhfyT2DlTcrY1OrBKn2UhH7qn5wTC9zMvD0AY7csdPSNwKP+7WiQw== - array-buffer-byte-length@^1.0.1, array-buffer-byte-length@^1.0.2: version "1.0.2" resolved "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.2.tgz" @@ -2456,6 +2231,11 @@ comma-separated-tokens@^2.0.0: resolved "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz" integrity sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg== +commander@^8.3.0: + version "8.3.0" + resolved "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz" + integrity sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww== + commander@2: version "2.20.3" resolved "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz" @@ -2466,11 +2246,6 @@ commander@7: resolved "https://registry.npmjs.org/commander/-/commander-7.2.0.tgz" integrity sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw== -commander@^8.3.0: - version "8.3.0" - resolved "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz" - integrity sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww== - compress-commons@^4.1.2: version "4.1.2" resolved "https://registry.npmjs.org/compress-commons/-/compress-commons-4.1.2.tgz" @@ -2532,7 +2307,7 @@ crelt@^1.0.0: cross-env@^10.1.0: version "10.1.0" - resolved "https://registry.npmjs.org/cross-env/-/cross-env-10.1.0.tgz#cfd2a6200df9ed75bfb9cb3d7ce609c13ea21783" + resolved "https://registry.npmjs.org/cross-env/-/cross-env-10.1.0.tgz" integrity sha512-GsYosgnACZTADcmEyJctkJIoqAhHjttw7RsFrVoJNXbsWWqaq6Ym+7kZjq6mS45O0jij6vtiReppKQEtqWy6Dw== dependencies: "@epic-web/invariant" "^1.0.0" @@ -2577,7 +2352,7 @@ culori@^4.0.2: resolved "https://registry.npmjs.org/culori/-/culori-4.0.2.tgz" integrity sha512-1+BhOB8ahCn4O0cep0Sh2l9KCOfOdY+BXJnKMHFFzDEouSr/el18QwXEMRlOj9UY5nCeA8UN3a/82rUWRBeyBw== -"d3-array@1 - 3", "d3-array@2 - 3", "d3-array@2.10.0 - 3", "d3-array@2.5.0 - 3", d3-array@3, d3-array@3.2.4, d3-array@^3.2.0, d3-array@^3.2.4: +d3-array@^3.2.0, d3-array@^3.2.4, "d3-array@1 - 3", "d3-array@2 - 3", "d3-array@2.10.0 - 3", "d3-array@2.5.0 - 3", d3-array@3, d3-array@3.2.4: version "3.2.4" resolved "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz" integrity sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg== @@ -2607,7 +2382,7 @@ d3-chord@3: dependencies: d3-path "1 - 3" -"d3-color@1 - 3", d3-color@3, d3-color@^3.1.0: +d3-color@^3.1.0, "d3-color@1 - 3", d3-color@3: version "3.1.0" resolved "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz" integrity sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA== @@ -2619,7 +2394,7 @@ d3-contour@4: dependencies: d3-array "^3.2.0" -d3-delaunay@6, d3-delaunay@^6.0.4: +d3-delaunay@^6.0.4, d3-delaunay@6: version "6.0.4" resolved "https://registry.npmjs.org/d3-delaunay/-/d3-delaunay-6.0.4.tgz" integrity sha512-mdjtIZ1XLAM8bm/hx3WwjfHt6Sggek7qH043O8KEjDXN40xi3vx/6pYSVTwLjEgiXQTbvaouWKynLBiUZ6SK6A== @@ -2639,7 +2414,7 @@ d3-delaunay@6, d3-delaunay@^6.0.4: d3-dispatch "1 - 3" d3-selection "3" -"d3-dsv@1 - 3", d3-dsv@3, d3-dsv@^3.0.1: +d3-dsv@^3.0.1, "d3-dsv@1 - 3", d3-dsv@3: version "3.0.1" resolved "https://registry.npmjs.org/d3-dsv/-/d3-dsv-3.0.1.tgz" integrity sha512-UG6OvdI5afDIFP9w4G0mNq50dSOsXHJaRE8arAS5o9ApWnIElp8GZw1Dun8vP8OyHOZ/QJUKUJwxiiCCnUwm+Q== @@ -2660,7 +2435,7 @@ d3-fetch@3: dependencies: d3-dsv "1 - 3" -d3-force@3, d3-force@^3.0.0: +d3-force@^3.0.0, d3-force@3: version "3.0.0" resolved "https://registry.npmjs.org/d3-force/-/d3-force-3.0.0.tgz" integrity sha512-zxV/SsA+U4yte8051P4ECydjD/S+qeYtnaIyAs9tgHCqfguma/aAQDjo85A9Z6EKhBirHRJHXIgJUlffT4wdLg== @@ -2669,7 +2444,7 @@ d3-force@3, d3-force@^3.0.0: d3-quadtree "1 - 3" d3-timer "1 - 3" -"d3-format@1 - 3", d3-format@3, d3-format@^3.1.0: +d3-format@^3.1.0, "d3-format@1 - 3", d3-format@3: version "3.1.2" resolved "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz" integrity sha512-AJDdYOdnyRDV5b6ArilzCPPwc1ejkHcoyFarqlPqT7zRYjhavcT3uSrqcMvsgh2CgoPbK3RCwyHaVyxYcP2Arg== @@ -2683,26 +2458,26 @@ d3-geo-projection@^4.0.0: d3-array "1 - 3" d3-geo "1.12.0 - 3" -"d3-geo@1.12.0 - 3", d3-geo@3, d3-geo@^3.1.1: +d3-geo@^3.1.1, "d3-geo@1.12.0 - 3", d3-geo@3: version "3.1.1" resolved "https://registry.npmjs.org/d3-geo/-/d3-geo-3.1.1.tgz" integrity sha512-637ln3gXKXOwhalDzinUgY83KzNWZRKbYubaG+fGVuc/dxO64RRljtCTnf5ecMyE1RIdtqpkVcq0IbtU2S8j2Q== dependencies: d3-array "2.5.0 - 3" -d3-hierarchy@3, d3-hierarchy@^3.1.2: +d3-hierarchy@^3.1.2, d3-hierarchy@3: version "3.1.2" resolved "https://registry.npmjs.org/d3-hierarchy/-/d3-hierarchy-3.1.2.tgz" integrity sha512-FX/9frcub54beBdugHjDCdikxThEqjnR93Qt7PvQTOHxyiNCAlvMrHhclk3cD5VeAaq9fxmfRp+CnWw9rEMBuA== -"d3-interpolate@1 - 3", "d3-interpolate@1.2.0 - 3", d3-interpolate@3, d3-interpolate@^3.0.1: +d3-interpolate@^3.0.1, "d3-interpolate@1 - 3", "d3-interpolate@1.2.0 - 3", d3-interpolate@3: version "3.0.1" resolved "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz" integrity sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g== dependencies: d3-color "1 - 3" -"d3-path@1 - 3", d3-path@3, d3-path@^3.1.0: +d3-path@^3.1.0, "d3-path@1 - 3", d3-path@3: version "3.1.0" resolved "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz" integrity sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ== @@ -2722,7 +2497,7 @@ d3-random@3: resolved "https://registry.npmjs.org/d3-random/-/d3-random-3.0.1.tgz" integrity sha512-FXMe9GfxTxqd5D6jFsQ+DJ8BJS4E/fT5mqqdjovykEB2oFbTMDVdg1MGFxfQW+FBOGoB++k8swBrgwSHT1cUXQ== -d3-scale-chromatic@3, d3-scale-chromatic@^3.1.0: +d3-scale-chromatic@^3.1.0, d3-scale-chromatic@3: version "3.1.0" resolved "https://registry.npmjs.org/d3-scale-chromatic/-/d3-scale-chromatic-3.1.0.tgz" integrity sha512-A3s5PWiZ9YCXFye1o246KoscMWqf8BsD9eRiJ3He7C9OBaxKhAd5TFCdEx/7VbKtxxTsu//1mMJFrEt572cEyQ== @@ -2730,7 +2505,7 @@ d3-scale-chromatic@3, d3-scale-chromatic@^3.1.0: d3-color "1 - 3" d3-interpolate "1 - 3" -d3-scale@4, d3-scale@^4.0.2: +d3-scale@^4.0.2, d3-scale@4: version "4.0.2" resolved "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz" integrity sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ== @@ -2746,28 +2521,28 @@ d3-scale@4, d3-scale@^4.0.2: resolved "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz" integrity sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ== -d3-shape@3, d3-shape@^3.2.0: +d3-shape@^3.2.0, d3-shape@3: version "3.2.0" resolved "https://registry.npmjs.org/d3-shape/-/d3-shape-3.2.0.tgz" integrity sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA== dependencies: d3-path "^3.1.0" -"d3-time-format@2 - 4", d3-time-format@4, d3-time-format@^4.1.0: +d3-time-format@^4.1.0, "d3-time-format@2 - 4", d3-time-format@4: version "4.1.0" resolved "https://registry.npmjs.org/d3-time-format/-/d3-time-format-4.1.0.tgz" integrity sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg== dependencies: d3-time "1 - 3" -"d3-time@1 - 3", "d3-time@2.1.1 - 3", d3-time@3, d3-time@^3.1.0: +d3-time@^3.1.0, "d3-time@1 - 3", "d3-time@2.1.1 - 3", d3-time@3: version "3.1.0" resolved "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz" integrity sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q== dependencies: d3-array "2 - 3" -"d3-timer@1 - 3", d3-timer@3, d3-timer@^3.0.1: +d3-timer@^3.0.1, "d3-timer@1 - 3", d3-timer@3: version "3.0.1" resolved "https://registry.npmjs.org/d3-timer/-/d3-timer-3.0.1.tgz" integrity sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA== @@ -2953,15 +2728,6 @@ devlop@^1.0.0, devlop@^1.1.0: dependencies: dequal "^2.0.0" -dnd-core@14.0.1: - version "14.0.1" - resolved "https://registry.npmjs.org/dnd-core/-/dnd-core-14.0.1.tgz" - integrity sha512-+PVS2VPTgKFPYWo3vAFEA8WPbTf7/xo43TifH9G8S1KqnrQu0o77A3unrF5yOugy4mIz7K5wAVFHUcha7wsz6A== - dependencies: - "@react-dnd/asap" "^4.0.0" - "@react-dnd/invariant" "^2.0.0" - redux "^4.1.1" - dnd-core@^16.0.1: version "16.0.1" resolved "https://registry.npmjs.org/dnd-core/-/dnd-core-16.0.1.tgz" @@ -2971,6 +2737,15 @@ dnd-core@^16.0.1: "@react-dnd/invariant" "^4.0.1" redux "^4.2.0" +dnd-core@14.0.1: + version "14.0.1" + resolved "https://registry.npmjs.org/dnd-core/-/dnd-core-14.0.1.tgz" + integrity sha512-+PVS2VPTgKFPYWo3vAFEA8WPbTf7/xo43TifH9G8S1KqnrQu0o77A3unrF5yOugy4mIz7K5wAVFHUcha7wsz6A== + dependencies: + "@react-dnd/asap" "^4.0.0" + "@react-dnd/invariant" "^2.0.0" + redux "^4.1.1" + doctrine@^2.1.0: version "2.1.0" resolved "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz" @@ -2996,14 +2771,7 @@ dom-helpers@^5.0.1: "@babel/runtime" "^7.8.7" csstype "^3.0.2" -dompurify@*, dompurify@^3.4.2: - version "3.4.7" - resolved "https://registry.npmjs.org/dompurify/-/dompurify-3.4.7.tgz#e2702ea4fd5d83467f1baef62309466ce7d44a82" - integrity sha512-2jBxDJY4RR06tQNy4w5FlFH7kfxsQZlufd0sbv+chfHCxeJwrFw2baUDsSwvBISD4K4RDbd0PTfy3uNXsR6siA== - optionalDependencies: - "@types/trusted-types" "^2.0.7" - -dompurify@^3.4.0: +dompurify@*, dompurify@^3.4.0: version "3.4.5" resolved "https://registry.npmjs.org/dompurify/-/dompurify-3.4.5.tgz" integrity sha512-OrwIBKsdNSVEeubdJ1HBv/wNENRM9ytAVCv7YXt//A3vPdVMNuACRqK9mXCGCBW2ln7BT/A4X0jXHo2Gu89miA== @@ -3851,7 +3619,7 @@ inflight@^1.0.4: once "^1.3.0" wrappy "1" -inherits@2, inherits@^2.0.3, inherits@^2.0.4, inherits@~2.0.0, inherits@~2.0.3: +inherits@^2.0.3, inherits@^2.0.4, inherits@~2.0.0, inherits@~2.0.3, inherits@2: version "2.0.4" resolved "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz" integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== @@ -4263,6 +4031,13 @@ levn@^0.4.1: prelude-ls "^1.2.1" type-check "~0.4.0" +lie@~3.3.0: + version "3.3.0" + resolved "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz" + integrity sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ== + dependencies: + immediate "~3.0.5" + lie@3.1.1: version "3.1.1" resolved "https://registry.npmjs.org/lie/-/lie-3.1.1.tgz" @@ -4270,12 +4045,29 @@ lie@3.1.1: dependencies: immediate "~3.0.5" -lie@~3.3.0: - version "3.3.0" - resolved "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz" - integrity sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ== +lightningcss-darwin-arm64@1.32.0: + version "1.32.0" + resolved "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.32.0.tgz" + integrity sha512-RzeG9Ju5bag2Bv1/lwlVJvBE3q6TtXskdZLLCyfg5pt+HLz9BqlICO7LZM7VHNTTn/5PRhHFBSjk5lc4cmscPQ== + +lightningcss@^1.32.0: + version "1.32.0" + resolved "https://registry.npmjs.org/lightningcss/-/lightningcss-1.32.0.tgz" + integrity sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ== dependencies: - immediate "~3.0.5" + detect-libc "^2.0.3" + optionalDependencies: + lightningcss-android-arm64 "1.32.0" + lightningcss-darwin-arm64 "1.32.0" + lightningcss-darwin-x64 "1.32.0" + lightningcss-freebsd-x64 "1.32.0" + lightningcss-linux-arm-gnueabihf "1.32.0" + lightningcss-linux-arm64-gnu "1.32.0" + lightningcss-linux-arm64-musl "1.32.0" + lightningcss-linux-x64-gnu "1.32.0" + lightningcss-linux-x64-musl "1.32.0" + lightningcss-win32-arm64-msvc "1.32.0" + lightningcss-win32-x64-msvc "1.32.0" lines-and-columns@^1.1.6: version "1.2.4" @@ -5051,7 +4843,7 @@ pathe@^2.0.3: resolved "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz" integrity sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w== -picocolors@1.1.1, picocolors@^1.1.1: +picocolors@^1.1.1, picocolors@1.1.1: version "1.1.1" resolved "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz" integrity sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA== @@ -5066,7 +4858,7 @@ possible-typed-array-names@^1.0.0: resolved "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz" integrity sha512-/+5VFTchJDoVj3bhoqi6UeymcD00DAwb1nJwamzPvHEszJ4FpF6SNNbUbOS8yI56qHzdV8eK0qEfOSiodkTdxg== -postcss@^8.5.6: +postcss@^8.5.6, postcss@^8.5.8: version "8.5.8" resolved "https://registry.npmjs.org/postcss/-/postcss-8.5.8.tgz" integrity sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg== @@ -5335,7 +5127,7 @@ react-animate-on-change@^2.2.0: react-arborist@3.7.0: version "3.7.0" - resolved "https://registry.npmjs.org/react-arborist/-/react-arborist-3.7.0.tgz#b39156f1fe4bb31477118c57905827a3ba4d0ec5" + resolved "https://registry.npmjs.org/react-arborist/-/react-arborist-3.7.0.tgz" integrity sha512-gh2SoO0eXQVSP6zxXMGqFeXF+l2uabDGBVn0+RKqy/s7mrG5xGnfM5mhyB67cMVobC3vWYLqe6HGh7ZEZadW/w== dependencies: react-dnd "^14.0.3" @@ -5523,7 +5315,33 @@ react@^18.2.0: dependencies: loose-envify "^1.1.0" -readable-stream@^2.0.0, readable-stream@^2.0.2, readable-stream@^2.0.5, readable-stream@~2.3.6: +readable-stream@^2.0.0: + version "2.3.8" + resolved "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz" + integrity sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA== + dependencies: + core-util-is "~1.0.0" + inherits "~2.0.3" + isarray "~1.0.0" + process-nextick-args "~2.0.0" + safe-buffer "~5.1.1" + string_decoder "~1.1.1" + util-deprecate "~1.0.1" + +readable-stream@^2.0.2: + version "2.3.8" + resolved "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz" + integrity sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA== + dependencies: + core-util-is "~1.0.0" + inherits "~2.0.3" + isarray "~1.0.0" + process-nextick-args "~2.0.0" + safe-buffer "~5.1.1" + string_decoder "~1.1.1" + util-deprecate "~1.0.1" + +readable-stream@^2.0.5: version "2.3.8" resolved "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz" integrity sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA== @@ -5545,6 +5363,19 @@ readable-stream@^3.1.1, readable-stream@^3.4.0, readable-stream@^3.6.0: string_decoder "^1.1.1" util-deprecate "^1.0.1" +readable-stream@~2.3.6: + version "2.3.8" + resolved "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz" + integrity sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA== + dependencies: + core-util-is "~1.0.0" + inherits "~2.0.3" + isarray "~1.0.0" + process-nextick-args "~2.0.0" + safe-buffer "~5.1.1" + string_decoder "~1.1.1" + util-deprecate "~1.0.1" + readdir-glob@^1.1.2: version "1.1.3" resolved "https://registry.npmjs.org/readdir-glob/-/readdir-glob-1.1.3.tgz" @@ -5575,7 +5406,14 @@ redux-thunk@^2.4.2: resolved "https://registry.npmjs.org/redux-thunk/-/redux-thunk-2.4.2.tgz" integrity sha512-+P3TjtnP0k/FEjcBL5FZpoovtvrTNT/UXd4/sluaSyrURlSlhLSzEdfsTBW7WsKB6yPvgd7q/iZPICFjW4o57Q== -redux@^4.1.1, redux@^4.2.0, redux@^4.2.1: +redux@^4.1.1: + version "4.2.1" + resolved "https://registry.npmjs.org/redux/-/redux-4.2.1.tgz" + integrity sha512-LAUYz4lc+Do8/g7aeRa8JkyDErK6ekstQaqWQrNRW//MY1TvCEpMtpTWvlQ+FPbWCx+Xixu/6SHt5N0HR+SB4w== + dependencies: + "@babel/runtime" "^7.9.2" + +redux@^4.2.0, redux@^4.2.1: version "4.2.1" resolved "https://registry.npmjs.org/redux/-/redux-4.2.1.tgz" integrity sha512-LAUYz4lc+Do8/g7aeRa8JkyDErK6ekstQaqWQrNRW//MY1TvCEpMtpTWvlQ+FPbWCx+Xixu/6SHt5N0HR+SB4w== @@ -5687,6 +5525,30 @@ robust-predicates@^3.0.2: resolved "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.3.tgz" integrity sha512-NS3levdsRIUOmiJ8FZWCP7LG3QpJyrs/TE0Zpf1yvZu8cAJJ6QMW92H1c7kWpdIHo8RvmLxN/o2JXTKHp74lUA== +rolldown@1.0.0-rc.11: + version "1.0.0-rc.11" + resolved "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.11.tgz" + integrity sha512-NRjoKMusSjfRbSYiH3VSumlkgFe7kYAa3pzVOsVYVFY3zb5d7nS+a3KGQ7hJKXuYWbzJKPVQ9Wxq2UvyK+ENpw== + dependencies: + "@oxc-project/types" "=0.122.0" + "@rolldown/pluginutils" "1.0.0-rc.11" + optionalDependencies: + "@rolldown/binding-android-arm64" "1.0.0-rc.11" + "@rolldown/binding-darwin-arm64" "1.0.0-rc.11" + "@rolldown/binding-darwin-x64" "1.0.0-rc.11" + "@rolldown/binding-freebsd-x64" "1.0.0-rc.11" + "@rolldown/binding-linux-arm-gnueabihf" "1.0.0-rc.11" + "@rolldown/binding-linux-arm64-gnu" "1.0.0-rc.11" + "@rolldown/binding-linux-arm64-musl" "1.0.0-rc.11" + "@rolldown/binding-linux-ppc64-gnu" "1.0.0-rc.11" + "@rolldown/binding-linux-s390x-gnu" "1.0.0-rc.11" + "@rolldown/binding-linux-x64-gnu" "1.0.0-rc.11" + "@rolldown/binding-linux-x64-musl" "1.0.0-rc.11" + "@rolldown/binding-openharmony-arm64" "1.0.0-rc.11" + "@rolldown/binding-wasm32-wasi" "1.0.0-rc.11" + "@rolldown/binding-win32-arm64-msvc" "1.0.0-rc.11" + "@rolldown/binding-win32-x64-msvc" "1.0.0-rc.11" + rollup@^4.43.0: version "4.60.0" resolved "https://registry.npmjs.org/rollup/-/rollup-4.60.0.tgz" @@ -5747,7 +5609,7 @@ safe-array-concat@^1.1.3: has-symbols "^1.1.0" isarray "^2.0.5" -safe-buffer@^5.0.1, safe-buffer@~5.2.0: +safe-buffer@^5.0.1: version "5.2.1" resolved "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz" integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ== @@ -5757,6 +5619,11 @@ safe-buffer@~5.1.0, safe-buffer@~5.1.1: resolved "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz" integrity sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g== +safe-buffer@~5.2.0: + version "5.2.1" + resolved "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz" + integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ== + safe-push-apply@^1.0.0: version "1.0.0" resolved "https://registry.npmjs.org/safe-push-apply/-/safe-push-apply-1.0.0.tgz" @@ -5947,7 +5814,7 @@ solid-js@^1.9.5: seroval "~1.5.0" seroval-plugins "~1.5.0" -"source-map-js@>=0.6.2 <2.0.0", source-map-js@^1.2.1: +source-map-js@^1.2.1, "source-map-js@>=0.6.2 <2.0.0": version "1.2.1" resolved "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz" integrity sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA== @@ -5985,6 +5852,20 @@ stop-iteration-iterator@^1.1.0: es-errors "^1.3.0" internal-slot "^1.1.0" +string_decoder@^1.1.1: + version "1.3.0" + resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz" + integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA== + dependencies: + safe-buffer "~5.2.0" + +string_decoder@~1.1.1: + version "1.1.1" + resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz" + integrity sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg== + dependencies: + safe-buffer "~5.1.0" + string-width@^7.0.0, string-width@^7.2.0: version "7.2.0" resolved "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz" @@ -6062,20 +5943,6 @@ string.prototype.trimstart@^1.0.8: define-properties "^1.2.1" es-object-atoms "^1.0.0" -string_decoder@^1.1.1: - version "1.3.0" - resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz" - integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA== - dependencies: - safe-buffer "~5.2.0" - -string_decoder@~1.1.1: - version "1.1.1" - resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz" - integrity sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg== - dependencies: - safe-buffer "~5.1.0" - stringify-entities@^4.0.0: version "4.0.4" resolved "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz" @@ -6218,9 +6085,9 @@ tldts@^7.0.5: tldts-core "^7.0.27" tmp@^0.2.0: - version "0.2.7" - resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.2.7.tgz#26f4db11d1601ce8012dcb8a798ece1c06a99059" - integrity sha512-e0votIpp4Uo2AJYSzVHV6xCcawuiez3DzqDAbrTc3YxBkplN6e+dM13ZeIcZnDg/QpSuU2zfZ3rzwY8ukEnaXw== + version "0.2.5" + resolved "https://registry.npmjs.org/tmp/-/tmp-0.2.5.tgz" + integrity sha512-voyz6MApa1rQGUxT3E+BK7/ROe8itEx7vD8/HEvt4xwXucvQ5G5oeEiHkmHZJuBO21RpOf+YYm9MOivj709jow== topojson-client@^3.1.0: version "3.1.0" @@ -6263,16 +6130,16 @@ ts-api-utils@^2.4.0: resolved "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.5.0.tgz" integrity sha512-OJ/ibxhPlqrMM0UiNHJ/0CKQkoKF243/AEmplt3qpRgkW8VG7IfOS41h7V8TjITqdByHzrjcS/2si+y4lIh8NA== -tslib@2.3.0: - version "2.3.0" - resolved "https://registry.npmjs.org/tslib/-/tslib-2.3.0.tgz" - integrity sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg== - tslib@^2.8.1, tslib@~2.8.1: version "2.8.1" resolved "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz" integrity sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w== +tslib@2.3.0: + version "2.3.0" + resolved "https://registry.npmjs.org/tslib/-/tslib-2.3.0.tgz" + integrity sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg== + tunnel-agent@^0.6.0: version "0.6.0" resolved "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz" @@ -6503,18 +6370,6 @@ vega-dataflow@^6.1.0, vega-dataflow@~6.1.0: vega-loader "^5.1.0" vega-util "^2.1.0" -vega-embed@6.5.1: - version "6.5.1" - resolved "https://registry.npmjs.org/vega-embed/-/vega-embed-6.5.1.tgz" - integrity sha512-yz/L1bN3+fLOpgXVb/8sCRv4GlZpD2/ngeKJAFRiHTIRm5zK6W0KuqZZvyGaO7E4s7RuYjW1TWhRIOqh5rS5hA== - dependencies: - fast-json-patch "^3.0.0-1" - json-stringify-pretty-compact "^2.0.0" - semver "^7.1.3" - vega-schema-url-parser "^1.1.0" - vega-themes "^2.8.2" - vega-tooltip "^0.22.0" - vega-embed@^6.21.0: version "6.29.0" resolved "https://registry.npmjs.org/vega-embed/-/vega-embed-6.29.0.tgz" @@ -6529,6 +6384,18 @@ vega-embed@^6.21.0: vega-themes "^2.15.0" vega-tooltip "^0.35.2" +vega-embed@6.5.1: + version "6.5.1" + resolved "https://registry.npmjs.org/vega-embed/-/vega-embed-6.5.1.tgz" + integrity sha512-yz/L1bN3+fLOpgXVb/8sCRv4GlZpD2/ngeKJAFRiHTIRm5zK6W0KuqZZvyGaO7E4s7RuYjW1TWhRIOqh5rS5hA== + dependencies: + fast-json-patch "^3.0.0-1" + json-stringify-pretty-compact "^2.0.0" + semver "^7.1.3" + vega-schema-url-parser "^1.1.0" + vega-themes "^2.8.2" + vega-tooltip "^0.22.0" + vega-encode@~5.1.0: version "5.1.0" resolved "https://registry.npmjs.org/vega-encode/-/vega-encode-5.1.0.tgz" @@ -6791,7 +6658,17 @@ vega-typings@~2.1.0: vega-expression "^6.1.0" vega-util "^2.1.0" -vega-util@^1.13.1, vega-util@^1.17.2, vega-util@^1.17.4: +vega-util@^1.13.1: + version "1.17.4" + resolved "https://registry.npmjs.org/vega-util/-/vega-util-1.17.4.tgz" + integrity sha512-+y3ZW7dEqM8Ck+KRsd+jkMfxfE7MrQxUyIpNjkfhIpGEreym+aTn7XUw1DKXqclr8mqTQvbilPo16B3lnBr0wA== + +vega-util@^1.17.2: + version "1.17.4" + resolved "https://registry.npmjs.org/vega-util/-/vega-util-1.17.4.tgz" + integrity sha512-+y3ZW7dEqM8Ck+KRsd+jkMfxfE7MrQxUyIpNjkfhIpGEreym+aTn7XUw1DKXqclr8mqTQvbilPo16B3lnBr0wA== + +vega-util@^1.17.4: version "1.17.4" resolved "https://registry.npmjs.org/vega-util/-/vega-util-1.17.4.tgz" integrity sha512-+y3ZW7dEqM8Ck+KRsd+jkMfxfE7MrQxUyIpNjkfhIpGEreym+aTn7XUw1DKXqclr8mqTQvbilPo16B3lnBr0wA== @@ -6893,7 +6770,20 @@ vfile@^6.0.0: "@types/unist" "^3.0.0" vfile-message "^4.0.0" -"vite@^6.0.0 || ^7.0.0 || ^8.0.0", vite@^7.3.3: +"vite@^6.0.0 || ^7.0.0 || ^8.0.0": + version "8.0.2" + resolved "https://registry.npmjs.org/vite/-/vite-8.0.2.tgz" + integrity sha512-1gFhNi+bHhRE/qKZOJXACm6tX4bA3Isy9KuKF15AgSRuRazNBOJfdDemPBU16/mpMxApDPrWvZ08DcLPEoRnuA== + dependencies: + lightningcss "^1.32.0" + picomatch "^4.0.3" + postcss "^8.5.8" + rolldown "1.0.0-rc.11" + tinyglobby "^0.2.15" + optionalDependencies: + fsevents "~2.3.3" + +vite@^7.3.3: version "7.3.3" resolved "https://registry.npmjs.org/vite/-/vite-7.3.3.tgz" integrity sha512-/4XH147Ui7OGTjg3HbdWe5arnZQSbfuRzdr9Ec7TQi5I7R+ir0Rlc9GIvD4v0XZurELqA035KVXJXpR61xhiTA==