From 0f8fa8394be658279a455d268194cd2d5a3ee700 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Thu, 28 May 2026 22:03:13 -0700
Subject: [PATCH 01/29] some fixes

---
 .../datalake/workspace_manager.py             |  11 +-
 src/app/dfSlice.tsx                           |  28 ++
 src/views/DataFormulator.tsx                  |  34 ++-
 src/views/DataLoadingChat.tsx                 | 263 +++++++++---------
 src/views/DataThread.tsx                      |  16 +-
 src/views/UnifiedDataUploadDialog.tsx         | 140 ++++------
 src/views/dataLoadingSuggestions.ts           |  82 ++++--
 tests/backend/data/test_workspace_manager.py  |  64 ++++-
 8 files changed, 369 insertions(+), 269 deletions(-)

diff --git a/py-src/data_formulator/datalake/workspace_manager.py b/py-src/data_formulator/datalake/workspace_manager.py
index 679452ca..23e37176 100644
--- a/py-src/data_formulator/datalake/workspace_manager.py
+++ b/py-src/data_formulator/datalake/workspace_manager.py
@@ -169,6 +169,10 @@ def list_workspaces(self) -> list[dict]:
         workspace.  If a workspace directory lacks this file (legacy),
         it is auto-repaired via :meth:`_ensure_meta`.
 
+        Every workspace directory is listed, including empty
+        "Untitled Session" entries from data-loading chats. Users
+        manage (rename/delete) these themselves via the sidebar.
+
         Returns list of {"id": str, "display_name": str, "updated_at": str}.
         """
         workspaces = []
@@ -184,13 +188,16 @@ def list_workspaces(self) -> list[dict]:
             except Exception:
                 continue
 
+            tc = meta.get("tableCount")
+            cc = meta.get("chartCount")
+
             workspaces.append({
                 "id": child.name,
                 "display_name": meta.get("displayName", child.name),
                 "created_at": meta.get("createdAt") or meta.get("updatedAt"),
                 "updated_at": meta.get("updatedAt"),
-                "table_count": meta.get("tableCount"),
-                "chart_count": meta.get("chartCount"),
+                "table_count": tc,
+                "chart_count": cc,
             })
 
         workspaces.sort(key=lambda w: w.get("updated_at") or "", reverse=True)
diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx
index a3fe5add..89b075ab 100644
--- a/src/app/dfSlice.tsx
+++ b/src/app/dfSlice.tsx
@@ -210,6 +210,17 @@ export interface DataFormulatorState {
      * Transient — not persisted.
      */
     dataLoadingChatResetCounter: number;
+    /**
+     * Pending submission queued for the data-loading chat. Set by any
+     * surface that wants to hand a prompt off to the chat (the menu
+     * agent input box, suggestion auto-run, external dialog callers).
+     * `DataLoadingChat` consumes it on render: it clears the slot and
+     * sends the carried payload as a fresh user message. Using a single
+     * redux slot (instead of props + a reset counter) eliminates the
+     * cross-tick race where the parent's pre-clear would otherwise
+     * cancel the auto-send for the new prompt. Transient — not persisted.
+     */
+    dataLoadingChatPending: { text: string; images: string[]; attachments: string[] } | null;
     /**
      * Pending hand-off from the Data Agent to a peer agent. Set by the
      * Data Agent's `delegate` action card; consumed by `DataFormulator`
@@ -299,6 +310,7 @@ const initialState: DataFormulatorState = {
     dataLoadingChatMessages: [],
     dataLoadingChatInProgress: false,
     dataLoadingChatResetCounter: 0,
+    dataLoadingChatPending: null,
     agentHandoffRequest: null,
 
     generatedReports: [],
@@ -720,6 +732,7 @@ export const dataFormulatorSlice = createSlice({
             state.dataLoadingChatMessages = [];
             state.dataLoadingChatInProgress = false;
             state.dataLoadingChatResetCounter = (state.dataLoadingChatResetCounter ?? 0) + 1;
+            state.dataLoadingChatPending = null;
 
             state.generatedReports = [];
 
@@ -837,6 +850,7 @@ export const dataFormulatorSlice = createSlice({
                 config: { ...initialState.config, ...(saved.config || {}) },
                 dataCleanBlocks: saved.dataCleanBlocks || [],
                 dataLoadingChatMessages: saved.dataLoadingChatMessages || [],
+                dataLoadingChatPending: null,
                 generatedReports: saved.generatedReports || [],
 
                 // Reset transient fields
@@ -1665,6 +1679,20 @@ export const dataFormulatorSlice = createSlice({
             state.dataLoadingChatMessages = [];
             state.dataLoadingChatInProgress = false;
             state.dataLoadingChatResetCounter = (state.dataLoadingChatResetCounter ?? 0) + 1;
+            // Note: `dataLoadingChatPending` is intentionally left
+            // alone. Callers that want "fresh slate + auto-send the
+            // new prompt" dispatch `clearChatMessages` followed by
+            // `setDataLoadingChatPending` in the same tick — clearing
+            // pending here would race with that ordering.
+        },
+        setDataLoadingChatPending: (
+            state,
+            action: PayloadAction<{ text: string; images: string[]; attachments: string[] }>,
+        ) => {
+            state.dataLoadingChatPending = action.payload;
+        },
+        clearDataLoadingChatPending: (state) => {
+            state.dataLoadingChatPending = null;
         },
         confirmTableLoad: (state, action: PayloadAction<{messageId: string, tableName: string}>) => {
             const msg = state.dataLoadingChatMessages.find(m => m.id === action.payload.messageId);
diff --git a/src/views/DataFormulator.tsx b/src/views/DataFormulator.tsx
index 2c21c15b..00e8086e 100644
--- a/src/views/DataFormulator.tsx
+++ b/src/views/DataFormulator.tsx
@@ -301,24 +301,37 @@ export const DataFormulatorFC = ({ }) => {
     // State for unified data upload dialog
     const [uploadDialogOpen, setUploadDialogOpen] = useState(false);
     const [uploadDialogInitialTab, setUploadDialogInitialTab] = useState<UploadTabType>('menu');
-    const [uploadDialogInitialChatPrompt, setUploadDialogInitialChatPrompt] = useState<string | undefined>(undefined);
-    const [uploadDialogInitialChatImages, setUploadDialogInitialChatImages] = useState<string[] | undefined>(undefined);
 
     // Loading state for sessions (from Redux, shared with App.tsx)
     const sessionLoading = useSelector((state: DataFormulatorState) => state.sessionLoading);
     const sessionLoadingLabel = useSelector((state: DataFormulatorState) => state.sessionLoadingLabel);
 
-    const openUploadDialog = (tab: UploadTabType, initialChatPrompt?: string, initialChatImages?: string[]) => {
+    const openUploadDialog = (tab: UploadTabType) => {
         // If no workspace is active, generate an ID (backend creates folder lazily on first data op)
         if (!activeWorkspace) {
             dispatch(dfActions.setActiveWorkspace({ id: generateSessionId(), displayName: 'Untitled Session' }));
         }
         setUploadDialogInitialTab(tab);
-        setUploadDialogInitialChatPrompt(initialChatPrompt);
-        setUploadDialogInitialChatImages(initialChatImages);
         setUploadDialogOpen(true);
     };
 
+    // Seed the Data Loading chat through the single redux `pending` slot,
+    // then navigate to the extract tab. This is the one channel that
+    // carries text, images, AND file attachments as first-class fields —
+    // replacing the older `initialChatPrompt/Images` props that silently
+    // dropped file attachments (they had no dedicated field and only
+    // survived if their name was baked into the prompt text).
+    const startDataLoadingChat = (text: string, images: string[] = [], attachments: string[] = []) => {
+        if (text.trim().length > 0 || images.length > 0 || attachments.length > 0) {
+            // Fresh query replaces any prior conversation.
+            if (dataLoadingChatMessages.length > 0) {
+                dispatch(dfActions.clearChatMessages());
+            }
+            dispatch(dfActions.setDataLoadingChatPending({ text, images, attachments }));
+        }
+        openUploadDialog('extract');
+    };
+
     // Honor cross-component requests to hand off to the Data Loading
     // chat seeded with a prompt (e.g. Data Agent's `delegate` card with
     // target='data_loading'). Hand-offs targeting other agents (e.g.
@@ -326,7 +339,7 @@ export const DataFormulatorFC = ({ }) => {
     const agentHandoffRequest = useSelector((state: DataFormulatorState) => state.agentHandoffRequest);
     useEffect(() => {
         if (agentHandoffRequest && agentHandoffRequest.target === 'data_loading') {
-            openUploadDialog('extract', agentHandoffRequest.prompt, agentHandoffRequest.images);
+            startDataLoadingChat(agentHandoffRequest.prompt, agentHandoffRequest.images ?? [], []);
             dispatch(dfActions.clearAgentHandoffRequest());
         }
         // openUploadDialog is stable enough for this purpose; we only react
@@ -730,7 +743,7 @@ export const DataFormulatorFC = ({ }) => {
                             openUploadDialog(`connector:${conn.id}` as UploadTabType);
                         }
                     }}
-                    onStartChat={(prompt, images) => openUploadDialog('extract', prompt, images)}
+                    onStartChat={(prompt, images, attachments) => startDataLoadingChat(prompt, images, attachments)}
                     hasPriorConversation={dataLoadingChatMessages.length > 0}
                     onResumeChat={() => openUploadDialog('extract')}
                     serverConfig={serverConfig}
@@ -933,16 +946,9 @@ export const DataFormulatorFC = ({ }) => {
                     open={uploadDialogOpen}
                     onClose={() => {
                         setUploadDialogOpen(false);
-                        // Clear one-shot seed values so the next dialog
-                        // open (e.g. via the upload button) doesn't
-                        // re-fire the agent with a stale prompt/image.
-                        setUploadDialogInitialChatPrompt(undefined);
-                        setUploadDialogInitialChatImages(undefined);
                         refreshPageConnectors();
                     }}
                     initialTab={uploadDialogInitialTab}
-                    initialChatPrompt={uploadDialogInitialChatPrompt}
-                    initialChatImages={uploadDialogInitialChatImages}
                     onConnectorsChanged={handleConnectorsChanged}
                 />
                 {/* Loading overlay for session loading */}
diff --git a/src/views/DataLoadingChat.tsx b/src/views/DataLoadingChat.tsx
index 379e29fb..9fe59000 100644
--- a/src/views/DataLoadingChat.tsx
+++ b/src/views/DataLoadingChat.tsx
@@ -60,7 +60,11 @@ const getUniqueTableName = (baseName: string, existingNames: Set<string>): strin
 // Modern monospace font stack for code blocks
 const CODE_FONT = '"SF Mono", "Cascadia Code", "Fira Code", Menlo, Consolas, "Liberation Mono", monospace';
 
-const MarkdownContent: React.FC<{ content: string }> = ({ content }) => {
+// Memoized so typing in the chat input (which re-renders the parent
+// `DataLoadingChat` on every keystroke) doesn't re-parse every assistant
+// message through react-markdown. `content` is a stable string per
+// committed message, so the default shallow equality is sufficient.
+const MarkdownContent = React.memo(({ content }: { content: string }) => {
     return (
         <Box sx={{ wordBreak: 'break-word' }}>
             <Markdown
@@ -168,7 +172,7 @@ const MarkdownContent: React.FC<{ content: string }> = ({ content }) => {
             </Markdown>
         </Box>
     );
-};
+});
 
 // ---------------------------------------------------------------------------
 // Inline table preview — compact notebook-style
@@ -317,10 +321,16 @@ const CodeBlockView: React.FC<{ block: CodeExecution }> = ({ block }) => {
 // Single chat message bubble
 // ---------------------------------------------------------------------------
 
-const ChatBubble: React.FC<{
+// Memoized so typing in the chat input doesn't re-render every prior
+// bubble (each one renders MarkdownContent + potentially code blocks /
+// table previews, which is expensive on long threads). The parent
+// stabilises `existingNames` via useMemo so memo equality holds across
+// keystrokes.
+const ChatBubble = React.memo<{
     message: ChatMessage;
     existingNames: Set<string>;
-}> = ({ message, existingNames }) => {
+    onTableLoaded?: () => void;
+}>(({ message, existingNames, onTableLoaded }) => {
     const theme = useTheme();
     const { t } = useTranslation();
     const dispatch = useDispatch<AppDispatch>();
@@ -340,6 +350,9 @@ const ChatBubble: React.FC<{
                 if (table) {
                     dispatch(loadTable({ table: { ...table, source: { type: 'extract' as const } } }));
                     dispatch(dfActions.confirmTableLoad({ messageId: message.id, tableName: pending.name }));
+                    // Loading data is a deliberate commit — return the
+                    // user to the canvas (the dialog closes via this hook).
+                    onTableLoaded?.();
                 }
             }
         } catch (err) {
@@ -468,6 +481,11 @@ const ChatBubble: React.FC<{
                                 }));
                             }
                             dispatch(dfActions.markLoadPlanConfirmed({ messageId: message.id }));
+                            if (selected.length > 0) {
+                                // Return the user to the canvas after a
+                                // deliberate batch load.
+                                onTableLoaded?.();
+                            }
                         }}
                     />
                 )}
@@ -493,7 +511,7 @@ const ChatBubble: React.FC<{
             </Box>
         </Box>
     );
-};
+});
 
 // ---------------------------------------------------------------------------
 // Tool call label mapping
@@ -517,7 +535,10 @@ interface ToolStep {
     label: string;
 }
 
-const StreamingIndicator: React.FC<{ content: string; toolSteps: ToolStep[] }> = ({ content, toolSteps }) => {
+// Memoized so an unrelated parent re-render (e.g. typing) doesn't
+// reflow the shimmer animation. Props are state values that only change
+// during an active stream.
+const StreamingIndicator = React.memo<{ content: string; toolSteps: ToolStep[] }>(({ content, toolSteps }) => {
     const theme = useTheme();
     return (
         <Box sx={{ mb: 2 }}>
@@ -579,55 +600,56 @@ const StreamingIndicator: React.FC<{ content: string; toolSteps: ToolStep[] }> =
             )}
         </Box>
     );
-};
+});
 
 // ---------------------------------------------------------------------------
 // Main chat component
 // ---------------------------------------------------------------------------
 
-export interface DataLoadingChatProps {
-    /**
-     * Optional initial text to pre-fill the chat input when the component
-     * mounts (or when the value changes). Used by external entry points
-     * (e.g. landing page quick-chat box) that want to hand off a prompt
-     * to the agent.
-     */
-    initialPrompt?: string;
-    /**
-     * Optional images (data URLs) to seed alongside `initialPrompt` —
-     * used when an external surface (e.g. landing-page agent box) has
-     * already collected pasted/attached images and is handing them off.
-     */
-    initialImages?: string[];
-    /**
-     * If true, automatically send the `initialPrompt` once on mount/change.
-     * Otherwise the prompt is only pre-filled and the user presses Enter.
-     */
-    autoSendInitialPrompt?: boolean;
+interface DataLoadingChatProps {
+    /** Called after a table is successfully loaded into the app. The
+     *  upload dialog wires this to its close handler so loading data
+     *  returns the user to the canvas. */
+    onTableLoaded?: () => void;
 }
 
-export const DataLoadingChat: React.FC<DataLoadingChatProps> = ({
-    initialPrompt,
-    initialImages,
-    autoSendInitialPrompt,
-}) => {
+export const DataLoadingChat: React.FC<DataLoadingChatProps> = ({ onTableLoaded }) => {
     const theme = useTheme();
     const { t } = useTranslation();
     const dispatch = useDispatch<AppDispatch>();
 
+    // Keep the latest callback in a ref so the stable `handleTableLoaded`
+    // identity below doesn't bust `ChatBubble`'s memoization even when the
+    // parent passes a fresh closure each render.
+    const onTableLoadedRef = useRef(onTableLoaded);
+    onTableLoadedRef.current = onTableLoaded;
+    const handleTableLoaded = useCallback(() => {
+        onTableLoadedRef.current?.();
+    }, []);
+
     const chatMessages = useSelector((state: DataFormulatorState) => state.dataLoadingChatMessages);
     const chatInProgress = useSelector((state: DataFormulatorState) => state.dataLoadingChatInProgress);
-    // External reset signal — bumped by `clearChatMessages` (manual reset
-    // button, new menu-level query, full session reset). When it changes
-    // we abort any in-flight stream, drop partial UI state, and re-seed
-    // from props if the parent provided a new prompt/images. Without
-    // this, an in-flight stream's eventual dispatches would leak into
-    // the freshly-cleared thread.
+    // External reset signal — bumped by `clearChatMessages` (manual
+    // reset button, fresh menu submission, full session reset). Used
+    // here only to abort an in-flight stream and invalidate any
+    // late-arriving dispatches from that stream via `sessionRef`.
     const chatResetCounter = useSelector((state: DataFormulatorState) => state.dataLoadingChatResetCounter ?? 0);
+    // Pending submission queued by an external surface (menu agent
+    // box, suggestion auto-run, external dialog caller). When set, we
+    // consume it in a useEffect: clear the slot first, then send the
+    // carried payload as a fresh user message via `sendMessage`.
+    // Single redux signal = no prop race.
+    const pendingSubmission = useSelector((state: DataFormulatorState) => state.dataLoadingChatPending);
     const existingTables = useSelector((state: DataFormulatorState) => state.tables);
     const activeModel = useSelector(dfSelectors.getActiveModel);
     const frontendRowLimit = useSelector((state: DataFormulatorState) => state.config?.frontendRowLimit ?? 2_000_000);
-    const existingNames = new Set(existingTables.map(tbl => tbl.id));
+    // Stable reference across renders that don't actually change the
+    // table list — without this, every keystroke in the chat input
+    // would rebuild the Set and bust `ChatBubble`'s memo equality.
+    const existingNames = React.useMemo(
+        () => new Set(existingTables.map(tbl => tbl.id)),
+        [existingTables],
+    );
 
     const [prompt, setPrompt] = useState('');
     const [userImages, setUserImages] = useState<string[]>([]);
@@ -654,95 +676,44 @@ export const DataLoadingChat: React.FC<DataLoadingChatProps> = ({
     // Auto-focus input
     useEffect(() => { inputRef.current?.focus(); }, []);
 
-    // ---- External initial prompt handling -------------------------------
-    // Pre-fill the input (and optionally auto-send) when `initialPrompt`
-    // is provided. Used by external surfaces (e.g. landing-page quick chat
-    // box) to hand off text to the agent. Auto-send only fires for a
-    // fresh conversation — we never auto-resend on remount mid-chat.
-    const hasExistingMessages = chatMessages.length > 0;
-    const [pendingAutoSend, setPendingAutoSend] = useState(false);
+    // ---- Reset handling -------------------------------------------------
+    // On external reset (counter bump from `clearChatMessages`): abort
+    // any in-flight stream, invalidate the current session token, and
+    // clear local input/streaming UI state. We deliberately do NOT
+    // re-seed anything here — a reset means "clean slate"; any new
+    // submission arrives separately via `pendingSubmission`.
     useEffect(() => {
-        // Detect external reset: abort, invalidate in-flight session,
-        // and clear all local UI state before re-seeding. Including
-        // `chatResetCounter` in the dep list also guarantees that an
-        // identical-prompt re-submission (same `initialPrompt` string)
-        // still triggers a fresh auto-send — otherwise the deps would
-        // be unchanged and the effect would skip.
-        const isReset = chatResetCounter !== lastResetRef.current;
-        if (isReset) {
-            lastResetRef.current = chatResetCounter;
-            sessionRef.current += 1;
-            abortControllerRef.current?.abort();
-            abortControllerRef.current = null;
-            setStreamingContent('');
-            setStreamingToolSteps([]);
-            setPrompt('');
-            setUserImages([]);
-            setUserAttachments([]);
-            setPendingAutoSend(false);
-        }
-
-        // Extract `[Uploaded: name]` mentions from the seeded prompt and
-        // surface them as chips. The mention template is locale-aware,
-        // so we build the regex from the current i18n value rather than
-        // hard-coding the English form.
-        const mentionTemplate = t('dataLoading.uploaded', { name: '__DF_NAME__' });
-        const mentionPattern = mentionTemplate
-            .replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
-            .replace('__DF_NAME__', '(.+?)');
-        const mentionRegex = new RegExp(mentionPattern, 'g');
-        let seededPrompt = initialPrompt || '';
-        const extractedNames: string[] = [];
-        if (seededPrompt) {
-            let match: RegExpExecArray | null;
-            while ((match = mentionRegex.exec(seededPrompt)) !== null) {
-                extractedNames.push(match[1]);
-            }
-            if (extractedNames.length > 0) {
-                seededPrompt = seededPrompt
-                    .replace(new RegExp(`\\n?${mentionPattern}`, 'g'), '')
-                    .trim();
-            }
-        }
-
-        const hasText = seededPrompt.trim().length > 0;
-        const hasImages = !!initialImages && initialImages.length > 0;
-        const hasAttachments = extractedNames.length > 0;
-        // Skip re-seeding the input on a user-initiated reset — the
-        // reset is meant to restore a clean slate, not re-populate the
-        // input with the prompt the user just cleared.
-        if (!isReset) {
-            if (hasText) setPrompt(seededPrompt);
-            if (hasAttachments) setUserAttachments(extractedNames);
-            if (hasImages) {
-                // Always replace, never append. The prop is a "seed" — each
-                // change represents a fresh handoff from the parent, not an
-                // additive update. Appending caused the same image to stack
-                // up every time the parent re-rendered with a new array ref.
-                setUserImages([...initialImages!]);
-            }
-        }
-        // Auto-send only on a genuinely fresh open (no prior messages,
-        // and not a user-initiated reset). Resetting means the user wants
-        // a clean slate — re-running the seeded prompt against their will
-        // would defeat the purpose of the reset button.
-        if (autoSendInitialPrompt && !isReset && (hasText || hasImages || hasAttachments) && !hasExistingMessages) {
-            setPendingAutoSend(true);
-        }
-        // eslint-disable-next-line react-hooks/exhaustive-deps
-    }, [initialPrompt, initialImages, autoSendInitialPrompt, chatResetCounter]);
+        if (chatResetCounter === lastResetRef.current) return;
+        lastResetRef.current = chatResetCounter;
+        sessionRef.current += 1;
+        abortControllerRef.current?.abort();
+        abortControllerRef.current = null;
+        setStreamingContent('');
+        setStreamingToolSteps([]);
+        setPrompt('');
+        setUserImages([]);
+        setUserAttachments([]);
+    }, [chatResetCounter]);
 
     const stopGeneration = () => { abortControllerRef.current?.abort(); };
 
     // ---- Send message ----
-    const sendMessage = useCallback(() => {
-        const text = prompt.trim();
-        if (!text && userImages.length === 0 && userAttachments.length === 0) return;
+    // Accepts an optional explicit payload so callers (suggestion
+    // auto-run, pending-submission consume) can submit the exact
+    // values they just chose without waiting for React state to flush.
+    // Reading via the `prompt`/`userImages`/`userAttachments` closures
+    // alone would be racy with batching and could submit the previous
+    // round's values on a fresh handoff.
+    const sendMessage = useCallback((explicit?: { text: string; images: string[]; attachments: string[] }) => {
+        const text = (explicit?.text ?? prompt).trim();
+        const imgs = explicit?.images ?? userImages;
+        const atts = explicit?.attachments ?? userAttachments;
+        if (!text && imgs.length === 0 && atts.length === 0) return;
         if (chatInProgress) return;
-        const imageAttachments: ChatAttachment[] = userImages.map((url, i) => ({
+        const imageAttachments: ChatAttachment[] = imgs.map((url, i) => ({
             type: 'image' as const, name: `image-${i + 1}`, url,
         }));
-        const fileAttachments: ChatAttachment[] = userAttachments.map(name => ({
+        const fileAttachments: ChatAttachment[] = atts.map(name => ({
             type: 'file' as const, name,
         }));
         const attachments: ChatAttachment[] = [...imageAttachments, ...fileAttachments];
@@ -751,7 +722,7 @@ export const DataLoadingChat: React.FC<DataLoadingChatProps> = ({
         // chips (rendered from `attachments`). The agent payload below
         // re-injects `[Uploaded: name]` mentions so the backend still
         // sees the file references inline.
-        const displayText = text || (userImages.length > 0 ? t('dataLoading.defaultImageMessage') : '');
+        const displayText = text || (imgs.length > 0 ? t('dataLoading.defaultImageMessage') : '');
 
         const userMsg: ChatMessage = {
             id: `msg-${Date.now()}-user`, role: 'user',
@@ -967,25 +938,48 @@ export const DataLoadingChat: React.FC<DataLoadingChatProps> = ({
                 }
             }
         })();
-    }, [prompt, userImages, chatInProgress, chatMessages, activeModel, existingTables, dispatch, streamingContent, t]);
+    }, [prompt, userImages, userAttachments, chatInProgress, chatMessages, activeModel, existingTables, dispatch, streamingContent, t]);
 
-    // Auto-send the initial prompt once it has been applied to state.
+    // Consume a queued submission from any external surface (menu
+    // agent input, suggestion auto-run, or a cross-component handoff
+    // routed through `startDataLoadingChat`). Single redux signal,
+    // single consumer — no prop race.
+    //
+    // Idempotency note: under React.StrictMode (dev), effects are
+    // intentionally double-invoked on mount with the *same* closure,
+    // so the `clearDataLoadingChatPending` dispatch in the first run
+    // isn't visible to the second run. `lastConsumedRef` tracks the
+    // exact payload object we've already sent, so the second
+    // invocation short-circuits before calling `sendMessage` again.
+    const lastConsumedRef = useRef<typeof pendingSubmission>(null);
     useEffect(() => {
-        if (!pendingAutoSend) return;
+        if (!pendingSubmission) return;
+        if (pendingSubmission === lastConsumedRef.current) return;
         if (chatInProgress) return;
-        if (prompt.trim().length === 0 && userImages.length === 0) return;
-        setPendingAutoSend(false);
-        sendMessage();
-    }, [pendingAutoSend, prompt, userImages, chatInProgress, sendMessage]);
+        lastConsumedRef.current = pendingSubmission;
+        const payload = pendingSubmission;
+        dispatch(dfActions.clearDataLoadingChatPending());
+        sendMessage(payload);
+    }, [pendingSubmission, chatInProgress, sendMessage, dispatch]);
 
     // Reuse the shared sample-task list so this in-session panel stays in
     // sync with the upload-dialog entry point (`UnifiedDataUploadDialog`).
+    // Auto-run is wired through the redux pending slot so the click —
+    // even on a chat with prior history — atomically clears the thread
+    // and queues the new submission.
     const focusSuggestions = React.useMemo(() => buildDataLoadingSuggestions({
         t,
         setInput: setPrompt,
         setImages: setUserImages,
         setAttachments: setUserAttachments,
-    }), [t]);
+        requestAutoSend: (payload) => {
+            if (chatMessages.length > 0) {
+                dispatch(dfActions.clearChatMessages());
+            }
+            dispatch(dfActions.setDataLoadingChatPending(payload));
+        },
+        // eslint-disable-next-line react-hooks/exhaustive-deps
+    }), [t, dispatch]);
 
     const isEmpty = chatMessages.length === 0 && !streamingContent;
 
@@ -1047,7 +1041,7 @@ export const DataLoadingChat: React.FC<DataLoadingChatProps> = ({
                 ) : (
                     <>
                         {chatMessages.map((msg) => (
-                            <ChatBubble key={msg.id} message={msg} existingNames={existingNames} />
+                            <ChatBubble key={msg.id} message={msg} existingNames={existingNames} onTableLoaded={handleTableLoaded} />
                         ))}
                         {streamingContent !== '' && <StreamingIndicator content={streamingContent} toolSteps={streamingToolSteps} />}
                         {chatInProgress && !streamingContent && <StreamingIndicator content="" toolSteps={streamingToolSteps} />}
@@ -1065,7 +1059,7 @@ export const DataLoadingChat: React.FC<DataLoadingChatProps> = ({
                         onChange={setPrompt}
                         images={userImages}
                         onImagesChange={setUserImages}
-                        onSend={sendMessage}
+                        onSend={() => sendMessage()}
                         onStop={stopGeneration}
                         inProgress={chatInProgress}
                         placeholder={t('dataLoading.placeholder')}
@@ -1076,8 +1070,13 @@ export const DataLoadingChat: React.FC<DataLoadingChatProps> = ({
                             formData.append('file', file);
                             apiRequest(getUrls().SCRATCH_UPLOAD_URL, {
                                 method: 'POST', body: formData,
-                            }).then(() => {
-                                setUserAttachments(prev => [...prev, file.name]);
+                            }).then(({ data }) => {
+                                // The backend hash-suffixes the filename
+                                // (e.g. `name_a1b2c3d4.xlsx`). Store the
+                                // server-assigned name so the `[Uploaded:]`
+                                // mention points to the real scratch file.
+                                const scratchName = (data?.path || `scratch/${file.name}`).replace(/^scratch\//, '');
+                                setUserAttachments(prev => [...prev, scratchName]);
                             }).catch(err => console.error('Upload failed:', err));
                         }}
                         attachments={userAttachments}
diff --git a/src/views/DataThread.tsx b/src/views/DataThread.tsx
index ae0cc9f1..c69a71a5 100644
--- a/src/views/DataThread.tsx
+++ b/src/views/DataThread.tsx
@@ -1751,6 +1751,9 @@ let SingleThreadGroupView: FC<{
     const TIMELINE_GAP = '4px'; // gap between timeline and card content
     const DOT_SIZE = 6;
     const CARD_PY = '6px'; // vertical padding for each timeline row
+    // Mirror the left timeline gutter on the right so cards sit visually
+    // centred in their column instead of hugging the right edge.
+    const CARD_CONTENT_PR = `${TIMELINE_WIDTH}px`;
 
     // CSS `border-style: dashed` stretches dashes to fit each element's
     // height, so stacked segments end up with mismatched dash lengths.  A
@@ -1907,7 +1910,7 @@ let SingleThreadGroupView: FC<{
                         {isLast && hasContinuationBelow && <Box sx={{ flex: '1 1 0', minHeight: 2, ...dashedLineSx }} />}
                         {isLast && !hasContinuationBelow && <Box sx={{ flex: '1 1 0', minHeight: 2 }} />}
                     </Box>
-                    <Box sx={{ flex: 1, minWidth: 0, py: '4px', pl: TIMELINE_GAP, display: 'flex', alignItems: 'center' }}>
+                    <Box sx={{ flex: 1, minWidth: 0, py: '4px', pl: TIMELINE_GAP, pr: CARD_CONTENT_PR, display: 'flex', alignItems: 'center' }}>
                         {item.element}
                     </Box>
                 </Box>
@@ -1983,7 +1986,7 @@ let SingleThreadGroupView: FC<{
                         {isLast && hasContinuationBelow && <Box sx={{ flex: '1 1 0', minHeight: 2, ...dashedLineSx }} />}
                         {isLast && !hasContinuationBelow && <Box sx={{ flex: '1 1 0', minHeight: 2 }} />}
                     </Box>
-                    <Box sx={{ flex: 1, minWidth: 0, py: CARD_PY, pl: TIMELINE_GAP }}>
+                    <Box sx={{ flex: 1, minWidth: 0, py: CARD_PY, pl: TIMELINE_GAP, pr: CARD_CONTENT_PR }}>
                         {item.element}
                     </Box>
                 </Box>
@@ -2006,7 +2009,7 @@ let SingleThreadGroupView: FC<{
                         {isLast && hasContinuationBelow && <Box sx={{ flex: '1 1 0', minHeight: 2, ...dashedLineSx }} />}
                         {isLast && !hasContinuationBelow && <Box sx={{ flex: '1 1 0', minHeight: 2 }} />}
                     </Box>
-                    <Box sx={{ flex: 1, minWidth: 0, py: CARD_PY, pl: TIMELINE_GAP }}>
+                    <Box sx={{ flex: 1, minWidth: 0, py: CARD_PY, pl: TIMELINE_GAP, pr: CARD_CONTENT_PR }}>
                         {item.element}
                     </Box>
                 </Box>
@@ -2054,7 +2057,7 @@ let SingleThreadGroupView: FC<{
                     )}
                     {isLast && !hasContinuationBelow && <Box sx={{ flex: '1 1 0', minHeight: 6 }} />}
                 </Box>
-                <Box sx={{ flex: 1, minWidth: 0, py: item.type === 'used-table' ? '1px' : CARD_PY, pl: TIMELINE_GAP,
+                <Box sx={{ flex: 1, minWidth: 0, py: item.type === 'used-table' ? '1px' : CARD_PY, pl: TIMELINE_GAP, pr: CARD_CONTENT_PR,
                     ...(item.type === 'used-table' && { display: 'flex', alignItems: 'center' }),
                 }}>
                     {item.element}
@@ -3119,7 +3122,10 @@ export const DataThread: FC<{sx?: SxProps}> = function ({ sx }) {
     // benefit, since the segments would just stack in the same single column.
     const CARD_GAP = 12; // padding + spacing between cards in a column
     const PANEL_PADDING = 16;
-    const CARD_WIDTH = 220;
+    // 220 visual card width + 14px right gutter (CARD_CONTENT_PR) so cards
+    // keep their original size while gaining a right margin that balances
+    // the left timeline gutter.
+    const CARD_WIDTH = 234;
     const COLUMN_WIDTH = CARD_WIDTH + CARD_GAP;
     // n columns need: n*CARD_WIDTH + (n-1)*CARD_GAP + PANEL_PADDING
     // Solving for n: n <= (containerWidth - PANEL_PADDING + CARD_GAP) / COLUMN_WIDTH
diff --git a/src/views/UnifiedDataUploadDialog.tsx b/src/views/UnifiedDataUploadDialog.tsx
index bd7167f8..73d325e1 100644
--- a/src/views/UnifiedDataUploadDialog.tsx
+++ b/src/views/UnifiedDataUploadDialog.tsx
@@ -448,12 +448,14 @@ export interface DataLoadMenuProps {
     onSelectConnector?: (connector: ConnectorInstance) => void;
     /**
      * Called when the user submits a prompt from the top-level Data Loading
-     * Agent chat box. Implementations should open the agent chat surface
-     * with the prompt (and optional pasted/attached images) pre-filled —
-     * typically auto-sent. If not provided, the chat box falls back to
-     * `onSelectTab('extract')`.
+     * Agent chat box. Implementations should hand the payload off to the
+     * agent chat surface, which will auto-send it as a fresh user
+     * message. Attachments are file names (already uploaded to the
+     * session scratch space) — the chat surface re-injects them as
+     * `[Uploaded: name]` mentions when building the backend payload.
+     * If not provided, the chat box falls back to `onSelectTab('extract')`.
      */
-    onStartChat?: (prompt: string, images?: string[]) => void;
+    onStartChat?: (prompt: string, images: string[], attachments: string[]) => void;
     /**
      * True when a prior data-loading agent conversation exists in
      * state. When set together with `onResumeChat`, the menu renders
@@ -605,22 +607,17 @@ export const DataLoadMenu: React.FC<DataLoadMenuProps> = ({
     const submitAgentChat = () => {
         const text = agentInput.trim();
         if (text.length === 0 && agentImages.length === 0 && agentAttachments.length === 0) {
-            // Empty submission — just open the chat surface.
-            if (onStartChat) onStartChat('', []);
+            // Empty submission — just surface the chat.
+            if (onStartChat) onStartChat('', [], []);
             else onSelectTab('extract');
             return;
         }
-        // Augment the outgoing prompt with `[Uploaded: name]` lines so the
-        // agent sees attachments as text references, without polluting
-        // the editable input the user sees.
-        const mentions = agentAttachments
-            .map(name => t('dataLoading.uploaded', { name }))
-            .join('\n');
-        const finalText = mentions
-            ? (text ? `${text}\n${mentions}` : mentions)
-            : text;
+        // Pass payload pieces unchanged — the chat surface builds the
+        // backend mentions itself. We deliberately do NOT pre-inject
+        // `[Uploaded: name]` into `text` here, so the visible message
+        // bubble stays clean and the file chips render uniformly.
         if (onStartChat) {
-            onStartChat(finalText, agentImages);
+            onStartChat(text, agentImages, agentAttachments);
         } else {
             onSelectTab('extract');
         }
@@ -631,14 +628,26 @@ export const DataLoadMenu: React.FC<DataLoadMenuProps> = ({
 
     // Suggestions surfaced as a focus-time dropdown — sourced from a shared
     // factory so the in-session `DataLoadingChat` panel renders the exact
-    // same list. See `dataLoadingSuggestions.ts`.
+    // same list. See `dataLoadingSuggestions.ts`. Auto-run is routed
+    // through `onStartChat` so the parent dialog can dispatch its
+    // `clearChatMessages` + `setDataLoadingChatPending` sequence
+    // atomically — same path as a manual submit.
     const agentChatSuggestions = useMemo(() => buildDataLoadingSuggestions({
         t,
         setInput: setAgentInput,
         setImages: setAgentImages,
         setAttachments: setAgentAttachments,
         ensureActiveWorkspace,
-    }), [t]);
+        requestAutoSend: onStartChat
+            ? (payload) => {
+                  onStartChat(payload.text, payload.images, payload.attachments);
+                  setAgentInput('');
+                  setAgentImages([]);
+                  setAgentAttachments([]);
+              }
+            : undefined,
+        // eslint-disable-next-line react-hooks/exhaustive-deps
+    }), [t, onStartChat]);
     const agentChatBox = (
         <Box sx={{ display: 'flex', flexDirection: 'column', width: '100%', maxWidth: 640 }}>
             <Box
@@ -708,8 +717,12 @@ export const DataLoadMenu: React.FC<DataLoadMenuProps> = ({
                     formData.append('file', file);
                     apiRequest(getUrls().SCRATCH_UPLOAD_URL, {
                         method: 'POST', body: formData,
-                    }).then(() => {
-                        setAgentAttachments(prev => [...prev, file.name]);
+                    }).then(({ data }) => {
+                        // The backend hash-suffixes the filename; store the
+                        // server-assigned name so the `[Uploaded:]` mention
+                        // resolves to the real scratch file.
+                        const scratchName = (data?.path || `scratch/${file.name}`).replace(/^scratch\//, '');
+                        setAgentAttachments(prev => [...prev, scratchName]);
                     }).catch(err => console.error('Upload failed:', err));
                 }}
                 attachments={agentAttachments}
@@ -1112,14 +1125,6 @@ export interface UnifiedDataUploadDialogProps {
     open: boolean;
     onClose: () => void;
     initialTab?: UploadTabType;
-    /**
-     * Optional initial prompt to hand off to the Data Loading Agent. When
-     * non-empty and `initialTab === 'extract'`, the prompt is pre-filled
-     * and auto-sent in the chat panel.
-     */
-    initialChatPrompt?: string;
-    /** Optional images (data URLs) to seed the chat alongside `initialChatPrompt`. */
-    initialChatImages?: string[];
     onConnectorsChanged?: () => void;
 }
 
@@ -1127,8 +1132,6 @@ export const UnifiedDataUploadDialog: React.FC<UnifiedDataUploadDialogProps> = (
     open,
     onClose,
     initialTab = 'menu',
-    initialChatPrompt,
-    initialChatImages,
     onConnectorsChanged,
 }) => {
     const theme = useTheme();
@@ -1143,21 +1146,6 @@ export const UnifiedDataUploadDialog: React.FC<UnifiedDataUploadDialogProps> = (
     const existingNames = new Set(existingTables.map(t => t.id));
 
     const [activeTab, setActiveTab] = useState<UploadTabType>(initialTab === 'menu' ? 'menu' : initialTab);
-    // Prompt to seed the agent chat with. Sourced from the `initialChatPrompt`
-    // prop when the dialog opens directly on 'extract', or set internally
-    // when the user submits the in-menu agent chat box.
-    const [seededChatPrompt, setSeededChatPrompt] = useState<string | undefined>(
-        initialTab === 'extract' ? initialChatPrompt : undefined,
-    );
-    const [seededChatImages, setSeededChatImages] = useState<string[] | undefined>(
-        initialTab === 'extract' ? initialChatImages : undefined,
-    );
-    const [autoSendSeededPrompt, setAutoSendSeededPrompt] = useState<boolean>(
-        initialTab === 'extract' && (
-            (!!initialChatPrompt && initialChatPrompt.trim().length > 0)
-            || (!!initialChatImages && initialChatImages.length > 0)
-        ),
-    );
     const fileInputRef = useRef<HTMLInputElement>(null);
     const urlInputRef = useRef<HTMLInputElement>(null);
 
@@ -1175,27 +1163,8 @@ export const UnifiedDataUploadDialog: React.FC<UnifiedDataUploadDialogProps> = (
         if (open) {
             setConnectorInstances([]);
             refreshConnectors();
-            // Re-seed chat prompt/images from props each time the dialog opens.
-            if (initialTab === 'extract') {
-                setSeededChatPrompt(initialChatPrompt);
-                setSeededChatImages(initialChatImages);
-                const hasText = !!initialChatPrompt && initialChatPrompt.trim().length > 0;
-                const hasImages = !!initialChatImages && initialChatImages.length > 0;
-                setAutoSendSeededPrompt(hasText || hasImages);
-                // Opening the dialog with a fresh prompt/images means the
-                // user wants a new data-loading conversation; clear any
-                // stale messages from a previous session so the new query
-                // isn't appended to an unrelated thread.
-                if ((hasText || hasImages) && dataLoadingChatMessages.length > 0) {
-                    dispatch(dfActions.clearChatMessages());
-                }
-            } else {
-                setSeededChatPrompt(undefined);
-                setSeededChatImages(undefined);
-                setAutoSendSeededPrompt(false);
-            }
         }
-    }, [open, refreshConnectors, identityKey, initialTab, initialChatPrompt, initialChatImages]);
+    }, [open, refreshConnectors, identityKey]);
 
     // Storage is determined by backend config — no user toggle
     const isEphemeral = serverConfig.WORKSPACE_BACKEND === 'ephemeral';
@@ -1848,29 +1817,32 @@ export const UnifiedDataUploadDialog: React.FC<UnifiedDataUploadDialogProps> = (
                                     setActiveTab(`connector:${conn.id}` as UploadTabType);
                                 }
                             }}
-                            onStartChat={(prompt, images) => {
+                            onStartChat={(prompt, images, attachments) => {
                                 const hasText = prompt.trim().length > 0;
-                                const hasImages = !!images && images.length > 0;
-                                // If a prior conversation exists, treat a
-                                // new query from the menu as a fresh data
-                                // reload and reset the chat. Without this
-                                // the new prompt would be appended onto an
-                                // unrelated thread, confusing the agent.
-                                if ((hasText || hasImages) && dataLoadingChatMessages.length > 0) {
-                                    dispatch(dfActions.clearChatMessages());
+                                const hasImages = images.length > 0;
+                                const hasAttachments = attachments.length > 0;
+                                // Always surface the chat. If the user
+                                // is starting a fresh query, clear any
+                                // prior conversation and enqueue the new
+                                // submission as a redux `pending` slot
+                                // — `DataLoadingChat` consumes it on
+                                // render and auto-sends. Doing both
+                                // dispatches in the same tick keeps the
+                                // handoff atomic; there's no prop race.
+                                if (hasText || hasImages || hasAttachments) {
+                                    if (dataLoadingChatMessages.length > 0) {
+                                        dispatch(dfActions.clearChatMessages());
+                                    }
+                                    dispatch(dfActions.setDataLoadingChatPending({
+                                        text: prompt, images, attachments,
+                                    }));
                                 }
-                                setSeededChatPrompt(prompt);
-                                setSeededChatImages(images);
-                                setAutoSendSeededPrompt(hasText || hasImages);
                                 setActiveTab('extract');
                             }}
                             hasPriorConversation={dataLoadingChatMessages.length > 0}
                             onResumeChat={() => {
                                 // Reopen the existing thread without
                                 // clearing messages or auto-sending.
-                                setSeededChatPrompt(undefined);
-                                setSeededChatImages(undefined);
-                                setAutoSendSeededPrompt(false);
                                 setActiveTab('extract');
                             }}
                             serverConfig={serverConfig}
@@ -2403,11 +2375,7 @@ export const UnifiedDataUploadDialog: React.FC<UnifiedDataUploadDialogProps> = (
 
                 {/* Extract Data Tab */}
                 <TabPanel value={activeTab} index="extract">
-                    <DataLoadingChat
-                        initialPrompt={seededChatPrompt}
-                        initialImages={seededChatImages}
-                        autoSendInitialPrompt={autoSendSeededPrompt}
-                    />
+                    <DataLoadingChat onTableLoaded={handleClose} />
                 </TabPanel>
 
                 {/* Local Folder Tab */}
diff --git a/src/views/dataLoadingSuggestions.ts b/src/views/dataLoadingSuggestions.ts
index 8d91b92b..f37e04f0 100644
--- a/src/views/dataLoadingSuggestions.ts
+++ b/src/views/dataLoadingSuggestions.ts
@@ -22,6 +22,12 @@ export interface DataLoadingSuggestion {
     onClick: () => void;
 }
 
+export interface SuggestionPayload {
+    text: string;
+    images: string[];
+    attachments: string[];
+}
+
 export interface BuildSuggestionsArgs {
     t: TFunction;
     setInput: (value: string) => void;
@@ -29,12 +35,22 @@ export interface BuildSuggestionsArgs {
     setAttachments: (names: string[]) => void;
     /** Optional hook that workspaces use to make sure a session exists before uploading. */
     ensureActiveWorkspace?: () => void;
+    /**
+     * Optional auto-run hook. When provided, suggestions submit the
+     * complete payload immediately (after any required async upload /
+     * data-URL prep) instead of just pre-filling the input. Callers
+     * typically wire this to a redux pending-submission dispatch so the
+     * payload survives the parent→child handoff without prop races.
+     * When absent, the suggestion behaves like a paste: it only fills
+     * the input fields via the `set*` callbacks.
+     */
+    requestAutoSend?: (payload: SuggestionPayload) => void;
 }
 
 const EXCEL_SAMPLE_NAME = 'climate-gas-indicator.xlsx';
 
 export function buildDataLoadingSuggestions(
-    { t, setInput, setImages, setAttachments, ensureActiveWorkspace }: BuildSuggestionsArgs,
+    { t, setInput, setImages, setAttachments, ensureActiveWorkspace, requestAutoSend }: BuildSuggestionsArgs,
 ): DataLoadingSuggestion[] {
     const kindAsk = t('upload.agentChatSuggestion.kind.ask', { defaultValue: 'ask' });
     const kindFind = t('upload.agentChatSuggestion.kind.find', { defaultValue: 'find' });
@@ -61,37 +77,38 @@ export function buildDataLoadingSuggestions(
 
     const iconSx = { fontSize: 14 };
 
+    // Common: fill the input fields AND (if auto-run is enabled) submit
+    // the payload. Centralising the dual behaviour keeps every
+    // suggestion below short and consistent.
+    const fillAndMaybeSend = (payload: SuggestionPayload) => {
+        setImages(payload.images);
+        setAttachments(payload.attachments);
+        setInput(payload.text);
+        requestAutoSend?.(payload);
+    };
+
     return [
         {
             kind: kindAsk,
             label: askLabel,
             icon: React.createElement(QuestionAnswerOutlinedIcon, { sx: iconSx }),
-            onClick: () => {
-                setImages([]);
-                setAttachments([]);
-                setInput(askLabel);
-            },
+            onClick: () => fillAndMaybeSend({ text: askLabel, images: [], attachments: [] }),
         },
         {
             kind: kindFind,
             label: findLabel,
             icon: React.createElement(SearchIcon, { sx: iconSx }),
-            onClick: () => {
-                setImages([]);
-                setAttachments([]);
-                setInput(findLabel);
-            },
+            onClick: () => fillAndMaybeSend({ text: findLabel, images: [], attachments: [] }),
         },
         {
             kind: kindExtract,
             label: extractExcelLabel,
             icon: React.createElement(TableChartOutlinedIcon, { sx: iconSx }),
             onClick: () => {
-                // Surface the attachment chip synchronously so it is
-                // always present when the user hits send, even if the
-                // upload below is still mid-flight. The chip is what
-                // gets serialised into the outgoing `[Uploaded: name]`
-                // mention and ultimately the chat bubble.
+                // Surface the attachment chip / input synchronously so
+                // it is visible during the async upload. The auto-send
+                // (if enabled) waits until the upload completes so the
+                // backend can actually find the scratch file.
                 setImages([]);
                 setAttachments([EXCEL_SAMPLE_NAME]);
                 setInput(extractExcelLabel);
@@ -108,6 +125,18 @@ export function buildDataLoadingSuggestions(
                             method: 'POST', body: formData,
                         });
                     })
+                    .then(({ data }) => {
+                        // The backend hash-suffixes the filename, so use the
+                        // server-assigned name for the chip and the mention
+                        // — otherwise the agent looks for a file that the
+                        // upload renamed and reports it missing.
+                        const scratchName = (data?.path || `scratch/${EXCEL_SAMPLE_NAME}`).replace(/^scratch\//, '');
+                        setAttachments([scratchName]);
+                        requestAutoSend?.({
+                            text: extractExcelLabel, images: [],
+                            attachments: [scratchName],
+                        });
+                    })
                     .catch(err => console.error('Sample Excel upload failed:', err));
             },
         },
@@ -116,16 +145,21 @@ export function buildDataLoadingSuggestions(
             label: extractImageLabel,
             icon: React.createElement(ImageOutlinedIcon, { sx: iconSx }),
             onClick: () => {
+                // Image needs to be read into a data URL before we can
+                // surface it as a chip or send it. Defer auto-send until
+                // the FileReader resolves.
                 fetch(exampleImageTable)
                     .then(res => res.blob())
                     .then(blob => {
                         const reader = new FileReader();
                         reader.onload = () => {
-                            if (reader.result) {
-                                setImages([reader.result as string]);
-                                setAttachments([]);
-                                setInput(extractImageLabel);
-                            }
+                            if (!reader.result) return;
+                            const dataUrl = reader.result as string;
+                            fillAndMaybeSend({
+                                text: extractImageLabel,
+                                images: [dataUrl],
+                                attachments: [],
+                            });
                         };
                         reader.readAsDataURL(blob);
                     });
@@ -135,11 +169,7 @@ export function buildDataLoadingSuggestions(
             kind: kindExtract,
             label: extractTextLabel,
             icon: React.createElement(DescriptionOutlinedIcon, { sx: iconSx }),
-            onClick: () => {
-                setImages([]);
-                setAttachments([]);
-                setInput(extractTextPrompt);
-            },
+            onClick: () => fillAndMaybeSend({ text: extractTextPrompt, images: [], attachments: [] }),
         },
     ];
 }
diff --git a/tests/backend/data/test_workspace_manager.py b/tests/backend/data/test_workspace_manager.py
index e2a00eb7..d82766c6 100644
--- a/tests/backend/data/test_workspace_manager.py
+++ b/tests/backend/data/test_workspace_manager.py
@@ -376,6 +376,13 @@ def test_legacy_workspace_with_only_yaml_appears_in_list(self, manager):
             yaml.safe_dump({"version": "1.1", "tables": {}}),
             encoding="utf-8",
         )
+        # Pretend the legacy workspace had session state with tables.
+        (ws_dir / "session_state.json").write_text(
+            json.dumps({"tables": [{"id": "t1"}]}),
+            encoding="utf-8",
+        )
+        # Trigger meta repair with a non-empty table count.
+        manager.save_session_state("legacy_ws", {"tables": [{"id": "t1"}]})
 
         ws_list = manager.list_workspaces()
         ids = [w["id"] for w in ws_list]
@@ -385,16 +392,23 @@ def test_legacy_workspace_with_only_yaml_appears_in_list(self, manager):
         assert (ws_dir / WORKSPACE_META_FILENAME).exists()
 
     def test_legacy_workspace_with_only_session_state_appears_in_list(self, manager):
-        """A directory with only session_state.json should be auto-repaired."""
+        """A directory with session_state.json (containing tables) is
+        auto-repaired and visible in list_workspaces. The displayName
+        is inferred from session_state."""
         ws_dir = manager.root / "state_only"
         ws_dir.mkdir(parents=True)
         (ws_dir / "session_state.json").write_text(
             json.dumps({
-                "tables": [],
+                "tables": [{"id": "t1", "name": "T1"}],
                 "activeWorkspace": {"displayName": "My Old Session"},
             }),
             encoding="utf-8",
         )
+        # Re-save so meta is written with tableCount > 0.
+        manager.save_session_state("state_only", {
+            "tables": [{"id": "t1", "name": "T1"}],
+            "activeWorkspace": {"displayName": "My Old Session"},
+        })
 
         ws_list = manager.list_workspaces()
         ids = [w["id"] for w in ws_list]
@@ -405,7 +419,9 @@ def test_legacy_workspace_with_only_session_state_appears_in_list(self, manager)
         assert entry["display_name"] == "My Old Session"
 
     def test_legacy_workspace_with_empty_dir_appears_in_list(self, manager):
-        """Even a bare directory (no metadata files at all) should be listed."""
+        """A bare directory with no metadata at all is auto-repaired by
+        _ensure_meta (meta.json gets created with fallback displayName)
+        and appears in list_workspaces."""
         ws_dir = manager.root / "bare"
         ws_dir.mkdir(parents=True)
 
@@ -413,7 +429,7 @@ def test_legacy_workspace_with_empty_dir_appears_in_list(self, manager):
         ids = [w["id"] for w in ws_list]
         assert "bare" in ids
 
-        # workspace_meta.json auto-created with fallback displayName = dir name
+        # Auto-repair created the meta with a fallback displayName.
         meta = json.loads((ws_dir / WORKSPACE_META_FILENAME).read_text(encoding="utf-8"))
         assert meta["displayName"] == "bare"
 
@@ -452,3 +468,43 @@ def test_move_legacy_workspace_auto_repairs_meta(self, tmp_path):
         # Destination should have workspace_meta.json
         dst_ws = dst.get_workspace_path("old_ws")
         assert (dst_ws / WORKSPACE_META_FILENAME).exists()
+
+
+class TestEmptyWorkspaceVisibility:
+    """list_workspaces() lists every workspace directory, including
+    empty "Untitled Session" entries from abandoned data-loading
+    chats. Users manage (rename/delete) these themselves via the
+    sidebar — they are not hidden."""
+
+    def test_empty_workspace_is_visible(self, manager):
+        manager.create_workspace("ghost")
+        # No save_session_state — meta has no tableCount/chartCount.
+
+        ws_list = manager.list_workspaces()
+
+        assert any(w["id"] == "ghost" for w in ws_list)
+        assert manager.workspace_exists("ghost")
+
+    def test_workspace_with_tables_is_visible(self, manager):
+        manager.create_workspace("real")
+        manager.save_session_state("real", {
+            "tables": [{"id": "t1", "name": "T1"}],
+            "activeWorkspace": {"id": "real", "displayName": "Real"},
+        })
+
+        ws_list = manager.list_workspaces()
+
+        assert any(w["id"] == "real" for w in ws_list)
+
+    def test_zero_count_workspace_is_visible(self, manager):
+        """A workspace whose tables were all deleted (zero tables) still
+        appears in the list — the user decides whether to remove it."""
+        manager.create_workspace("emptied")
+        manager.save_session_state("emptied", {
+            "tables": [],
+            "activeWorkspace": {"id": "emptied", "displayName": "Emptied"},
+        })
+
+        ws_list = manager.list_workspaces()
+
+        assert any(w["id"] == "emptied" for w in ws_list)

From 18cf3603dfd7da2197f4ba131e80ecb2fab285f3 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Thu, 28 May 2026 22:21:14 -0700
Subject: [PATCH 02/29] small fixes

---
 src/views/DataFormulator.tsx    |  4 +--
 src/views/DataSourceSidebar.tsx | 64 ++++++++-------------------------
 2 files changed, 17 insertions(+), 51 deletions(-)

diff --git a/src/views/DataFormulator.tsx b/src/views/DataFormulator.tsx
index 00e8086e..b340a477 100644
--- a/src/views/DataFormulator.tsx
+++ b/src/views/DataFormulator.tsx
@@ -571,7 +571,7 @@ export const DataFormulatorFC = ({ }) => {
     const fixedSplitPane = ( 
         <Box sx={{display: 'flex', flexDirection: 'row', height: '100%'}}>
             <DataSourceSidebar
-                onOpenUploadDialog={(tab) => openUploadDialog((tab ?? 'add-connection') as UploadTabType)}
+                onOpenUploadDialog={(tab) => openUploadDialog((tab ?? 'menu') as UploadTabType)}
                 connectorRefreshKey={connectorRefreshKey}
             />
             <Box ref={containerRef} className="outer-allotment" sx={{
@@ -936,7 +936,7 @@ export const DataFormulatorFC = ({ }) => {
                 {tables.length > 0 ? fixedSplitPane : (
                     <Box sx={{ display: 'flex', flexDirection: 'row', height: '100%' }}>
                         <DataSourceSidebar
-                            onOpenUploadDialog={(tab) => openUploadDialog((tab ?? 'add-connection') as UploadTabType)}
+                            onOpenUploadDialog={(tab) => openUploadDialog((tab ?? 'menu') as UploadTabType)}
                             connectorRefreshKey={connectorRefreshKey}
                         />
                         {dataUploadRequestBox}
diff --git a/src/views/DataSourceSidebar.tsx b/src/views/DataSourceSidebar.tsx
index a8c4715d..7381a423 100644
--- a/src/views/DataSourceSidebar.tsx
+++ b/src/views/DataSourceSidebar.tsx
@@ -42,7 +42,6 @@ import { VirtualizedCatalogTree } from '../components/VirtualizedCatalogTree';
 
 import StorageIcon from '@mui/icons-material/Storage';
 import AddIcon from '@mui/icons-material/Add';
-import FileUploadOutlinedIcon from '@mui/icons-material/FileUploadOutlined';
 import FolderOpenIcon from '@mui/icons-material/FolderOpen';
 import FolderOutlinedIcon from '@mui/icons-material/FolderOutlined';
 import UploadFileIcon from '@mui/icons-material/UploadFile';
@@ -51,9 +50,6 @@ import ChevronLeftIcon from '@mui/icons-material/ChevronLeft';
 import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
 import ChevronRightIcon from '@mui/icons-material/ChevronRight';
 import RefreshIcon from '@mui/icons-material/Refresh';
-import ContentPasteOutlinedIcon from '@mui/icons-material/ContentPasteOutlined';
-import SmartToyOutlinedIcon from '@mui/icons-material/SmartToyOutlined';
-import LinkOutlinedIcon from '@mui/icons-material/LinkOutlined';
 import LinkOffOutlinedIcon from '@mui/icons-material/LinkOffOutlined';
 import DeleteOutlineIcon from '@mui/icons-material/DeleteOutline';
 import EditOutlinedIcon from '@mui/icons-material/EditOutlined';
@@ -159,7 +155,7 @@ export const DataSourceSidebar: React.FC<{
     // built-in sample_datasets connector is shown there, giving users
     // something useful to explore immediately. The upgrade message only
     // appears when they try to add a new connector or link a folder.
-    const [initialTab, setInitialTab] = useState<'upload' | 'sources' | 'sessions' | 'knowledge'>('sources');
+    const [initialTab, setInitialTab] = useState<'sources' | 'sessions' | 'knowledge'>('sources');
 
     // External callers (e.g. SaveExperienceButton on success) can ask the
     // sidebar to open and switch to a specific tab.
@@ -277,6 +273,18 @@ export const DataSourceSidebar: React.FC<{
                 pt: 1,
                 gap: 0.5,
             }}>
+                {/* Primary action — adding data is the main task. Styled like
+                    the view-switcher icons but kept in primary color as a
+                    subtle cue; opens the upload dialog (landing menu). */}
+                <Tooltip title={t('sidebar.openUpload', { defaultValue: 'Add data' })} placement="right">
+                    <IconButton size="small" onClick={() => onOpenUploadDialog?.()} sx={{
+                        color: 'primary.main',
+                        borderRadius: 1,
+                        '&:hover': { bgcolor: 'action.hover' },
+                    }}>
+                        <AddIcon fontSize="small" />
+                    </IconButton>
+                </Tooltip>
                 <Tooltip title={t('sidebar.sessions', { defaultValue: 'Saved workspaces' })} placement="right">
                     <IconButton size="small" onClick={() => { setInitialTab('sessions'); if (!isOpen) toggle(); else if (initialTab !== 'sessions') setInitialTab('sessions'); else toggle(); }} sx={{
                         color: isOpen && initialTab === 'sessions' ? 'primary.main' : 'text.secondary',
@@ -295,15 +303,6 @@ export const DataSourceSidebar: React.FC<{
                         <RelationalDBIcon fontSize="small" />
                     </IconButton>
                 </Tooltip>
-                <Tooltip title={t('sidebar.openUpload', { defaultValue: 'Add data' })} placement="right">
-                    <IconButton size="small" onClick={() => { setInitialTab('upload'); if (!isOpen) toggle(); else if (initialTab !== 'upload') setInitialTab('upload'); else toggle(); }} sx={{
-                        color: isOpen && initialTab === 'upload' ? 'primary.main' : 'text.secondary',
-                        bgcolor: isOpen && initialTab === 'upload' ? 'action.selected' : 'transparent',
-                        borderRadius: 1,
-                    }}>
-                        <FileUploadOutlinedIcon fontSize="small" />
-                    </IconButton>
-                </Tooltip>
                 <Tooltip title={t('sidebar.knowledge', { defaultValue: 'Agent knowledge' })} placement="right">
                     <IconButton size="small" onClick={() => { setInitialTab('knowledge'); if (!isOpen) toggle(); else if (initialTab !== 'knowledge') setInitialTab('knowledge'); else toggle(); }} sx={{
                         color: isOpen && initialTab === 'knowledge' ? 'primary.main' : 'text.secondary',
@@ -347,7 +346,7 @@ const DataSourceSidebarPanel: React.FC<{
     panelWidth: number;
     onOpenUploadDialog?: (tab?: string) => void;
     onCollapse: () => void;
-    initialTab?: 'upload' | 'sources' | 'sessions' | 'knowledge';
+    initialTab?: 'sources' | 'sessions' | 'knowledge';
     connectorRefreshKey?: number;
     disableConnectors?: boolean;
 }> = ({ panelWidth, onOpenUploadDialog, onCollapse, initialTab = 'sources', connectorRefreshKey = 0, disableConnectors = false }) => {
@@ -419,7 +418,7 @@ const DataSourceSidebarPanel: React.FC<{
     const [searchingCatalog, setSearchingCatalog] = useState<Record<string, boolean>>({});
 
     // Sidebar tab: 'sources' or 'sessions' or 'knowledge'
-    const [activeTab, setActiveTab] = useState<'upload' | 'sources' | 'sessions' | 'knowledge'>(initialTab);
+    const [activeTab, setActiveTab] = useState<'sources' | 'sessions' | 'knowledge'>(initialTab);
 
     // Sync tab when rail icon switches it
     useEffect(() => {
@@ -1292,39 +1291,6 @@ const DataSourceSidebarPanel: React.FC<{
             overflow: 'hidden',
         }}>
 
-            {/* ── Upload Data tab ── */}
-            {activeTab === 'upload' && (
-            <Box sx={{ flex: 1, display: 'flex', flexDirection: 'column', overflow: 'hidden' }}>
-                <Box sx={{ display: 'flex', alignItems: 'center', px: 1.5, py: 0.75, borderBottom: `1px solid ${borderColor.view}`, flexShrink: 0 }}>
-                    <Typography sx={{ fontSize: 13, fontWeight: 500, color: 'text.primary', flex: 1 }}>
-                        {t('sidebar.uploadData', { defaultValue: 'Upload Data' })}
-                    </Typography>
-                    <Tooltip title={t('sidebar.collapse', { defaultValue: 'Collapse' })} placement="bottom">
-                        <IconButton size="small" onClick={onCollapse} sx={{ p: 0.5, color: 'text.disabled', '&:hover': { color: 'text.secondary' } }}>
-                            <ChevronLeftIcon sx={{ fontSize: 16 }} />
-                        </IconButton>
-                    </Tooltip>
-                </Box>
-                <Box sx={{ flex: 1, overflowY: 'auto', overflowX: 'hidden', overscrollBehavior: 'contain', py: 0.5 }}>
-                    {[
-                        { icon: <UploadFileIcon sx={{ fontSize: 16, color: 'text.secondary' }} />, label: t('upload.uploadFile', { defaultValue: 'Upload file' }), tab: 'upload' },
-                        { icon: <ContentPasteOutlinedIcon sx={{ fontSize: 16, color: 'text.secondary' }} />, label: t('upload.pasteData', { defaultValue: 'Paste data' }), tab: 'paste' },
-                        { icon: <SmartToyOutlinedIcon sx={{ fontSize: 16, color: 'text.secondary' }} />, label: t('upload.extractData', { defaultValue: 'Data Assistant' }), tab: 'extract' },
-                        { icon: <LinkOutlinedIcon sx={{ fontSize: 16, color: 'text.secondary' }} />, label: t('upload.loadFromUrl', { defaultValue: 'Load from URL' }), tab: 'url' },
-                    ].map((item, i) => (
-                        <Box
-                            key={i}
-                            onClick={() => onOpenUploadDialog?.(item.tab)}
-                            sx={{ display: 'flex', alignItems: 'center', gap: 0.75, px: 1.5, py: 0.75, cursor: 'pointer', color: 'text.primary', '&:hover': { bgcolor: 'action.hover' }, userSelect: 'none' }}
-                        >
-                            {item.icon}
-                            <Typography noWrap sx={{ fontSize: 12, fontWeight: 500 }}>{item.label}</Typography>
-                        </Box>
-                    ))}
-                </Box>
-            </Box>
-            )}
-
             {/* ── Data Connectors tab ──
                 Sample datasets remain available even when external
                 connectors are disabled; the Add Connector / Link Folder

From 4cb0a2f4f5e32bd5132a84b77cdcb3cc12f50f56 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Thu, 28 May 2026 22:50:07 -0700
Subject: [PATCH 03/29] cleanup

---
 src/views/ChartRecBox.tsx       | 14 +++++++-------
 src/views/SimpleChartRecBox.tsx | 34 +++++----------------------------
 2 files changed, 12 insertions(+), 36 deletions(-)

diff --git a/src/views/ChartRecBox.tsx b/src/views/ChartRecBox.tsx
index 872dc3c1..9eb26085 100644
--- a/src/views/ChartRecBox.tsx
+++ b/src/views/ChartRecBox.tsx
@@ -292,10 +292,10 @@ export const ChartRecBox: FC<ChartRecBoxProps> = function ({ tableId, placeHolde
                         type={current ? undefined : 'button'}
                         onClick={current ? undefined : () => dispatch(dfActions.setFocused({ type: 'table', tableId: table.id }))}
                         sx={{
-                            display: 'inline-flex', alignItems: 'center', gap: current ? '6px' : '3px',
+                            display: 'inline-flex', alignItems: 'center', gap: '3px',
                             border: 'none', background: 'transparent', p: 0,
                             fontFamily: theme.typography.fontFamily,
-                            fontSize: current ? 16 : 11, lineHeight: 1.4,
+                            fontSize: 11, lineHeight: 1.4,
                             color: current ? 'primary.main' : 'text.secondary',
                             fontWeight: current ? 600 : 400,
                             cursor: current ? 'default' : 'pointer',
@@ -304,7 +304,7 @@ export const ChartRecBox: FC<ChartRecBoxProps> = function ({ tableId, placeHolde
                             '&:hover': current ? undefined : { color: 'primary.main' },
                         }}
                     >
-                        <TableIcon sx={{ fontSize: current ? 16 : 12, color: 'inherit' }} />
+                        <TableIcon sx={{ fontSize: 12, color: 'inherit' }} />
                         {table.displayId}
                     </Box>
                 );
@@ -682,10 +682,10 @@ export const ChartRecBox: FC<ChartRecBoxProps> = function ({ tableId, placeHolde
                     );
                 };
 
-                // Center cluster auto-scales with chart count; neighbour
-                // clusters are halved and dimmed to read as context.
-                const centerN = Math.min(chartsForTable(currentTable.id).length, 8);
-                const centerScale = centerN <= 3 ? 1 : centerN <= 5 ? 0.82 : 0.66;
+                // All clusters render at the same scale; the current
+                // cluster is only distinguished by not being dimmed and by
+                // showing more thumbnails.
+                const centerScale = 0.5;
                 const sideScale = 0.5;
 
                 return (
diff --git a/src/views/SimpleChartRecBox.tsx b/src/views/SimpleChartRecBox.tsx
index c96dec65..5e7bd63b 100644
--- a/src/views/SimpleChartRecBox.tsx
+++ b/src/views/SimpleChartRecBox.tsx
@@ -41,7 +41,7 @@ import StopIcon from '@mui/icons-material/Stop';
 import AutoGraphIcon from '@mui/icons-material/AutoGraph';
 import DescriptionOutlinedIcon from '@mui/icons-material/DescriptionOutlined';
 import { UnifiedDataUploadDialog } from './UnifiedDataUploadDialog';
-import { transition } from '../app/tokens';
+import { borderColor, transition } from '../app/tokens';
 import { Theme } from '@mui/material/styles';
 import { useTranslation } from 'react-i18next';
 import { shouldAutoFocusGeneratedChart } from '../app/agentInteractionPolicy';
@@ -1380,12 +1380,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
     }, [pendingClarification, dispatch, t]);
 
     const isReportMode = selectedAgent === 'report';
-    const gradientBorder = isReportMode
-        ? `linear-gradient(135deg, ${alpha(theme.palette.warning.main, 0.6)}, ${alpha(theme.palette.warning.dark, 0.5)})`
-        : `linear-gradient(135deg, ${alpha(theme.palette.primary.main, 0.6)}, ${alpha(theme.palette.secondary.main, 0.55)})`;
-    const workingBorder = isReportMode
-        ? `linear-gradient(135deg, ${alpha(theme.palette.warning.main, 0.3)}, ${alpha(theme.palette.warning.dark, 0.25)})`
-        : `linear-gradient(135deg, ${alpha(theme.palette.primary.main, 0.3)}, ${alpha(theme.palette.secondary.main, 0.25)})`;
 
     // Landing / "no thread yet" highlight: when the user has loaded data
     // but hasn't started an exploration on the focused table (no real
@@ -1419,12 +1413,9 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             mx: 1, mb: 1, mt: 0.5,
             px: 1.25, pt: 1, pb: 0.5,
             borderRadius: '12px',
-            // The 2-tone border is drawn by the `::before` gradient
-            // overlay below (works through border-radius + masks). We
-            // intentionally leave the Card's own border off so the two
-            // don't fight; focus state uses a shadow halo instead of a
-            // border-color shift.
-            border: 'none',
+            // Standard single-tone input style (matches AgentChatInput): a
+            // solid divider border that turns the accent color on focus.
+            border: `1px solid ${borderColor.divider}`,
             outline: 'none',
             position: 'relative',
             overflow: isChatFormulating ? 'hidden' : 'visible',
@@ -1454,24 +1445,9 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             } : {}),
             '&:focus-within': {
                 animation: 'none',
+                borderColor: isReportMode ? theme.palette.warning.main : theme.palette.primary.main,
                 boxShadow: `0 0 0 2px ${alpha(isReportMode ? theme.palette.warning.main : theme.palette.primary.main, 0.15)}, 0 2px 10px rgba(32, 33, 36, 0.14)`,
             },
-            // Gradient border via pseudo-element (works with border-radius)
-            '&::before': {
-                content: '""',
-                position: 'absolute',
-                inset: 0,
-                borderRadius: 'inherit',
-                padding: '1.5px',
-                background: isChatFormulating 
-                    ? workingBorder 
-                    : gradientBorder,
-                WebkitMask: 'linear-gradient(#fff 0 0) content-box, linear-gradient(#fff 0 0)',
-                WebkitMaskComposite: 'xor',
-                maskComposite: 'exclude',
-                pointerEvents: 'none',
-                zIndex: 3,
-            },
         }}
         >
             {clarificationQuestions?.kind === 'clarification' && clarificationQuestions.questions && pendingClarification && !isChatFormulating && (

From c616338d67c4c12cb7290139ce046213abc44098 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Fri, 29 May 2026 00:06:50 -0700
Subject: [PATCH 04/29] some updates

---
 src/views/DataView.tsx          | 92 +++++++++++++++++++++++++++++++--
 src/views/VisualizationView.tsx | 20 ++++---
 2 files changed, 98 insertions(+), 14 deletions(-)

diff --git a/src/views/DataView.tsx b/src/views/DataView.tsx
index aa1263d0..f6c4a79f 100644
--- a/src/views/DataView.tsx
+++ b/src/views/DataView.tsx
@@ -2,11 +2,15 @@
 // Licensed under the MIT License.
 
 import React, { FC, useEffect, useMemo, useCallback } from 'react';
+import ReactDOM from 'react-dom';
 
 import _ from 'lodash';
 
-import { Typography, Box, Link, Breadcrumbs, useTheme, Fade } from '@mui/material';
+import { Typography, Box, Link, Breadcrumbs, useTheme, Fade, IconButton, Tooltip } from '@mui/material';
 import { alpha } from '@mui/material/styles';
+import { useTranslation } from 'react-i18next';
+import OpenInFullIcon from '@mui/icons-material/OpenInFull';
+import CloseFullscreenIcon from '@mui/icons-material/CloseFullscreen';
 
 import '../scss/DataView.scss';
 
@@ -16,11 +20,19 @@ import { useDispatch, useSelector } from 'react-redux';
 import { Type } from '../data/types';
 import { SelectableDataGrid } from './SelectableDataGrid';
 import { formatCellValue, getColumnAlign } from './ViewUtils';
+import { borderColor } from '../app/tokens';
 
 export interface FreeDataViewProps {
+    // When true, render a maximize/restore toggle that pops the table into a
+    // full-canvas overlay. Used wherever the grid is shown inline (under a
+    // chart, or as the focused-table preview).
+    maximizable?: boolean;
 }
 
-export const FreeDataViewFC: FC<FreeDataViewProps> = function DataView() {
+export const FreeDataViewFC: FC<FreeDataViewProps> = function DataView({ maximizable }) {
+
+    const { t } = useTranslation();
+    const [maximized, setMaximized] = React.useState(false);
 
     const dispatch = useDispatch();
 
@@ -32,6 +44,7 @@ export const FreeDataViewFC: FC<FreeDataViewProps> = function DataView() {
     const focusedTableId = useMemo(() => {
         if (!focusedId) return undefined;
         if (focusedId.type === 'table') return focusedId.tableId;
+        if (focusedId.type !== 'chart') return undefined;
         const chartId = focusedId.chartId;
         const chart = allCharts.find(c => c.id === chartId);
         return chart?.tableRef;
@@ -108,7 +121,7 @@ export const FreeDataViewFC: FC<FreeDataViewProps> = function DataView() {
         ];
     }, [targetTable, rowData, conceptShelfItems]);
 
-    return (
+    const grid = (
         <Box sx={{height: "100%", display: "flex", flexDirection: "column", background: "rgba(0,0,0,0.02)"}}>
             <Fade in={true} timeout={600} key={targetTable?.id}>
                 <Box sx={{height: '100%'}}>
@@ -124,4 +137,77 @@ export const FreeDataViewFC: FC<FreeDataViewProps> = function DataView() {
             </Fade>
         </Box>
     );
+
+    if (!maximizable) {
+        return grid;
+    }
+
+    const toggleButton = (
+        <Tooltip title={maximized ? t('chart.restoreTable', { defaultValue: 'Restore' }) : t('chart.maximizeTable', { defaultValue: 'Maximize table' })} placement="left">
+            <IconButton
+                size="small"
+                onClick={() => setMaximized(m => !m)}
+                sx={{
+                    color: 'text.secondary',
+                    '&:hover': { color: 'primary.main', backgroundColor: 'transparent' },
+                }}
+            >
+                {maximized ? <CloseFullscreenIcon sx={{ fontSize: 16 }} /> : <OpenInFullIcon sx={{ fontSize: 16 }} />}
+            </IconButton>
+        </Tooltip>
+    );
+
+    // The toggle button sits just outside the table to the right (a slim panel),
+    // so it never overlaps the column headers and the card keeps its original look.
+    // In maximized mode the surrounding overlay already provides the card frame.
+    const cardSx = maximized ? { overflow: 'hidden' } : {
+        overflow: 'hidden',
+        borderRadius: '8px',
+        border: `1px solid ${borderColor.divider}`,
+        transition: 'box-shadow 0.2s ease',
+        '&:hover': { boxShadow: '0 0 8px rgba(25, 118, 210, 0.25)' },
+    };
+    const framed = (
+        <Box sx={{ height: '100%', width: '100%', display: 'flex', flexDirection: 'row' }}>
+            <Box sx={{ flex: 1, minWidth: 0, ...cardSx }}>
+                {grid}
+            </Box>
+            <Box sx={{ flexShrink: 0, display: 'flex', alignItems: 'flex-start', pt: 0.25, pl: 0.25 }}>
+                {toggleButton}
+            </Box>
+        </Box>
+    );
+
+    if (maximized) {
+        const canvas = typeof document !== 'undefined' ? document.getElementById('vis-view-canvas') : null;
+        const overlay = (
+            <>
+                {/* Transparent click-catcher — click outside to restore. Scoped to the visualization view. */}
+                <Box
+                    onClick={() => setMaximized(false)}
+                    sx={{ position: 'absolute', inset: 0, zIndex: 1299 }}
+                />
+                {/* Table overlay filling the visualization view. */}
+                <Box sx={{
+                    position: 'absolute', inset: 12, zIndex: 1300,
+                    borderRadius: '8px', overflow: 'hidden',
+                    border: `1px solid ${borderColor.divider}`,
+                    boxShadow: '0 8px 32px rgba(0,0,0,0.2)',
+                    backgroundColor: 'background.paper',
+                    p: 0.5,
+                }}>
+                    {framed}
+                </Box>
+            </>
+        );
+        return (
+            <>
+                {/* Keep the inline slot occupied so surrounding layout doesn't jump. */}
+                <Box sx={{ height: '100%', width: '100%' }} />
+                {canvas ? ReactDOM.createPortal(overlay, canvas) : overlay}
+            </>
+        );
+    }
+
+    return framed;
 }
\ No newline at end of file
diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx
index 96d91d8c..7b6d18b4 100644
--- a/src/views/VisualizationView.tsx
+++ b/src/views/VisualizationView.tsx
@@ -932,11 +932,12 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
                             return sum + Math.max(80, Math.min(280, contentLen * 10)) + 60;
                         }, ROW_ID_COL_WIDTH);
                         const SCROLLBAR_WIDTH = 17;
-                        const adaptiveWidth = Math.max(MIN_TABLE_WIDTH, Math.min(MAX_TABLE_WIDTH, totalColWidth + SCROLLBAR_WIDTH + 16));
+                        // +34px gutter so the maximize button can sit just outside the table on the right.
+                        const adaptiveWidth = Math.max(MIN_TABLE_WIDTH, Math.min(MAX_TABLE_WIDTH, totalColWidth + SCROLLBAR_WIDTH + 16)) + 34;
 
                         return (
-                            <Box sx={{ ...panelBoxSx, padding: 0, height: adaptiveHeight, width: adaptiveWidth, overflow: 'hidden', flexShrink: 0 }}>
-                                <FreeDataViewFC />
+                            <Box sx={{ margin: '8px auto 24px auto', padding: 0, height: adaptiveHeight, width: adaptiveWidth, overflow: 'hidden', flexShrink: 0 }}>
+                                <FreeDataViewFC maximizable />
                             </Box>
                         );
                     })()}
@@ -1096,7 +1097,7 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
         </Tooltip>
     </Stack>, [localScaleFactor, t]);
 
-    return <Box ref={componentRef} sx={{overflow: "hidden", display: 'flex', flex: 1, position: 'relative'}}>
+    return <Box ref={componentRef} id="vis-view-canvas" sx={{overflow: "hidden", display: 'flex', flex: 1, position: 'relative'}}>
         {synthesisRunning ? <Box sx={{
                 position: "absolute", height: "calc(100%)", width: "calc(100%)", zIndex: 1001, 
                 backgroundColor: "rgba(243, 243, 243, 0.8)", display: "flex", alignItems: "center"
@@ -1210,7 +1211,7 @@ export const VisualizationViewFC: FC<VisPanelProps> = function VisualizationView
             }
         </Box>
         return (
-            <Box sx={{ width: "100%", overflow: "hidden", display: "flex", flexDirection: "row" }}>
+            <Box id="vis-view-canvas" sx={{ width: "100%", overflow: "hidden", display: "flex", flexDirection: "row", position: 'relative' }}>
                 <Box sx={{ overflow: "hidden", display: 'flex', flex: 1 }}>
                     <Box className="vis-scroll" sx={{ display: 'flex', overflowY: 'auto', overflowX: 'hidden', flexDirection: 'column', flex: 1 }}>
                         <Box sx={{ minHeight: 'min(75vh, 600px)', width: '100%', display: 'flex', flexDirection: 'column', flex: 1, justifyContent: 'center', alignItems: 'center' }}>
@@ -1281,18 +1282,15 @@ export const VisualizationViewFC: FC<VisPanelProps> = function VisualizationView
                                 return sum + Math.max(80, Math.min(280, contentLen * 10)) + 60;
                             }, ROW_ID_COL_WIDTH);
                             const SCROLLBAR_WIDTH = 17;
-                            const adaptiveWidth = Math.max(MIN_TABLE_WIDTH, Math.min(MAX_TABLE_WIDTH, totalColWidth + SCROLLBAR_WIDTH + 16));
+                            // +34px gutter so the maximize button can sit just outside the table on the right.
+                            const adaptiveWidth = Math.max(MIN_TABLE_WIDTH, Math.min(MAX_TABLE_WIDTH, totalColWidth + SCROLLBAR_WIDTH + 16)) + 34;
                             return (
                                 <Box sx={{
                                     margin: '8px auto 24px auto', padding: 0,
                                     height: adaptiveHeight, width: adaptiveWidth,
-                                    borderRadius: '8px',
-                                    border: `1px solid ${borderColor.divider}`,
-                                    transition: 'box-shadow 0.2s ease',
-                                    '&:hover': { boxShadow: '0 0 8px rgba(25, 118, 210, 0.25)' },
                                     overflow: 'hidden', flexShrink: 0,
                                 }}>
-                                    <FreeDataViewFC />
+                                    <FreeDataViewFC maximizable />
                                 </Box>
                             );
                         })()}

From a59ec9e04fd3c0c447f3fbffbe2a0f0573b7dd04 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Fri, 29 May 2026 09:25:24 -0700
Subject: [PATCH 05/29] some cleanup

---
 src/views/DataFormulator.tsx    | 22 ++++----
 src/views/DataThread.tsx        | 14 ++---
 src/views/SimpleChartRecBox.tsx | 98 ++++++++++++++++++++++++++-------
 src/views/threadLayout.ts       | 39 +++++++++++++
 4 files changed, 133 insertions(+), 40 deletions(-)
 create mode 100644 src/views/threadLayout.ts

diff --git a/src/views/DataFormulator.tsx b/src/views/DataFormulator.tsx
index b340a477..90547525 100644
--- a/src/views/DataFormulator.tsx
+++ b/src/views/DataFormulator.tsx
@@ -40,6 +40,7 @@ import { DndProvider } from 'react-dnd'
 import { HTML5Backend } from 'react-dnd-html5-backend'
 import { toolName } from '../app/App';
 import { DataThread } from './DataThread';
+import { threadPaneWidth } from './threadLayout';
 
 import dfLogo from '../assets/df-logo.png';
 import exampleImageTable from "../assets/example-image-table.png";
@@ -443,12 +444,9 @@ export const DataFormulatorFC = ({ }) => {
         //boxShadow: '0 0 5px rgba(0,0,0,0.1)',
     }
 
-    // Discrete column snapping for DataThread
-    const CARD_WIDTH = 220;
-    const CARD_GAP = 12;
-    const COLUMN_WIDTH = CARD_WIDTH + CARD_GAP;
-    const PANE_PADDING = 48;
-    const columnSize = (n: number) => n * COLUMN_WIDTH + PANE_PADDING;
+    // Discrete column snapping for DataThread.
+    // Column geometry is defined once in ./threadLayout and shared with
+    // DataThread so the pane snap points line up with the rendered columns.
     const allotmentRef = useRef<AllotmentHandle>(null);
     const containerRef = useRef<HTMLDivElement>(null);
 
@@ -459,13 +457,13 @@ export const DataFormulatorFC = ({ }) => {
         let bestCols = 1;
         let bestDist = Infinity;
         for (let n = 1; n <= 3; n++) {
-            const dist = Math.abs(raw - columnSize(n));
+            const dist = Math.abs(raw - threadPaneWidth(n));
             if (dist < bestDist) {
                 bestDist = dist;
                 bestCols = n;
             }
         }
-        const snapped = columnSize(bestCols);
+        const snapped = threadPaneWidth(bestCols);
         if (Math.abs(raw - snapped) > 2) {
             const totalWidth = sizes.reduce((a, b) => a + b, 0);
             allotmentRef.current.resize([snapped, totalWidth - snapped]);
@@ -545,10 +543,10 @@ export const DataFormulatorFC = ({ }) => {
         let newSize: number | null = null;
         if (prev <= 1 && threadCount > 1) {
             // Case 1: was 1 thread, now 2+ → expand to 2 columns
-            newSize = columnSize(2);
+            newSize = threadPaneWidth(2);
         } else if (prev > 1 && threadCount <= 1) {
             // Case 2: was 2+ threads, now 1 → shrink to 1 column
-            newSize = columnSize(1);
+            newSize = threadPaneWidth(1);
         }
         // Case 3: was 2+ threads and still 2+ → don't change (respect user's manual setting)
 
@@ -581,7 +579,9 @@ export const DataFormulatorFC = ({ }) => {
                     position: 'relative'}}>
                 <Allotment ref={allotmentRef} onDragEnd={snapToColumns} proportionalLayout={false}>
                     {tables.length > 0 ? (
-                        <Allotment.Pane minSize={columnSize(1)} preferredSize={columnSize(preferredColumns)} maxSize={columnSize(3)} snap={false}>
+                        <Allotment.Pane minSize={threadPaneWidth(1)} 
+                                preferredSize={threadPaneWidth(preferredColumns)} 
+                                maxSize={threadPaneWidth(3)} snap={false}>
                             <DataThread sx={{
                                 display: 'flex', 
                                 flexDirection: 'column',
diff --git a/src/views/DataThread.tsx b/src/views/DataThread.tsx
index c69a71a5..940cb4ef 100644
--- a/src/views/DataThread.tsx
+++ b/src/views/DataThread.tsx
@@ -97,6 +97,7 @@ import { ViewBorderStyle, ComponentBorderStyle, transition, radius, borderColor
 
 import { SimpleChartRecBox } from './SimpleChartRecBox';
 import { InteractionEntryCard, ResolvedConversationCard, getEntryGutterIcon, getDefaultGutterIcon, PlanStepsView } from './InteractionEntryCard';
+import { CARD_WIDTH, CARD_GAP, PANEL_PADDING, fittableThreadColumns } from './threadLayout';
 
 /** Pick the icon component for a step line based on known prefixes. */
 // Re-exported from InteractionEntryCard — kept here for backward compat with gutter icon logic
@@ -3120,16 +3121,9 @@ export const DataThread: FC<{sx?: SxProps}> = function ({ sx }) {
     // only one column fits, splitting a long thread into segments adds visual
     // overhead (continuation headers + ghost parents) without any layout
     // benefit, since the segments would just stack in the same single column.
-    const CARD_GAP = 12; // padding + spacing between cards in a column
-    const PANEL_PADDING = 16;
-    // 220 visual card width + 14px right gutter (CARD_CONTENT_PR) so cards
-    // keep their original size while gaining a right margin that balances
-    // the left timeline gutter.
-    const CARD_WIDTH = 234;
-    const COLUMN_WIDTH = CARD_WIDTH + CARD_GAP;
-    // n columns need: n*CARD_WIDTH + (n-1)*CARD_GAP + PANEL_PADDING
-    // Solving for n: n <= (containerWidth - PANEL_PADDING + CARD_GAP) / COLUMN_WIDTH
-    const fittableColumns = Math.max(1, Math.min(3, Math.floor((containerWidth - PANEL_PADDING + CARD_GAP) / COLUMN_WIDTH)));
+    // Column geometry (CARD_WIDTH / CARD_GAP / PANEL_PADDING) is defined once
+    // in ./threadLayout and shared with DataFormulator's pane snapping.
+    const fittableColumns = fittableThreadColumns(containerWidth);
 
     // Adaptively split long derivation chains so the resulting segments fill
     // the available columns evenly.  See `computeSplitExtraLeaves` for the
diff --git a/src/views/SimpleChartRecBox.tsx b/src/views/SimpleChartRecBox.tsx
index 5e7bd63b..23b30c7c 100644
--- a/src/views/SimpleChartRecBox.tsx
+++ b/src/views/SimpleChartRecBox.tsx
@@ -40,7 +40,7 @@ import StopIcon from '@mui/icons-material/Stop';
 
 import AutoGraphIcon from '@mui/icons-material/AutoGraph';
 import DescriptionOutlinedIcon from '@mui/icons-material/DescriptionOutlined';
-import { UnifiedDataUploadDialog } from './UnifiedDataUploadDialog';
+import InsertDriveFileOutlinedIcon from '@mui/icons-material/InsertDriveFileOutlined';
 import { borderColor, transition } from '../app/tokens';
 import { Theme } from '@mui/material/styles';
 import { useTranslation } from 'react-i18next';
@@ -151,7 +151,8 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
     const [mentionHighlightIdx, setMentionHighlightIdx] = useState(0);
     const [selectedAgent, setSelectedAgent] = useState<'explore' | 'report'>('explore');
     const [attachedImages, setAttachedImages] = useState<string[]>([]);
-    const [uploadDialogOpen, setUploadDialogOpen] = useState(false);
+    const [attachedFiles, setAttachedFiles] = useState<{ name: string; content: string }[]>([]);
+    const fileInputRef = useRef<HTMLInputElement | null>(null);
     const agentAbortRef = useRef<AbortController | null>(null);
     const userChartFocusLockedRef = useRef(false);
     const lastAutoFocusedChartIdRef = useRef<string | null>(null);
@@ -296,6 +297,31 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         }
     }, []);
 
+    // Attach files as conversation context. Images become reference images
+    // (sent to the model as attachments); text-like files are read as text
+    // and folded into the agent prompt as context.
+    const handleAttachFiles = React.useCallback((fileList: FileList | null) => {
+        if (!fileList) return;
+        const MAX_TEXT_CHARS = 50000;
+        Array.from(fileList).forEach(file => {
+            if (file.type.startsWith('image/')) {
+                const reader = new FileReader();
+                reader.onload = () => setAttachedImages(prev => [...prev, reader.result as string]);
+                reader.readAsDataURL(file);
+            } else {
+                const reader = new FileReader();
+                reader.onload = () => {
+                    let content = (reader.result as string) || '';
+                    if (content.length > MAX_TEXT_CHARS) {
+                        content = content.slice(0, MAX_TEXT_CHARS) + '\n…[truncated]';
+                    }
+                    setAttachedFiles(prev => [...prev, { name: file.name, content }]);
+                };
+                reader.readAsText(file);
+            }
+        });
+    }, []);
+
     // Collect table IDs from root up to (and including) the focused table for agent action matching
     const threadTableIds = React.useMemo(() => {
         if (!focusedTableId) return new Set<string>();
@@ -373,6 +399,14 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
     }, displayPrompt?: string) => {
         if (!focusedTableId || (!clarificationContext && prompt.trim() === "")) return;
 
+        // Fold attached reference files into the prompt the agent sees, while
+        // keeping the timeline bubble (displayContent) clean for the user.
+        const fileContext = attachedFiles.length > 0
+            ? '\n\n' + attachedFiles.map(f => `[Attached file: ${f.name}]\n${f.content}`).join('\n\n')
+            : '';
+        const agentPrompt = prompt + fileContext;
+        const cleanDisplay = displayPrompt ?? (fileContext ? prompt : undefined);
+
         const rootTables = tables.filter(t => t.derive === undefined || t.anchored);
         const currentTable = tables.find(t => t.id === focusedTableId);
         const priorityIds = (currentTable?.derive && !currentTable.anchored)
@@ -404,8 +438,8 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         // 'clarifying' status and pendingClarification storage.
         if (isResume && pendingClarification?.draftId) {
             dispatch(dfActions.appendDraftInteraction({ draftId: pendingClarification.draftId, entry: {
-                from: 'user', to: 'data-agent', role: 'prompt', content: prompt,
-                ...(displayPrompt ? { displayContent: displayPrompt } : {}),
+                from: 'user', to: 'data-agent', role: 'prompt', content: agentPrompt,
+                ...(cleanDisplay ? { displayContent: cleanDisplay } : {}),
                 timestamp: Date.now()
             }}));
             dispatch(dfActions.updateDraftClarification({ draftId: pendingClarification.draftId, pendingClarification: null }));
@@ -552,10 +586,10 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             // backend appends it to the trajectory as a normal user message.
             // No special clarification payload needed.
             requestBody.trajectory = clarificationContext!.trajectory;
-            requestBody.user_question = prompt;
+            requestBody.user_question = agentPrompt;
             requestBody.completed_step_count = clarificationContext!.completedStepCount;
         } else {
-            requestBody.user_question = prompt;
+            requestBody.user_question = agentPrompt;
             if (focusedThread) requestBody.focused_thread = focusedThread;
             if (otherThreads) requestBody.other_threads = otherThreads;
         }
@@ -603,13 +637,13 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             currentDraftParentTableId = existingDraft?.derive?.trigger?.tableId || null;
             currentDraftInteraction = [...(existingDraft?.derive?.trigger?.interaction || [])];
             // The user reply was already appended above, add to local accumulator too
-            currentDraftInteraction.push({ from: 'user', to: 'data-agent', role: 'prompt', content: prompt,
-                ...(displayPrompt ? { displayContent: displayPrompt } : {}),
+            currentDraftInteraction.push({ from: 'user', to: 'data-agent', role: 'prompt', content: agentPrompt,
+                ...(cleanDisplay ? { displayContent: cleanDisplay } : {}),
                 timestamp: Date.now() });
         } else {
             const initialEntries: InteractionEntry[] = [
-                { from: 'user', to: 'data-agent', role: 'prompt', content: prompt,
-                    ...(displayPrompt ? { displayContent: displayPrompt } : {}),
+                { from: 'user', to: 'data-agent', role: 'prompt', content: agentPrompt,
+                    ...(cleanDisplay ? { displayContent: cleanDisplay } : {}),
                     timestamp: Date.now() }
             ];
             createNextDraft(lastCreatedTableId || focusedTableId!, initialEntries);
@@ -940,6 +974,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                 clearTimeout(timeoutId);
                 setChatPrompt("");
                 setAttachedImages([]);
+                setAttachedFiles([]);
                 isCompleted = true;
             }
 
@@ -988,6 +1023,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                 clearTimeout(timeoutId);
                 setChatPrompt("");
                 setAttachedImages([]);
+                setAttachedFiles([]);
                 isCompleted = true;
             }
 
@@ -1028,6 +1064,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             if (completionResult) {
                 setChatPrompt("");
                 setAttachedImages([]);
+                setAttachedFiles([]);
             }
         };
 
@@ -1110,7 +1147,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                 }
             }
         })();
-    }, [focusedTableId, tables, draftNodes, activeModel, config, conceptShelfItems, dispatch, t]);
+    }, [focusedTableId, tables, draftNodes, activeModel, config, conceptShelfItems, dispatch, t, attachedImages, attachedFiles]);
 
     // ── Report generation via report agent ──────────────────────────
 
@@ -1473,7 +1510,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             {/* @-mention table chips and image attachments.
                 Skip the table-chip row entirely when there's only one root table —
                 there's nothing else the user could @-mention, so the chip is noise. */}
-            {((primaryTableIds.length > 0 && rootTables.length > 1) || attachedImages.length > 0) && !isChatFormulating && (
+            {((primaryTableIds.length > 0 && rootTables.length > 1) || attachedImages.length > 0 || attachedFiles.length > 0) && !isChatFormulating && (
                 <Box sx={{ display: 'flex', flexWrap: 'wrap', alignItems: 'center', gap: '3px', px: 0.5, pb: '2px' }}>
                     {rootTables.length > 1 && primaryTableIds.map(id => {
                         const tbl = tables.find(t => t.id === id);
@@ -1517,6 +1554,27 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                             }}
                         />
                     ))}
+                    {attachedFiles.map((file, idx) => (
+                        <Chip
+                            key={`file-${idx}`}
+                            size="small"
+                            icon={<InsertDriveFileOutlinedIcon sx={{ fontSize: 14 }} />}
+                            label={file.name}
+                            onDelete={() => setAttachedFiles(prev => prev.filter((_, i) => i !== idx))}
+                            sx={{
+                                height: 20,
+                                fontSize: 10,
+                                maxWidth: 160,
+                                color: theme.palette.text.secondary,
+                                backgroundColor: 'rgba(0,0,0,0.04)',
+                                border: 'none',
+                                borderRadius: '4px',
+                                '& .MuiChip-label': { px: '4px', overflow: 'hidden', textOverflow: 'ellipsis' },
+                                '& .MuiChip-icon': { ml: '4px', mr: '-2px' },
+                                '& .MuiChip-deleteIcon': { fontSize: 12, color: theme.palette.text.disabled, mr: '2px' },
+                            }}
+                        />
+                    ))}
                 </Box>
             )}
             {/* @-mention dropdown */}
@@ -1645,10 +1703,17 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             <Box sx={{ display: 'flex', flexDirection: 'row', alignItems: 'center', justifyContent: 'space-between', gap: 0.5 }}>
                 {/* Action buttons */}
                 <Box sx={{ display: 'flex', flexDirection: 'row', alignItems: 'center', gap: 0.5, overflow: 'hidden', flex: 1 }}>
-                    <Tooltip title={t('chartRec.addMoreData')}>
+                    <input
+                        ref={fileInputRef}
+                        type="file"
+                        multiple
+                        style={{ display: 'none' }}
+                        onChange={(e) => { handleAttachFiles(e.target.files); if (e.target) e.target.value = ''; }}
+                    />
+                    <Tooltip title={t('chartRec.attachContext', { defaultValue: 'Attach context (image or file)' })}>
                         <IconButton
                             size="small"
-                            onClick={(e) => { e.stopPropagation(); setUploadDialogOpen(true); }}
+                            onClick={(e) => { e.stopPropagation(); fileInputRef.current?.click(); }}
                             sx={{
                                 p: 0.5,
                                 color: theme.palette.text.secondary,
@@ -1762,11 +1827,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         <Box>
             {/* The input box */}
             {inputBox}
-            <UnifiedDataUploadDialog
-                open={uploadDialogOpen}
-                onClose={() => setUploadDialogOpen(false)}
-                initialTab="menu"
-            />
         </Box>
     );
 };
diff --git a/src/views/threadLayout.ts b/src/views/threadLayout.ts
new file mode 100644
index 00000000..fa793f2c
--- /dev/null
+++ b/src/views/threadLayout.ts
@@ -0,0 +1,39 @@
+// Single source of truth for DataThread column geometry.
+//
+// Both the DataThread panel (which renders the thread columns) and
+// DataFormulator (which snaps the resizable Allotment pane to whole-column
+// widths) must agree on these values, otherwise the pane snap points won't
+// line up with the actual rendered columns.  Keep all width/padding tuning
+// here.
+
+/** Visual width of a single thread card / column (px). */
+export const CARD_WIDTH = 248;
+
+/** Horizontal gap between adjacent columns (px). */
+export const CARD_GAP = 8;
+
+/** Total horizontal padding inside the thread panel (left + right, px). */
+export const PANEL_PADDING = 32;
+
+/** Max number of columns the thread panel will ever lay out. */
+export const MAX_THREAD_COLUMNS = 3;
+
+/**
+ * Pixel width required to display exactly `n` columns:
+ *   n cards + (n-1) gaps + panel padding.
+ */
+export const threadPaneWidth = (n: number): number =>
+    n * CARD_WIDTH + Math.max(0, n - 1) * CARD_GAP + PANEL_PADDING;
+
+/**
+ * How many whole columns fit within `containerWidth`, clamped to
+ * [1, MAX_THREAD_COLUMNS].  Inverse of `threadPaneWidth`.
+ */
+export const fittableThreadColumns = (containerWidth: number): number =>
+    Math.max(
+        1,
+        Math.min(
+            MAX_THREAD_COLUMNS,
+            Math.floor((containerWidth - PANEL_PADDING + CARD_GAP) / (CARD_WIDTH + CARD_GAP)),
+        ),
+    );

From 3a23dc3039f6299845f444f42b34180ecd2cf525 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Fri, 29 May 2026 16:28:43 -0700
Subject: [PATCH 06/29] workflow design

---
 py-src/data_formulator/agent_config.py        |   2 +-
 .../agents/agent_chart_insight.py             |   4 +-
 .../agents/agent_data_loading_chat.py         |   8 +-
 .../agents/agent_interactive_explore.py       |   4 +-
 ...e_distill.py => agent_workflow_distill.py} | 208 ++++++++++-----
 py-src/data_formulator/agents/data_agent.py   |  33 ++-
 py-src/data_formulator/app.py                 |   2 +-
 py-src/data_formulator/knowledge/store.py     | 136 ++++++----
 py-src/data_formulator/routes/knowledge.py    | 121 +++++----
 src/api/knowledgeApi.ts                       |  28 +-
 src/app/useKnowledgeStore.ts                  |  14 +-
 src/i18n/locales/en/common.json               |  46 ++--
 src/i18n/locales/zh/common.json               |  48 ++--
 src/views/DataFrameTable.tsx                  |  27 +-
 src/views/DataSourceSidebar.tsx               |   2 +-
 src/views/DataThread.tsx                      |  11 -
 src/views/InteractionEntryCard.tsx            |   5 +
 src/views/KnowledgePanel.tsx                  | 244 +++++++-----------
 src/views/SessionDistill.tsx                  |  56 ++--
 src/views/SimpleChartRecBox.tsx               |  46 +++-
 ...xperienceContext.ts => workflowContext.ts} |   4 +-
 .../test_agent_knowledge_integration.py       |   8 +-
 ...ce_distill.py => test_workflow_distill.py} | 162 +++++++-----
 .../backend/knowledge/test_knowledge_store.py |  69 +++--
 tests/backend/routes/test_knowledge_routes.py | 137 +++++-----
 25 files changed, 794 insertions(+), 631 deletions(-)
 rename py-src/data_formulator/agents/{agent_experience_distill.py => agent_workflow_distill.py} (58%)
 rename src/views/{experienceContext.ts => workflowContext.ts} (98%)
 rename tests/backend/agents/{test_experience_distill.py => test_workflow_distill.py} (74%)

diff --git a/py-src/data_formulator/agent_config.py b/py-src/data_formulator/agent_config.py
index bec4c670..67dbbe31 100644
--- a/py-src/data_formulator/agent_config.py
+++ b/py-src/data_formulator/agent_config.py
@@ -56,7 +56,7 @@
     # ── Light: single-turn extractors / classifiers / formatters ────────────
     "data_load":           "minimal",  # one-shot type inference
     "data_clean":          "minimal",  # extract tables from text
-    "experience_distill":  "minimal",  # summarise an analysis context
+    "workflow_distill":  "minimal",  # summarise an analysis context
     "chart_insight":       "minimal",  # title + 1–3 takeaways from a chart
     "chart_restyle":       "minimal",  # apply style edits to a Vega-Lite spec
     "code_explanation":    "minimal",  # describe derived fields
diff --git a/py-src/data_formulator/agents/agent_chart_insight.py b/py-src/data_formulator/agents/agent_chart_insight.py
index a3ae8aba..c280efc2 100644
--- a/py-src/data_formulator/agents/agent_chart_insight.py
+++ b/py-src/data_formulator/agents/agent_chart_insight.py
@@ -64,7 +64,7 @@ def run(self, chart_image_base64, chart_type, field_names, input_tables=None, n=
                 search_query = " ".join([chart_type] + field_names[:5]).strip()
                 if search_query:
                     relevant = self._knowledge_store.search(
-                        search_query, categories=["experiences"], max_results=3,
+                        search_query, categories=["workflows"], max_results=3,
                     )
                     if relevant:
                         kb_parts = ["Relevant analysis knowledge:"]
@@ -72,7 +72,7 @@ def run(self, chart_image_base64, chart_type, field_names, input_tables=None, n=
                             kb_parts.append(f"- {item['title']}: {item['snippet'][:200]}")
                         context_parts.append("\n".join(kb_parts))
             except Exception:
-                logger.warning("Failed to search knowledge experiences", exc_info=True)
+                logger.warning("Failed to search knowledge workflows", exc_info=True)
 
         context = "\n".join(context_parts)
 
diff --git a/py-src/data_formulator/agents/agent_data_loading_chat.py b/py-src/data_formulator/agents/agent_data_loading_chat.py
index 61d3a0e6..55f2640a 100644
--- a/py-src/data_formulator/agents/agent_data_loading_chat.py
+++ b/py-src/data_formulator/agents/agent_data_loading_chat.py
@@ -1292,7 +1292,7 @@ def _build_system_prompt(self, last_user_text: str = ""):
         """Build the system prompt with current workspace context.
 
         *last_user_text* is used to search the knowledge store for
-        experiences relevant to the user's current request.  Falls back
+        workflows relevant to the user's current request.  Falls back
         to a generic query when empty.
         """
         table_names = "none"
@@ -1324,7 +1324,7 @@ def _build_system_prompt(self, last_user_text: str = ""):
         if self._knowledge_store:
             prompt += self._knowledge_store.format_rules_block()
 
-        # Inject relevant experiences from knowledge store
+        # Inject relevant workflows from knowledge store
         if self._knowledge_store:
             try:
                 search_query = (
@@ -1334,7 +1334,7 @@ def _build_system_prompt(self, last_user_text: str = ""):
                 )
                 relevant = self._knowledge_store.search(
                     search_query,
-                    categories=["experiences"],
+                    categories=["workflows"],
                     max_results=3,
                 )
                 if relevant:
@@ -1343,7 +1343,7 @@ def _build_system_prompt(self, last_user_text: str = ""):
                         knowledge_block += f"\n### {item['title']}\n{item['snippet']}\n"
                     prompt += "\n\n" + knowledge_block
             except Exception:
-                logger.warning("Failed to search knowledge experiences", exc_info=True)
+                logger.warning("Failed to search knowledge workflows", exc_info=True)
 
         if self.language_instruction:
             prompt += "\n\n" + self.language_instruction
diff --git a/py-src/data_formulator/agents/agent_interactive_explore.py b/py-src/data_formulator/agents/agent_interactive_explore.py
index 67847ec2..0f5f90fb 100644
--- a/py-src/data_formulator/agents/agent_interactive_explore.py
+++ b/py-src/data_formulator/agents/agent_interactive_explore.py
@@ -162,7 +162,7 @@ def run(self, input_tables, start_question=None,
         if start_question:
             context += f"\n\n[START QUESTION]\n\n{start_question}"
 
-        # ── Inject relevant experiences from knowledge store ──────────
+        # ── Inject relevant workflows from knowledge store ──────────
         if self._knowledge_store:
             try:
                 query = start_question or ""
@@ -170,7 +170,7 @@ def run(self, input_tables, start_question=None,
                 search_query = " ".join([query] + table_names[:5]).strip()
                 if search_query:
                     relevant = self._knowledge_store.search(
-                        search_query, categories=["experiences"], max_results=3,
+                        search_query, categories=["workflows"], max_results=3,
                     )
                     if relevant:
                         knowledge_block = "[RELEVANT KNOWLEDGE]\n"
diff --git a/py-src/data_formulator/agents/agent_experience_distill.py b/py-src/data_formulator/agents/agent_workflow_distill.py
similarity index 58%
rename from py-src/data_formulator/agents/agent_experience_distill.py
rename to py-src/data_formulator/agents/agent_workflow_distill.py
index cc738495..3f3d9c6d 100644
--- a/py-src/data_formulator/agents/agent_experience_distill.py
+++ b/py-src/data_formulator/agents/agent_workflow_distill.py
@@ -1,17 +1,17 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-"""Experience distillation agent — extracts reusable knowledge from analysis context.
+"""Workflow distillation agent — extracts a replayable workflow from analysis context.
 
 Given a user-visible analysis context (timeline of events) plus an optional
 user instruction, this agent calls an LLM to produce a structured Markdown
-experience document with YAML front matter suitable for storage in the
+workflow document with YAML front matter suitable for storage in the
 knowledge base.
 
 Usage::
 
-    agent = ExperienceDistillAgent(client)
-    md_content = agent.run(experience_context, user_instruction="...")
+    agent = WorkflowDistillAgent(client)
+    md_content = agent.run(workflow_context, user_instruction="...")
 """
 
 from __future__ import annotations
@@ -25,18 +25,19 @@
 
 logger = logging.getLogger(__name__)
 
-_AGENT_ID = "experience_distill"
+_AGENT_ID = "workflow_distill"
 
 
 SYSTEM_PROMPT = """\
-You are a knowledge distiller. Given the chronological events of a data
-analysis session plus an optional user instruction, write a short reusable
-Markdown note that will help with similar future tasks.
+You are a workflow distiller. Given the chronological events of a data
+analysis session plus an optional user instruction, extract a short,
+**replayable workflow** that captures *what the user wanted and got* — so
+the same analysis can be reproduced later on a similarly-shaped dataset.
 
 The session contains one or more threads (separate analysis branches in
 the same session) each rendered under a `### Thread N` header. When
-multiple threads are provided, synthesise lessons that hold across them
-— do NOT enumerate per-thread.
+multiple threads are provided, merge them into one coherent ordered
+workflow — do NOT enumerate per-thread.
 
 The events use three types:
 - `message` — directed speech, formatted as `[<from>→<to>/<role>] <content>`.
@@ -46,56 +47,136 @@
   (followed by columns, row count, sample, and code).
 - `create_chart` — a chart emitted on a table (mark + encoding summary).
 
-If a user instruction is provided, focus the note on that instruction.
-Otherwise, distill the most transferable methodology from the events.
+Your job is to recover the **ordered list of requests** the user actually
+wanted, and the outputs (tables/charts) they ended up keeping. Beyond the
+concrete steps, also distill the analysis at TWO levels of abstraction so
+it can be reused later:
+- **Adapting to similar data** (concrete) — how to rerun essentially the
+  same analysis on a near-identical dataset, e.g. the business report for
+  a different month, region, or product line. Same shape and intent, only
+  the specific inputs/filters change.
+- **Generalizing to other data** (abstract, dataset-agnostic) — the
+  underlying analytical pattern, independent of this domain: the kinds of
+  questions, computations, and charts involved, phrased so they transfer
+  to a different domain or a differently-shaped dataset.
+
+CRITICAL extraction rules — keep only what the user wanted and got:
+- Each step = one user request, written in plain language. Say BOTH the
+  question being explored AND what was produced to answer it — including
+  the chart that was created and the key fields it uses (e.g. "Ask how
+  sales trend over time, and plot monthly total sales as a line chart";
+  "Compare regions by breaking revenue down per region as a sorted bar
+  chart"). Order them as the analysis progressed.
+- DROP corrective back-and-forth. If the user changed their mind
+  ("no, it should be…", "actually use median instead"), keep ONLY the
+  final resolved intent — not the wrong first attempt or the correction.
+- DROP abandoned work. If a chart or table was created and then deleted
+  or never kept, leave it out entirely.
+- DROP mechanics. Do NOT include error-repair loops, dtype fixes, tool
+  call noise, or low-level code. Describe intent, not implementation.
+- Do NOT lean on code or exact column names unless a name is essential to
+  the request's meaning. Keep steps dataset-agnostic where possible so
+  they replay on a new slice of similar data.
+- Capture genuine gotchas separately as short notes (advisory warnings to
+  carry forward), NOT as steps to re-perform.
+
+If a user instruction is provided, let it steer what to keep or emphasise.
 
 Output format (Markdown with YAML front matter, nothing else):
 
 ```
 ---
-subtitle: <short, scannable noun phrase, 3-8 words; no colons, dashes, or run-on lists>
-tags: [<broad search keywords: domain, chart type, key operations, technique>]
+subtitle: <plain-language description of what this workflow is about, up to ~25 words; a full sentence is fine; start with an action verb; no jargon, no colons, dashes, or run-on lists>
+filename: <short 2-5 word lowercase name for the file, e.g. "monthly sales trend"; no dates, no extension>
 created: <today YYYY-MM-DD>
 updated: <today YYYY-MM-DD>
 source: distill
 source_context: <context_id>
 ---
 
-## When to Use
-<general conditions where this method applies>
-
-## Method
-<concrete steps, abstracted; use generic placeholders like "the target column"
-instead of actual column names when names aren't universally meaningful>
-
-## Pitfalls & Tips
-<gotchas, workarounds, and things to watch out for — the most valuable section.
-If a repair was needed, explain *why* it failed and the general fix.>
+## Goal
+<one or two sentences: the overall question(s) this analysis answers and
+what it produces>
+
+## Steps
+1. <first question explored, and the table/chart created to answer it>
+2. <next question, and what was produced>
+3. <…>
+
+## Adapting to similar data
+<how to rerun essentially the same analysis on a near-identical dataset —
+e.g. the same kind of report for a different month, region, or product
+line. Keep the structure and outputs the same; call out which inputs,
+filters, or columns would change. 1-4 short sentences or bullets.>
+
+## Generalizing to other data
+<the dataset-agnostic analytical pattern behind this workflow: the kinds
+of questions, computations, and charts it represents, described in
+domain-neutral terms so it can transfer to a different domain or a
+differently-shaped dataset. Focus on the reasoning and technique, not the
+specific fields or values. 1-4 short sentences or bullets.>
+
+## Notes
+<optional short bullets: caveats/gotchas to watch for when reproducing this
+analysis on new data — e.g. "sort by time before computing deltas". Omit
+this section entirely if there is nothing worth warning about.>
 ```
 
 Rules:
-- Subtitle must be a short, scannable noun phrase (3-8 words) that captures
-  the technique or pattern. The hosting application prefixes it with the
-  session name to form the full title (e.g. "Experience from <session>: <subtitle>"),
-  so do NOT include the session name in the subtitle. Do NOT pack scenario,
-  takeaway, and steps into the subtitle — leave details for `## When to Use`
-  and `## Method`.
-  Good: "Year-over-year volatility comparison". "Repairing pandas dtype mismatches".
-  Bad:  "Time series analysis workflow: aggregate, visualize trends, quantify YoY spikes, and compare volatility across periods".
-- Focus on *transferable* methods and caveats, not case-specific details.
-- Keep the body under 500 words.
-- No raw data, PII, secrets, or specific values unless they show a universal pattern.
-- Write the subtitle, headings, body, and tags in {output_language}.
+- Subtitle must DESCRIBE what the workflow is about in PLAIN LANGUAGE that
+  a non-expert can fully understand at a glance, so they can decide
+  whether to replay it on new data. Favor clarity over brevity: it can be
+  a full sentence (up to ~25 words) if that makes the analysis genuinely
+  understandable. Write it like you would explain the analysis to a
+  colleague in one breath, covering the subject and the main thing you do
+  with it. The hosting application uses this subtitle directly as the
+  workflow's display title, so make it self-contained and do NOT prefix it
+  with the session name.
+  - Start with a concrete action verb (Plot, Compare, Break down, Rank,
+    Track, Summarize, Find…).
+  - Name the real-world subject in everyday words (sales, revenue,
+    customers, events), NOT the internal mechanics or derived-column
+    names you happened to create.
+  - AVOID abstract or technical jargon and invented noun-phrases
+    ("deltas", "composition", "window", "distribution shift"). If a
+    technique matters, phrase it plainly ("change from one period to the
+    next" instead of "deltas").
+  Good: "Plot monthly sales over time and compare each year against the
+         previous one to spot volatile periods".
+        "Break revenue down by region and show how each region
+         contributes to the total as a stacked area chart".
+        "Track how many events happen in each time window and what kinds
+         of events make up each window".
+  Bad:  "Time series analysis". "Data workflow". "Chart exploration".
+        "Event window deltas with composition". "Distribution shift inspection".
+- Filename must be a SHORT (2-5 word) lowercase name for the file — just
+  the core subject and action, e.g. "monthly sales trend", "region revenue
+  breakdown". No dates, no file extension, no session name. It is only
+  used to name the file on disk; the descriptive subtitle is what users see.
+- Steps must be ordered and reproducible. Each step should make clear the
+  question being explored and the chart/output produced to answer it.
+- "Adapting to similar data" stays close to this analysis (same domain,
+  same shape) — only the concrete inputs change. "Generalizing to other
+  data" must be domain-neutral: strip out this dataset's subject matter and
+  describe only the transferable analytical pattern (question types,
+  computations, chart kinds). Do NOT just repeat the steps in either
+  section; add genuine reuse guidance. Keep each section brief.
+- Be as long as the analysis needs — do not omit meaningful steps,
+  questions, or charts just to stay short. Stay focused, but completeness
+  matters more than brevity.
+- No raw data, PII, secrets, or specific values unless essential to a request.
+- Write the subtitle, headings, and body in {output_language}.
   YAML front-matter keys stay in English.
 
 {language_instruction}
 """
 
 
-class ExperienceDistillAgent:
-    """Distills analysis context into a reusable experience document."""
 
-    # Language display names for experience-specific prompts
+class WorkflowDistillAgent:
+    """Distills analysis context into a reusable workflow document."""
+
+    # Language display names for workflow-specific prompts
     _LANG_NAMES: dict[str, str] = {
         "zh": "Simplified Chinese (简体中文)",
         "ja": "Japanese (日本語)",
@@ -121,7 +202,7 @@ def __init__(
         self.timeout_seconds = int(timeout_seconds) if timeout_seconds else self.DEFAULT_TIMEOUT
 
     def run(self, context: dict[str, Any], user_instruction: str = "") -> str:
-        """Distill an experience document from user-visible session context."""
+        """Distill a workflow document from user-visible session context."""
         summary = self._extract_context_summary(context)
         today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
         context_id = str(context.get("context_id", "") or "")
@@ -130,7 +211,7 @@ def run(self, context: dict[str, Any], user_instruction: str = "") -> str:
 
         instruction_block = (
             f"\n[USER INSTRUCTION]\n{user_instruction.strip()}\n"
-            f"Focus the distilled experience on the above instruction.\n"
+            f"Focus the distilled workflow on the above instruction.\n"
         ) if user_instruction and user_instruction.strip() else ""
 
         workspace_block = (
@@ -158,9 +239,11 @@ def run(self, context: dict[str, Any], user_instruction: str = "") -> str:
             {"role": "user", "content": user_msg},
         ]
 
-        from data_formulator.knowledge.store import KNOWLEDGE_LIMITS
+        from data_formulator.knowledge.store import KNOWLEDGE_LIMITS, WORKFLOW_HARD_MAX
         content = self._call_with_length_retry(
-            messages, KNOWLEDGE_LIMITS.get("experiences", 2000),
+            messages,
+            KNOWLEDGE_LIMITS.get("workflows", 6000),
+            WORKFLOW_HARD_MAX,
         )
 
         if not content.strip().startswith("---"):
@@ -182,7 +265,7 @@ def _prompt_format_kwargs(self) -> dict[str, str]:
             lang_block = (
                 f"[LANGUAGE INSTRUCTION]\n"
                 f"The user's language is **{display_name}**.\n"
-                f"Write the title, all section headings, all body text, and tags "
+                f"Write the title, all section headings, and all body text "
                 f"in {display_name}. YAML front-matter keys stay in English."
             )
         return {
@@ -199,39 +282,43 @@ def _prompt_format_kwargs(self) -> dict[str, str]:
     def _call_with_length_retry(
         self,
         messages: list[dict],
-        body_limit: int,
+        soft_limit: int,
+        hard_limit: int,
     ) -> str:
-        """Call LLM and retry once if the body exceeds *body_limit* characters.
+        """Call the LLM, nudging it to stay near *soft_limit* characters.
 
-        If the retry *still* overshoots, hard-truncate the body so the
-        document is saved instead of the entire distillation being lost.
+        ``soft_limit`` is advisory guidance: if the first response overshoots
+        it we retry once asking the model to condense. We only ever
+        hard-truncate at ``hard_limit`` — a much larger safety ceiling — so
+        rich, multi-section workflows are kept intact while runaway output
+        is still bounded.
         """
         from data_formulator.knowledge.store import parse_front_matter
 
         content = self._call_llm(messages)
         _, body = parse_front_matter(content)
-        if len(body.strip()) <= body_limit:
+        if len(body.strip()) <= soft_limit:
             return content
 
-        retry_target = max(body_limit - self.RETRY_MARGIN, 1)
+        retry_target = max(soft_limit - self.RETRY_MARGIN, 1)
         logger.info(
-            "Distilled content too long (%d > %d), retrying with condensation prompt (target ≤ %d)",
-            len(body.strip()), body_limit, retry_target,
+            "Distilled content over soft target (%d > %d), retrying with condensation prompt (target ≤ %d)",
+            len(body.strip()), soft_limit, retry_target,
         )
         messages = messages + [
             {"role": "assistant", "content": content},
             {"role": "user", "content": (
-                f"Your output body is {len(body.strip())} characters, which exceeds "
-                f"the limit of {body_limit}. Please condense the document to fit "
-                f"within {retry_target} characters while keeping the most important "
-                f"insights. Output ONLY the revised Markdown document."
+                f"Your output body is {len(body.strip())} characters, which is "
+                f"longer than ideal. Please tighten the document to around "
+                f"{retry_target} characters while keeping the most important "
+                f"insights and all sections. Output ONLY the revised Markdown document."
             )},
         ]
         retried = self._call_llm(messages)
 
-        # Hard-trim if the retry still overshoots — better a slightly
-        # truncated experience than a save failure.
-        return self._truncate_body_to_limit(retried, body_limit)
+        # Hard-trim only if the retry blows past the absolute ceiling —
+        # better a slightly truncated workflow than a save failure.
+        return self._truncate_body_to_limit(retried, hard_limit)
 
     @classmethod
     def _truncate_body_to_limit(cls, content: str, body_limit: int) -> str:
@@ -385,7 +472,7 @@ def _render_events(cls, events: list[Any]) -> str:
         return "\n".join(parts) if parts else "(empty context)"
 
     def _call_llm(self, messages: list[dict]) -> str:
-        """Single LLM call to generate the experience document."""
+        """Single LLM call to generate the workflow document."""
         resp = self.client.get_completion(
             messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model), timeout=self.timeout_seconds,
         )
@@ -401,7 +488,6 @@ def _add_fallback_front_matter(
 
         header = (
             f"---\ntitle: {title}\n"
-            f"tags: []\n"
             f"created: {today}\n"
             f"updated: {today}\n"
             f"source: distill\n"
diff --git a/py-src/data_formulator/agents/data_agent.py b/py-src/data_formulator/agents/data_agent.py
index 8e9cd39a..9a9d10b2 100644
--- a/py-src/data_formulator/agents/data_agent.py
+++ b/py-src/data_formulator/agents/data_agent.py
@@ -153,7 +153,7 @@ def _rescue_validate_action(data: dict) -> list[str]:
         "function": {
             "name": "search_knowledge",
             "description": (
-                "Search the user's knowledge base (rules, experiences) "
+                "Search the user's knowledge base (rules, workflows) "
                 "for relevant entries. Returns title, category, snippet, and "
                 "path for each match. Use read_knowledge to get full content."
             ),
@@ -168,7 +168,7 @@ def _rescue_validate_action(data: dict) -> list[str]:
                         "type": "array",
                         "items": {
                             "type": "string",
-                            "enum": ["rules", "experiences"],
+                            "enum": ["rules", "workflows"],
                         },
                         "description": "Optional: limit search to specific categories.",
                     },
@@ -190,7 +190,7 @@ def _rescue_validate_action(data: dict) -> list[str]:
                 "properties": {
                     "category": {
                         "type": "string",
-                        "enum": ["rules", "experiences"],
+                        "enum": ["rules", "workflows"],
                         "description": "Knowledge category.",
                     },
                     "path": {
@@ -224,7 +224,7 @@ def _rescue_validate_action(data: dict) -> list[str]:
 - **inspect_source_data(table_names)** — get schema, stats, and sample rows
   for source tables (cheaper than explore for basic inspection).
 - **search_knowledge(query, categories?)** — search the user's knowledge base
-  (rules, experiences) for relevant entries.
+  (rules, workflows) for relevant entries.
 - **read_knowledge(category, path)** — read the full content of a knowledge entry.
 
 You analyse data that is **already in the workspace**.  If the user's
@@ -1379,14 +1379,14 @@ def _build_initial_messages(
         if peripheral_block:
             user_content += f"{peripheral_block}\n\n"
 
-        # Search and inject relevant knowledge (experiences + non-alwaysApply rules)
+        # Search and inject relevant knowledge (workflows + non-alwaysApply rules)
         table_names = [t.get("name", "") for t in input_tables if t.get("name")]
         relevant_knowledge = self._search_relevant_knowledge(user_question, table_names)
 
-        # Always include the experience distilled from the active workspace
+        # Always include the workflow distilled from the active workspace
         # (design-docs/24 §3.6) so the session has stable working memory
         # across turns regardless of search relevance.
-        session_exp = self._load_active_session_experience()
+        session_exp = self._load_active_session_workflow()
         if session_exp:
             existing_paths = {
                 (item["category"], item["path"]) for item in relevant_knowledge
@@ -1891,7 +1891,7 @@ def _search_relevant_knowledge(
         table_names: list[str],
         max_items: int = 5,
     ) -> list[dict[str, Any]]:
-        """Search experiences and non-alwaysApply rules relevant to the current session.
+        """Search workflows and non-alwaysApply rules relevant to the current session.
 
         Uses the user question as the search query and passes table names
         separately for tag-overlap boosting.  alwaysApply rules are
@@ -1904,7 +1904,7 @@ def _search_relevant_knowledge(
         try:
             results = self._knowledge_store.search(
                 user_question,
-                categories=["rules", "experiences"],
+                categories=["rules", "workflows"],
                 max_results=max_items,
                 table_names=table_names[:5],
             )
@@ -1913,11 +1913,11 @@ def _search_relevant_knowledge(
             logger.warning("Failed to search knowledge", exc_info=True)
             return []
 
-    def _load_active_session_experience(self) -> dict[str, Any] | None:
-        """Return the experience distilled from the active workspace, if any.
+    def _load_active_session_workflow(self) -> dict[str, Any] | None:
+        """Return the workflow distilled from the active workspace, if any.
 
         The session-scoped distillation flow (design-docs/24) writes one
-        experience per workspace, stamped with ``source_workspace_id``.
+        workflow per workspace, stamped with ``source_workspace_id``.
         We always inject that file into the agent's context so the agent
         has stable working memory for the active session in addition to
         whatever the relevance search picked.
@@ -1932,14 +1932,14 @@ def _load_active_session_experience(self) -> dict[str, Any] | None:
         if not ws_id:
             return None
         try:
-            entry = self._knowledge_store.find_experience_by_workspace_id(ws_id)
+            entry = self._knowledge_store.find_workflow_by_workspace_id(ws_id)
         except Exception:
-            logger.warning("find_experience_by_workspace_id failed", exc_info=True)
+            logger.warning("find_workflow_by_workspace_id failed", exc_info=True)
             return None
         if not entry:
             return None
         try:
-            content = self._knowledge_store.read("experiences", entry["path"])
+            content = self._knowledge_store.read("workflows", entry["path"])
         except Exception:
             return None
         from data_formulator.knowledge.store import parse_front_matter
@@ -1948,9 +1948,8 @@ def _load_active_session_experience(self) -> dict[str, Any] | None:
         if not snippet:
             return None
         return {
-            "category": "experiences",
+            "category": "workflows",
             "title": entry.get("title", entry.get("path", "")),
-            "tags": entry.get("tags", []),
             "path": entry["path"],
             "snippet": snippet,
             "source": entry.get("source", "distill"),
diff --git a/py-src/data_formulator/app.py b/py-src/data_formulator/app.py
index 47d2bda8..ef9dd4cb 100644
--- a/py-src/data_formulator/app.py
+++ b/py-src/data_formulator/app.py
@@ -219,7 +219,7 @@ def _register_blueprints():
     from data_formulator.routes.credentials import credential_bp
     app.register_blueprint(credential_bp)
 
-    # Register knowledge management API (rules, skills, experiences)
+    # Register knowledge management API (rules, skills, workflows)
     from data_formulator.routes.knowledge import knowledge_bp
     app.register_blueprint(knowledge_bp)
 
diff --git a/py-src/data_formulator/knowledge/store.py b/py-src/data_formulator/knowledge/store.py
index 0b290093..08463437 100644
--- a/py-src/data_formulator/knowledge/store.py
+++ b/py-src/data_formulator/knowledge/store.py
@@ -1,10 +1,10 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-"""Knowledge store — manages user knowledge files (rules, experiences).
+"""Knowledge store — manages user knowledge files (rules, workflows).
 
 Each user has a ``knowledge/`` directory under their home with two
-sub-directories: ``rules`` and ``experiences``.  Every knowledge entry is a
+sub-directories: ``rules`` and ``workflows``.  Every knowledge entry is a
 Markdown file with YAML front matter.
 
 All file I/O is routed through :class:`ConfinedDir` for path safety.
@@ -12,7 +12,7 @@
 Directory depth constraints:
 
 - ``rules``: flat — only files directly under ``rules/`` (1 path part)
-- ``experiences``: one level of sub-directories (up to 2 path parts)
+- ``workflows``: one level of sub-directories (up to 2 path parts)
 """
 
 from __future__ import annotations
@@ -27,19 +27,27 @@
 
 logger = logging.getLogger(__name__)
 
-VALID_CATEGORIES = frozenset({"rules", "experiences"})
+VALID_CATEGORIES = frozenset({"rules", "workflows"})
 
 _MAX_DEPTH = {
     "rules": 1,
-    "experiences": 2,   # one sub-dir: "category/file.md"
+    "workflows": 2,   # one sub-dir: "category/file.md"
 }
 
 KNOWLEDGE_LIMITS: dict[str, int] = {
     "rule_description_max": 100,
     "rules": 350,
-    "experiences": 2000,
+    # Soft length guidance for distilled workflows: the target the distill
+    # agent aims for, NOT a hard cap. Workflows may exceed it when an
+    # analysis genuinely needs the room (e.g. multiple abstraction levels).
+    # Writes are only rejected past WORKFLOW_HARD_MAX below.
+    "workflows": 6000,
 }
 
+# Absolute safety ceiling for a workflow body. Guards against runaway LLM
+# output while still letting rich, multi-section workflows through.
+WORKFLOW_HARD_MAX: int = 24000
+
 # ---------------------------------------------------------------------------
 # Tokenization helpers for improved search scoring
 # ---------------------------------------------------------------------------
@@ -151,14 +159,13 @@ class KnowledgeItemMeta:
     """
 
     __slots__ = (
-        "title", "tags", "source", "created", "description", "always_apply",
+        "title", "source", "created", "description", "always_apply",
         "source_workspace_id", "source_workspace_name",
     )
 
     def __init__(
         self,
         title: str,
-        tags: list[str],
         source: str,
         created: str,
         description: str,
@@ -167,7 +174,6 @@ def __init__(
         source_workspace_name: str = "",
     ):
         self.title = title
-        self.tags = tags
         self.source = source
         self.created = created
         self.description = description
@@ -181,14 +187,6 @@ def from_raw(cls, meta: dict[str, Any], fallback_stem: str = "") -> "KnowledgeIt
         title = meta.get("title", fallback_stem)
         title = str(title) if title is not None else fallback_stem
 
-        raw_tags = meta.get("tags", [])
-        if isinstance(raw_tags, list):
-            tags = [str(t) for t in raw_tags]
-        elif raw_tags is None:
-            tags = []
-        else:
-            tags = [str(raw_tags)]
-
         source = str(meta.get("source", "manual") or "manual")
         created = str(meta.get("created", "") or "")
         description = str(meta.get("description", "") or "")
@@ -198,7 +196,6 @@ def from_raw(cls, meta: dict[str, Any], fallback_stem: str = "") -> "KnowledgeIt
 
         return cls(
             title=title,
-            tags=tags,
             source=source,
             created=created,
             description=description,
@@ -246,26 +243,64 @@ class KnowledgeStore:
 
         store = KnowledgeStore(user_home)
         items = store.list_all("rules")
-        content = store.read("experiences", "data-cleaning/handle-missing.md")
+        content = store.read("workflows", "data-cleaning/handle-missing.md")
         store.write("rules", "date-format.md", md_content)
         store.delete("rules", "date-format.md")
-        results = store.search("ROI", categories=["rules", "experiences"])
+        results = store.search("ROI", categories=["rules", "workflows"])
     """
 
     def __init__(self, user_home: Path | str) -> None:
         user_home = Path(user_home)
         self._root = ConfinedDir(user_home / "knowledge", mkdir=True)
+        self._migrate_experiences_to_workflows()
         self._jails: dict[str, ConfinedDir] = {
             "rules": ConfinedDir(self._root.root / "rules", mkdir=True),
-            "experiences": ConfinedDir(self._root.root / "experiences", mkdir=True),
+            "workflows": ConfinedDir(self._root.root / "workflows", mkdir=True),
         }
         self._migrate_flat()
 
     # -- migration ---------------------------------------------------------
 
+    def _migrate_experiences_to_workflows(self) -> None:
+        """Move legacy ``experiences/`` files into ``workflows/`` (one-time).
+
+        The feature was renamed from "experiences" to "workflows"; existing
+        users have files under ``knowledge/experiences/``.  Move them so the
+        rename is transparent.
+        """
+        old_root = self._root.root / "experiences"
+        if not old_root.is_dir():
+            return
+        new_root = self._root.root / "workflows"
+        new_root.mkdir(parents=True, exist_ok=True)
+        for md_file in list(old_root.rglob("*.md")):
+            rel = md_file.relative_to(old_root)
+            dest = new_root / rel
+            dest.parent.mkdir(parents=True, exist_ok=True)
+            if dest.exists():
+                stem = rel.stem
+                suffix_n = 1
+                while dest.exists():
+                    dest = dest.parent / f"{stem}-{suffix_n}.md"
+                    suffix_n += 1
+            try:
+                md_file.rename(dest)
+                logger.info("Migrated experiences/%s → workflows/%s", rel, dest.name)
+            except Exception:
+                logger.warning("Failed to migrate experience file %s", md_file, exc_info=True)
+        # Remove the now-empty legacy tree (best effort)
+        try:
+            for sub in sorted(old_root.rglob("*"), reverse=True):
+                if sub.is_dir() and not any(sub.iterdir()):
+                    sub.rmdir()
+            if not any(old_root.iterdir()):
+                old_root.rmdir()
+        except Exception:
+            logger.warning("Failed to clean up legacy experiences dir", exc_info=True)
+
     def _migrate_flat(self) -> None:
-        """Move any experiences/subdir/file.md → experiences/file.md (one-time migration)."""
-        exp_root = self._jails["experiences"].root
+        """Move any workflows/subdir/file.md → workflows/file.md (one-time migration)."""
+        exp_root = self._jails["workflows"].root
         for md_file in list(exp_root.rglob("*.md")):
             rel = md_file.relative_to(exp_root)
             if len(rel.parts) <= 1:
@@ -285,9 +320,9 @@ def _migrate_flat(self) -> None:
                 parent = md_file.parent
                 if parent != exp_root and not any(parent.iterdir()):
                     parent.rmdir()
-                logger.info("Migrated knowledge experience %s → %s", rel, dest.name)
+                logger.info("Migrated knowledge workflow %s → %s", rel, dest.name)
             except Exception:
-                logger.warning("Failed to migrate experience file %s", md_file, exc_info=True)
+                logger.warning("Failed to migrate workflow file %s", md_file, exc_info=True)
 
     # -- path validation ---------------------------------------------------
 
@@ -326,7 +361,7 @@ def _jail(self, category: str) -> ConfinedDir:
     def list_all(self, category: str) -> list[dict[str, Any]]:
         """List all knowledge entries in *category*.
 
-        Returns a list of dicts with ``title``, ``tags``, ``path``,
+        Returns a list of dicts with ``title``, ``path``,
         ``source``, and ``created`` parsed from front matter.
         For rules, also includes ``description`` and ``alwaysApply``.
         """
@@ -345,7 +380,6 @@ def list_all(self, category: str) -> list[dict[str, Any]]:
             rel = str(md_file.relative_to(jail.root)).replace("\\", "/")
             item: dict[str, Any] = {
                 "title": km.title,
-                "tags": km.tags,
                 "path": rel,
                 "source": km.source,
                 "created": km.created,
@@ -353,9 +387,9 @@ def list_all(self, category: str) -> list[dict[str, Any]]:
             if category == "rules":
                 item["description"] = km.description
                 item["alwaysApply"] = km.always_apply
-            if category == "experiences":
+            if category == "workflows":
                 # Surface session-distillation provenance so the frontend can
-                # find an existing session experience by workspace id
+                # find an existing session workflow by workspace id
                 # without re-reading every file. See design-docs/24.
                 if km.source_workspace_id:
                     item["sourceWorkspaceId"] = km.source_workspace_id
@@ -394,7 +428,15 @@ def write(self, category: str, path: str, content: str) -> Path:
         body_limit = KNOWLEDGE_LIMITS.get(category)
         if body_limit is not None:
             body_len = len(body.strip())
-            if body_len > body_limit:
+            if category == "workflows":
+                # Soft guidance: the body_limit is a target the distill agent
+                # aims for, not a hard cap. Only reject far past the ceiling.
+                if body_len > WORKFLOW_HARD_MAX:
+                    raise ValueError(
+                        f"workflows body exceeds {WORKFLOW_HARD_MAX} characters "
+                        f"(got {body_len})"
+                    )
+            elif body_len > body_limit:
                 raise ValueError(
                     f"{category} body exceeds {body_limit} characters "
                     f"(got {body_len})"
@@ -407,12 +449,12 @@ def delete(self, category: str, path: str) -> None:
         self.validate_path(category, path)
         self._jail(category).unlink(path)
 
-    # -- session experience helpers ----------------------------------------
+    # -- session workflow helpers ----------------------------------------
 
-    def find_experience_by_workspace_id(
+    def find_workflow_by_workspace_id(
         self, workspace_id: str,
     ) -> dict[str, Any] | None:
-        """Return the experience entry whose front matter records this workspace id.
+        """Return the workflow entry whose front matter records this workspace id.
 
         Used by the session-scoped distillation flow (design-docs/24) to
         upsert: when re-distilling the same session, overwrite the same
@@ -421,11 +463,11 @@ def find_experience_by_workspace_id(
         if not workspace_id or not workspace_id.strip():
             return None
         try:
-            for item in self.list_all("experiences"):
+            for item in self.list_all("workflows"):
                 if item.get("sourceWorkspaceId") == workspace_id:
                     return item
         except Exception:
-            logger.warning("find_experience_by_workspace_id failed", exc_info=True)
+            logger.warning("find_workflow_by_workspace_id failed", exc_info=True)
         return None
 
     # -- alwaysApply rules helper ------------------------------------------
@@ -511,12 +553,13 @@ def search(
         """Search across knowledge categories.
 
         Tokenizes *query* into keywords and scores each entry using
-        multi-field weighted matching (title > tags > filename > body).
-        Whole-string exact matches and table-name / tag overlaps receive
+        multi-field weighted matching (title > filename > body).
+        Whole-string exact matches and table-name overlaps receive
         additional bonuses.  Non-manual sources are slightly discounted.
 
         *table_names* (optional) are table names from the current session;
-        when a table name appears in an entry's tags the entry is boosted.
+        when a table name appears in an entry's title or body the entry is
+        boosted.
         """
         if not query or not query.strip():
             return []
@@ -542,7 +585,7 @@ def search(
                     continue
 
                 score = self._match_score(
-                    q, km.title, km.tags, md_file.stem, body[:200],
+                    q, km.title, md_file.stem, body[:200],
                     source=km.source, table_names=table_names,
                 )
                 if score <= 0:
@@ -552,7 +595,6 @@ def search(
                 scored.append((score, {
                     "category": cat,
                     "title": km.title,
-                    "tags": km.tags,
                     "path": rel,
                     "snippet": body[:500].strip(),
                     "source": km.source,
@@ -565,7 +607,6 @@ def search(
     def _match_score(
         query: str,
         title: str,
-        tags: list[str],
         stem: str,
         body_prefix: str,
         *,
@@ -589,13 +630,10 @@ def _match_score(
             title_l = title.lower()
             stem_l = stem.lower()
             body_l = body_prefix.lower()
-            tags_l = [t.lower() for t in tags]
 
             for token in tokens:
                 if token in title_l:
                     score += 100 / n
-                if any(token in tl for tl in tags_l):
-                    score += 50 / n
                 if token in stem_l:
                     score += 30 / n
                 if token in body_l:
@@ -604,14 +642,14 @@ def _match_score(
         # Whole-string bonus (handles short queries like "ROI")
         if q and q in title.lower():
             score += 50
-        if q and any(q in t.lower() for t in tags):
-            score += 50
 
-        # Table-name → tag overlap bonus
+        # Table-name overlap bonus (title / body)
         if table_names:
-            tags_l_set = {t.lower() for t in tags}
+            title_l = title.lower()
+            body_l = body_prefix.lower()
             for tn in table_names:
-                if any(tn.lower() in tl for tl in tags_l_set):
+                tnl = tn.lower()
+                if tnl in title_l or tnl in body_l:
                     score += 30
 
         # Non-manual source slight discount
diff --git a/py-src/data_formulator/routes/knowledge.py b/py-src/data_formulator/routes/knowledge.py
index 901024c1..1a458ba9 100644
--- a/py-src/data_formulator/routes/knowledge.py
+++ b/py-src/data_formulator/routes/knowledge.py
@@ -1,7 +1,7 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-"""Knowledge management API — CRUD + search + experience distillation.
+"""Knowledge management API — CRUD + search + workflow distillation.
 
 All endpoints use ``POST`` with JSON body.  Access is scoped to the
 current user via ``get_identity_id()`` and confined via ``ConfinedDir``.
@@ -155,44 +155,44 @@ def knowledge_search():
     return json_ok({"results": results})
 
 
-# ── distill experience ────────────────────────────────────────────────────
+# ── distill workflow ────────────────────────────────────────────────────
 
 
-@knowledge_bp.route("/distill-experience", methods=["POST"])
-def distill_experience():
-    """Distill user-visible analysis context into a reusable experience.
+@knowledge_bp.route("/distill-workflow", methods=["POST"])
+def distill_workflow():
+    """Distill user-visible analysis context into a reusable workflow.
 
     Session-scoped payload (design-docs/24):
-    ``experience_context`` carries a list of ``threads`` (one per leaf
+    ``workflow_context`` carries a list of ``threads`` (one per leaf
     derived table the user has on screen), each with its own chronological
     ``events`` array. ``workspace_id`` + ``workspace_name`` bind the
     resulting file to the active session so re-distilling upserts the
     same file.
 
-    Required body fields: ``experience_context`` and ``model``.
+    Required body fields: ``workflow_context`` and ``model``.
     Optional: ``user_instruction`` (natural-language focus hint for the LLM),
-    ``category_hint`` (sub-directory under experiences/).
+    ``category_hint`` (sub-directory under workflows/).
     """
     data = request.get_json(silent=True) or {}
-    experience_context = data.get("experience_context")
-    if not isinstance(experience_context, dict):
-        raise AppError(ErrorCode.INVALID_REQUEST, "'experience_context' is required")
+    workflow_context = data.get("workflow_context")
+    if not isinstance(workflow_context, dict):
+        raise AppError(ErrorCode.INVALID_REQUEST, "'workflow_context' is required")
 
-    threads = experience_context.get("threads")
+    threads = workflow_context.get("threads")
     if not isinstance(threads, list) or not threads:
         raise AppError(
             ErrorCode.INVALID_REQUEST,
-            "'experience_context.threads' is required and must be a non-empty list",
+            "'workflow_context.threads' is required and must be a non-empty list",
         )
 
-    workspace_id_raw = experience_context.get("workspace_id", "")
+    workspace_id_raw = workflow_context.get("workspace_id", "")
     workspace_id = workspace_id_raw.strip() if isinstance(workspace_id_raw, str) else ""
-    workspace_name_raw = experience_context.get("workspace_name", "")
+    workspace_name_raw = workflow_context.get("workspace_name", "")
     workspace_name = workspace_name_raw.strip() if isinstance(workspace_name_raw, str) else ""
     if not workspace_id or not workspace_name:
         raise AppError(
             ErrorCode.INVALID_REQUEST,
-            "'experience_context.workspace_id' and 'workspace_name' are required",
+            "'workflow_context.workspace_id' and 'workspace_name' are required",
         )
 
     model_config = data.get("model")
@@ -215,53 +215,55 @@ def distill_experience():
 
     # Build client and run distillation
     from data_formulator.routes.agents import get_client, _get_ui_lang
-    from data_formulator.agents.agent_experience_distill import ExperienceDistillAgent
+    from data_formulator.agents.agent_workflow_distill import WorkflowDistillAgent
 
     client = get_client(model_config)
 
-    agent = ExperienceDistillAgent(
+    agent = WorkflowDistillAgent(
         client=client,
         language_code=_get_ui_lang(),
         timeout_seconds=timeout_seconds,
     )
     try:
-        md_content = agent.run(experience_context, user_instruction=user_instruction)
+        md_content = agent.run(workflow_context, user_instruction=user_instruction)
     except Exception as exc:
-        logger.warning("Experience distillation LLM call failed: %s", type(exc).__name__)
+        logger.warning("Workflow distillation LLM call failed: %s", type(exc).__name__)
         from data_formulator.error_handler import classify_and_wrap_llm_error
         raise classify_and_wrap_llm_error(exc) from exc
 
-    # Save to knowledge/experiences/
+    # Save to knowledge/workflows/
     store = KnowledgeStore(user_home)
 
-    # Bind the file to the workspace, override title to
-    # "Experience from <workspace name>: <subtitle>", and upsert below.
-    md_content = _apply_session_front_matter(md_content, workspace_id, workspace_name)
+    # Bind the file to the workspace, set the title to the agent-generated
+    # descriptive subtitle, and upsert below.
+    md_content, title_core, filename_hint = _apply_session_front_matter(
+        md_content, workspace_id, workspace_name,
+    )
 
-    filename = _experience_filename(workspace_name)
+    filename = _workflow_filename(filename_hint or title_core or workspace_name)
     rel_path = f"{category_hint}/{filename}" if category_hint else filename
 
-    # Upsert: if a previous experience exists for this workspace at a
+    # Upsert: if a previous workflow exists for this workspace at a
     # different path (e.g. user renamed the workspace), delete it after a
     # successful write so we keep one file per session.
-    existing = store.find_experience_by_workspace_id(workspace_id)
+    existing = store.find_workflow_by_workspace_id(workspace_id)
 
     try:
-        store.write("experiences", rel_path, md_content)
+        store.write("workflows", rel_path, md_content)
     except ValueError as exc:
         raise AppError(ErrorCode.INVALID_REQUEST, str(exc)) from exc
 
     if existing and existing.get("path") and existing["path"] != rel_path:
         try:
-            store.delete("experiences", existing["path"])
+            store.delete("workflows", existing["path"])
         except Exception:
             logger.warning(
-                "Failed to delete stale session experience at %s",
+                "Failed to delete stale session workflow at %s",
                 existing.get("path"),
                 exc_info=True,
             )
 
-    return json_ok({"path": rel_path, "category": "experiences"})
+    return json_ok({"path": rel_path, "category": "workflows"})
 
 
 # ── helpers for session-scoped distillation ───────────────────────────────
@@ -269,16 +271,21 @@ def distill_experience():
 
 def _apply_session_front_matter(
     content: str, workspace_id: str, workspace_name: str,
-) -> str:
-    """Override / inject session-binding fields in the experience front matter.
-
-    - Composes the visible ``title`` as ``Experience from <name>: <subtitle>``
-      using the LLM-emitted ``subtitle`` (preferred) or pre-existing
-      ``title``. The original ``subtitle`` field is removed from the
-      front matter once consumed.
+) -> tuple[str, str, str]:
+    """Override / inject session-binding fields in the workflow front matter.
+
+    - Sets the visible ``title`` to the agent-emitted descriptive
+      ``subtitle`` (preferred) or the pre-existing ``title``, with any
+      legacy ``Workflow from <name>: `` prefix stripped. The ``subtitle``
+      field is removed from the front matter once consumed.
+    - Consumes the agent-emitted short ``filename`` hint (removed from the
+      front matter) and returns it so the caller can name the file without
+      using the long descriptive title.
     - Stamps ``source_workspace_id`` and ``source_workspace_name`` so the
       file can be looked up on subsequent distillations.
     - Forces ``source: distill`` (idempotent if already set).
+
+    Returns ``(content_with_front_matter, title_core, filename_hint)``.
     """
     from data_formulator.knowledge.store import parse_front_matter
 
@@ -287,27 +294,31 @@ def _apply_session_front_matter(
         meta = {}
 
     subtitle = str(meta.pop("subtitle", "") or "").strip()
+    filename_hint = str(meta.pop("filename", "") or "").strip()
     existing_title = str(meta.get("title", "") or "").strip()
 
-    # Strip any "Experience from <prev name>: " prefix from a prior pass so
-    # update-mode runs don't double-prefix when the LLM echoes the title.
-    title_core = subtitle or _strip_experience_prefix(existing_title)
+    # Strip any legacy "Workflow from <prev name>: " (or "Experience from")
+    # prefix so update-mode runs don't carry it forward.
+    title_core = subtitle or _strip_workflow_prefix(existing_title)
     if not title_core:
         title_core = workspace_name
 
-    new_title = f"Experience from {workspace_name}: {title_core}"
-    meta["title"] = new_title
+    meta["title"] = title_core
     meta["source"] = "distill"
     meta["source_workspace_id"] = workspace_id
     meta["source_workspace_name"] = workspace_name
 
-    return _serialize_front_matter(meta, body)
+    return _serialize_front_matter(meta, body), title_core, filename_hint
+
 
+_EXP_PREFIX_RE = re.compile(r"^\s*(?:Workflow|Experience) from .+?:\s*", re.IGNORECASE)
 
-_EXP_PREFIX_RE = re.compile(r"^\s*Experience from .+?:\s*", re.IGNORECASE)
+# Path separators, Windows-reserved chars and control chars that must never
+# appear in a filename derived from untrusted LLM output.
+_UNSAFE_FILENAME_CHARS = re.compile(r'[\\/:*?"<>|\x00-\x1f]+')
 
 
-def _strip_experience_prefix(title: str) -> str:
+def _strip_workflow_prefix(title: str) -> str:
     return _EXP_PREFIX_RE.sub("", title).strip()
 
 
@@ -323,16 +334,22 @@ def _serialize_front_matter(meta: dict, body: str) -> str:
     return f"---\n{yaml_text}\n---\n\n{body_text}"
 
 
-def _experience_filename(workspace_name: str) -> str:
-    """Derive a deterministic filename from the workspace name.
+def _workflow_filename(title: str) -> str:
+    """Slugify an LLM-supplied name into a clean, safe ``.md`` filename.
 
-    Re-distilling the same session always lands on the same file.
-    Falls back to a literal slug when sanitisation rejects the name.
+    Re-distilling a session upserts by ``source_workspace_id`` (see caller),
+    so the file is replaced even when the name changes. ``safe_data_filename``
+    enforces the security boundary (basename only, no ``.``/``..``); the slug
+    step just keeps separators and reserved chars out so the name is clean and
+    portable. Unicode (e.g. CJK) is preserved.
     """
     from data_formulator.datalake.parquet_utils import safe_data_filename
 
-    slug = workspace_name.strip().replace(" ", "-").lower()[:80] or "session-experience"
+    cleaned = _UNSAFE_FILENAME_CHARS.sub("-", title)
+    cleaned = re.sub(r"\s+", "-", cleaned.strip())
+    cleaned = re.sub(r"-{2,}", "-", cleaned)
+    slug = cleaned.strip(".-").lower()[:80] or "session-workflow"
     try:
         return safe_data_filename(f"{slug}.md")
     except ValueError:
-        return "session-experience.md"
+        return "session-workflow.md"
diff --git a/src/api/knowledgeApi.ts b/src/api/knowledgeApi.ts
index 7c149f3a..a722c00f 100644
--- a/src/api/knowledgeApi.ts
+++ b/src/api/knowledgeApi.ts
@@ -2,7 +2,7 @@
 // Licensed under the MIT License.
 
 /**
- * Knowledge API client — CRUD, search, and experience distillation.
+ * Knowledge API client — CRUD, search, and workflow distillation.
  *
  * All endpoints use POST with JSON body.  Requests go through
  * {@link fetchWithIdentity} for identity headers and 401 retry.
@@ -14,11 +14,10 @@ import { apiRequest } from '../app/apiClient';
 
 // ── Types ────────────────────────────────────────────────────────────────
 
-export type KnowledgeCategory = 'rules' | 'experiences';
+export type KnowledgeCategory = 'rules' | 'workflows';
 
 export interface KnowledgeItem {
     title: string;
-    tags: string[];
     path: string;
     source: string;
     created: string;
@@ -27,25 +26,24 @@ export interface KnowledgeItem {
     /** Rules only: if true the rule is always injected into the agent prompt. */
     alwaysApply?: boolean;
     /**
-     * Experiences only: workspace id this experience was distilled from.
+     * Workflows only: workspace id this workflow was distilled from.
      * Set by the session-scoped distillation flow (design-docs/24); used
-     * by the KnowledgePanel to find the existing session experience.
+     * by the KnowledgePanel to find the existing session workflow.
      */
     sourceWorkspaceId?: string;
-    /** Experiences only: workspace display name at distillation time. */
+    /** Workflows only: workspace display name at distillation time. */
     sourceWorkspaceName?: string;
 }
 
 export interface KnowledgeLimits {
     rule_description_max: number;
     rules: number;
-    experiences: number;
+    workflows: number;
 }
 
 export interface KnowledgeSearchResult {
     category: KnowledgeCategory;
     title: string;
-    tags: string[];
     path: string;
     snippet: string;
     source: string;
@@ -122,7 +120,7 @@ export async function searchKnowledge(
     return data.results ?? [];
 }
 
-export interface DistillExperienceResult {
+export interface DistillWorkflowResult {
     path: string;
     category: string;
 }
@@ -134,7 +132,7 @@ export interface DistillExperienceResult {
  * a deterministic filename + title. `threads` carries one chronological
  * `events` list per leaf table on screen.
  */
-export interface SessionExperienceContext {
+export interface SessionWorkflowContext {
     context_id?: string;
     workspace_id: string;
     workspace_name: string;
@@ -146,18 +144,18 @@ export interface SessionExperienceContext {
     payload_notes?: string[];
 }
 
-export async function distillSessionExperience(
-    sessionContext: SessionExperienceContext,
+export async function distillSessionWorkflow(
+    sessionContext: SessionWorkflowContext,
     model: Record<string, any>,
     instruction?: string,
     timeoutSeconds?: number,
     signal?: AbortSignal,
-): Promise<DistillExperienceResult> {
-    const { data } = await apiRequest<{ path: string; category: string }>('/api/knowledge/distill-experience', {
+): Promise<DistillWorkflowResult> {
+    const { data } = await apiRequest<{ path: string; category: string }>('/api/knowledge/distill-workflow', {
         method: 'POST',
         headers: JSON_HEADERS,
         body: JSON.stringify({
-            experience_context: sessionContext,
+            workflow_context: sessionContext,
             model,
             user_instruction: instruction,
             timeout_seconds: timeoutSeconds,
diff --git a/src/app/useKnowledgeStore.ts b/src/app/useKnowledgeStore.ts
index 0a6ea65d..6adeb60c 100644
--- a/src/app/useKnowledgeStore.ts
+++ b/src/app/useKnowledgeStore.ts
@@ -5,7 +5,7 @@
  * Knowledge state management — React hooks for knowledge CRUD & search.
  *
  * Uses plain React state (not Redux) because knowledge data is server-side
- * and only needed by the KnowledgePanel and save-as-experience flows.
+ * and only needed by the KnowledgePanel and save-as-workflow flows.
  * Errors are dispatched to the global MessageSnackbar via dfActions.addMessages.
  */
 
@@ -40,16 +40,16 @@ export function useKnowledgeStore() {
     const { t } = useTranslation();
 
     const [rules, setRules] = useState<KnowledgeCategoryState>({ ...EMPTY_CATEGORY });
-    const [experiences, setExperiences] = useState<KnowledgeCategoryState>({ ...EMPTY_CATEGORY });
+    const [workflows, setWorkflows] = useState<KnowledgeCategoryState>({ ...EMPTY_CATEGORY });
 
     const [searchResults, setSearchResults] = useState<KnowledgeSearchResult[]>([]);
     const [searching, setSearching] = useState(false);
 
-    const DEFAULT_LIMITS: KnowledgeLimits = { rule_description_max: 100, rules: 350, experiences: 2000 };
+    const DEFAULT_LIMITS: KnowledgeLimits = { rule_description_max: 100, rules: 350, workflows: 2000 };
     const [limits, setLimits] = useState<KnowledgeLimits>(DEFAULT_LIMITS);
 
-    const stateMap = { rules, experiences };
-    const setterMap = useRef({ rules: setRules, experiences: setExperiences });
+    const stateMap = { rules, workflows };
+    const setterMap = useRef({ rules: setRules, workflows: setWorkflows });
 
     const fetchList = useCallback(async (category: KnowledgeCategory) => {
         const setter = setterMap.current[category];
@@ -71,7 +71,7 @@ export function useKnowledgeStore() {
     const fetchAll = useCallback(async () => {
         await Promise.all([
             fetchList('rules'),
-            fetchList('experiences'),
+            fetchList('workflows'),
             fetchKnowledgeLimits().then(setLimits).catch(() => { /* best-effort */ }),
         ]);
     }, [fetchList]);
@@ -184,7 +184,7 @@ export function useKnowledgeStore() {
 
     return {
         rules,
-        experiences,
+        workflows,
         stateMap,
         limits,
         searchResults,
diff --git a/src/i18n/locales/en/common.json b/src/i18n/locales/en/common.json
index 7a52125b..8ef952df 100644
--- a/src/i18n/locales/en/common.json
+++ b/src/i18n/locales/en/common.json
@@ -875,9 +875,9 @@
   "knowledge": {
     "title": "Agent Knowledge",
     "rules": "Rules",
-    "experiences": "Experiences",
+    "workflows": "Workflows",
     "rulesDescription": "Constraints and standards that agents must follow",
-    "experiencesDescription": "Reusable methods, tips, and knowledge distilled from analyses",
+    "workflowsDescription": "Reusable analysis workflows distilled from past sessions that agents can save and replay",
     "newItem": "New",
     "search": "Search",
     "searchPlaceholder": "Search knowledge...",
@@ -902,29 +902,29 @@
     "failedToSave": "Failed to save knowledge",
     "failedToDelete": "Failed to delete knowledge",
     "failedToSearch": "Search failed",
-    "saveAsExperience": "Save as Experience",
-    "saveAsExperienceTitle": "Save as Experience",
-    "distillHint": "Distill experience from this analysis for agents to reuse in future analysis.",
+    "saveAsExperience": "Save as Workflow",
+    "saveAsExperienceTitle": "Save as Workflow",
+    "distillHint": "Distill a workflow from this analysis for agents to save and replay in future sessions.",
     "distillFromHeading": "Distill from",
     "distillFromCaption": "Threads below will be sent to the LLM. Click a thread to inspect its events.",
-    "distillingOverlay": "Distilling experience… this may take a moment.",
+    "distillingOverlay": "Distilling workflow… this may take a moment.",
     "userInstruction": "User instruction (optional)",
     "userInstructionPlaceholder": "what to focus on, what to skip…",
     "distillationInstructions": "Distillation instructions (optional)",
     "distillationInstructionsPlaceholder": "e.g. focus on the data cleaning steps; skip exploratory chart variations; emphasise pitfalls we hit when joining tables…",
-    "distillExperience": "Distill Experience",
-    "distillStarted": "Distilling experience...",
-    "distilling": "Distilling experience...",
-    "distilled": "Experience saved",
+    "distillWorkflow": "Distill Workflow",
+    "distillStarted": "Distilling workflow...",
+    "distilling": "Distilling workflow...",
+    "distilled": "Workflow saved",
     "distillFailedRetry": "Save failed, retry",
-    "failedToDistill": "Failed to distill experience",
-    "distillSessionTitle": "Distill Session Experience",
-    "updateSessionTitle": "Update Session Experience",
-    "distillSessionHint": "Distill lessons from this analysis into a reusable knowledge document.",
-    "distillSessionUpdateHint": "Re-distill lessons from this analysis into the existing knowledge document.",
+    "failedToDistill": "Failed to distill workflow",
+    "distillSessionTitle": "Distill Session Workflow",
+    "updateSessionTitle": "Update Session Workflow",
+    "distillSessionHint": "Distill this analysis into a reusable workflow document that agents can replay.",
+    "distillSessionUpdateHint": "Re-distill this analysis into the existing workflow document.",
     "distillSessionNothing": "No completed analysis threads in this session yet.",
     "distillFromSession": "Distill from this session",
-    "experiencePlaceholderHint": "Save lessons learned",
+    "workflowPlaceholderHint": "Save this analysis as a workflow",
     "updateFromSession": "Update from this session",
     "updateFromSessionHint": "Refresh with new lessons",
     "addNewRule": "Add new rule",
@@ -937,15 +937,21 @@
     "itemCount": "({{count}})",
     "collapse": "Collapse",
     "expand": "Expand",
-    "emptyState": "Add rules or experiences to help AI agents work better.",
-    "rulesHint": "Rules — constraints the agent always follows. Click + to add your own.",
-    "experiencesHint": "Experiences — lessons distilled from your past analyses. Click the placeholder below to distill one from this session.",
+    "emptyState": "Add rules or workflows to help AI agents work better.",
+    "rulesHint": "Constraints the agent always follows.",
+    "workflowsHint": "Analyses distilled from past sessions that the agent can save and replay.",
     "markdownEditor": "Markdown Editor",
     "description": "Description",
     "descriptionPlaceholder": "Short summary of this rule (max {{max}} chars)",
     "alwaysApply": "Always loaded into AI",
     "alwaysApplyHint": "When enabled, this rule is always injected into every AI agent prompt, regardless of context",
     "charCount": "{{current}} / {{max}}",
-    "charCountExceeded": "Exceeds {{max}} character limit ({{current}} / {{max}})"
+    "charCountExceeded": "Exceeds {{max}} character limit ({{current}} / {{max}})",
+    "replay": "Replay",
+    "replayTooltip": "Replay this analysis on the current data",
+    "replayBusy": "The agent is busy — wait for it to finish before replaying.",
+    "replayNoData": "Load and focus a dataset before replaying a workflow.",
+    "replayStarted": "Replaying workflow on the current data…",
+    "replayPrompt": "Reproduce the following analysis workflow on the currently loaded data. Follow the steps in order, adapting any column references to the columns available in the current dataset. It's fine if the result isn't identical — reproduce the same overall analysis.\n\nBefore making large assumptions, check whether the current data can actually support the workflow. If there is a major discrepancy — e.g. a required field or measure is missing, the granularity or shape is very different, or a step has no sensible equivalent on this data — pause and ask me to confirm how to proceed (or briefly explain the mismatch and your proposed adaptation) instead of guessing. Minor differences (renamed columns, extra columns) can be adapted silently.\n\n{{content}}"
   }
 }
diff --git a/src/i18n/locales/zh/common.json b/src/i18n/locales/zh/common.json
index 1740e92a..244f93da 100644
--- a/src/i18n/locales/zh/common.json
+++ b/src/i18n/locales/zh/common.json
@@ -875,9 +875,9 @@
   "knowledge": {
     "title": "Agent 知识",
     "rules": "规则",
-    "experiences": "经验",
+    "workflows": "工作流",
     "rulesDescription": "Agent 必须遵守的约束和编码规范",
-    "experiencesDescription": "从分析中提炼的可复用方法和技巧",
+    "workflowsDescription": "从过往会话中提炼、可供 Agent 保存与重放的可复用分析工作流",
     "newItem": "新建",
     "search": "搜索",
     "searchPlaceholder": "搜索知识...",
@@ -902,29 +902,29 @@
     "failedToSave": "保存知识失败",
     "failedToDelete": "删除知识失败",
     "failedToSearch": "搜索失败",
-    "saveAsExperience": "保存为经验",
-    "saveAsExperienceTitle": "保存为经验",
-    "distillHint": "从本次分析中提炼经验，供 Agent 在后续分析中复用。",
+    "saveAsExperience": "保存为工作流",
+    "saveAsExperienceTitle": "保存为工作流",
+    "distillHint": "从本次分析中提炼工作流，供 Agent 在后续会话中保存与重放。",
     "distillFromHeading": "提炼来源",
     "distillFromCaption": "以下线索将发送给 LLM。点击线索可查看其事件。",
-    "distillingOverlay": "正在提炼经验…请稍候。",
+    "distillingOverlay": "正在提炼工作流…请稍候。",
     "userInstruction": "用户指令（可选）",
     "userInstructionPlaceholder": "重点关注什么、跳过什么…",
     "distillationInstructions": "提炼指令（可选）",
     "distillationInstructionsPlaceholder": "例如：重点关注数据清洗步骤；跳过探索性图表变体；着重记录表连接时遇到的陷阱…",
-    "distillExperience": "提炼经验",
-    "distillStarted": "正在提炼经验...",
-    "distilling": "正在提炼经验...",
-    "distilled": "经验已保存",
-    "distillFailedRetry": "保存经验失败，重试",
-    "failedToDistill": "提炼经验失败",
-    "distillSessionTitle": "提炼会话经验",
-    "updateSessionTitle": "更新会话经验",
-    "distillSessionHint": "从本次分析中提炼经验，生成一篇可复用的知识文档。",
-    "distillSessionUpdateHint": "重新提炼本次分析的经验，覆盖现有的知识文档。",
+    "distillWorkflow": "提炼工作流",
+    "distillStarted": "正在提炼工作流...",
+    "distilling": "正在提炼工作流...",
+    "distilled": "工作流已保存",
+    "distillFailedRetry": "保存工作流失败，重试",
+    "failedToDistill": "提炼工作流失败",
+    "distillSessionTitle": "提炼会话工作流",
+    "updateSessionTitle": "更新会话工作流",
+    "distillSessionHint": "将本次分析提炼为一篇可供 Agent 重放的可复用工作流文档。",
+    "distillSessionUpdateHint": "将本次分析重新提炼到现有的工作流文档中。",
     "distillSessionNothing": "本会话还没有可提炼的分析线索。",
     "distillFromSession": "从本会话提炼",
-    "experiencePlaceholderHint": "保存分析中的经验",
+    "workflowPlaceholderHint": "将本次分析保存为工作流",
     "updateFromSession": "从本会话更新",
     "updateFromSessionHint": "用新经验刷新该条目",
     "addNewRule": "添加新规则",
@@ -937,15 +937,21 @@
     "itemCount": "（{{count}}）",
     "collapse": "收起",
     "expand": "展开",
-    "emptyState": "添加规则、技能或经验，帮助 AI Agent 更好地工作。",
-    "rulesHint": "规则 — Agent 始终遵守的约束。点击 + 添加你自己的规则。",
-    "experiencesHint": "经验 — 从你过往分析中提炼出的经验。点击下方占位项可从本会话提炼一条。",
+    "emptyState": "添加规则或工作流，帮助 AI Agent 更好地工作。",
+    "rulesHint": "Agent 始终遵守的约束。",
+    "workflowsHint": "从过往会话中提炼、Agent 可保存与重放的分析。",
     "markdownEditor": "Markdown 编辑器",
     "description": "描述",
     "descriptionPlaceholder": "规则的简短描述（最多 {{max}} 字符）",
     "alwaysApply": "始终加载到 AI",
     "alwaysApplyHint": "启用后，无论什么场景，此规则都会自动注入到每次 AI Agent 的提示词中",
     "charCount": "{{current}} / {{max}}",
-    "charCountExceeded": "超出 {{max}} 字符限制（{{current}} / {{max}}）"
+    "charCountExceeded": "超过 {{max}} 字符限制（{{current}} / {{max}}）",
+    "replay": "重放",
+    "replayTooltip": "在当前数据上重放此分析",
+    "replayBusy": "Agent 正忙——请等待其完成后再重放。",
+    "replayNoData": "请先加载并聚焦一个数据集，再重放工作流。",
+    "replayStarted": "正在当前数据上重放工作流…",
+    "replayPrompt": "在当前已加载的数据上复现以下分析流程。按顺序执行各步骤，并将其中的列引用调整为当前数据集中可用的列。结果不必完全一致——复现同样的整体分析即可。\n\n在做出较大假设之前，请先确认当前数据是否真的能支撑该流程。如果存在重大差异——例如缺少必需的字段或度量、数据粒度或结构差异很大、或某个步骤在当前数据上没有合理的对应方式——请暂停并向我确认如何继续（或简要说明不匹配之处及你建议的调整方案），而不要凭空猜测。对于细微差异（列被重命名、存在额外的列）可以直接静默调整。\n\n{{content}}"
   }
 }
diff --git a/src/views/DataFrameTable.tsx b/src/views/DataFrameTable.tsx
index dd32ecc9..afb03bbd 100644
--- a/src/views/DataFrameTable.tsx
+++ b/src/views/DataFrameTable.tsx
@@ -127,17 +127,24 @@ export const DataFrameTable: React.FC<DataFrameTableProps> = ({
                         )}
                         {displayCols.map((col, i) => {
                             const desc = col !== '\u2026' ? columnDescriptions?.[col] : undefined;
+                            if (desc) {
+                                return (
+                                    <Tooltip key={i} title={desc} placement="top" enterDelay={400}>
+                                        <Typography component="th" variant="caption"
+                                            sx={{ fontWeight: 600, fontSize: headerFontSize,
+                                                cursor: 'help', textDecoration: 'underline', textDecorationStyle: 'dotted', textUnderlineOffset: 2,
+                                            }}>
+                                            {col}
+                                        </Typography>
+                                    </Tooltip>
+                                );
+                            }
                             return (
-                                <Tooltip key={i} title={desc || ''} placement="top"
-                                    enterDelay={400} disableHoverListener={!desc}>
-                                    <Typography component="th" variant="caption"
-                                        title={desc ? undefined : col}
-                                        sx={{ fontWeight: 600, fontSize: headerFontSize,
-                                            ...(desc ? { cursor: 'help', textDecoration: 'underline', textDecorationStyle: 'dotted', textUnderlineOffset: 2 } : {}),
-                                        }}>
-                                        {col}
-                                    </Typography>
-                                </Tooltip>
+                                <Typography component="th" key={i} variant="caption"
+                                    title={col}
+                                    sx={{ fontWeight: 600, fontSize: headerFontSize }}>
+                                    {col}
+                                </Typography>
                             );
                         })}
                     </tr>
diff --git a/src/views/DataSourceSidebar.tsx b/src/views/DataSourceSidebar.tsx
index 7381a423..c0502fd0 100644
--- a/src/views/DataSourceSidebar.tsx
+++ b/src/views/DataSourceSidebar.tsx
@@ -157,7 +157,7 @@ export const DataSourceSidebar: React.FC<{
     // appears when they try to add a new connector or link a folder.
     const [initialTab, setInitialTab] = useState<'sources' | 'sessions' | 'knowledge'>('sources');
 
-    // External callers (e.g. SaveExperienceButton on success) can ask the
+    // External callers (e.g. workflow distill on success) can ask the
     // sidebar to open and switch to a specific tab.
     useEffect(() => {
         const handler = (e: Event) => {
diff --git a/src/views/DataThread.tsx b/src/views/DataThread.tsx
index 940cb4ef..248dbe75 100644
--- a/src/views/DataThread.tsx
+++ b/src/views/DataThread.tsx
@@ -1333,17 +1333,6 @@ let SingleThreadGroupView: FC<{
             const mergeIds = derivedTable?.derive?.source as string[] | undefined;
             if (entry.role === 'instruction' && mergeNames && mergeNames.length > 0 && mergeIds && mergeIds.length > 0) {
                 const nextKey = sourceSetKey(mergeIds);
-                // eslint-disable-next-line no-console
-                console.log('[merge-node check]', {
-                    tableId,
-                    parentTableId: parentTable?.id,
-                    initialSourceIds,
-                    prevSourceKey,
-                    mergeIds,
-                    mergeNames,
-                    nextKey,
-                    fires: nextKey !== prevSourceKey,
-                });
                 if (nextKey !== prevSourceKey) {
                     const mergeColor = highlighted ? theme.palette.primary.main : theme.palette.text.secondary;
                     timelineItems.push({
diff --git a/src/views/InteractionEntryCard.tsx b/src/views/InteractionEntryCard.tsx
index 8cfd0000..79686ca2 100644
--- a/src/views/InteractionEntryCard.tsx
+++ b/src/views/InteractionEntryCard.tsx
@@ -242,6 +242,11 @@ export const InteractionEntryCard: React.FC<InteractionEntryCardProps> = memo(({
                 // so they should read stronger than the agent's bubbles.
                 backgroundColor: palette.bgcolor,
                 border: `1px solid ${borderColor.component}`,
+                // Cap very long instructions (e.g. a replayed workflow) so the
+                // card stays compact; the full text scrolls within the cap.
+                maxHeight: 160,
+                overflowY: 'auto',
+                overscrollBehavior: 'contain',
                 ...(highlighted ? { borderLeft: `2px solid ${palette.main}` } : {}),
                 ...clickSx,
             }}>
diff --git a/src/views/KnowledgePanel.tsx b/src/views/KnowledgePanel.tsx
index 8d8a111f..05343a0a 100644
--- a/src/views/KnowledgePanel.tsx
+++ b/src/views/KnowledgePanel.tsx
@@ -4,16 +4,16 @@
 /**
  * KnowledgePanel — panel for browsing and editing knowledge items.
  *
- * Shows two collapsible sections: Rules (flat) and Experiences (flat).
+ * Shows two collapsible sections: Rules (flat) and Workflows (flat).
  * Items are tagged for organization; no subdirectory grouping.
  * Supports search, edit, and delete. Rules can be created directly by
- * the user via the "+" affordance; experiences are produced by the
+ * the user via the "+" affordance; workflows are produced by the
  * agent's distillation flow (see SessionDistill).
  */
 
-import React, { useState, useCallback, useEffect, useRef } from 'react';
+import React, { useState, useCallback, useEffect } from 'react';
 import { useTranslation } from 'react-i18next';
-import { useSelector } from 'react-redux';
+import { useSelector, useDispatch } from 'react-redux';
 import {
     Box,
     Typography,
@@ -26,7 +26,6 @@ import {
     DialogContent,
     DialogActions,
     CircularProgress,
-    Chip,
     Divider,
 } from '@mui/material';
 import { alpha } from '@mui/material/styles';
@@ -34,6 +33,7 @@ import AddIcon from '@mui/icons-material/Add';
 import DeleteOutlineIcon from '@mui/icons-material/DeleteOutline';
 import DescriptionOutlinedIcon from '@mui/icons-material/DescriptionOutlined';
 import SmartToyOutlinedIcon from '@mui/icons-material/SmartToyOutlined';
+import PlayArrowIcon from '@mui/icons-material/PlayArrow';
 import RefreshIcon from '@mui/icons-material/Refresh';
 import Editor from 'react-simple-code-editor';
 
@@ -41,9 +41,9 @@ import { useKnowledgeStore } from '../app/useKnowledgeStore';
 import { deleteKnowledge, type KnowledgeCategory } from '../api/knowledgeApi';
 import type { KnowledgeItem } from '../api/knowledgeApi';
 import { borderColor, radius } from '../app/tokens';
-import { type DataFormulatorState } from '../app/dfSlice';
-import { isLeafDerivedTable, buildLeafEvents } from './experienceContext';
-import { SessionDistillDialog, findSessionExperience } from './SessionDistill';
+import { dfActions, type DataFormulatorState } from '../app/dfSlice';
+import { isLeafDerivedTable, buildLeafEvents } from './workflowContext';
+import { SessionDistillDialog, findSessionWorkflow } from './SessionDistill';
 
 // Default file name and seed body for a brand-new rule. Rules are plain
 // Markdown — the user just edits the body; no front matter is required.
@@ -58,19 +58,18 @@ Describe the constraints or conventions the agent should follow.
 interface ActionRowProps {
     icon: React.ReactNode;
     label: string;
-    hint: string;
     onClick: () => void;
 }
 
-const ActionRow: React.FC<ActionRowProps> = ({ icon, label, hint, onClick }) => (
+const ActionRow: React.FC<ActionRowProps> = ({ icon, label, onClick }) => (
     <Box
         onClick={onClick}
         role="button"
         tabIndex={0}
         sx={{
-            display: 'flex', alignItems: 'flex-start', gap: 0.75,
+            display: 'flex', alignItems: 'center', gap: 0.75,
             mx: 1.5, my: 0.5,
-            px: 1, py: 0.6,
+            px: 1, py: 0.5,
             cursor: 'pointer',
             color: 'primary.main',
             border: theme => `1px solid ${alpha(theme.palette.primary.main, 0.5)}`,
@@ -88,22 +87,12 @@ const ActionRow: React.FC<ActionRowProps> = ({ icon, label, hint, onClick }) =>
             userSelect: 'none',
         }}
     >
-        <Box sx={{ color: 'inherit', display: 'flex', mt: 0.125 }}>{icon}</Box>
-        <Box sx={{ flex: 1, minWidth: 0 }}>
-            <Typography sx={{
-                fontSize: 12, fontWeight: 500, color: 'inherit', wordBreak: 'break-word',
-            }}>
-                {label}
-            </Typography>
-            <Typography
-                className="placeholder-hint"
-                sx={{
-                    fontSize: 10.5, mt: 0.125, color: 'primary.main', opacity: 0.6, wordBreak: 'break-word',
-                }}
-            >
-                {hint}
-            </Typography>
-        </Box>
+        <Box sx={{ color: 'inherit', display: 'flex' }}>{icon}</Box>
+        <Typography sx={{
+            fontSize: 12, fontWeight: 500, color: 'inherit', wordBreak: 'break-word',
+        }}>
+            {label}
+        </Typography>
     </Box>
 );
 
@@ -112,8 +101,9 @@ const ActionRow: React.FC<ActionRowProps> = ({ icon, label, hint, onClick }) =>
 export const KnowledgePanel: React.FC = () => {
     const { t } = useTranslation();
     const store = useKnowledgeStore();
+    const dispatch = useDispatch();
 
-    // For the "distill from this session" placeholder under EXPERIENCES.
+    // For the "distill from this session" placeholder under WORKFLOWS.
     const tables = useSelector((s: DataFormulatorState) => s.tables);
     const charts = useSelector((s: DataFormulatorState) => s.charts);
     const conceptShelfItems = useSelector((s: DataFormulatorState) => s.conceptShelfItems);
@@ -183,35 +173,6 @@ export const KnowledgePanel: React.FC = () => {
         setEditorLoading(false);
     }, [store]);
 
-    // Pending request to auto-open an entry once it appears in the store
-    // (e.g. after the SessionDistillDialog finishes distilling).
-    const pendingOpenRef = useRef<{ category: KnowledgeCategory; path: string } | null>(null);
-
-    useEffect(() => {
-        const handler = (e: Event) => {
-            const detail = (e as CustomEvent).detail || {};
-            const category = (detail.category as KnowledgeCategory | undefined) ?? 'experiences';
-            const path = detail.path as string | undefined;
-            if (path) {
-                pendingOpenRef.current = { category, path };
-            }
-        };
-        window.addEventListener('open-knowledge-panel', handler);
-        return () => window.removeEventListener('open-knowledge-panel', handler);
-    }, []);
-
-    // When the requested entry shows up in the store, open its editor.
-    useEffect(() => {
-        const pending = pendingOpenRef.current;
-        if (!pending) return;
-        const cat = store.stateMap[pending.category];
-        if (!cat?.loaded) return;
-        const item = cat.items.find(i => i.path === pending.path);
-        if (!item) return;
-        pendingOpenRef.current = null;
-        openEditDialog(pending.category, item);
-    }, [store.stateMap, openEditDialog]);
-
     const handleSave = useCallback(async () => {
         if (!editorPath.trim() || !editorContent.trim()) return;
         setEditorSaving(true);
@@ -237,9 +198,9 @@ export const KnowledgePanel: React.FC = () => {
     }, [deleteTarget, store]);
 
     // ── Distill from current session ────────────────────────────────────
-    // The EXPERIENCES placeholder under EXPERIENCES is bound to the
+    // The WORKFLOWS placeholder is bound to the
     // active workspace. When the workspace already has a distilled
-    // experience (matched by `sourceWorkspaceId` in front matter) we
+    // workflow (matched by `sourceWorkspaceId` in front matter) we
     // expose an inline ⟳ Update affordance on the existing entry;
     // otherwise the placeholder opens the dialog in *create* mode.
     // See design-docs/24-session-scoped-distillation.md.
@@ -265,9 +226,9 @@ export const KnowledgePanel: React.FC = () => {
     const selectedModel = allModels.find(m => m.id === selectedModelId);
     const canDistillFromSession = hasDistillableSession && !!selectedModel && !!activeWorkspace;
 
-    const sessionExperience = React.useMemo(
-        () => findSessionExperience(
-            store.stateMap['experiences'].items,
+    const sessionWorkflow = React.useMemo(
+        () => findSessionWorkflow(
+            store.stateMap['workflows'].items,
             activeWorkspace?.id,
         ),
         [store.stateMap, activeWorkspace?.id],
@@ -278,6 +239,21 @@ export const KnowledgePanel: React.FC = () => {
         setSessionDialogOpen(true);
     }, []);
 
+    // ── Replay a workflow ────────────────────────────────────────────
+    // Reads the workflow body and asks the data agent (in SimpleChartRecBox)
+    // to reproduce the captured workflow on the currently loaded data. v1 is
+    // deliberately simple: we hand the whole workflow to the agent in one
+    // request via a window event and let it figure out the rest.
+    // See discussion/replayable-experience-workflow.md.
+    const handleReplay = useCallback(async (item: KnowledgeItem) => {
+        const content = await store.read('workflows', item.path);
+        if (content == null) return;
+        const prompt = t('knowledge.replayPrompt', { content });
+        window.dispatchEvent(new CustomEvent('df-replay-workflow', {
+            detail: { prompt, title: item.title },
+        }));
+    }, [store, t]);
+
     // ── Render section ──────────────────────────────────────────────────
 
 
@@ -285,81 +261,85 @@ export const KnowledgePanel: React.FC = () => {
         category: KnowledgeCategory,
         item: KnowledgeItem,
     ) => {
-        const displayName = item.path || item.title;
+        const displayTitle = (item.title || '').replace(/^\s*(?:Workflow|Experience) from .+?:\s*/i, '').trim();
+        const primary = displayTitle || item.title || item.path;
         return (
             <Box
                 key={`${category}/${item.path}`}
                 onClick={() => openEditDialog(category, item)}
                 sx={{
                     display: 'flex', alignItems: 'flex-start', gap: 0.75,
-                    px: 1.5, py: 0.75,
+                    px: 1.5, py: 0.625,
                     cursor: 'pointer',
                     color: 'text.primary',
                     '&:hover': { bgcolor: 'action.hover' },
-                    '&:hover .item-actions': { visibility: 'visible' },
+                    '&:hover .item-actions': { display: 'inline-flex' },
                     userSelect: 'none',
                 }}
             >
-                <DescriptionOutlinedIcon sx={{ fontSize: 16, color: 'text.primary', mt: 0.125 }} />
+                <DescriptionOutlinedIcon sx={{ fontSize: 16, color: 'text.primary', mt: 0.25 }} />
                 <Box sx={{ flex: 1, minWidth: 0 }}>
-                    <Typography sx={{ fontSize: 12.5, fontWeight: 500, wordBreak: 'break-word', color: 'text.primary' }}>
-                        {displayName}
+                    <Typography sx={{ fontSize: 12, fontWeight: 500, lineHeight: 1.45, wordBreak: 'break-word', color: 'text.primary' }}>
+                        {primary}
                     </Typography>
-                    {item.tags.length > 0 && (
-                        <Box sx={{ display: 'flex', flexWrap: 'wrap', gap: 0.25, mt: 0.25 }}>
-                            {item.tags.map(tag => (
-                                <Chip
-                                    key={tag}
-                                    label={tag}
-                                    size="small"
-                                    variant="outlined"
-                                    sx={{ fontSize: 9.5, height: 15, '& .MuiChip-label': { px: 0.5 } }}
-                                />
-                            ))}
-                        </Box>
-                    )}
                 </Box>
                 {item.source === 'agent_summarized' && (
                     <Tooltip title={t('knowledge.sourceAgent')}>
                         <SmartToyOutlinedIcon sx={{ fontSize: 13, color: 'text.secondary', mt: 0.25 }} />
                     </Tooltip>
                 )}
-                <Box className="item-actions" sx={{ display: 'flex', visibility: 'hidden', mt: 0.125 }}>
+                <Box sx={{ display: 'flex', flexDirection: 'column', alignItems: 'center', flexShrink: 0 }}>
+                    {category === 'workflows' && (
+                        <Tooltip title={t('knowledge.replayTooltip')}>
+                            <IconButton
+                                size="small"
+                                onClick={(e) => { e.stopPropagation(); handleReplay(item); }}
+                                sx={{
+                                    p: 0.25,
+                                    color: 'primary.main',
+                                    '&:hover': { bgcolor: theme => alpha(theme.palette.primary.main, 0.08) },
+                                }}
+                            >
+                                <PlayArrowIcon sx={{ fontSize: 17 }} />
+                            </IconButton>
+                        </Tooltip>
+                    )}
                     <IconButton
+                        className="item-actions"
                         size="small"
                         onClick={(e) => { e.stopPropagation(); setDeleteTarget({ category, path: item.path, title: item.title }); }}
-                        sx={{ p: 0.25, color: 'text.secondary', '&:hover': { color: 'error.main' } }}
+                        sx={{ p: 0.25, display: 'none', color: 'text.secondary', '&:hover': { color: 'error.main' } }}
                     >
-                        <DeleteOutlineIcon sx={{ fontSize: 14 }} />
+                        <DeleteOutlineIcon sx={{ fontSize: 16 }} />
                     </IconButton>
                 </Box>
             </Box>
         );
-    }, [openEditDialog, t]);
+    }, [openEditDialog, t, handleReplay]);
 
     const renderCategorySection = useCallback((
         category: KnowledgeCategory,
         label: string,
+        hint: string,
     ) => {
         const state = store.stateMap[category];
 
         // Persistent action row at the top of the section. Rules: opens
-        // the create dialog. Experiences: opens the session distill
+        // the create dialog. Workflows: opens the session distill
         // dialog in create or update mode depending on whether the active
-        // workspace already has a distilled experience.
+        // workspace already has a distilled workflow.
         // See design-docs/24-session-scoped-distillation.md.
         const renderActionRow = () => {
             if (category === 'rules') {
                 return (
                     <ActionRow
-                        icon={<AddIcon sx={{ fontSize: 18, mt: 0.125 }} />}
+                        icon={<AddIcon sx={{ fontSize: 18 }} />}
                         label={t('knowledge.addNewRule', { defaultValue: 'Add new rule' })}
-                        hint={t('knowledge.addNewRuleHint', { defaultValue: 'Set a convention for the agent' })}
                         onClick={() => openCreateDialog('rules')}
                     />
                 );
             }
-            // experiences
+            // workflows
             if (!canDistillFromSession) {
                 // No active workspace, no model, or no distillable thread
                 // yet — show a passive hint instead of a dead action.
@@ -370,15 +350,12 @@ export const KnowledgePanel: React.FC = () => {
                     </Typography>
                 );
             }
-            const updateMode = !!sessionExperience;
+            const updateMode = !!sessionWorkflow;
             if (sessionDistilling) {
                 return (
                     <ActionRow
-                        icon={<CircularProgress size={14} sx={{ mt: 0.25 }} />}
-                        label={t('knowledge.distilling', { defaultValue: 'Distilling experience…' })}
-                        hint={updateMode
-                            ? t('knowledge.updateFromSessionHint', { defaultValue: 'Refresh with new lessons' })
-                            : t('knowledge.experiencePlaceholderHint', { defaultValue: 'Save lessons learned' })}
+                        icon={<CircularProgress size={14} />}
+                        label={t('knowledge.distilling', { defaultValue: 'Distilling workflow…' })}
                         onClick={() => openSessionDistillDialog(updateMode)}
                     />
                 );
@@ -386,32 +363,44 @@ export const KnowledgePanel: React.FC = () => {
             return (
                 <ActionRow
                     icon={updateMode
-                        ? <RefreshIcon sx={{ fontSize: 18, mt: 0.125 }} />
-                        : <AddIcon sx={{ fontSize: 18, mt: 0.125 }} />}
+                        ? <RefreshIcon sx={{ fontSize: 18 }} />
+                        : <AddIcon sx={{ fontSize: 18 }} />}
                     label={updateMode
                         ? t('knowledge.updateFromSession', { defaultValue: 'Update from this session' })
                         : t('knowledge.distillFromSession', { defaultValue: 'Distill from this session' })}
-                    hint={updateMode
-                        ? t('knowledge.updateFromSessionHint', { defaultValue: 'Refresh with new lessons' })
-                        : t('knowledge.experiencePlaceholderHint', { defaultValue: 'Save lessons learned' })}
                     onClick={() => openSessionDistillDialog(updateMode)}
                 />
             );
         };
 
         return (
-            <Box key={category}>
+            <Box key={category} sx={{ pb: 1 }}>
                 <Box
                     sx={{
                         display: 'flex', alignItems: 'center',
-                        px: 1.5, pt: 1, pb: 0.25,
+                        px: 1.5, pt: 2, pb: 0.75,
                     }}
                 >
-                    <Typography sx={{ flex: 1, fontSize: 10.5, fontWeight: 600, color: 'text.primary', letterSpacing: 0.4, textTransform: 'uppercase' }}>
+                    <Typography sx={{ fontSize: 11, fontWeight: 700, color: 'text.secondary', letterSpacing: 0.6, textTransform: 'uppercase' }}>
                         {label}
                     </Typography>
                 </Box>
 
+                {/* Always-visible guidance for the section, set off by a
+                    subtle left accent line below the title. */}
+                <Box
+                    sx={{
+                        mx: 1.5, mb: 0.75,
+                        pl: 1, py: 0.25,
+                        borderLeft: '2px solid',
+                        borderColor: theme => alpha(theme.palette.primary.main, 0.25),
+                    }}
+                >
+                    <Typography sx={{ fontSize: 11, color: 'text.disabled', lineHeight: 1.55 }}>
+                        {hint}
+                    </Typography>
+                </Box>
+
                 {state.loading && (
                     <Box sx={{ display: 'flex', justifyContent: 'center', py: 1.5 }}>
                         <CircularProgress size={16} />
@@ -421,36 +410,18 @@ export const KnowledgePanel: React.FC = () => {
                 {state.items.map(item => renderItem(category, item))}
             </Box>
         );
-    }, [store.stateMap, renderItem, openCreateDialog, t, canDistillFromSession, sessionExperience, sessionDistilling, openSessionDistillDialog]);
+    }, [store.stateMap, renderItem, openCreateDialog, t, canDistillFromSession, sessionWorkflow, sessionDistilling, openSessionDistillDialog]);
 
     // ── Main render ─────────────────────────────────────────────────────
 
     return (
         <Box sx={{ flex: 1, display: 'flex', flexDirection: 'column', overflow: 'hidden' }}>
-            {/* Persistent hint — explains Rules vs Experiences without
-                requiring the user to scroll past empty-state messages. */}
-            <Box
-                sx={{
-                    mx: 1.5, mt: 1, mb: 0.75,
-                    px: 1, py: 0.75,
-                    bgcolor: 'action.hover',
-                    borderRadius: 1,
-                    flexShrink: 0,
-                }}
-            >
-                <Typography sx={{ fontSize: 11.5, color: 'text.primary', lineHeight: 1.5 }}>
-                    {t('knowledge.rulesHint')}
-                </Typography>
-                <Typography sx={{ fontSize: 11.5, color: 'text.primary', lineHeight: 1.5, mt: 0.5 }}>
-                    {t('knowledge.experiencesHint')}
-                </Typography>
-            </Box>
-
-            {/* Content area */}
+            {/* Content area. Rules vs Workflows guidance is surfaced via an
+                info icon next to each section title (see renderCategorySection). */}
             <Box sx={{ flex: 1, overflowY: 'auto', overflowX: 'hidden', overscrollBehavior: 'contain' }}>
                 <Box>
-                    {renderCategorySection('rules', t('knowledge.rules'))}
-                    {renderCategorySection('experiences', t('knowledge.experiences'))}
+                    {renderCategorySection('rules', t('knowledge.rules'), t('knowledge.rulesHint'))}
+                    {renderCategorySection('workflows', t('knowledge.workflows'), t('knowledge.workflowsHint'))}
                 </Box>
             </Box>
 
@@ -515,22 +486,6 @@ export const KnowledgePanel: React.FC = () => {
                                     }}
                                 />
                             </Box>
-                            {(() => {
-                                const bodyLimit = store.limits[editorCategory as keyof typeof store.limits] as number | undefined;
-                                if (!bodyLimit) return null;
-                                const bodyLen = editorContent.trim().length;
-                                const exceeded = bodyLen > bodyLimit;
-                                return (
-                                    <Typography sx={{
-                                        fontSize: 10, textAlign: 'right',
-                                        color: exceeded ? 'error.main' : bodyLen > bodyLimit * 0.9 ? 'warning.main' : 'text.disabled',
-                                    }}>
-                                        {exceeded
-                                            ? t('knowledge.charCountExceeded', { max: bodyLimit, current: bodyLen })
-                                            : t('knowledge.charCount', { max: bodyLimit, current: bodyLen })}
-                                    </Typography>
-                                );
-                            })()}
                         </>
                     )}
                 </DialogContent>
@@ -555,7 +510,6 @@ export const KnowledgePanel: React.FC = () => {
                             editorSaving
                             || !editorContent.trim()
                             || !editorPath.trim()
-                            || editorContent.trim().length > (store.limits[editorCategory as keyof typeof store.limits] as number ?? Infinity)
                         }
                         variant="contained"
                         sx={{ textTransform: 'none', fontSize: 12 }}
diff --git a/src/views/SessionDistill.tsx b/src/views/SessionDistill.tsx
index fbff4f3b..fd9efeae 100644
--- a/src/views/SessionDistill.tsx
+++ b/src/views/SessionDistill.tsx
@@ -2,19 +2,19 @@
 // Licensed under the MIT License.
 
 /**
- * SessionDistill — session-scoped experience distillation.
+ * SessionDistill — session-scoped workflow distillation.
  *
  * Replaces the old per-result distillation flow with a single
  * session-bound entry. See design-docs/24-session-scoped-distillation.md.
  *
  * Exports:
- *   - buildSessionExperienceContext(workspace, threads): state-independent
+ *   - buildSessionWorkflowContext(workspace, threads): state-independent
  *     payload builder (with size budgeting, see §3.5 of the design doc).
  *   - collectSessionThreads(tables, charts, fields): leaf discovery + per-leaf
  *     event walk against live DataFormulator state.
  *   - SessionDistillDialog: the dialog used by KnowledgePanel for both
  *     create and update modes.
- *   - findSessionExperience: lookup an existing session experience by
+ *   - findSessionWorkflow: lookup an existing session workflow by
  *     workspace id.
  */
 
@@ -51,16 +51,16 @@ import {
 import { store, type AppDispatch } from '../app/store';
 import { handleApiError } from '../app/errorHandler';
 import {
-    distillSessionExperience,
+    distillSessionWorkflow,
     type KnowledgeItem,
-    type SessionExperienceContext,
+    type SessionWorkflowContext,
 } from '../api/knowledgeApi';
 import {
     buildLeafEvents,
     buildDistillModelConfig,
     isLeafDerivedTable,
     TOOL_USES_CODE_FONT,
-} from './experienceContext';
+} from './workflowContext';
 
 // ---------------------------------------------------------------------------
 // Payload size budget (design-docs/24 §3.5)
@@ -81,7 +81,7 @@ const SESSION_EVENT_BUDGET = 60_000;  // bytes of JSON-serialized events
 // ---------------------------------------------------------------------------
 
 /**
- * One pre-built thread, ready for `buildSessionExperienceContext`.
+ * One pre-built thread, ready for `buildSessionWorkflowContext`.
  *
  * Callers produce these by walking their own tables (see
  * `collectSessionThreads` for the in-app implementation) or with hand-built
@@ -97,7 +97,7 @@ export interface SessionThread {
 
 export interface BuildSessionResult {
     /** Payload as it will be sent (after trimming). */
-    payload: SessionExperienceContext;
+    payload: SessionWorkflowContext;
     /** Display threads with labels for the preview UI (post-trim). */
     threads: SessionThread[];
     /** Aggregate stats for the preview (post-trim). */
@@ -107,14 +107,14 @@ export interface BuildSessionResult {
 }
 
 // ---------------------------------------------------------------------------
-// findSessionExperience
+// findSessionWorkflow
 // ---------------------------------------------------------------------------
 
 /**
- * Find the experience entry distilled from the given workspace, if any.
+ * Find the workflow entry distilled from the given workspace, if any.
  * Returns the first match; the backend ensures at most one per workspace.
  */
-export function findSessionExperience(
+export function findSessionWorkflow(
     items: KnowledgeItem[] | undefined,
     workspaceId: string | undefined,
 ): KnowledgeItem | undefined {
@@ -132,7 +132,7 @@ export function findSessionExperience(
  *
  * Threads with no user message are filtered out. Returns `[]` when the
  * session has no distillable thread. Not used in tests — tests construct
- * `SessionThread[]` directly and call `buildSessionExperienceContext`.
+ * `SessionThread[]` directly and call `buildSessionWorkflowContext`.
  */
 export function collectSessionThreads(
     tables: DictTable[],
@@ -162,16 +162,16 @@ export function collectSessionThreads(
 }
 
 // ---------------------------------------------------------------------------
-// buildSessionExperienceContext — pure (workspace, threads) → payload
+// buildSessionWorkflowContext — pure (workspace, threads) → payload
 // ---------------------------------------------------------------------------
 
 /**
- * Assemble the multi-thread payload sent to `/api/knowledge/distill-experience`.
+ * Assemble the multi-thread payload sent to `/api/knowledge/distill-workflow`.
  *
  * State-independent: takes pre-built threads and a workspace identity.
  * Returns `null` when `threads` is empty.
  */
-export function buildSessionExperienceContext(
+export function buildSessionWorkflowContext(
     workspace: { id: string; displayName: string },
     threads: SessionThread[],
 ): BuildSessionResult | null {
@@ -179,7 +179,7 @@ export function buildSessionExperienceContext(
 
     const { trimmedThreads, notes } = trimToBudget(threads, SESSION_EVENT_BUDGET);
 
-    const payload: SessionExperienceContext = {
+    const payload: SessionWorkflowContext = {
         context_id: workspace.id,
         workspace_id: workspace.id,
         workspace_name: workspace.displayName,
@@ -308,7 +308,7 @@ export const SessionDistillDialog: React.FC<SessionDistillDialogProps> = ({
     const built = useMemo(() => {
         if (!open || !activeWorkspace) return null;
         const threads = collectSessionThreads(tables, charts, conceptShelfItems);
-        return buildSessionExperienceContext(activeWorkspace, threads);
+        return buildSessionWorkflowContext(activeWorkspace, threads);
     }, [open, activeWorkspace, tables, charts, conceptShelfItems]);
 
     const [userInstruction, setUserInstruction] = useState('');
@@ -330,6 +330,10 @@ export const SessionDistillDialog: React.FC<SessionDistillDialogProps> = ({
         setStatus('running');
         onRunningChange?.(true);
         const instruction = userInstruction.trim() || undefined;
+        // Close the dialog right away — distillation continues in the
+        // background and surfaces its result via the events/toast below.
+        setUserInstruction('');
+        onClose();
 
         try {
             const modelConfig = buildDistillModelConfig(selectedModel as ModelConfig);
@@ -338,7 +342,7 @@ export const SessionDistillDialog: React.FC<SessionDistillDialogProps> = ({
             const timeoutId = setTimeout(() => controller.abort(), timeoutSeconds * 1000);
             let result;
             try {
-                result = await distillSessionExperience(
+                result = await distillSessionWorkflow(
                     built.payload, modelConfig, instruction, timeoutSeconds, controller.signal,
                 );
             } finally {
@@ -351,14 +355,12 @@ export const SessionDistillDialog: React.FC<SessionDistillDialogProps> = ({
                 value: t('knowledge.distilled'),
             }));
             window.dispatchEvent(new CustomEvent('knowledge-changed', {
-                detail: { category: 'experiences' },
+                detail: { category: 'workflows' },
             }));
             window.dispatchEvent(new CustomEvent('open-knowledge-panel', {
-                detail: { category: 'experiences', path: result.path },
+                detail: { category: 'workflows', path: result.path },
             }));
             setStatus('idle');
-            setUserInstruction('');
-            onClose();
         } catch (e: unknown) {
             setStatus('failed');
             handleApiError(e, 'knowledge');
@@ -375,8 +377,8 @@ export const SessionDistillDialog: React.FC<SessionDistillDialogProps> = ({
         <Dialog open={open} onClose={handleClose} maxWidth="sm" fullWidth>
             <DialogTitle sx={{ fontSize: 15, pb: 0.5 }}>
                 {updateMode
-                    ? t('knowledge.updateSessionTitle', { defaultValue: 'Update Session Experience' })
-                    : t('knowledge.distillSessionTitle', { defaultValue: 'Distill Session Experience' })}
+                    ? t('knowledge.updateSessionTitle', { defaultValue: 'Update Session Workflow' })
+                    : t('knowledge.distillSessionTitle', { defaultValue: 'Distill Session Workflow' })}
             </DialogTitle>
             <DialogContent sx={{
                 display: 'flex', flexDirection: 'column', gap: 1.5,
@@ -387,10 +389,10 @@ export const SessionDistillDialog: React.FC<SessionDistillDialogProps> = ({
                     <Typography sx={{ fontSize: 11, color: 'text.secondary', lineHeight: 1.4 }}>
                         {updateMode
                             ? t('knowledge.distillSessionUpdateHint', {
-                                defaultValue: 'Re-distill lessons from this analysis into the existing knowledge document.',
+                                defaultValue: 'Re-distill this analysis into the existing workflow document.',
                             })
                             : t('knowledge.distillSessionHint', {
-                                defaultValue: 'Distill lessons from this analysis into a reusable knowledge document.',
+                                defaultValue: 'Distill this analysis into a reusable workflow document that agents can replay.',
                             })}
                     </Typography>
                 </Box>
@@ -479,7 +481,7 @@ export const SessionDistillDialog: React.FC<SessionDistillDialogProps> = ({
                         ? t('knowledge.distilling')
                         : updateMode
                             ? t('knowledge.updateSession', { defaultValue: 'Update' })
-                            : t('knowledge.distillExperience')}
+                            : t('knowledge.distillWorkflow')}
                 </Button>
             </DialogActions>
         </Dialog>
diff --git a/src/views/SimpleChartRecBox.tsx b/src/views/SimpleChartRecBox.tsx
index 23b30c7c..e94d2192 100644
--- a/src/views/SimpleChartRecBox.tsx
+++ b/src/views/SimpleChartRecBox.tsx
@@ -38,8 +38,6 @@ import AddIcon from '@mui/icons-material/Add';
 import TipsAndUpdatesIcon from '@mui/icons-material/TipsAndUpdates';
 import StopIcon from '@mui/icons-material/Stop';
 
-import AutoGraphIcon from '@mui/icons-material/AutoGraph';
-import DescriptionOutlinedIcon from '@mui/icons-material/DescriptionOutlined';
 import InsertDriveFileOutlinedIcon from '@mui/icons-material/InsertDriveFileOutlined';
 import { borderColor, transition } from '../app/tokens';
 import { Theme } from '@mui/material/styles';
@@ -70,7 +68,7 @@ const AgentWorkingOverlay: FC<{ message?: string; elapsed?: number; theme: Theme
         }}>
             <Box sx={{ display: 'flex', flexDirection: 'row', alignItems: 'center', gap: 0.75 }}>
                 <WritingPencil size={12} />
-                <Typography variant="body2" color="text.secondary" sx={{ fontWeight: 400, fontSize: 12, lineHeight: 1.5 }}>
+                <Typography variant="body2" color="text.secondary" sx={{ fontWeight: 500, fontSize: 11.5, lineHeight: 1.4 }}>
                     {t('chartRec.agentWorking')}
                 </Typography>
             </Box>
@@ -96,13 +94,13 @@ const AgentWorkingOverlay: FC<{ message?: string; elapsed?: number; theme: Theme
             )}
             <Typography variant="body2" sx={{
                 color: 'text.disabled',
-                fontSize: 12,
+                fontSize: 11,
                 textAlign: 'center',
                 display: '-webkit-box',
                 WebkitLineClamp: 3,
                 WebkitBoxOrient: 'vertical',
                 overflow: 'hidden',
-                lineHeight: 1.35,
+                lineHeight: 1.45,
                 wordBreak: 'break-word',
             }}>
                 {latestMessage}{elapsedSuffix}
@@ -1354,6 +1352,39 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         exploreFromChat(prompt, undefined, displayPrompt);
     }, [reportFromChat, exploreFromChat, selectedAgent, clarificationQuestions, clarifyAnswers]);
 
+    // Replay a workflow: the KnowledgePanel fires `df-replay-workflow`
+    // with a prompt describing the captured workflow; we hand it straight to
+    // the data agent on the currently focused dataset. v1 is deliberately
+    // simple — one request, let the agent reproduce the analysis on its own.
+    // See discussion/replayable-experience-workflow.md.
+    useEffect(() => {
+        const handler = (e: Event) => {
+            const prompt = (e as CustomEvent).detail?.prompt as string | undefined;
+            if (!prompt) return;
+            if (isChatFormulating) {
+                dispatch(dfActions.addMessages({
+                    timestamp: Date.now(), type: 'error',
+                    component: 'data-agent', value: t('knowledge.replayBusy'),
+                }));
+                return;
+            }
+            if (!focusedTableId) {
+                dispatch(dfActions.addMessages({
+                    timestamp: Date.now(), type: 'error',
+                    component: 'data-agent', value: t('knowledge.replayNoData'),
+                }));
+                return;
+            }
+            dispatch(dfActions.addMessages({
+                timestamp: Date.now(), type: 'info',
+                component: 'data-agent', value: t('knowledge.replayStarted'),
+            }));
+            exploreFromChat(prompt);
+        };
+        window.addEventListener('df-replay-workflow', handler);
+        return () => window.removeEventListener('df-replay-workflow', handler);
+    }, [exploreFromChat, isChatFormulating, focusedTableId, dispatch, t]);
+
     const resumeFromClarification = useCallback((responses: ClarificationResponse[]) => {
         if (!pendingClarification) return;
         // Pass the formatted display string as `prompt` — it powers both the
@@ -1744,9 +1775,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                                 '&:hover': { backgroundColor: alpha(isReportMode ? theme.palette.warning.main : theme.palette.primary.main, 0.08) },
                             }}
                         >
-                            {selectedAgent === 'explore'
-                                ? <AutoGraphIcon sx={{ fontSize: '14px !important' }} />
-                                : <DescriptionOutlinedIcon sx={{ fontSize: '14px !important' }} />}
                             {selectedAgent === 'explore' ? t('chartRec.modeExplore') : t('chartRec.modeReport')}
                         </Button>
                     </Tooltip>
@@ -1761,7 +1789,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                             <span>
                                 <IconButton
                                     size="small"
-                                    sx={{ p: 0.5, color: theme.palette.secondary.main }}
+                                    sx={{ p: 0.5, color: theme.palette.primary.main }}
                                     disabled={!focusedTableId || isChatFormulating || !!pendingClarification}
                                     onClick={() => submitChat(t('chartRec.exploreIdeasPrompt'), undefined, t('chartRec.askedForRecommendations'))}
                                 >
diff --git a/src/views/experienceContext.ts b/src/views/workflowContext.ts
similarity index 98%
rename from src/views/experienceContext.ts
rename to src/views/workflowContext.ts
index 98ec1c80..dac6e006 100644
--- a/src/views/experienceContext.ts
+++ b/src/views/workflowContext.ts
@@ -2,8 +2,8 @@
 // Licensed under the MIT License.
 
 /**
- * experienceContext — pure helpers that turn DataFormulator state into
- * the timeline payload sent to `/api/knowledge/distill-experience`.
+ * workflowContext — pure helpers that turn DataFormulator state into
+ * the timeline payload sent to `/api/knowledge/distill-workflow`.
  *
  * No React, no Redux. Used by:
  *   - SessionDistill.collectSessionThreads (live distillation)
diff --git a/tests/backend/agents/test_agent_knowledge_integration.py b/tests/backend/agents/test_agent_knowledge_integration.py
index 3efc65df..4d738635 100644
--- a/tests/backend/agents/test_agent_knowledge_integration.py
+++ b/tests/backend/agents/test_agent_knowledge_integration.py
@@ -62,7 +62,7 @@ def user_home(tmp_path):
     rules_dir.mkdir(parents=True)
     (rules_dir / "roi.md").write_text(RULE_MD, encoding="utf-8")
 
-    exp_dir = tmp_path / "knowledge" / "experiences" / "cleaning"
+    exp_dir = tmp_path / "knowledge" / "workflows" / "cleaning"
     exp_dir.mkdir(parents=True)
     (exp_dir / "missing.md").write_text(SKILL_MD, encoding="utf-8")
 
@@ -170,11 +170,11 @@ def test_no_match_no_injection(self, mock_client, mock_workspace, user_home):
     def test_max_five_items(self, mock_client, mock_workspace, tmp_path):
         rules_dir = tmp_path / "knowledge" / "rules"
         rules_dir.mkdir(parents=True)
-        exp_dir = tmp_path / "knowledge" / "experiences" / "common"
+        exp_dir = tmp_path / "knowledge" / "workflows" / "common"
         exp_dir.mkdir(parents=True)
         for i in range(10):
             (exp_dir / f"exp-{i}.md").write_text(
-                f"---\ntitle: Common Experience {i}\ntags: [common]\n"
+                f"---\ntitle: Common Workflow {i}\ntags: [common]\n"
                 f"created: 2026-04-26\nupdated: 2026-04-26\n---\n"
                 f"Content about common topic {i}.\n",
                 encoding="utf-8",
@@ -247,7 +247,7 @@ def test_agent_works_without_knowledge(self, mock_client, mock_workspace):
     def test_empty_knowledge_dir(self, mock_client, mock_workspace, tmp_path):
         """Agent with empty knowledge dir works normally."""
         (tmp_path / "knowledge" / "rules").mkdir(parents=True)
-        (tmp_path / "knowledge" / "experiences").mkdir(parents=True)
+        (tmp_path / "knowledge" / "workflows").mkdir(parents=True)
         agent = _make_agent(mock_client, mock_workspace, tmp_path)
         prompt = agent._build_system_prompt()
         assert "User Rules" not in prompt
diff --git a/tests/backend/agents/test_experience_distill.py b/tests/backend/agents/test_workflow_distill.py
similarity index 74%
rename from tests/backend/agents/test_experience_distill.py
rename to tests/backend/agents/test_workflow_distill.py
index a3b823c8..e44d4a86 100644
--- a/tests/backend/agents/test_experience_distill.py
+++ b/tests/backend/agents/test_workflow_distill.py
@@ -1,13 +1,13 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-"""Tests for ExperienceDistillAgent and the /api/knowledge/distill-experience endpoint.
+"""Tests for WorkflowDistillAgent and the /api/knowledge/distill-workflow endpoint.
 
 Covers:
-- _extract_context_summary correctly extracts experience context
+- _extract_context_summary correctly extracts workflow context
 - Output Markdown includes valid YAML front matter
 - front matter contains source: distill and source metadata
-- Generated experience file written to correct directory
+- Generated workflow file written to correct directory
 - category_hint controls sub-directory
 """
 
@@ -18,8 +18,14 @@
 import flask
 import pytest
 
-from data_formulator.agents.agent_experience_distill import ExperienceDistillAgent
-from data_formulator.knowledge.store import parse_front_matter
+from data_formulator.agents.agent_workflow_distill import WorkflowDistillAgent
+from data_formulator.knowledge.store import (
+    KNOWLEDGE_LIMITS,
+    WORKFLOW_HARD_MAX,
+    parse_front_matter,
+)
+
+WORKFLOW_SOFT_LIMIT = KNOWLEDGE_LIMITS["workflows"]
 
 pytestmark = [pytest.mark.backend]
 
@@ -73,7 +79,7 @@
     },
 ]
 
-SAMPLE_EXPERIENCE_CONTEXT = {
+SAMPLE_WORKFLOW_CONTEXT = {
     "context_id": "ws-1",
     "workspace_id": "ws-1",
     "workspace_name": "Sales Region Analysis",
@@ -86,7 +92,7 @@
 
 class TestExtractContextSummary:
     def test_renders_each_event_type(self):
-        summary = ExperienceDistillAgent._extract_context_summary(SAMPLE_EXPERIENCE_CONTEXT)
+        summary = WorkflowDistillAgent._extract_context_summary(SAMPLE_WORKFLOW_CONTEXT)
         # message events
         assert "[user→data-agent/prompt]" in summary
         assert "Show sales by region" in summary
@@ -113,7 +119,7 @@ def test_renders_each_event_type(self):
         assert "encoding: x=region(nominal)" in summary
 
     def test_empty_events_returns_marker(self):
-        summary = ExperienceDistillAgent._extract_context_summary({})
+        summary = WorkflowDistillAgent._extract_context_summary({})
         assert summary == "(empty context)"
 
     def test_user_content_is_not_displaycontent(self):
@@ -131,7 +137,7 @@ def test_user_content_is_not_displaycontent(self):
                 }],
             }],
         }
-        summary = ExperienceDistillAgent._extract_context_summary(ctx)
+        summary = WorkflowDistillAgent._extract_context_summary(ctx)
         assert "raw text" in summary
 
     def test_skips_non_dict_events(self):
@@ -140,7 +146,7 @@ def test_skips_non_dict_events(self):
             {"type": "message", "from": "user", "to": "data-agent",
              "role": "prompt", "content": "ok"},
         ]}]}
-        summary = ExperienceDistillAgent._extract_context_summary(ctx)
+        summary = WorkflowDistillAgent._extract_context_summary(ctx)
         assert "[user→data-agent/prompt]" in summary
         # No crashes; the bogus entries are silently dropped.
 
@@ -154,7 +160,7 @@ def test_create_table_basic(self):
             "sample_rows": [{"a": 1}],
             "code": "x = 1",
         }]}]}
-        summary = ExperienceDistillAgent._extract_context_summary(ctx)
+        summary = WorkflowDistillAgent._extract_context_summary(ctx)
         assert "[create_table] t1" in summary
 
     def test_create_chart_without_encoding(self):
@@ -163,7 +169,7 @@ def test_create_chart_without_encoding(self):
             "related_table_id": "t1",
             "mark_or_type": "line",
         }]}]}
-        summary = ExperienceDistillAgent._extract_context_summary(ctx)
+        summary = WorkflowDistillAgent._extract_context_summary(ctx)
         assert "[create_chart] line on t1" in summary
         assert "encoding:" not in summary
 
@@ -187,7 +193,7 @@ def test_renders_multi_thread_with_headers(self):
                 },
             ],
         }
-        summary = ExperienceDistillAgent._extract_context_summary(ctx)
+        summary = WorkflowDistillAgent._extract_context_summary(ctx)
         assert "### Thread 1 (id=leaf-a)" in summary
         assert "### Thread 2 (id=leaf-b)" in summary
         assert "load gas prices" in summary
@@ -236,10 +242,10 @@ def _mock_client(self):
 
     def test_produces_valid_markdown(self):
         client = self._mock_client()
-        agent = ExperienceDistillAgent(client=client)
+        agent = WorkflowDistillAgent(client=client)
 
         with patch.object(agent, "_call_llm", return_value=MOCK_CONTEXT_RESPONSE):
-            result = agent.run(SAMPLE_EXPERIENCE_CONTEXT)
+            result = agent.run(SAMPLE_WORKFLOW_CONTEXT)
 
         assert result.startswith("---")
         meta, body = parse_front_matter(result)
@@ -249,11 +255,11 @@ def test_produces_valid_markdown(self):
 
     def test_fallback_front_matter_added(self):
         client = self._mock_client()
-        agent = ExperienceDistillAgent(client=client)
+        agent = WorkflowDistillAgent(client=client)
 
         no_fm_response = "# Sales Analysis\n\nJust some content."
         with patch.object(agent, "_call_llm", return_value=no_fm_response):
-            result = agent.run(SAMPLE_EXPERIENCE_CONTEXT)
+            result = agent.run(SAMPLE_WORKFLOW_CONTEXT)
 
         assert result.startswith("---")
         meta, _ = parse_front_matter(result)
@@ -261,11 +267,11 @@ def test_fallback_front_matter_added(self):
         assert meta["source_context"] == "ws-1"
 
     def test_retries_once_when_body_too_long(self):
-        """If first LLM call produces body > limit, agent retries with condensation prompt."""
+        """If first LLM call produces body over the soft target, agent retries with condensation prompt."""
         client = self._mock_client()
-        agent = ExperienceDistillAgent(client=client)
+        agent = WorkflowDistillAgent(client=client)
 
-        long_body = "x" * 3000
+        long_body = "x" * (WORKFLOW_SOFT_LIMIT + 1000)
         long_response = (
             "---\ntitle: Long\ntags: []\ncreated: 2026-01-01\n"
             "updated: 2026-01-01\nsource: distill\nsource_context: t1\n---\n\n"
@@ -283,18 +289,18 @@ def fake_call_llm(messages):
             return short_response
 
         with patch.object(agent, "_call_llm", side_effect=fake_call_llm):
-            result = agent.run(SAMPLE_EXPERIENCE_CONTEXT)
+            result = agent.run(SAMPLE_WORKFLOW_CONTEXT)
 
         assert call_count == 2
         _, body = parse_front_matter(result)
-        assert len(body.strip()) <= 2000
+        assert len(body.strip()) <= WORKFLOW_SOFT_LIMIT
 
     def test_retry_asks_for_slack_under_limit(self):
-        """The retry prompt asks the model for less than the hard limit."""
+        """The retry prompt asks the model for less than the soft target."""
         client = self._mock_client()
-        agent = ExperienceDistillAgent(client=client)
+        agent = WorkflowDistillAgent(client=client)
 
-        long_body = "x" * 3000
+        long_body = "x" * (WORKFLOW_SOFT_LIMIT + 1000)
         long_response = (
             "---\ntitle: L\ntags: []\ncreated: 2026-01-01\n"
             "updated: 2026-01-01\nsource: distill\nsource_context: t1\n---\n\n"
@@ -309,21 +315,21 @@ def fake_call_llm(messages):
             return long_response if len(captured) == 1 else MOCK_CONTEXT_RESPONSE
 
         with patch.object(agent, "_call_llm", side_effect=fake_call_llm):
-            agent.run(SAMPLE_EXPERIENCE_CONTEXT)
+            agent.run(SAMPLE_WORKFLOW_CONTEXT)
 
         assert len(captured) == 2
         retry_prompt = captured[1][-1]["content"]
-        # Must mention the slacked target (limit minus margin), not the raw limit.
-        expected_target = 2000 - agent.RETRY_MARGIN
-        assert f"within {expected_target} characters" in retry_prompt
+        # Must mention the slacked target (soft limit minus margin).
+        expected_target = WORKFLOW_SOFT_LIMIT - agent.RETRY_MARGIN
+        assert f"around {expected_target} characters" in retry_prompt
 
     def test_hard_trims_when_retry_still_over_limit(self):
-        """If the retry still overshoots, body is hard-trimmed to fit the limit."""
+        """If the retry still blows past the hard ceiling, body is hard-trimmed to fit it."""
         client = self._mock_client()
-        agent = ExperienceDistillAgent(client=client)
+        agent = WorkflowDistillAgent(client=client)
 
-        first_body = "x" * 3000
-        retry_body = "y" * 2014  # mimics the real-world failure: 14 over
+        first_body = "x" * (WORKFLOW_SOFT_LIMIT + 1000)
+        retry_body = "y" * (WORKFLOW_HARD_MAX + 14)  # mimics retry still over the ceiling
         front_matter = (
             "---\ntitle: T\ntags: []\ncreated: 2026-01-01\n"
             "updated: 2026-01-01\nsource: distill\nsource_context: t1\n---\n\n"
@@ -339,13 +345,13 @@ def fake_call_llm(messages):
             return resp
 
         with patch.object(agent, "_call_llm", side_effect=fake_call_llm):
-            result = agent.run(SAMPLE_EXPERIENCE_CONTEXT)
+            result = agent.run(SAMPLE_WORKFLOW_CONTEXT)
 
         # Both LLM calls happened.
         assert call_count == 2
-        # Final body fits the hard limit (no save failure).
+        # Final body fits the hard ceiling (no save failure).
         _, body = parse_front_matter(result)
-        assert len(body.strip()) <= 2000
+        assert len(body.strip()) <= WORKFLOW_HARD_MAX
         # Truncation marker is present so the user can see it was trimmed.
         assert "truncated" in body
         # Front matter preserved.
@@ -355,7 +361,7 @@ def fake_call_llm(messages):
     def test_no_retry_when_body_within_limit(self):
         """If first LLM call is within limit, no retry happens."""
         client = self._mock_client()
-        agent = ExperienceDistillAgent(client=client)
+        agent = WorkflowDistillAgent(client=client)
 
         call_count = 0
 
@@ -365,14 +371,14 @@ def fake_call_llm(messages):
             return MOCK_CONTEXT_RESPONSE
 
         with patch.object(agent, "_call_llm", side_effect=fake_call_llm):
-            agent.run(SAMPLE_EXPERIENCE_CONTEXT)
+            agent.run(SAMPLE_WORKFLOW_CONTEXT)
 
         assert call_count == 1
 
     def test_language_instruction_injected_into_system_prompt(self):
         client = self._mock_client()
         zh_instruction = "[LANGUAGE INSTRUCTION]\nWrite in Simplified Chinese."
-        agent = ExperienceDistillAgent(client=client, language_instruction=zh_instruction)
+        agent = WorkflowDistillAgent(client=client, language_instruction=zh_instruction)
 
         captured_messages = []
 
@@ -381,7 +387,7 @@ def fake_call_llm(messages):
             return MOCK_CONTEXT_RESPONSE
 
         with patch.object(agent, "_call_llm", side_effect=fake_call_llm):
-            agent.run(SAMPLE_EXPERIENCE_CONTEXT)
+            agent.run(SAMPLE_WORKFLOW_CONTEXT)
 
         system_content = captured_messages[0]["content"]
         assert "[LANGUAGE INSTRUCTION]" in system_content
@@ -389,7 +395,7 @@ def fake_call_llm(messages):
 
     def test_language_code_zh_injects_chinese_instruction(self):
         client = self._mock_client()
-        agent = ExperienceDistillAgent(client=client, language_code="zh")
+        agent = WorkflowDistillAgent(client=client, language_code="zh")
 
         captured_messages = []
 
@@ -398,7 +404,7 @@ def fake_call_llm(messages):
             return MOCK_CONTEXT_RESPONSE
 
         with patch.object(agent, "_call_llm", side_effect=fake_call_llm):
-            agent.run(SAMPLE_EXPERIENCE_CONTEXT)
+            agent.run(SAMPLE_WORKFLOW_CONTEXT)
 
         system_content = captured_messages[0]["content"]
         assert "Simplified Chinese" in system_content
@@ -406,7 +412,7 @@ def fake_call_llm(messages):
 
     def test_language_code_en_no_extra_instruction(self):
         client = self._mock_client()
-        agent = ExperienceDistillAgent(client=client, language_code="en")
+        agent = WorkflowDistillAgent(client=client, language_code="en")
 
         captured_messages = []
 
@@ -415,27 +421,45 @@ def fake_call_llm(messages):
             return MOCK_CONTEXT_RESPONSE
 
         with patch.object(agent, "_call_llm", side_effect=fake_call_llm):
-            agent.run(SAMPLE_EXPERIENCE_CONTEXT)
+            agent.run(SAMPLE_WORKFLOW_CONTEXT)
 
         system_content = captured_messages[0]["content"]
         assert "in English" in system_content
         assert "[LANGUAGE INSTRUCTION]" not in system_content
 
 
-# ── _experience_filename ──────────────────────────────────────────────────
+# ── _workflow_filename ──────────────────────────────────────────────────
 
 
-class TestExperienceFilename:
-    def test_derives_from_workspace_name(self):
-        from data_formulator.routes.knowledge import _experience_filename
-        name = _experience_filename("Sales Analysis Pattern")
+class TestWorkflowFilename:
+    def test_derives_from_title(self):
+        from data_formulator.routes.knowledge import _workflow_filename
+        name = _workflow_filename("Sales Analysis Pattern")
         assert name.endswith(".md")
         assert "sales-analysis-pattern" in name.lower()
 
-    def test_fallback_when_workspace_name_blank(self):
-        from data_formulator.routes.knowledge import _experience_filename
-        name = _experience_filename("   ")
-        assert name == "session-experience.md"
+    def test_fallback_when_title_blank(self):
+        from data_formulator.routes.knowledge import _workflow_filename
+        name = _workflow_filename("   ")
+        assert name == "session-workflow.md"
+
+    def test_rejects_path_traversal(self):
+        from data_formulator.routes.knowledge import _workflow_filename
+        # An LLM-supplied name must never escape the workflows directory.
+        for evil in ("../../etc/passwd", "..\\..\\win", "/etc/shadow", "a/b/c"):
+            name = _workflow_filename(evil)
+            assert "/" not in name
+            assert "\\" not in name
+            assert ".." not in name
+            assert name.endswith(".md")
+
+    def test_strips_reserved_and_control_chars(self):
+        from data_formulator.routes.knowledge import _workflow_filename
+        name = _workflow_filename('sales:report*?"<>|\x00 v1')
+        assert name.endswith(".md")
+        for ch in ':*?"<>|\x00':
+            assert ch not in name
+        assert name == "sales-report-v1.md"
 
 
 # ── API endpoint ──────────────────────────────────────────────────────────
@@ -453,7 +477,7 @@ def app(self, tmp_path):
         _app.register_blueprint(knowledge_bp)
         register_error_handlers(_app)
 
-        (tmp_path / "knowledge" / "experiences").mkdir(parents=True)
+        (tmp_path / "knowledge" / "workflows").mkdir(parents=True)
 
         with patch("data_formulator.routes.knowledge.get_identity_id", return_value="test-user"), \
              patch("data_formulator.routes.knowledge.get_user_home", return_value=tmp_path):
@@ -464,14 +488,14 @@ def client(self, app):
         return app.test_client()
 
     def test_missing_context_returns_error(self, client):
-        resp = client.post("/api/knowledge/distill-experience",
+        resp = client.post("/api/knowledge/distill-workflow",
                            json={"model": {"endpoint": "openai", "model": "gpt-4o"}})
         data = resp.get_json()
         assert data["status"] == "error"
 
     def test_missing_model_returns_error(self, client):
-        resp = client.post("/api/knowledge/distill-experience",
-                           json={"experience_context": SAMPLE_EXPERIENCE_CONTEXT})
+        resp = client.post("/api/knowledge/distill-workflow",
+                           json={"workflow_context": SAMPLE_WORKFLOW_CONTEXT})
         data = resp.get_json()
         assert data["status"] == "error"
 
@@ -483,9 +507,9 @@ def test_missing_events_returns_error(self, client):
             "workspace_name": "Demo",
             "threads": [],
         }
-        resp = client.post("/api/knowledge/distill-experience",
+        resp = client.post("/api/knowledge/distill-workflow",
                            json={
-                               "experience_context": bad_context,
+                               "workflow_context": bad_context,
                                "model": {"endpoint": "openai", "model": "gpt-4o", "api_key": "test"},
                            })
         data = resp.get_json()
@@ -497,9 +521,9 @@ def test_missing_events_field_returns_error(self, client):
             "workspace_id": "ws-1",
             "workspace_name": "Demo",
         }  # no 'threads' key
-        resp = client.post("/api/knowledge/distill-experience",
+        resp = client.post("/api/knowledge/distill-workflow",
                            json={
-                               "experience_context": bad_context,
+                               "workflow_context": bad_context,
                                "model": {"endpoint": "openai", "model": "gpt-4o", "api_key": "test"},
                            })
         data = resp.get_json()
@@ -508,22 +532,22 @@ def test_missing_events_field_returns_error(self, client):
     def test_successful_distill(self, client, tmp_path):
         with patch("data_formulator.routes.agents.get_client") as mock_gc, \
              patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \
-             patch("data_formulator.agents.agent_experience_distill.ExperienceDistillAgent.run",
+             patch("data_formulator.agents.agent_workflow_distill.WorkflowDistillAgent.run",
                    return_value=MOCK_CONTEXT_RESPONSE):
 
             mock_gc.return_value = MagicMock()
-            resp = client.post("/api/knowledge/distill-experience",
+            resp = client.post("/api/knowledge/distill-workflow",
                                json={
-                                   "experience_context": SAMPLE_EXPERIENCE_CONTEXT,
+                                   "workflow_context": SAMPLE_WORKFLOW_CONTEXT,
                                    "model": {"endpoint": "openai", "model": "gpt-4o", "api_key": "test"},
                                })
             data = resp.get_json()
             assert data["status"] == "success"
-            assert data["data"]["category"] == "experiences"
+            assert data["data"]["category"] == "workflows"
             assert data["data"]["path"].endswith(".md")
 
             # Verify file was written
-            exp_dir = tmp_path / "knowledge" / "experiences"
+            exp_dir = tmp_path / "knowledge" / "workflows"
             md_files = list(exp_dir.rglob("*.md"))
             assert len(md_files) >= 1
             assert not (tmp_path / "agent-logs").exists()
@@ -531,13 +555,13 @@ def test_successful_distill(self, client, tmp_path):
     def test_category_hint_creates_subdir(self, client, tmp_path):
         with patch("data_formulator.routes.agents.get_client") as mock_gc, \
              patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \
-             patch("data_formulator.agents.agent_experience_distill.ExperienceDistillAgent.run",
+             patch("data_formulator.agents.agent_workflow_distill.WorkflowDistillAgent.run",
                    return_value=MOCK_CONTEXT_RESPONSE):
 
             mock_gc.return_value = MagicMock()
-            resp = client.post("/api/knowledge/distill-experience",
+            resp = client.post("/api/knowledge/distill-workflow",
                                json={
-                                   "experience_context": SAMPLE_EXPERIENCE_CONTEXT,
+                                   "workflow_context": SAMPLE_WORKFLOW_CONTEXT,
                                    "model": {"endpoint": "openai", "model": "gpt-4o", "api_key": "test"},
                                    "category_hint": "sales",
                                })
diff --git a/tests/backend/knowledge/test_knowledge_store.py b/tests/backend/knowledge/test_knowledge_store.py
index 2444195b..f69ce37c 100644
--- a/tests/backend/knowledge/test_knowledge_store.py
+++ b/tests/backend/knowledge/test_knowledge_store.py
@@ -5,11 +5,11 @@
 
 Covers:
 - list_all, read, write, delete for each category
-- path depth constraints (rules=flat, experiences=1 sub-dir)
+- path depth constraints (rules=flat, workflows=1 sub-dir)
 - .md extension enforcement
 - ConfinedDir traversal rejection
 - front matter parsing and graceful degradation
-- search: title, tags, filename, body matching + ranking + limit
+- search: title, filename, body matching + ranking + limit
 - search skips alwaysApply rules (they are injected via system prompt)
 - tokenization: English stopwords, CJK/ASCII mixed splitting
 - scoring: partial token match, source discount, table_names boost
@@ -73,21 +73,20 @@ def test_lists_rules(self, store, tmp_path):
         items = store.list_all("rules")
         assert len(items) == 1
         assert items[0]["title"] == "ROI Calculation"
-        assert items[0]["tags"] == ["finance", "computation"]
         assert items[0]["path"] == "roi.md"
         assert items[0]["source"] == "manual"
 
-    def test_lists_experiences_in_subdirs(self, store, tmp_path):
-        exp_dir = tmp_path / "knowledge" / "experiences" / "cleaning"
+    def test_lists_workflows_in_subdirs(self, store, tmp_path):
+        exp_dir = tmp_path / "knowledge" / "workflows" / "cleaning"
         exp_dir.mkdir(parents=True)
         (exp_dir / "missing.md").write_text(SAMPLE_MD_SKILL, encoding="utf-8")
 
-        items = store.list_all("experiences")
+        items = store.list_all("workflows")
         assert len(items) == 1
         assert items[0]["path"] == "cleaning/missing.md"
 
     def test_empty_category_returns_empty(self, store):
-        items = store.list_all("experiences")
+        items = store.list_all("workflows")
         assert items == []
 
     def test_front_matter_title_fallback_to_stem(self, store, tmp_path):
@@ -139,9 +138,9 @@ def test_preserves_existing_front_matter(self, store):
         content = store.read("rules", "fm.md")
         assert "title: ROI Calculation" in content
 
-    def test_writes_experiences_in_subdir(self, store, tmp_path):
-        store.write("experiences", "cleaning/handle-missing.md", SAMPLE_MD_SKILL)
-        assert (tmp_path / "knowledge" / "experiences" / "cleaning" / "handle-missing.md").exists()
+    def test_writes_workflows_in_subdir(self, store, tmp_path):
+        store.write("workflows", "cleaning/handle-missing.md", SAMPLE_MD_SKILL)
+        assert (tmp_path / "knowledge" / "workflows" / "cleaning" / "handle-missing.md").exists()
 
 
 # ── CRUD: delete ──────────────────────────────────────────────────────────
@@ -169,12 +168,12 @@ def test_rules_subdir_rejected(self):
         with pytest.raises(ValueError, match="sub-directories"):
             KnowledgeStore.validate_path("rules", "sub/file.md")
 
-    def test_experiences_one_subdir_ok(self):
-        KnowledgeStore.validate_path("experiences", "cat/file.md")
+    def test_workflows_one_subdir_ok(self):
+        KnowledgeStore.validate_path("workflows", "cat/file.md")
 
-    def test_experiences_two_subdirs_rejected(self):
+    def test_workflows_two_subdirs_rejected(self):
         with pytest.raises(ValueError, match="one level"):
-            KnowledgeStore.validate_path("experiences", "cat/sub/file.md")
+            KnowledgeStore.validate_path("workflows", "cat/sub/file.md")
 
     def test_skills_rejected_as_invalid(self):
         with pytest.raises(ValueError, match="Invalid category"):
@@ -228,7 +227,7 @@ def _setup_knowledge(self, store, tmp_path):
         rules_dir = tmp_path / "knowledge" / "rules"
         (rules_dir / "roi.md").write_text(SAMPLE_MD, encoding="utf-8")
 
-        exp_dir = tmp_path / "knowledge" / "experiences" / "cleaning"
+        exp_dir = tmp_path / "knowledge" / "workflows" / "cleaning"
         exp_dir.mkdir(parents=True)
         (exp_dir / "missing.md").write_text(SAMPLE_MD_SKILL, encoding="utf-8")
 
@@ -237,11 +236,6 @@ def test_search_by_title(self, store):
         assert len(results) >= 1
         assert results[0]["title"] == "Handle Missing Values"
 
-    def test_search_by_tags(self, store):
-        results = store.search("pandas")
-        assert len(results) >= 1
-        assert results[0]["title"] == "Handle Missing Values"
-
     def test_search_by_filename(self, store):
         results = store.search("missing")
         assert len(results) >= 1
@@ -269,7 +263,7 @@ def test_max_results_limit(self, store, tmp_path):
         assert len(results) <= 5
 
     def test_search_filters_by_category(self, store):
-        results = store.search("ROI", categories=["experiences"])
+        results = store.search("ROI", categories=["workflows"])
         assert len(results) == 0
 
     def test_search_skips_always_apply_rules(self, store, tmp_path):
@@ -304,13 +298,12 @@ def test_partial_token_match_finds_results(self, store):
         assert results[0]["title"] == "Handle Missing Values"
 
     def test_table_names_boost(self, store, tmp_path):
-        """Entries tagged with a session table name get boosted."""
-        exp_dir = tmp_path / "knowledge" / "experiences" / "analysis"
+        """Entries mentioning a session table name (title/body) get boosted."""
+        exp_dir = tmp_path / "knowledge" / "workflows" / "analysis"
         exp_dir.mkdir(parents=True)
         (exp_dir / "sales-tip.md").write_text(
-            "---\ntitle: Sales Analysis Tips\n"
-            "tags: [sales_data, revenue]\nsource: manual\n---\n"
-            "When analysing sales, check for seasonality.\n",
+            "---\ntitle: Sales Analysis Tips\nsource: manual\n---\n"
+            "When analysing sales_data, check for seasonality.\n",
             encoding="utf-8",
         )
         results = store.search("analysis tips", table_names=["sales_data"])
@@ -319,7 +312,7 @@ def test_table_names_boost(self, store, tmp_path):
 
     def test_non_manual_source_discounted(self, store, tmp_path):
         """Non-manual entries score lower than equivalent manual entries."""
-        exp_dir = tmp_path / "knowledge" / "experiences"
+        exp_dir = tmp_path / "knowledge" / "workflows"
         (exp_dir / "auto-tip.md").write_text(
             "---\ntitle: Tip One\ntags: [tip]\nsource: distill\n---\nSome tip.\n",
             encoding="utf-8",
@@ -328,7 +321,7 @@ def test_non_manual_source_discounted(self, store, tmp_path):
             "---\ntitle: Tip One\ntags: [tip]\nsource: manual\n---\nSome tip.\n",
             encoding="utf-8",
         )
-        results = store.search("Tip One", categories=["experiences"])
+        results = store.search("Tip One", categories=["workflows"])
         assert len(results) == 2
         assert results[0]["source"] == "manual"
         assert results[1]["source"] == "distill"
@@ -472,7 +465,7 @@ def test_all_stopwords_returns_empty(self):
 class TestMatchScore:
     def test_single_token_title_hit(self):
         score = KnowledgeStore._match_score(
-            "ROI", "ROI Calculation", [], "roi", "",
+            "ROI", "ROI Calculation", "roi", "",
         )
         assert score > 0
 
@@ -481,49 +474,49 @@ def test_partial_tokens_accumulate(self):
         score = KnowledgeStore._match_score(
             "quarterly sales trend",
             "Sales Trend Analysis",
-            [], "analysis", "",
+            "analysis", "",
         )
         assert score > 0
 
     def test_whole_string_bonus(self):
         full = KnowledgeStore._match_score(
-            "ROI", "ROI Calculation", [], "roi", "",
+            "ROI", "ROI Calculation", "roi", "",
         )
         no_title = KnowledgeStore._match_score(
-            "ROI", "Something Else", [], "roi", "",
+            "ROI", "Something Else", "roi", "",
         )
         assert full > no_title
 
     def test_source_discount(self):
         manual = KnowledgeStore._match_score(
-            "ROI", "ROI Guide", ["finance"], "roi", "",
+            "ROI", "ROI Guide", "roi", "",
             source="manual",
         )
         auto = KnowledgeStore._match_score(
-            "ROI", "ROI Guide", ["finance"], "roi", "",
+            "ROI", "ROI Guide", "roi", "",
             source="distill",
         )
         assert auto == pytest.approx(manual * 0.9)
 
     def test_table_names_boost(self):
         without = KnowledgeStore._match_score(
-            "analysis", "Analysis Tips", ["sales_data"], "tips", "",
+            "analysis", "Analysis Tips", "tips", "about sales_data",
         )
         with_tn = KnowledgeStore._match_score(
-            "analysis", "Analysis Tips", ["sales_data"], "tips", "",
+            "analysis", "Analysis Tips", "tips", "about sales_data",
             table_names=["sales_data"],
         )
         assert with_tn > without
 
     def test_no_match_returns_zero(self):
         score = KnowledgeStore._match_score(
-            "xyznonexistent", "ROI Calculation", ["finance"], "roi", "body text",
+            "xyznonexistent", "ROI Calculation", "roi", "body text",
         )
         assert score == 0
 
     def test_cjk_mixed_query_matches(self):
         """Chinese+English query should match via extracted ASCII tokens."""
         score = KnowledgeStore._match_score(
-            "帮我分析ROI", "ROI Calculation", ["finance"], "roi", "",
+            "帮我分析ROI", "ROI Calculation", "roi", "",
         )
         assert score > 0
diff --git a/tests/backend/routes/test_knowledge_routes.py b/tests/backend/routes/test_knowledge_routes.py
index ddc2b7ab..f5ac69ff 100644
--- a/tests/backend/routes/test_knowledge_routes.py
+++ b/tests/backend/routes/test_knowledge_routes.py
@@ -167,7 +167,7 @@ def test_delete_nonexistent(self, client):
 
 class TestKnowledgeSearch:
     def test_search_returns_results(self, client, tmp_path):
-        exp_dir = tmp_path / "knowledge" / "experiences" / "finance"
+        exp_dir = tmp_path / "knowledge" / "workflows" / "finance"
         exp_dir.mkdir(parents=True, exist_ok=True)
         (exp_dir / "roi.md").write_text(SAMPLE_MD, encoding="utf-8")
 
@@ -191,7 +191,7 @@ def test_search_invalid_category(self, client):
         assert data["status"] == "error"
 
     def test_search_filters_by_category(self, client, tmp_path):
-        exp_dir = tmp_path / "knowledge" / "experiences" / "finance"
+        exp_dir = tmp_path / "knowledge" / "workflows" / "finance"
         exp_dir.mkdir(parents=True, exist_ok=True)
         (exp_dir / "roi.md").write_text(SAMPLE_MD, encoding="utf-8")
 
@@ -202,7 +202,7 @@ def test_search_filters_by_category(self, client, tmp_path):
         assert len(data["data"]["results"]) == 0
 
 
-SESSION_EXPERIENCE_CONTEXT = {
+SESSION_WORKFLOW_CONTEXT = {
     "context_id": "ws-1",
     "workspace_id": "ws-1",
     "workspace_name": "Gasoline prices 2024",
@@ -233,6 +233,7 @@ def test_search_filters_by_category(self, client, tmp_path):
 DISTILLED_MD = """\
 ---
 subtitle: monthly sales aggregation
+filename: monthly sales
 tags: [sales, time-series]
 created: 2026-05-06
 updated: 2026-05-06
@@ -251,37 +252,37 @@ def test_search_filters_by_category(self, client, tmp_path):
 """
 
 
-class TestDistillExperience:
-    def test_distill_experience_from_context(self, client, tmp_path):
+class TestDistillWorkflow:
+    def test_distill_workflow_from_context(self, client, tmp_path):
         with patch("data_formulator.routes.agents.get_client", return_value=object()), \
              patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \
              patch(
-                 "data_formulator.agents.agent_experience_distill."
-                 "ExperienceDistillAgent.run",
+                 "data_formulator.agents.agent_workflow_distill."
+                 "WorkflowDistillAgent.run",
                  return_value=DISTILLED_MD,
              ) as run:
-            resp = client.post("/api/knowledge/distill-experience", json={
-                "experience_context": SESSION_EXPERIENCE_CONTEXT,
+            resp = client.post("/api/knowledge/distill-workflow", json={
+                "workflow_context": SESSION_WORKFLOW_CONTEXT,
                 "model": {"endpoint": "openai", "key": "x", "model": "gpt"},
             })
 
         data = resp.get_json()
         assert data["status"] == "success"
-        assert data["data"]["category"] == "experiences"
-        assert (tmp_path / "knowledge" / "experiences" / data["data"]["path"]).exists()
+        assert data["data"]["category"] == "workflows"
+        assert (tmp_path / "knowledge" / "workflows" / data["data"]["path"]).exists()
         assert not (tmp_path / "agent-logs").exists()
         run.assert_called_once()
 
-    def test_distill_experience_llm_timeout_returns_structured_error(self, client):
+    def test_distill_workflow_llm_timeout_returns_structured_error(self, client):
         with patch("data_formulator.routes.agents.get_client", return_value=object()), \
              patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \
              patch(
-                 "data_formulator.agents.agent_experience_distill."
-                 "ExperienceDistillAgent.run",
+                 "data_formulator.agents.agent_workflow_distill."
+                 "WorkflowDistillAgent.run",
                  side_effect=TimeoutError("request timed out"),
              ):
-            resp = client.post("/api/knowledge/distill-experience", json={
-                "experience_context": SESSION_EXPERIENCE_CONTEXT,
+            resp = client.post("/api/knowledge/distill-workflow", json={
+                "workflow_context": SESSION_WORKFLOW_CONTEXT,
                 "model": {"endpoint": "openai", "key": "x", "model": "gpt"},
             })
 
@@ -291,55 +292,60 @@ def test_distill_experience_llm_timeout_returns_structured_error(self, client):
         assert data["error"]["code"] == "LLM_TIMEOUT"
         assert data["error"]["retry"] is True
 
-    def test_distill_experience_missing_context(self, client):
-        resp = client.post("/api/knowledge/distill-experience", json={
+    def test_distill_workflow_missing_context(self, client):
+        resp = client.post("/api/knowledge/distill-workflow", json={
             "model": {"endpoint": "openai", "key": "x", "model": "gpt"},
         })
         data = resp.get_json()
         assert data["status"] == "error"
 
-    def test_distill_experience_missing_threads(self, client):
-        bad_context = {k: v for k, v in SESSION_EXPERIENCE_CONTEXT.items() if k != "threads"}
-        resp = client.post("/api/knowledge/distill-experience", json={
-            "experience_context": bad_context,
+    def test_distill_workflow_missing_threads(self, client):
+        bad_context = {k: v for k, v in SESSION_WORKFLOW_CONTEXT.items() if k != "threads"}
+        resp = client.post("/api/knowledge/distill-workflow", json={
+            "workflow_context": bad_context,
             "model": {"endpoint": "openai", "key": "x", "model": "gpt"},
         })
         data = resp.get_json()
         assert data["status"] == "error"
 
-    def test_distill_experience_missing_workspace(self, client):
-        bad_context = {k: v for k, v in SESSION_EXPERIENCE_CONTEXT.items()
+    def test_distill_workflow_missing_workspace(self, client):
+        bad_context = {k: v for k, v in SESSION_WORKFLOW_CONTEXT.items()
                        if k not in ("workspace_id", "workspace_name")}
-        resp = client.post("/api/knowledge/distill-experience", json={
-            "experience_context": bad_context,
+        resp = client.post("/api/knowledge/distill-workflow", json={
+            "workflow_context": bad_context,
             "model": {"endpoint": "openai", "key": "x", "model": "gpt"},
         })
         data = resp.get_json()
         assert data["status"] == "error"
 
-    def test_distill_session_overrides_title_with_workspace_name(self, client, tmp_path):
-        """Session-scoped distillation composes 'Experience from <name>: <subtitle>'."""
+    def test_distill_session_uses_descriptive_title(self, client, tmp_path):
+        """Session-scoped distillation uses the agent subtitle as the title."""
         with patch("data_formulator.routes.agents.get_client", return_value=object()), \
              patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \
              patch(
-                 "data_formulator.agents.agent_experience_distill."
-                 "ExperienceDistillAgent.run",
+                 "data_formulator.agents.agent_workflow_distill."
+                 "WorkflowDistillAgent.run",
                  return_value=DISTILLED_MD,
              ):
-            resp = client.post("/api/knowledge/distill-experience", json={
-                "experience_context": SESSION_EXPERIENCE_CONTEXT,
+            resp = client.post("/api/knowledge/distill-workflow", json={
+                "workflow_context": SESSION_WORKFLOW_CONTEXT,
                 "model": {"endpoint": "openai", "key": "x", "model": "gpt"},
             })
 
         data = resp.get_json()
         assert data["status"] == "success"
         path = data["data"]["path"]
-        # Filename is derived from the workspace name, not the LLM subtitle.
-        assert path == "gasoline-prices-2024.md"
-        saved = (tmp_path / "knowledge" / "experiences" / path).read_text(encoding="utf-8")
-        assert "title: 'Experience from Gasoline prices 2024: monthly sales aggregation'" in saved \
-            or "title: \"Experience from Gasoline prices 2024: monthly sales aggregation\"" in saved \
-            or "title: Experience from Gasoline prices 2024: monthly sales aggregation" in saved
+        # Filename is derived from the short agent-emitted `filename` hint,
+        # not the long descriptive title.
+        assert path == "monthly-sales.md"
+        saved = (tmp_path / "knowledge" / "workflows" / path).read_text(encoding="utf-8")
+        assert "title: monthly sales aggregation" in saved \
+            or "title: 'monthly sales aggregation'" in saved \
+            or "title: \"monthly sales aggregation\"" in saved
+        # No legacy "Workflow from <name>:" prefix on the title.
+        assert "Workflow from" not in saved
+        # The filename hint is consumed, not persisted in the front matter.
+        assert "filename:" not in saved
         # Workspace stamps are present so the file can be looked up later.
         assert "source_workspace_id: ws-1" in saved
         assert "source_workspace_name: Gasoline prices 2024" in saved
@@ -347,42 +353,46 @@ def test_distill_session_overrides_title_with_workspace_name(self, client, tmp_p
         assert "## Method" in saved
 
     def test_distill_session_upserts_existing_workspace_file(self, client, tmp_path):
-        """Re-distilling the same workspace overwrites the same file."""
+        """Re-distilling the same workspace replaces the prior file."""
+        second_md = DISTILLED_MD.replace(
+            "filename: monthly sales",
+            "filename: annual revenue",
+        )
         with patch("data_formulator.routes.agents.get_client", return_value=object()), \
              patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \
              patch(
-                 "data_formulator.agents.agent_experience_distill."
-                 "ExperienceDistillAgent.run",
-                 return_value=DISTILLED_MD,
+                 "data_formulator.agents.agent_workflow_distill."
+                 "WorkflowDistillAgent.run",
+                 side_effect=[DISTILLED_MD, second_md],
              ):
-            client.post("/api/knowledge/distill-experience", json={
-                "experience_context": SESSION_EXPERIENCE_CONTEXT,
+            client.post("/api/knowledge/distill-workflow", json={
+                "workflow_context": SESSION_WORKFLOW_CONTEXT,
                 "model": {"endpoint": "openai", "key": "x", "model": "gpt"},
             })
-            # Re-distill: workspace renamed, so the slug changes — old file
-            # should be removed in favour of the new one.
-            renamed = {**SESSION_EXPERIENCE_CONTEXT, "workspace_name": "Diesel 2024"}
-            resp = client.post("/api/knowledge/distill-experience", json={
-                "experience_context": renamed,
+            # Re-distill: the filename hint changes, so the slug changes — old
+            # file should be removed in favour of the new one (matched by
+            # source_workspace_id).
+            resp = client.post("/api/knowledge/distill-workflow", json={
+                "workflow_context": SESSION_WORKFLOW_CONTEXT,
                 "model": {"endpoint": "openai", "key": "x", "model": "gpt"},
             })
 
         data = resp.get_json()
         assert data["status"] == "success"
         new_path = data["data"]["path"]
-        exp_dir = tmp_path / "knowledge" / "experiences"
+        exp_dir = tmp_path / "knowledge" / "workflows"
         # Stale slug deleted, new slug present.
-        assert not (exp_dir / "gasoline-prices-2024.md").exists()
+        assert not (exp_dir / "monthly-sales.md").exists()
         assert (exp_dir / new_path).exists()
-        assert new_path == "diesel-2024.md"
+        assert new_path == "annual-revenue.md"
 
-    def test_distill_session_skips_subtitle_double_prefix(self, client, tmp_path):
-        """Update-mode runs that re-emit a prefixed title don't double-prefix."""
-        # Simulate a prior run where the LLM echoed an Experience-prefixed title
+    def test_distill_session_strips_legacy_title_prefix(self, client, tmp_path):
+        """Update-mode runs strip any legacy 'Workflow from <name>:' prefix."""
+        # Simulate a prior run where the LLM echoed a Workflow-prefixed title
         # without a subtitle.
         prior_md = (
             "---\n"
-            "title: 'Experience from Gasoline prices 2024: prior insight'\n"
+            "title: 'Workflow from Gasoline prices 2024: prior insight'\n"
             "tags: [a]\n"
             "created: 2026-05-06\n"
             "updated: 2026-05-06\n"
@@ -392,17 +402,18 @@ def test_distill_session_skips_subtitle_double_prefix(self, client, tmp_path):
         with patch("data_formulator.routes.agents.get_client", return_value=object()), \
              patch("data_formulator.routes.agents.get_language_instruction", return_value=""), \
              patch(
-                 "data_formulator.agents.agent_experience_distill."
-                 "ExperienceDistillAgent.run",
+                 "data_formulator.agents.agent_workflow_distill."
+                 "WorkflowDistillAgent.run",
                  return_value=prior_md,
              ):
-            resp = client.post("/api/knowledge/distill-experience", json={
-                "experience_context": SESSION_EXPERIENCE_CONTEXT,
+            resp = client.post("/api/knowledge/distill-workflow", json={
+                "workflow_context": SESSION_WORKFLOW_CONTEXT,
                 "model": {"endpoint": "openai", "key": "x", "model": "gpt"},
             })
 
         data = resp.get_json()
         assert data["status"] == "success"
-        saved = (tmp_path / "knowledge" / "experiences" / data["data"]["path"]).read_text(encoding="utf-8")
-        # The "Experience from ..." prefix is stripped before re-prefixing.
-        assert saved.count("Experience from") == 1
+        saved = (tmp_path / "knowledge" / "workflows" / data["data"]["path"]).read_text(encoding="utf-8")
+        # The legacy "Workflow from ..." prefix is fully stripped.
+        assert "Workflow from" not in saved
+        assert "prior insight" in saved

From 1571113ac7b74ce1a0c32b434a9576d6fdce2b40 Mon Sep 17 00:00:00 2001
From: y-agent-ai <zhb123@126.com>
Date: Sat, 30 May 2026 18:22:30 +0800
Subject: [PATCH 07/29] refactor(loading): Refactor AnvilLoader and add custom
 parameter support

1. Add custom property support for height , label , and sx to AnvilLoader
2. Replace globally hardcoded loading text with customizable label parameter
3. Optimize loading overlay styles with new frosted glass background effect
4. Unify loading state display in App.tsx and VisualizationView
---
 src/app/App.tsx                 |  2 +-
 src/components/AnvilLoader.tsx  | 46 +++++++++++++++++++++------------
 src/views/VisualizationView.tsx | 10 ++++---
 3 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/src/app/App.tsx b/src/app/App.tsx
index 17898f0c..22d76510 100644
--- a/src/app/App.tsx
+++ b/src/app/App.tsx
@@ -1253,7 +1253,7 @@ export const AppFC: FC<AppFCProps> = function AppFC(appProps) {
             {configLoaded && authChecked ? (
                 <RouterProvider router={router} />
             ) : (
-                <AnvilLoader />
+                <AnvilLoader label="loading data formulator..." />
             )}
             {migrationBrowserId && (
                 <IdentityMigrationDialog
diff --git a/src/components/AnvilLoader.tsx b/src/components/AnvilLoader.tsx
index 49471259..8e9c5f9e 100644
--- a/src/components/AnvilLoader.tsx
+++ b/src/components/AnvilLoader.tsx
@@ -2,7 +2,7 @@
 // Licensed under the MIT License.
 
 import React, { useEffect, useState } from 'react';
-import { Box, Typography } from '@mui/material';
+import { Box, SxProps, Theme, Typography } from '@mui/material';
 import { keyframes } from '@mui/system';
 
 const ROWS = 3;
@@ -69,28 +69,40 @@ function BinaryGrid() {
     );
 }
 
-export function AnvilLoader() {
+export interface AnvilLoaderProps {
+    /** Override container height. Defaults to `'100vh'` (full-screen). */
+    height?: string | number;
+    /** Optional text shown below the binary grid. When omitted no text is rendered. */
+    label?: React.ReactNode;
+    /** Extra sx applied to the outermost container. */
+    sx?: SxProps<Theme>;
+}
+
+export function AnvilLoader({ height = '100vh', label, sx }: AnvilLoaderProps) {
     return (
         <Box sx={{
             display: 'flex', flexDirection: 'column', alignItems: 'center',
-            justifyContent: 'center', height: '100vh', gap: 3,
+            justifyContent: 'center', height, gap: 3,
             userSelect: 'none',
+            ...sx as any,
         }}>
             <BinaryGrid />
-            <Typography
-                variant="body2"
-                sx={{
-                    color: 'text.secondary',
-                    fontFamily: '"Courier New", monospace',
-                    letterSpacing: 3,
-                    fontSize: '0.75rem',
-                    fontWeight: 400,
-                    animation: `${pulse} 2.5s ease-in-out infinite`,
-                    textTransform: 'uppercase',
-                }}
-            >
-                loading data formulator...
-            </Typography>
+            {label !== undefined && (
+                <Typography
+                    variant="body2"
+                    sx={{
+                        color: 'text.secondary',
+                        fontFamily: '"Courier New", monospace',
+                        letterSpacing: 3,
+                        fontSize: '0.75rem',
+                        fontWeight: 400,
+                        animation: `${pulse} 2.5s ease-in-out infinite`,
+                        textTransform: 'uppercase',
+                    }}
+                >
+                    {label}
+                </Typography>
+            )}
         </Box>
     );
 }
diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx
index 7b6d18b4..585eba79 100644
--- a/src/views/VisualizationView.tsx
+++ b/src/views/VisualizationView.tsx
@@ -15,7 +15,6 @@ import {
     ListItemIcon,
     ListItemText,
     MenuItem,
-    LinearProgress,
     Card,
     ListSubheader,
     Menu,
@@ -37,6 +36,7 @@ import _ from 'lodash';
 
 import { borderColor, transition } from '../app/tokens';
 import { WritingIndicator } from '../components/FunComponents';
+import { AnvilLoader } from '../components/AnvilLoader';
 
 import ButtonGroup from '@mui/material/ButtonGroup';
 
@@ -1099,10 +1099,12 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
 
     return <Box ref={componentRef} id="vis-view-canvas" sx={{overflow: "hidden", display: 'flex', flex: 1, position: 'relative'}}>
         {synthesisRunning ? <Box sx={{
-                position: "absolute", height: "calc(100%)", width: "calc(100%)", zIndex: 1001, 
-                backgroundColor: "rgba(243, 243, 243, 0.8)", display: "flex", alignItems: "center"
+                position: "absolute", top: 0, left: 0, right: 0, bottom: 0, zIndex: 1001, 
+                backgroundColor: "rgba(255, 255, 255, 0.82)",
+                backdropFilter: 'blur(2px)',
+                display: "flex", alignItems: "center", justifyContent: "center",
             }}>
-                <LinearProgress sx={{ width: "100%", height: "100%", opacity: 0.05 }} />
+                <AnvilLoader height="auto" />
             </Box> : ''}
         {chartUnavailable ? "" : chartResizer}
         {content}

From b9abafb163d59c8c4165075d8f573345b4d70235 Mon Sep 17 00:00:00 2001
From: cat0825 <1759138827@qq.com>
Date: Sun, 31 May 2026 14:02:02 +0800
Subject: [PATCH 08/29] test: keep zh locale keys aligned

---
 src/i18n/locales/zh/dataLoading.json        |  1 +
 tests/frontend/unit/app/i18nLocales.test.ts | 30 +++++++++++++++++++++
 2 files changed, 31 insertions(+)
 create mode 100644 tests/frontend/unit/app/i18nLocales.test.ts

diff --git a/src/i18n/locales/zh/dataLoading.json b/src/i18n/locales/zh/dataLoading.json
index 4eab6fcf..6ffe595d 100644
--- a/src/i18n/locales/zh/dataLoading.json
+++ b/src/i18n/locales/zh/dataLoading.json
@@ -39,6 +39,7 @@
       "rowLimit": "行数限制",
       "loadSelected": "加载选中的表",
       "loadedCount": "✓ 已加载 {{count}} 张表",
+      "loadedCount_plural": "✓ 已加载 {{count}} 张表",
       "preview": "预览",
       "hidePreview": "收起",
       "previewing": "正在预览...",
diff --git a/tests/frontend/unit/app/i18nLocales.test.ts b/tests/frontend/unit/app/i18nLocales.test.ts
new file mode 100644
index 00000000..dd6c9933
--- /dev/null
+++ b/tests/frontend/unit/app/i18nLocales.test.ts
@@ -0,0 +1,30 @@
+import { describe, expect, it } from "vitest";
+
+import en from "../../../../src/i18n/locales/en";
+import zh from "../../../../src/i18n/locales/zh";
+
+type TranslationValue = string | Record<string, TranslationValue>;
+type TranslationMap = Record<string, TranslationValue>;
+
+function collectKeys(value: TranslationMap, prefix = ""): Set<string> {
+  const keys = new Set<string>();
+
+  for (const [key, child] of Object.entries(value)) {
+    const nextPrefix = prefix ? `${prefix}.${key}` : key;
+    if (typeof child === "string") {
+      keys.add(nextPrefix);
+    } else {
+      for (const childKey of collectKeys(child, nextPrefix)) {
+        keys.add(childKey);
+      }
+    }
+  }
+
+  return keys;
+}
+
+describe("i18n locale bundles", () => {
+  it("keeps Simplified Chinese translation keys aligned with English", () => {
+    expect(collectKeys(zh)).toEqual(collectKeys(en));
+  });
+});

From 748a30ce45f8e01b95388aced0b2e27106d70b69 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Sun, 31 May 2026 12:26:36 -0700
Subject: [PATCH 09/29] bug fix and clean up

---
 .../agents/agent_workflow_distill.py          | 190 ++++++++++--------
 .../data_loader/sample_datasets_loader.py     |  13 +-
 src/app/dfSlice.tsx                           |  10 +
 src/components/LoadPlanCard.tsx               |  51 ++++-
 src/i18n/locales/en/common.json               |   4 +-
 src/i18n/locales/en/dataLoading.json          |   2 +
 src/i18n/locales/zh/dataLoading.json          |   2 +
 src/views/DataLoadingChat.tsx                 |  24 ++-
 src/views/DataSourceSidebar.tsx               |  28 ++-
 src/views/EncodingShelfCard.tsx               |  60 ++++--
 src/views/KnowledgePanel.tsx                  |   6 +-
 src/views/VisualizationView.tsx               |  15 +-
 12 files changed, 263 insertions(+), 142 deletions(-)

diff --git a/py-src/data_formulator/agents/agent_workflow_distill.py b/py-src/data_formulator/agents/agent_workflow_distill.py
index 3f3d9c6d..0d86aa78 100644
--- a/py-src/data_formulator/agents/agent_workflow_distill.py
+++ b/py-src/data_formulator/agents/agent_workflow_distill.py
@@ -30,9 +30,28 @@
 
 SYSTEM_PROMPT = """\
 You are a workflow distiller. Given the chronological events of a data
-analysis session plus an optional user instruction, extract a short,
-**replayable workflow** that captures *what the user wanted and got* — so
-the same analysis can be reproduced later on a similarly-shaped dataset.
+analysis session plus an optional user distillation instruction, extract a **replayable
+workflow** that captures *what the user wanted and got* — and write it at
+TWO levels so it can be reused in two different situations:
+
+1. An **Abstract workflow** — dataset-independent. The underlying analytical
+   pattern, stripped of this dataset's subject matter: the sequence of
+   questions, computations, and chart kinds, phrased in domain-neutral terms.
+   Following it on a *different and possibly very differently-shaped* dataset
+   should walk the same process and arrive at structurally similar
+   visualizations.
+2. A **Concrete workflow** — for *similar* data (same shape, only minor
+   differences — a different period, region, or filter). It names the real
+   fields, aggregations, filters, and chart encodings used here, so the
+   analysis can be replayed closely with minimal thought.
+
+Both describe the SAME analysis at different distances. They should be
+consistent, but they do NOT need an exact 1:1 step mapping — let each be as
+long as it needs (typically 3-7 steps each).
+
+Where the analysis hinges on a few choices a user might change on replay (a
+period, a filter, a top-N), surface them as named **parameters** with
+`{{token}}` placeholders in the steps — see the `## Parameters` section below.
 
 The session contains one or more threads (separate analysis branches in
 the same session) each rendered under a `### Thread N` header. When
@@ -47,38 +66,20 @@
   (followed by columns, row count, sample, and code).
 - `create_chart` — a chart emitted on a table (mark + encoding summary).
 
-Your job is to recover the **ordered list of requests** the user actually
-wanted, and the outputs (tables/charts) they ended up keeping. Beyond the
-concrete steps, also distill the analysis at TWO levels of abstraction so
-it can be reused later:
-- **Adapting to similar data** (concrete) — how to rerun essentially the
-  same analysis on a near-identical dataset, e.g. the business report for
-  a different month, region, or product line. Same shape and intent, only
-  the specific inputs/filters change.
-- **Generalizing to other data** (abstract, dataset-agnostic) — the
-  underlying analytical pattern, independent of this domain: the kinds of
-  questions, computations, and charts involved, phrased so they transfer
-  to a different domain or a differently-shaped dataset.
-
 CRITICAL extraction rules — keep only what the user wanted and got:
-- Each step = one user request, written in plain language. Say BOTH the
-  question being explored AND what was produced to answer it — including
-  the chart that was created and the key fields it uses (e.g. "Ask how
-  sales trend over time, and plot monthly total sales as a line chart";
-  "Compare regions by breaking revenue down per region as a sorted bar
-  chart"). Order them as the analysis progressed.
+- Recover the ORDERED list of requests the user actually wanted, and the
+  outputs (tables/charts) they kept. Each step states BOTH the question
+  explored AND what was produced to answer it — including the chart and the
+  key fields it uses.
 - DROP corrective back-and-forth. If the user changed their mind
   ("no, it should be…", "actually use median instead"), keep ONLY the
   final resolved intent — not the wrong first attempt or the correction.
 - DROP abandoned work. If a chart or table was created and then deleted
   or never kept, leave it out entirely.
 - DROP mechanics. Do NOT include error-repair loops, dtype fixes, tool
-  call noise, or low-level code. Describe intent, not implementation.
-- Do NOT lean on code or exact column names unless a name is essential to
-  the request's meaning. Keep steps dataset-agnostic where possible so
-  they replay on a new slice of similar data.
-- Capture genuine gotchas separately as short notes (advisory warnings to
-  carry forward), NOT as steps to re-perform.
+  call noise, or low-level code dumps. Describe intent, not implementation.
+- Capture genuine gotchas as short Notes (advisory warnings to carry
+  forward), NOT as steps to re-perform.
 
 If a user instruction is provided, let it steer what to keep or emphasise.
 
@@ -86,8 +87,8 @@
 
 ```
 ---
-subtitle: <plain-language description of what this workflow is about, up to ~25 words; a full sentence is fine; start with an action verb; no jargon, no colons, dashes, or run-on lists>
-filename: <short 2-5 word lowercase name for the file, e.g. "monthly sales trend"; no dates, no extension>
+subtitle: <abstract, library-friendly TITLE naming the KIND of analysis, not this dataset — see rules below; a few words, e.g. "Year-over-year KPI volatility analysis">
+filename: <short 2-5 word lowercase name for the file, e.g. "kpi volatility analysis"; no dates, no extension>
 created: <today YYYY-MM-DD>
 updated: <today YYYY-MM-DD>
 source: distill
@@ -96,74 +97,85 @@
 
 ## Goal
 <one or two sentences: the overall question(s) this analysis answers and
-what it produces>
-
-## Steps
-1. <first question explored, and the table/chart created to answer it>
-2. <next question, and what was produced>
+what it produces. This is where the dataset-grounded explanation belongs —
+you MAY name the real subject here (e.g. "originally distilled from a
+monthly gasoline-price session").>
+
+## Parameters
+<the FEW analysis-specific choices a replay may want to change or re-confirm
+— your judgment about which knobs genuinely matter (often 0-4; omit the
+section entirely if none). Knobs may be run-specific (a period, region, top-N
+the user repicks each run) or dataset-specific (a domain value or column tied
+to this data). List each as a named parameter using a short `{{token}}`
+matching the placeholders in the steps. Give what it controls, the value used
+in THIS session, and a replay hint: `ask` (prompt the user to confirm/fill)
+or `keep` (a safe default unless told otherwise).>
+- `{{period}}` — the time range analysed; used here: 2024; on replay: ask.
+- `{{top_n}}` — how many top categories to keep; used here: 10; on replay: keep.
+- `{{region}}` — geographic filter applied; used here: National; on replay: ask.
+
+## Abstract workflow
+<dataset-INDEPENDENT. An ordered list of moves, each phrased as the question
+explored, the computation, and the chart kind — in domain-neutral terms
+(metric, category, period, cohort, event), with NO column names or this
+dataset's subject matter. Reference parameters by their `{{token}}` where a
+choice is analysis-specific. Following this on a different dataset should
+reproduce a structurally similar set of visualizations.>
+1. <e.g. "Aggregate a metric over a `{{time_grain}}` and plot it as a line to establish the baseline trend.">
+2. <e.g. "Compare each period against the prior comparable period to surface change, shown as a diverging bar.">
 3. <…>
 
-## Adapting to similar data
-<how to rerun essentially the same analysis on a near-identical dataset —
-e.g. the same kind of report for a different month, region, or product
-line. Keep the structure and outputs the same; call out which inputs,
-filters, or columns would change. 1-4 short sentences or bullets.>
-
-## Generalizing to other data
-<the dataset-agnostic analytical pattern behind this workflow: the kinds
-of questions, computations, and charts it represents, described in
-domain-neutral terms so it can transfer to a different domain or a
-differently-shaped dataset. Focus on the reasoning and technique, not the
-specific fields or values. 1-4 short sentences or bullets.>
+## Concrete workflow
+<for SIMILAR data (same shape, only minor differences). Follows the same
+analysis but names the real fields, aggregations, filters, and chart
+encodings used here, referencing the same `{{token}}` parameters where a value
+should be swapped on replay. A short code/encoding snippet is fine where it
+guards an easy-to-make mistake, but don't over-rely on code — keep it mostly
+plain language.>
+1. <e.g. "Filter to `{{region}}`, group `sales` by month, sum it; line chart x=month y=total. Swap `{{period}}` for the target run.">
+2. <…>
 
 ## Notes
 <optional short bullets: caveats/gotchas to watch for when reproducing this
-analysis on new data — e.g. "sort by time before computing deltas". Omit
-this section entirely if there is nothing worth warning about.>
+analysis on new data — e.g. "sort by time before computing period-over-period
+change". Omit this section entirely if there is nothing worth warning about.>
 ```
 
 Rules:
-- Subtitle must DESCRIBE what the workflow is about in PLAIN LANGUAGE that
-  a non-expert can fully understand at a glance, so they can decide
-  whether to replay it on new data. Favor clarity over brevity: it can be
-  a full sentence (up to ~25 words) if that makes the analysis genuinely
-  understandable. Write it like you would explain the analysis to a
-  colleague in one breath, covering the subject and the main thing you do
-  with it. The hosting application uses this subtitle directly as the
-  workflow's display title, so make it self-contained and do NOT prefix it
-  with the session name.
-  - Start with a concrete action verb (Plot, Compare, Break down, Rank,
-    Track, Summarize, Find…).
-  - Name the real-world subject in everyday words (sales, revenue,
-    customers, events), NOT the internal mechanics or derived-column
-    names you happened to create.
-  - AVOID abstract or technical jargon and invented noun-phrases
-    ("deltas", "composition", "window", "distribution shift"). If a
-    technique matters, phrase it plainly ("change from one period to the
-    next" instead of "deltas").
-  Good: "Plot monthly sales over time and compare each year against the
-         previous one to spot volatile periods".
-        "Break revenue down by region and show how each region
-         contributes to the total as a stacked area chart".
-        "Track how many events happen in each time window and what kinds
-         of events make up each window".
-  Bad:  "Time series analysis". "Data workflow". "Chart exploration".
-        "Event window deltas with composition". "Distribution shift inspection".
+- The subtitle is the workflow's display TITLE. Make it ABSTRACT and
+  library-friendly: name the *kind of analysis* — a technique plus a GENERIC
+  subject (KPI, metric, category, event, cohort) — so someone browsing the
+  workflow library can tell whether this is the KIND of analysis they want to
+  reuse. Do NOT pin it to this dataset's specific subject, period, or column
+  names, and do NOT prefix it with the session name.
+  - Pair a real technique with a generic subject; avoid bare category words.
+  Good: "Year-over-year KPI volatility analysis".
+        "Category contribution-to-total breakdown".
+        "Time-windowed event composition analysis".
+  Bad:  "Plot monthly gasoline prices in 2024 and compare each year".  (too specific)
+        "Time series analysis". "Data workflow". "Chart exploration".    (too vague)
+  The dataset-grounded, full-sentence explanation goes in `## Goal`, NOT the title.
 - Filename must be a SHORT (2-5 word) lowercase name for the file — just
-  the core subject and action, e.g. "monthly sales trend", "region revenue
-  breakdown". No dates, no file extension, no session name. It is only
-  used to name the file on disk; the descriptive subtitle is what users see.
-- Steps must be ordered and reproducible. Each step should make clear the
-  question being explored and the chart/output produced to answer it.
-- "Adapting to similar data" stays close to this analysis (same domain,
-  same shape) — only the concrete inputs change. "Generalizing to other
-  data" must be domain-neutral: strip out this dataset's subject matter and
-  describe only the transferable analytical pattern (question types,
-  computations, chart kinds). Do NOT just repeat the steps in either
-  section; add genuine reuse guidance. Keep each section brief.
-- Be as long as the analysis needs — do not omit meaningful steps,
-  questions, or charts just to stay short. Stay focused, but completeness
-  matters more than brevity.
+  the technique/subject, e.g. "kpi volatility analysis", "region revenue
+  breakdown". No dates, no file extension, no session name. It only names the
+  file on disk; the subtitle is what users see.
+- Abstract workflow must be domain-neutral — strip this dataset's subject
+  matter and column names; describe only the transferable pattern (question
+  types, computations, chart kinds). Concrete workflow must be runnable on a
+  near-identical dataset: real field names, the aggregation, the filter to
+  vary, the chart mark + key encodings. Do NOT have the two sections merely
+  repeat each other — each adds its own grain of reuse guidance.
+- Parameters are optional and a judgment call: surface only the FEW knobs
+  that materially change the outcome and that a user would revisit on replay
+  (often 0-4). When in doubt, leave the value inline — a spurious `{{token}}`
+  is worse than none. Knobs may be run-specific (period, region, top-N —
+  usually `ask`) or dataset-specific (a domain value/column — usually `keep`,
+  and may be skipped in the Abstract workflow). Every `{{token}}` in the steps
+  must be listed in `## Parameters` and vice versa.
+- Steps in both sections must be ordered and reproducible.
+- Be as long as the analysis needs — do not omit meaningful steps, questions,
+  or charts just to stay short. Stay focused, but completeness matters more
+  than brevity.
 - No raw data, PII, secrets, or specific values unless essential to a request.
 - Write the subtitle, headings, and body in {output_language}.
   YAML front-matter keys stay in English.
diff --git a/py-src/data_formulator/data_loader/sample_datasets_loader.py b/py-src/data_formulator/data_loader/sample_datasets_loader.py
index 6c3267cf..a84b8302 100644
--- a/py-src/data_formulator/data_loader/sample_datasets_loader.py
+++ b/py-src/data_formulator/data_loader/sample_datasets_loader.py
@@ -25,7 +25,10 @@
 import pyarrow as pa
 
 from data_formulator.data_loader.external_data_loader import ExternalDataLoader
-from data_formulator.datalake.parquet_utils import df_to_safe_records
+from data_formulator.datalake.parquet_utils import (
+    df_to_safe_records,
+    sanitize_dataframe_for_arrow,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -231,7 +234,13 @@ def fetch_data_as_arrow(
 
         logger.info("Returning %d / %d rows from sample dataset: %s",
                     len(df), self._last_total_rows, source_table)
-        return pa.Table.from_pandas(df, preserve_index=False)
+        # Public sample JSON/CSV files frequently contain mixed-type object
+        # columns (e.g. movies.json's ``Title`` holds both strings and
+        # numeric values), which makes ``pa.Table.from_pandas`` raise
+        # ArrowTypeError. Coerce such columns to a consistent type first.
+        return pa.Table.from_pandas(
+            sanitize_dataframe_for_arrow(df), preserve_index=False
+        )
 
     # ------------------------------------------------------------------
     # Internal: cached full-dataset fetch
diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx
index 89b075ab..2ceb55aa 100644
--- a/src/app/dfSlice.tsx
+++ b/src/app/dfSlice.tsx
@@ -245,6 +245,9 @@ export interface DataFormulatorState {
     /** Whether the data source sidebar is expanded (true) or collapsed to rail (false) */
     dataSourceSidebarOpen: boolean;
 
+    /** Which data source sidebar tab is active. Persisted so it survives session refresh. */
+    dataSourceSidebarTab: 'sources' | 'sessions' | 'knowledge';
+
     /**
      * One-shot signal asking the sidebar to focus a specific connector
      * (open the sidebar, switch to sources tab, expand + scroll-into-view
@@ -322,6 +325,8 @@ const initialState: DataFormulatorState = {
 
     dataSourceSidebarOpen: false,
 
+    dataSourceSidebarTab: 'sources',
+
     focusedConnectorId: undefined,
 }
 
@@ -762,12 +767,16 @@ export const dataFormulatorSlice = createSlice({
                 viewMode: state.viewMode,
                 dataLoaderConnectParams: state.dataLoaderConnectParams,
                 dataSourceSidebarOpen: state.dataSourceSidebarOpen,
+                dataSourceSidebarTab: state.dataSourceSidebarTab,
                 activeWorkspace: action.payload,
             };
         },
         setDataSourceSidebarOpen: (state, action: PayloadAction<boolean>) => {
             state.dataSourceSidebarOpen = action.payload;
         },
+        setDataSourceSidebarTab: (state, action: PayloadAction<'sources' | 'sessions' | 'knowledge'>) => {
+            state.dataSourceSidebarTab = action.payload;
+        },
         /**
          * Ask the data-source sidebar to focus a specific connector.
          * Opens the sidebar (if collapsed) and stores the target id; the
@@ -870,6 +879,7 @@ export const dataFormulatorSlice = createSlice({
                 activeWorkspace: saved.activeWorkspace ?? state.activeWorkspace ?? null,
 
                 dataSourceSidebarOpen: state.dataSourceSidebarOpen,
+                dataSourceSidebarTab: state.dataSourceSidebarTab,
 
                 // Reset display-rows tick so dependent components re-fetch.
                 displayRowsTick: 0,
diff --git a/src/components/LoadPlanCard.tsx b/src/components/LoadPlanCard.tsx
index 9b60effd..b91e54ab 100644
--- a/src/components/LoadPlanCard.tsx
+++ b/src/components/LoadPlanCard.tsx
@@ -16,8 +16,14 @@ import type { LoadPlan, LoadPlanCandidate, PendingTableLoad } from './ComponentT
 
 interface LoadPlanCardProps {
     plan: LoadPlan;
-    onConfirm: (selected: LoadPlanCandidate[]) => void;
+    onConfirm: (selected: LoadPlanCandidate[], opts?: { newWorkspace?: boolean }) => void;
     confirmed?: boolean;
+    /** When true, a workspace with existing data is already open, so the
+     *  destination of the load is ambiguous. We then offer two explicit
+     *  actions: add to the current workspace, or load into a fresh one.
+     *  When false (empty/new workspace), a single "Load selected" button
+     *  loads directly with no ambiguity. */
+    canLoadInNewWorkspace?: boolean;
 }
 
 // Plans this small auto-expand each row's preview on first render so the
@@ -48,7 +54,7 @@ const formatFilterValue = (value: any) => {
     return Array.isArray(value) ? value.join(', ') : String(value);
 };
 
-export const LoadPlanCard: React.FC<LoadPlanCardProps> = ({ plan, onConfirm, confirmed }) => {
+export const LoadPlanCard: React.FC<LoadPlanCardProps> = ({ plan, onConfirm, confirmed, canLoadInNewWorkspace }) => {
     const theme = useTheme();
     const { t } = useTranslation();
     const [selection, setSelection] = useState<Record<number, boolean>>(
@@ -143,12 +149,12 @@ export const LoadPlanCard: React.FC<LoadPlanCardProps> = ({ plan, onConfirm, con
         fetchPreview(candidate, idx);
     };
 
-    const handleConfirm = async () => {
+    const handleConfirm = async (newWorkspace = false) => {
         const selected = plan.candidates.filter((c, i) => selection[i] && !c.resolutionError);
         if (selected.length === 0) return;
         setLoading(true);
         try {
-            await onConfirm(selected);
+            await onConfirm(selected, { newWorkspace });
         } finally {
             setLoading(false);
         }
@@ -257,12 +263,47 @@ export const LoadPlanCard: React.FC<LoadPlanCardProps> = ({ plan, onConfirm, con
                             defaultValue: '✓ Loaded',
                         })}
                     </Typography>
+                ) : canLoadInNewWorkspace ? (
+                    // A workspace with data is already open — make the load
+                    // destination explicit rather than silently appending.
+                    <>
+                        <Button
+                            size="small"
+                            variant="outlined"
+                            disabled={selectedCount === 0 || loading}
+                            onClick={() => handleConfirm(true)}
+                            sx={{
+                                textTransform: 'none', fontSize: 12,
+                                py: 0.5, px: 1.5, minHeight: 0,
+                                borderRadius: 1.5,
+                            }}
+                        >
+                            {loading
+                                ? '...'
+                                : t('dataLoading.loadPlan.loadInNewWorkspace', { defaultValue: 'Load in new workspace' })}
+                        </Button>
+                        <Button
+                            size="small"
+                            variant="contained"
+                            disabled={selectedCount === 0 || loading}
+                            onClick={() => handleConfirm(false)}
+                            sx={{
+                                textTransform: 'none', fontSize: 12,
+                                py: 0.5, px: 2, minHeight: 0,
+                                borderRadius: 1.5, boxShadow: 'none',
+                            }}
+                        >
+                            {loading
+                                ? '...'
+                                : `${t('dataLoading.loadPlan.addToCurrent', { defaultValue: 'Add to current workspace' })} (${selectedCount})`}
+                        </Button>
+                    </>
                 ) : (
                     <Button
                         size="small"
                         variant="contained"
                         disabled={selectedCount === 0 || loading}
-                        onClick={handleConfirm}
+                        onClick={() => handleConfirm(false)}
                         sx={{
                             textTransform: 'none', fontSize: 12,
                             py: 0.5, px: 2, minHeight: 0,
diff --git a/src/i18n/locales/en/common.json b/src/i18n/locales/en/common.json
index 8ef952df..537629c0 100644
--- a/src/i18n/locales/en/common.json
+++ b/src/i18n/locales/en/common.json
@@ -938,8 +938,8 @@
     "collapse": "Collapse",
     "expand": "Expand",
     "emptyState": "Add rules or workflows to help AI agents work better.",
-    "rulesHint": "Constraints the agent always follows.",
-    "workflowsHint": "Analyses distilled from past sessions that the agent can save and replay.",
+    "rulesHint": "Provide rules that agents should follow.",
+    "workflowsHint": "Distill an analysis into reusable workflow. Replay it in a new context.",
     "markdownEditor": "Markdown Editor",
     "description": "Description",
     "descriptionPlaceholder": "Short summary of this rule (max {{max}} chars)",
diff --git a/src/i18n/locales/en/dataLoading.json b/src/i18n/locales/en/dataLoading.json
index 20394e7f..a10094e1 100644
--- a/src/i18n/locales/en/dataLoading.json
+++ b/src/i18n/locales/en/dataLoading.json
@@ -38,6 +38,8 @@
       "filters": "Filters",
       "rowLimit": "Row limit",
       "loadSelected": "Load selected",
+      "loadInNewWorkspace": "Load in new workspace",
+      "addToCurrent": "Add to current workspace",
       "loadedCount": "✓ Loaded {{count}} table",
       "loadedCount_plural": "✓ Loaded {{count}} tables",
       "preview": "Preview",
diff --git a/src/i18n/locales/zh/dataLoading.json b/src/i18n/locales/zh/dataLoading.json
index 4eab6fcf..be1468ce 100644
--- a/src/i18n/locales/zh/dataLoading.json
+++ b/src/i18n/locales/zh/dataLoading.json
@@ -38,6 +38,8 @@
       "filters": "筛选条件",
       "rowLimit": "行数限制",
       "loadSelected": "加载选中的表",
+      "loadInNewWorkspace": "载入新工作区",
+      "addToCurrent": "添加到当前工作区",
       "loadedCount": "✓ 已加载 {{count}} 张表",
       "preview": "预览",
       "hidePreview": "收起",
diff --git a/src/views/DataLoadingChat.tsx b/src/views/DataLoadingChat.tsx
index 9fe59000..16255865 100644
--- a/src/views/DataLoadingChat.tsx
+++ b/src/views/DataLoadingChat.tsx
@@ -38,6 +38,18 @@ import { loadTable } from '../app/tableThunks';
 import { LoadPlanCard, PendingLoadsCard } from '../components/LoadPlanCard';
 import { TablePreviewRow, TablePreviewData } from '../components/TablePreviewRow';
 import { AgentChatInput } from './AgentChatInput';
+import { generateUUID } from '../app/identity';
+
+// ---------------------------------------------------------------------------
+// Helper: fresh workspace session id (mirrors DataSourceSidebar's scheme)
+// ---------------------------------------------------------------------------
+
+const newWorkspaceSessionId = (): string => {
+    const now = new Date();
+    const date = `${now.getFullYear()}${String(now.getMonth() + 1).padStart(2, '0')}${String(now.getDate()).padStart(2, '0')}`;
+    const time = `${String(now.getHours()).padStart(2, '0')}${String(now.getMinutes()).padStart(2, '0')}${String(now.getSeconds()).padStart(2, '0')}`;
+    return `session_${date}_${time}_${generateUUID().slice(0, 4)}`;
+};
 
 // ---------------------------------------------------------------------------
 // Helper: generate table name
@@ -448,7 +460,17 @@ const ChatBubble = React.memo<{
                     <LoadPlanCard
                         plan={message.loadPlan}
                         confirmed={message.loadPlan.candidates.every(c => c.selected === false)}
-                        onConfirm={async (selected: LoadPlanCandidate[]) => {
+                        canLoadInNewWorkspace={existingNames.size > 0}
+                        onConfirm={async (selected: LoadPlanCandidate[], opts?: { newWorkspace?: boolean }) => {
+                            // When data already exists, the user may choose to
+                            // start a fresh workspace instead of appending. We
+                            // reset *before* loading so the X-Workspace-Id
+                            // header (read live from the store at fetch time)
+                            // targets the new session.
+                            if (opts?.newWorkspace) {
+                                const displayName = selected[0]?.displayName || 'Untitled Session';
+                                dispatch(dfActions.resetForNewWorkspace({ id: newWorkspaceSessionId(), displayName }));
+                            }
                             for (const item of selected) {
                                 const sourceTableName = item.sourceTableName || item.displayName;
                                 const table = {
diff --git a/src/views/DataSourceSidebar.tsx b/src/views/DataSourceSidebar.tsx
index c0502fd0..e9464806 100644
--- a/src/views/DataSourceSidebar.tsx
+++ b/src/views/DataSourceSidebar.tsx
@@ -155,7 +155,13 @@ export const DataSourceSidebar: React.FC<{
     // built-in sample_datasets connector is shown there, giving users
     // something useful to explore immediately. The upgrade message only
     // appears when they try to add a new connector or link a folder.
-    const [initialTab, setInitialTab] = useState<'sources' | 'sessions' | 'knowledge'>('sources');
+    // Stored in Redux so the active tab survives a session refresh.
+    // Fall back to 'sources' for older persisted state that predates this field.
+    const initialTab = useSelector((state: DataFormulatorState) => state.dataSourceSidebarTab ?? 'sources');
+    const setInitialTab = useCallback(
+        (tab: 'sources' | 'sessions' | 'knowledge') => dispatch(dfActions.setDataSourceSidebarTab(tab)),
+        [dispatch],
+    );
 
     // External callers (e.g. workflow distill on success) can ask the
     // sidebar to open and switch to a specific tab.
@@ -322,7 +328,6 @@ export const DataSourceSidebar: React.FC<{
                     panelWidth={panelWidth}
                     onOpenUploadDialog={onOpenUploadDialog}
                     onCollapse={toggle}
-                    initialTab={initialTab}
                     connectorRefreshKey={connectorRefreshKey}
                     disableConnectors={disableConnectors}
                 />
@@ -346,10 +351,9 @@ const DataSourceSidebarPanel: React.FC<{
     panelWidth: number;
     onOpenUploadDialog?: (tab?: string) => void;
     onCollapse: () => void;
-    initialTab?: 'sources' | 'sessions' | 'knowledge';
     connectorRefreshKey?: number;
     disableConnectors?: boolean;
-}> = ({ panelWidth, onOpenUploadDialog, onCollapse, initialTab = 'sources', connectorRefreshKey = 0, disableConnectors = false }) => {
+}> = ({ panelWidth, onOpenUploadDialog, onCollapse, connectorRefreshKey = 0, disableConnectors = false }) => {
     const { t } = useTranslation();
     const dispatch = useDispatch<AppDispatch>();
 
@@ -417,13 +421,15 @@ const DataSourceSidebarPanel: React.FC<{
     const [searchCatalogCache, setSearchCatalogCache] = useState<Record<string, CatalogCache>>({});
     const [searchingCatalog, setSearchingCatalog] = useState<Record<string, boolean>>({});
 
-    // Sidebar tab: 'sources' or 'sessions' or 'knowledge'
-    const [activeTab, setActiveTab] = useState<'sources' | 'sessions' | 'knowledge'>(initialTab);
-
-    // Sync tab when rail icon switches it
-    useEffect(() => {
-        setActiveTab(initialTab);
-    }, [initialTab]);
+    // Sidebar tab: 'sources' or 'sessions' or 'knowledge'.
+    // Stored in Redux so the active tab survives a session refresh; the
+    // `initialTab` prop is derived from the same Redux value upstream.
+    // Fall back to 'sources' for older persisted state that predates this field.
+    const activeTab = useSelector((state: DataFormulatorState) => state.dataSourceSidebarTab ?? 'sources');
+    const setActiveTab = useCallback(
+        (tab: 'sources' | 'sessions' | 'knowledge') => dispatch(dfActions.setDataSourceSidebarTab(tab)),
+        [dispatch],
+    );
 
     // ── Sessions ─────────────────────────────────────────────────────────────
 
diff --git a/src/views/EncodingShelfCard.tsx b/src/views/EncodingShelfCard.tsx
index 39faebd2..1386e614 100644
--- a/src/views/EncodingShelfCard.tsx
+++ b/src/views/EncodingShelfCard.tsx
@@ -32,6 +32,7 @@ import {
     Theme,
     Slider,
     CircularProgress,
+    LinearProgress,
     Button,
     Collapse,
     Dialog,
@@ -43,7 +44,7 @@ import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
 
 import React from 'react';
 import { useDragLayer } from 'react-dnd';
-import { ThinkingBufferEffect } from '../components/FunComponents';
+import { ThinkingBufferEffect, WritingPencil } from '../components/FunComponents';
 import { Channel, Chart, FieldItem, Trigger, duplicateChart, ChartStyleVariant, computeEncodingFingerprint, isVariantStale } from "../components/ComponentType";
 
 import _ from 'lodash';
@@ -1658,31 +1659,56 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
             </Box>
             {variantChipStrip}
             {formulateInputBox}
-            {/* Inline status banner — shown right under the input bubble so
-                the user always knows what stage the agent is in. Covers the
-                three submit phases (classify → restyle/formulate). The data
-                agent has its own progress indicators elsewhere (running spinner
-                on the chart, status messages in the data thread); we keep this
-                line short and focused on telling the user *which* path was
-                chosen so the routing decision feels visible. */}
-            {submitPhase !== 'idle' && (
-                <Box sx={{ px: 1, py: 0.25, ml: '8px' }}>
-                    {ThinkingBanner(
-                        submitPhase === 'classifying' ? 'thinking…'
-                          : submitPhase === 'restyling' ? 'updating the chart…'
-                          : 'preparing data for the chart…'
-                    )}
-                </Box>
-            )}
         </Box>);
 
+    // Whether any agent work is in flight (intent classify, restyle, or the
+    // data agent) and the matching status line shown in the overlay below.
+    const isAgentWorking = submitPhase !== 'idle' || isDataAgentRunning;
+    const agentStatusText =
+        submitPhase === 'classifying' ? 'thinking…'
+          : submitPhase === 'restyling' ? 'updating the chart…'
+          : (submitPhase === 'formulating' || isDataAgentRunning) ? 'preparing data for the chart…'
+          : 'thinking…';
+
     const encodingShelfCard = (
         <Box sx={{ 
+            position: 'relative',
             padding: '4px 6px', 
             maxWidth: "400px", 
             display: 'flex', 
             flexDirection: 'column', 
         }}>
+            {/* Opaque agent-working overlay — blocks the encoding shelf +
+                chat box while any agent phase runs (intent classify, restyle,
+                or the data agent), showing the live status text, instead of
+                dimming the chart canvas. */}
+            {isAgentWorking && (
+                <Box sx={{
+                    position: 'absolute',
+                    top: 0, left: 0, right: 0, bottom: 0,
+                    backgroundColor: alpha(theme.palette.background.paper, 0.88),
+                    backdropFilter: 'blur(3px)',
+                    display: 'flex',
+                    flexDirection: 'column',
+                    alignItems: 'center',
+                    justifyContent: 'center',
+                    gap: 0.5,
+                    zIndex: 2,
+                    px: 2,
+                }}>
+                    <Box sx={{ display: 'flex', flexDirection: 'row', alignItems: 'center', gap: 0.75 }}>
+                        <WritingPencil size={12} />
+                        <Typography variant="body2" color="text.secondary" sx={{ fontWeight: 500, fontSize: 11.5, lineHeight: 1.4 }}>
+                            {agentStatusText}
+                        </Typography>
+                    </Box>
+                    <LinearProgress sx={{
+                        position: 'absolute', bottom: 0, left: 0, right: 0, height: 2,
+                        backgroundColor: alpha(theme.palette.primary.main, 0.15),
+                        '& .MuiLinearProgress-bar': { backgroundColor: theme.palette.primary.main },
+                    }} />
+                </Box>
+            )}
             <Box sx={{ padding: '4px 0px' }}>
                 {channelComponent}
             </Box>
diff --git a/src/views/KnowledgePanel.tsx b/src/views/KnowledgePanel.tsx
index 05343a0a..3c54888b 100644
--- a/src/views/KnowledgePanel.tsx
+++ b/src/views/KnowledgePanel.tsx
@@ -288,7 +288,7 @@ export const KnowledgePanel: React.FC = () => {
                         <SmartToyOutlinedIcon sx={{ fontSize: 13, color: 'text.secondary', mt: 0.25 }} />
                     </Tooltip>
                 )}
-                <Box sx={{ display: 'flex', flexDirection: 'column', alignItems: 'center', flexShrink: 0 }}>
+                <Box sx={{ display: 'flex', flexDirection: 'column', alignItems: 'center', alignSelf: 'stretch', flexShrink: 0 }}>
                     {category === 'workflows' && (
                         <Tooltip title={t('knowledge.replayTooltip')}>
                             <IconButton
@@ -300,7 +300,7 @@ export const KnowledgePanel: React.FC = () => {
                                     '&:hover': { bgcolor: theme => alpha(theme.palette.primary.main, 0.08) },
                                 }}
                             >
-                                <PlayArrowIcon sx={{ fontSize: 17 }} />
+                                <PlayArrowIcon sx={{ fontSize: 18 }} />
                             </IconButton>
                         </Tooltip>
                     )}
@@ -308,7 +308,7 @@ export const KnowledgePanel: React.FC = () => {
                         className="item-actions"
                         size="small"
                         onClick={(e) => { e.stopPropagation(); setDeleteTarget({ category, path: item.path, title: item.title }); }}
-                        sx={{ p: 0.25, display: 'none', color: 'text.secondary', '&:hover': { color: 'error.main' } }}
+                        sx={{ p: 0.25, mt: 'auto', display: 'none', color: 'text.secondary', '&:hover': { color: 'error.main' } }}
                     >
                         <DeleteOutlineIcon sx={{ fontSize: 16 }} />
                     </IconButton>
diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx
index 585eba79..78af7812 100644
--- a/src/views/VisualizationView.tsx
+++ b/src/views/VisualizationView.tsx
@@ -36,7 +36,6 @@ import _ from 'lodash';
 
 import { borderColor, transition } from '../app/tokens';
 import { WritingIndicator } from '../components/FunComponents';
-import { AnvilLoader } from '../components/AnvilLoader';
 
 import ButtonGroup from '@mui/material/ButtonGroup';
 
@@ -51,9 +50,7 @@ import { buildEmbeddedDataForChart } from '../app/restyle';
 import { apiRequest } from '../app/apiClient';
 import embed from 'vega-embed';
 import { Chart, EncodingItem, EncodingMap, FieldItem, computeInsightKey } from '../components/ComponentType';
-import { DictTable } from "../components/ComponentType";
 
-import AddchartIcon from '@mui/icons-material/Addchart';
 import DeleteIcon from '@mui/icons-material/Delete';
 import TerminalIcon from '@mui/icons-material/Terminal';
 import QuestionAnswerIcon from '@mui/icons-material/QuestionAnswer';
@@ -431,7 +428,6 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
     let focusedChartId = focusedId?.type === 'chart' ? focusedId.chartId : undefined;
     let chartSynthesisInProgress = useSelector((state: DataFormulatorState) => state.chartSynthesisInProgress) || [];
 
-    let synthesisRunning = focusedChartId ? chartSynthesisInProgress.includes(focusedChartId) : false;
     let handleDeleteChart = () => { focusedChartId && dispatch(dfActions.deleteChartById(focusedChartId)) }
 
     // Track the assembled Vega-Lite spec from the renderer so we can open it in the Vega Editor
@@ -1098,14 +1094,9 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
     </Stack>, [localScaleFactor, t]);
 
     return <Box ref={componentRef} id="vis-view-canvas" sx={{overflow: "hidden", display: 'flex', flex: 1, position: 'relative'}}>
-        {synthesisRunning ? <Box sx={{
-                position: "absolute", top: 0, left: 0, right: 0, bottom: 0, zIndex: 1001, 
-                backgroundColor: "rgba(255, 255, 255, 0.82)",
-                backdropFilter: 'blur(2px)',
-                display: "flex", alignItems: "center", justifyContent: "center",
-            }}>
-                <AnvilLoader height="auto" />
-            </Box> : ''}
+        {/* No full-screen block while the agent works: the previous chart
+            stays visible, and progress is signaled non-intrusively on the
+            chat box + encoding shelf (see EncodingShelfCard). */}
         {chartUnavailable ? "" : chartResizer}
         {content}
     </Box>

From 1c73e626b14dbf0e2afe5f4e44bed492a3697592 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Mon, 1 Jun 2026 22:33:43 -0700
Subject: [PATCH 10/29] minor

---
 py-src/data_formulator/agents/data_agent.py | 270 +++-----------------
 src/i18n/locales/en/common.json             |   2 +-
 src/i18n/locales/zh/common.json             |   2 +-
 src/views/KnowledgePanel.tsx                |  32 ++-
 4 files changed, 61 insertions(+), 245 deletions(-)

diff --git a/py-src/data_formulator/agents/data_agent.py b/py-src/data_formulator/agents/data_agent.py
index 9a9d10b2..56de9037 100644
--- a/py-src/data_formulator/agents/data_agent.py
+++ b/py-src/data_formulator/agents/data_agent.py
@@ -148,60 +148,11 @@ def _rescue_validate_action(data: dict) -> list[str]:
             },
         },
     },
-    {
-        "type": "function",
-        "function": {
-            "name": "search_knowledge",
-            "description": (
-                "Search the user's knowledge base (rules, workflows) "
-                "for relevant entries. Returns title, category, snippet, and "
-                "path for each match. Use read_knowledge to get full content."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "query": {
-                        "type": "string",
-                        "description": "Search keywords.",
-                    },
-                    "categories": {
-                        "type": "array",
-                        "items": {
-                            "type": "string",
-                            "enum": ["rules", "workflows"],
-                        },
-                        "description": "Optional: limit search to specific categories.",
-                    },
-                },
-                "required": ["query"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "read_knowledge",
-            "description": (
-                "Read the full content of a knowledge entry. Use the category "
-                "and path from search_knowledge results."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "category": {
-                        "type": "string",
-                        "enum": ["rules", "workflows"],
-                        "description": "Knowledge category.",
-                    },
-                    "path": {
-                        "type": "string",
-                        "description": "Relative path to the knowledge file (from search_knowledge).",
-                    },
-                },
-                "required": ["category", "path"],
-            },
-        },
-    },
+    # TODO(knowledge): The agent-callable knowledge tools (`search_knowledge`,
+    # `read_knowledge`) were removed along with the automatic up-front
+    # injection (see _build_initial_messages). Reintroduce a single, unified
+    # knowledge-access design here when we revisit it — see the TODO block in
+    # _build_initial_messages for the intended shape.
 ]
 
 
@@ -223,9 +174,6 @@ def _rescue_validate_action(data: dict) -> list[str]:
   transforming, printing) into a single explore() call.
 - **inspect_source_data(table_names)** — get schema, stats, and sample rows
   for source tables (cheaper than explore for basic inspection).
-- **search_knowledge(query, categories?)** — search the user's knowledge base
-  (rules, workflows) for relevant entries.
-- **read_knowledge(category, path)** — read the full content of a knowledge entry.
 
 You analyse data that is **already in the workspace**.  If the user's
 question requires data that isn't present, do NOT try to find it yourself —
@@ -247,8 +195,7 @@ def _rescue_validate_action(data: dict) -> list[str]:
 `delegate` are **actions**, NOT tools.  Never call them via
 function/tool calling — they MUST appear as a JSON object in your **text
 reply**.  Only the items listed in the Tools section above (`explore`,
-`inspect_source_data`, `search_knowledge`, `read_knowledge`) may be
-invoked as tool calls.
+`inspect_source_data`) may be invoked as tool calls.
 
 ### `visualize`
 ```json
@@ -1380,48 +1327,29 @@ def _build_initial_messages(
             user_content += f"{peripheral_block}\n\n"
 
         # Search and inject relevant knowledge (workflows + non-alwaysApply rules)
-        table_names = [t.get("name", "") for t in input_tables if t.get("name")]
-        relevant_knowledge = self._search_relevant_knowledge(user_question, table_names)
-
-        # Always include the workflow distilled from the active workspace
-        # (design-docs/24 §3.6) so the session has stable working memory
-        # across turns regardless of search relevance.
-        session_exp = self._load_active_session_workflow()
-        if session_exp:
-            existing_paths = {
-                (item["category"], item["path"]) for item in relevant_knowledge
-            }
-            if (session_exp["category"], session_exp["path"]) not in existing_paths:
-                relevant_knowledge = [session_exp] + relevant_knowledge
-
-        if relevant_knowledge:
-            knowledge_block = "[RELEVANT KNOWLEDGE]\n"
-            for item in relevant_knowledge:
-                label = "rule" if item["category"] == "rules" else "knowledge"
-                knowledge_block += (
-                    f"\n### [{label}] {item['title']}\n"
-                    f"{item['snippet']}\n"
-                )
-            user_content += f"{knowledge_block}\n\n"
-            self._injected_knowledge = [
-                {"category": item["category"], "title": item["title"], "path": item["path"]}
-                for item in relevant_knowledge
-            ]
-        else:
-            self._injected_knowledge = []
-
-        self._reasoning_log.log(
-            "knowledge_search",
-            source="auto_inject",
-            query=user_question,
-            table_names=table_names,
-            results_count=len(relevant_knowledge),
-            results=[
-                {"category": item["category"], "title": item["title"]}
-                for item in relevant_knowledge
-            ],
-        )
-
+        #
+        # TODO(knowledge): Both knowledge-access paths are intentionally
+        # disabled for now:
+        #   1. (controlled) the automatic up-front injection that used to run
+        #      `self._search_relevant_knowledge(user_question, table_names)`
+        #      here and append a `[RELEVANT KNOWLEDGE]` block to the user msg.
+        #   2. (uncontrolled) the agent-callable `search_knowledge` /
+        #      `read_knowledge` tools (definitions + dispatch + handlers).
+        # Reason: lexical keyword search over rules/workflows injected unclear,
+        # often-irrelevant context and added agent burden without a clear win.
+        #
+        # When we revisit this, design ONE coherent retrieval path rather than
+        # two competing ones. Open questions to settle first:
+        #   - relevance: replace the keyword `_match_score` with semantic /
+        #     embedding search (or a hybrid) so matches are actually on-topic;
+        #   - trigger: decide controlled (deterministic pre-inject, bounded)
+        #     vs. tool-driven (agent asks on demand) — pick one, not both;
+        #   - budget: hard cap how many entries + tokens land in the prompt;
+        #   - scope: keep alwaysApply rules (injected below) separate — those
+        #     are an explicit user opt-in, not search.
+        # `_injected_knowledge` stays an empty list so the reasoning log and
+        # context_info payloads keep their shape.
+        self._injected_knowledge = []
         # Inject alwaysApply rules into user message for better visibility
         # (rules in system prompt are often ignored; rules in user message have higher impact)
         if self._knowledge_store:
@@ -1644,7 +1572,6 @@ def _tool_loop(
                         "purpose": tool_args.get("purpose") if tool_name == "explore" else None,
                         "code": tool_args.get("code") if tool_name == "explore" else None,
                         "table_names": tool_args.get("table_names") if tool_name == "inspect_source_data" else None,
-                        "query": tool_args.get("query") if tool_name == "search_knowledge" else None,
                     }
 
                     tool_t0 = time.time()
@@ -1677,25 +1604,6 @@ def _tool_loop(
                             "status": "ok",
                             "stdout": tool_content,
                         }
-                    elif tool_name == "search_knowledge":
-                        tool_content = self._handle_search_knowledge(tool_args)
-                        rlog.log("knowledge_search",
-                                 query=tool_args.get("query", ""),
-                                 results_count=tool_content.count("- [") if tool_content else 0)
-                        yield {
-                            "type": "tool_result",
-                            "tool": tool_name,
-                            "status": "ok",
-                            "stdout": tool_content,
-                        }
-                    elif tool_name == "read_knowledge":
-                        tool_content = self._handle_read_knowledge(tool_args)
-                        yield {
-                            "type": "tool_result",
-                            "tool": tool_name,
-                            "status": "ok",
-                            "stdout": tool_content,
-                        }
                     elif tool_name in ("visualize", "clarify", "explain", "summary", "delegate", "action"):
                         action_data = dict(tool_args)
                         if "action" not in action_data:
@@ -1884,115 +1792,17 @@ def _format_observation(
     # ------------------------------------------------------------------
     # Knowledge helpers
     # ------------------------------------------------------------------
-
-    def _search_relevant_knowledge(
-        self,
-        user_question: str,
-        table_names: list[str],
-        max_items: int = 5,
-    ) -> list[dict[str, Any]]:
-        """Search workflows and non-alwaysApply rules relevant to the current session.
-
-        Uses the user question as the search query and passes table names
-        separately for tag-overlap boosting.  alwaysApply rules are
-        excluded by KnowledgeStore.search() since they are already
-        injected via system prompt.
-        Graceful degradation: returns empty list on failure.
-        """
-        if not self._knowledge_store:
-            return []
-        try:
-            results = self._knowledge_store.search(
-                user_question,
-                categories=["rules", "workflows"],
-                max_results=max_items,
-                table_names=table_names[:5],
-            )
-            return results
-        except Exception:
-            logger.warning("Failed to search knowledge", exc_info=True)
-            return []
-
-    def _load_active_session_workflow(self) -> dict[str, Any] | None:
-        """Return the workflow distilled from the active workspace, if any.
-
-        The session-scoped distillation flow (design-docs/24) writes one
-        workflow per workspace, stamped with ``source_workspace_id``.
-        We always inject that file into the agent's context so the agent
-        has stable working memory for the active session in addition to
-        whatever the relevance search picked.
-        """
-        if not self._knowledge_store:
-            return None
-        try:
-            from data_formulator.workspace_factory import get_active_workspace_id
-            ws_id = get_active_workspace_id()
-        except Exception:
-            ws_id = None
-        if not ws_id:
-            return None
-        try:
-            entry = self._knowledge_store.find_workflow_by_workspace_id(ws_id)
-        except Exception:
-            logger.warning("find_workflow_by_workspace_id failed", exc_info=True)
-            return None
-        if not entry:
-            return None
-        try:
-            content = self._knowledge_store.read("workflows", entry["path"])
-        except Exception:
-            return None
-        from data_formulator.knowledge.store import parse_front_matter
-        _, body = parse_front_matter(content)
-        snippet = body[:500].strip()
-        if not snippet:
-            return None
-        return {
-            "category": "workflows",
-            "title": entry.get("title", entry.get("path", "")),
-            "path": entry["path"],
-            "snippet": snippet,
-            "source": entry.get("source", "distill"),
-        }
-
-    def _handle_search_knowledge(self, tool_args: dict) -> str:
-        """Handle the ``search_knowledge`` tool call."""
-        if not self._knowledge_store:
-            return "Knowledge base is not available."
-
-        query = tool_args.get("query", "")
-        categories = tool_args.get("categories")
-        try:
-            results = self._knowledge_store.search(query, categories=categories)
-            if not results:
-                return "No matching knowledge entries found."
-            lines = []
-            for r in results:
-                lines.append(
-                    f"- [{r['category']}] **{r['title']}** ({r['path']})\n"
-                    f"  {r['snippet'][:200]}"
-                )
-            return "\n".join(lines)
-        except Exception as exc:
-            logger.warning("search_knowledge tool error: %s", type(exc).__name__)
-            return f"Error searching knowledge: {type(exc).__name__}"
-
-    def _handle_read_knowledge(self, tool_args: dict) -> str:
-        """Handle the ``read_knowledge`` tool call."""
-        if not self._knowledge_store:
-            return "Knowledge base is not available."
-
-        category = tool_args.get("category", "")
-        path = tool_args.get("path", "")
-        try:
-            return self._knowledge_store.read(category, path)
-        except ValueError as exc:
-            return f"Invalid path: {exc}"
-        except FileNotFoundError:
-            return "Knowledge file not found."
-        except Exception as exc:
-            logger.warning("read_knowledge tool error: %s", type(exc).__name__)
-            return f"Error reading knowledge: {type(exc).__name__}"
+    #
+    # TODO(knowledge): The data agent's knowledge access is disabled for now.
+    # Removed together:
+    #   - _search_relevant_knowledge()  (controlled up-front auto-injection)
+    #   - _handle_search_knowledge()    (uncontrolled `search_knowledge` tool)
+    #   - _handle_read_knowledge()      (uncontrolled `read_knowledge` tool)
+    # KnowledgeStore.search()/read() still exist and are used elsewhere
+    # (e.g. the Knowledge panel + alwaysApply rule injection). When we bring
+    # agent knowledge back, add a single unified retrieval entry point here
+    # rather than re-adding both competing paths. See the TODO block in
+    # _build_initial_messages for the design questions to settle first.
 
     # ------------------------------------------------------------------
     # Helpers
diff --git a/src/i18n/locales/en/common.json b/src/i18n/locales/en/common.json
index 537629c0..784e6123 100644
--- a/src/i18n/locales/en/common.json
+++ b/src/i18n/locales/en/common.json
@@ -950,7 +950,7 @@
     "replay": "Replay",
     "replayTooltip": "Replay this analysis on the current data",
     "replayBusy": "The agent is busy — wait for it to finish before replaying.",
-    "replayNoData": "Load and focus a dataset before replaying a workflow.",
+    "replayNoData": "Load a dataset before replaying a workflow.",
     "replayStarted": "Replaying workflow on the current data…",
     "replayPrompt": "Reproduce the following analysis workflow on the currently loaded data. Follow the steps in order, adapting any column references to the columns available in the current dataset. It's fine if the result isn't identical — reproduce the same overall analysis.\n\nBefore making large assumptions, check whether the current data can actually support the workflow. If there is a major discrepancy — e.g. a required field or measure is missing, the granularity or shape is very different, or a step has no sensible equivalent on this data — pause and ask me to confirm how to proceed (or briefly explain the mismatch and your proposed adaptation) instead of guessing. Minor differences (renamed columns, extra columns) can be adapted silently.\n\n{{content}}"
   }
diff --git a/src/i18n/locales/zh/common.json b/src/i18n/locales/zh/common.json
index 244f93da..ee0ff177 100644
--- a/src/i18n/locales/zh/common.json
+++ b/src/i18n/locales/zh/common.json
@@ -950,7 +950,7 @@
     "replay": "重放",
     "replayTooltip": "在当前数据上重放此分析",
     "replayBusy": "Agent 正忙——请等待其完成后再重放。",
-    "replayNoData": "请先加载并聚焦一个数据集，再重放工作流。",
+    "replayNoData": "请先加载一个数据集，再重放工作流。",
     "replayStarted": "正在当前数据上重放工作流…",
     "replayPrompt": "在当前已加载的数据上复现以下分析流程。按顺序执行各步骤，并将其中的列引用调整为当前数据集中可用的列。结果不必完全一致——复现同样的整体分析即可。\n\n在做出较大假设之前，请先确认当前数据是否真的能支撑该流程。如果存在重大差异——例如缺少必需的字段或度量、数据粒度或结构差异很大、或某个步骤在当前数据上没有合理的对应方式——请暂停并向我确认如何继续（或简要说明不匹配之处及你建议的调整方案），而不要凭空猜测。对于细微差异（列被重命名、存在额外的列）可以直接静默调整。\n\n{{content}}"
   }
diff --git a/src/views/KnowledgePanel.tsx b/src/views/KnowledgePanel.tsx
index 3c54888b..9fb5c974 100644
--- a/src/views/KnowledgePanel.tsx
+++ b/src/views/KnowledgePanel.tsx
@@ -105,6 +105,9 @@ export const KnowledgePanel: React.FC = () => {
 
     // For the "distill from this session" placeholder under WORKFLOWS.
     const tables = useSelector((s: DataFormulatorState) => s.tables);
+    // Workflow replay needs data to run on — disable replay when the
+    // workspace has no tables loaded.
+    const hasTables = tables.length > 0;
     const charts = useSelector((s: DataFormulatorState) => s.charts);
     const conceptShelfItems = useSelector((s: DataFormulatorState) => s.conceptShelfItems);
     const selectedModelId = useSelector((s: DataFormulatorState) => s.selectedModelId);
@@ -290,18 +293,21 @@ export const KnowledgePanel: React.FC = () => {
                 )}
                 <Box sx={{ display: 'flex', flexDirection: 'column', alignItems: 'center', alignSelf: 'stretch', flexShrink: 0 }}>
                     {category === 'workflows' && (
-                        <Tooltip title={t('knowledge.replayTooltip')}>
-                            <IconButton
-                                size="small"
-                                onClick={(e) => { e.stopPropagation(); handleReplay(item); }}
-                                sx={{
-                                    p: 0.25,
-                                    color: 'primary.main',
-                                    '&:hover': { bgcolor: theme => alpha(theme.palette.primary.main, 0.08) },
-                                }}
-                            >
-                                <PlayArrowIcon sx={{ fontSize: 18 }} />
-                            </IconButton>
+                        <Tooltip title={hasTables ? t('knowledge.replayTooltip') : t('knowledge.replayNoData')}>
+                            <span>
+                                <IconButton
+                                    size="small"
+                                    disabled={!hasTables}
+                                    onClick={(e) => { e.stopPropagation(); handleReplay(item); }}
+                                    sx={{
+                                        p: 0.25,
+                                        color: 'primary.main',
+                                        '&:hover': { bgcolor: theme => alpha(theme.palette.primary.main, 0.08) },
+                                    }}
+                                >
+                                    <PlayArrowIcon sx={{ fontSize: 18 }} />
+                                </IconButton>
+                            </span>
                         </Tooltip>
                     )}
                     <IconButton
@@ -315,7 +321,7 @@ export const KnowledgePanel: React.FC = () => {
                 </Box>
             </Box>
         );
-    }, [openEditDialog, t, handleReplay]);
+    }, [openEditDialog, t, handleReplay, hasTables]);
 
     const renderCategorySection = useCallback((
         category: KnowledgeCategory,

From e14ce6648c0998ccd9af0bb344185f8059a4f593 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Tue, 2 Jun 2026 09:28:35 -0700
Subject: [PATCH 11/29] minor fix

---
 py-src/data_formulator/data_connector.py              | 10 ++++++++++
 .../data_loader/sample_datasets_loader.py             | 11 +++++++++++
 2 files changed, 21 insertions(+)

diff --git a/py-src/data_formulator/data_connector.py b/py-src/data_formulator/data_connector.py
index f56f4aca..c82b9bfb 100644
--- a/py-src/data_formulator/data_connector.py
+++ b/py-src/data_formulator/data_connector.py
@@ -657,6 +657,16 @@ def _require_loader(self) -> ExternalDataLoader:
         loader = self._loaders.get(identity)
         if loader is not None:
             return loader
+        # No-auth connectors (e.g. built-in example datasets) are always
+        # available — there's nothing to connect, so lazily instantiate and
+        # cache the loader on first use. This mirrors the ``auth_mode == "none"``
+        # special-casing in the connect/get-status/preview/import endpoints and
+        # keeps no-auth sources working for catalog/preview/import even when
+        # external data connectors are disabled (e.g. ephemeral/demo mode).
+        if _loader_auth_mode(self._loader_class) == "none":
+            loader = self._loader_class()
+            self._loaders[identity] = loader
+            return loader
         # Try auto-reconnect from vault
         loader = self._try_auto_reconnect(identity)
         if loader is not None:
diff --git a/py-src/data_formulator/data_loader/sample_datasets_loader.py b/py-src/data_formulator/data_loader/sample_datasets_loader.py
index a84b8302..d74b6fa4 100644
--- a/py-src/data_formulator/data_loader/sample_datasets_loader.py
+++ b/py-src/data_formulator/data_loader/sample_datasets_loader.py
@@ -69,6 +69,17 @@ def auth_mode() -> str:
         # credentials UI, and are always reported as ``connected: true``.
         return "none"
 
+    @staticmethod
+    def auth_config() -> dict:
+        # Mirror :meth:`auth_mode` for the modern auth interface. The base
+        # class defaults ``auth_config`` to ``{"mode": "credentials"}``
+        # independently of ``auth_mode``, and ``_loader_auth_mode`` prefers
+        # ``auth_config``. Without this override the no-auth loader would be
+        # mis-classified as credential-based, breaking catalog/preview/import
+        # (which require a connection) whenever no loader was eagerly cached
+        # — e.g. in ephemeral / ``--disable-data-connectors`` deployments.
+        return {"mode": "none"}
+
     @staticmethod
     def catalog_hierarchy() -> list[dict[str, str]]:
         return [

From b1eafdff20629257f0a0725df1117183d76d060a Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Fri, 5 Jun 2026 10:41:27 -0700
Subject: [PATCH 12/29] fix issues learned from build

---
 .../agents/agent_report_gen.py                | 135 +++++++--
 src/app/chartCache.ts                         |   2 +
 src/app/tableThunks.ts                        |  40 +++
 src/app/useFormulateData.ts                   |  27 +-
 src/lib/agents-chart/vegalite/assemble.ts     |  29 +-
 src/views/ChartRenderService.tsx              |  27 +-
 src/views/DataThread.tsx                      |  14 +-
 src/views/ReportView.tsx                      |  33 ++-
 src/views/SimpleChartRecBox.tsx               | 272 ++++++++++--------
 9 files changed, 416 insertions(+), 163 deletions(-)

diff --git a/py-src/data_formulator/agents/agent_report_gen.py b/py-src/data_formulator/agents/agent_report_gen.py
index 9d936b28..80d4c31b 100644
--- a/py-src/data_formulator/agents/agent_report_gen.py
+++ b/py-src/data_formulator/agents/agent_report_gen.py
@@ -104,14 +104,32 @@
 
 The user message contains context about the workspace:
 - **[PRIMARY TABLE(S)]** / **[OTHER AVAILABLE TABLES]**: Lightweight schema of datasets.
-- **[FOCUSED THREAD]** (optional): The exploration thread the user is continuing.
-- **[OTHER THREADS]** (optional): Brief summaries of other exploration threads.
+- **[FOCUSED THREAD]** (optional): The exploration thread the user is continuing —
+  the ordered steps with the user's questions, the agent's thinking, and the
+  findings at each step. This is the spine of the story you are telling.
+- **[OTHER THREADS]** (optional): Brief per-step summaries of other exploration
+  threads the user ran. These are additional findings worth weaving in.
 - **[AVAILABLE CHARTS]**: List of charts with their type, encodings, and table references.
 
+## Ground the report in the exploration
+The thread context is your most important input. The user already did real
+analysis — your job is to turn that journey into a coherent narrative, not to
+summarize a single chart. Before writing:
+- Read the FOCUSED THREAD and OTHER THREADS to understand the full set of
+  questions asked and findings reached.
+- Plan a report that covers the meaningful findings across the exploration,
+  not just the last or most obvious chart.
+
 ## Phase 1 — Inspect
-Before writing, use `inspect_chart` and `inspect_source_data` to gather information
-about the charts and data you want to include. Inspect only what you actually need
-to ground your narrative — don't fetch everything.
+Use `inspect_chart` and `inspect_source_data` to gather what you need before
+writing. `inspect_chart` returns the chart's rendered image, a data sample, and
+the transformation code — so you can see exactly what each chart shows and write
+accurate captions and insights.
+- Inspect the charts that correspond to the key findings you plan to present.
+  For a multi-section report or dashboard, that usually means several charts.
+- You can inspect multiple charts in one call (pass several chart_ids).
+- Don't fetch charts you have no intention of discussing, but don't under-inspect
+  either — a report that ignores most of the exploration is a poor report.
 
 ## Phase 2 — Write the report
 
@@ -133,7 +151,7 @@
 
 ### Style & structure — adapt to the user's request
 The user may ask for any of:
-- a short note or social-style summary (a few sentences, maybe one chart),
+- a short note or social-style summary (a few sentences, one or two charts),
 - a blog post / narrative report (intro → findings → takeaway),
 - an executive summary (key numbers up top, then context),
 - a KPI dashboard / multi-section overview (headings per topic, multiple charts
@@ -141,10 +159,14 @@
 - a slide-style brief (compact sections with bullet points and embedded charts),
 - a deeper analytical report with sub-sections, methodology notes, and caveats.
 
-Pick the structure that fits the request and the available material. Reasonable
-defaults if the user is vague:
+Pick the structure that fits the request and the available material. Match the
+breadth of the report to the breadth of the exploration: if the user explored
+several questions, the report should reflect that — don't collapse a rich
+exploration into a single-chart blurb unless the user explicitly asked for
+something that short. Reasonable defaults if the user is vague:
 - Start with a `# Title` that reflects the topic.
-- Group related findings under `##` (and `###` if useful) headings.
+- Group related findings under `##` (and `###` if useful) headings, typically
+  one section per key finding / thread.
 - Around each embedded chart, briefly explain what it shows and the key insight.
 - Use bullets / short paragraphs / tables where they help; don't pad.
 - Close with a brief takeaway or summary section if the report is more than a
@@ -284,7 +306,10 @@ def _run_inspect_phase(
             attach_reasoning_content(assistant_msg, choice.message)
             messages.append(assistant_msg)
 
-            # Execute each tool
+            # Execute each tool. Chart images can't ride along in tool-result
+            # messages on most providers, so we collect them and attach them as
+            # a single follow-up vision message after all tool results.
+            pending_images: list[str] = []
             for tc in tool_calls:
                 tool_name = tc.function.name
                 try:
@@ -293,9 +318,10 @@ def _run_inspect_phase(
                     tool_args = {}
 
                 if tool_name == "inspect_chart":
-                    tool_content = self._handle_inspect_chart(
+                    tool_content, image_urls = self._handle_inspect_chart(
                         tool_args.get("chart_ids", []), charts
                     )
+                    pending_images.extend(image_urls)
                 elif tool_name == "inspect_source_data":
                     tool_content = handle_inspect_source_data(
                         tool_args.get("table_names", []),
@@ -311,6 +337,23 @@ def _run_inspect_phase(
                     "content": tool_content,
                 })
 
+            # Attach rendered chart images so the agent can visually inspect
+            # them before deciding what to embed.
+            if pending_images:
+                image_blocks: list[dict[str, Any]] = [{
+                    "type": "text",
+                    "text": (
+                        "[INSPECTED CHART IMAGE(S)] Rendered images for the "
+                        "charts you just inspected, in request order:"
+                    ),
+                }]
+                for url in pending_images:
+                    image_blocks.append({
+                        "type": "image_url",
+                        "image_url": {"url": url, "detail": "high"},
+                    })
+                messages.append({"role": "user", "content": image_blocks})
+
             logger.info(f"[ReportAgent] Inspect phase: executed {len(tool_calls)} tool call(s)")
 
         return messages
@@ -331,8 +374,12 @@ def _run_generate_phase(
         messages.append({
             "role": "user",
             "content": (
-                "Now write the report in markdown. "
-                "Use ![caption](chart://chart_id) to embed charts."
+                "Now write the report in markdown, grounded in the exploration "
+                "threads and the charts/data you inspected. Cover the key "
+                "findings across the exploration — don't reduce it to a single "
+                "chart unless the request explicitly calls for something that "
+                "brief. Embed each chart you discuss with "
+                "![caption](chart://chart_id)."
             ),
         })
 
@@ -358,9 +405,17 @@ def _handle_inspect_chart(
         self,
         chart_ids: list[str],
         charts: list[dict[str, Any]],
-    ) -> str:
-        """Return chart details as text + image content for inspection."""
+    ) -> tuple[str, list[str]]:
+        """Inspect charts: return a text summary plus rendered chart images.
+
+        Returns ``(text_summary, image_urls)`` where ``image_urls`` is a list of
+        base64 PNG data URLs (one per chart that could be rendered). Images are
+        returned separately so the caller can attach them as a follow-up vision
+        message — tool-result messages cannot carry image content on most
+        providers.
+        """
         results = []
+        image_urls: list[str] = []
         for chart_id in chart_ids:
             chart = next((c for c in charts if c["chart_id"] == chart_id), None)
             if not chart:
@@ -386,13 +441,55 @@ def _handle_inspect_chart(
                 parts.append(f"  Columns: {', '.join(df.columns.tolist())}")
                 parts.append(f"  Sample:\n{df.head(5).to_string()}")
 
-            # Chart image — return as base64 reference
-            if chart.get("chart_image"):
-                parts.append("  [Chart image available — shown below]")
+            # Render the chart image server-side, on demand. We prefer a
+            # frontend-supplied thumbnail; otherwise we render from the chart
+            # data + encodings so the agent can actually see what it embeds.
+            image = chart.get("chart_image") or self._render_chart_image(chart)
+            if image:
+                image_urls.append(image)
+                parts.append("  [Chart image attached below for visual inspection]")
+            else:
+                parts.append("  [Chart image unavailable — reason about it from data + encodings]")
 
             results.append("\n".join(parts))
 
-        return "\n\n".join(results)
+        return "\n\n".join(results), image_urls
+
+    def _render_chart_image(self, chart: dict[str, Any]) -> str | None:
+        """Render a chart to a base64 PNG data URL from its data + encodings.
+
+        Mirrors the DataAgent thumbnail path: resolve field types from the
+        chart's sample data, assemble a Vega-Lite spec, and rasterize it.
+        Returns ``None`` if there is not enough information to render.
+        """
+        chart_data = chart.get("chart_data") or {}
+        rows = chart_data.get("rows")
+        if not rows:
+            return None
+
+        chart_type = chart.get("chart_type", "Bar Chart")
+        raw_encodings = chart.get("encodings", {}) or {}
+        try:
+            df = pd.DataFrame(rows)
+            if df.empty:
+                return None
+
+            encodings: dict[str, dict[str, str]] = {}
+            for channel, field in raw_encodings.items():
+                if field and field in df.columns:
+                    field_type = resolve_field_type(df[field], field)
+                    field_type = coerce_field_type(chart_type, channel, field_type)
+                    encodings[channel] = {"field": field, "type": field_type}
+
+            if not encodings:
+                return None
+
+            spec = assemble_vegailte_chart(df, chart_type, encodings)
+            return spec_to_base64(spec) if spec else None
+        except Exception as e:
+            logger.warning(f"[ReportAgent] Chart render error for {chart.get('chart_id')}: {e}")
+            return None
+
 
     def _resolve_table_data(
         self,
diff --git a/src/app/chartCache.ts b/src/app/chartCache.ts
index 6fc56d75..3872c76b 100644
--- a/src/app/chartCache.ts
+++ b/src/app/chartCache.ts
@@ -19,6 +19,8 @@ export interface ChartCacheEntry {
     thumbnailDataUrl: string;   // PNG data URL (for DataThread thumbnails)
     fullPngDataUrl: string;     // Full-size PNG data URL (for agent/report use)
     specKey: string;            // Deterministic key of the inputs that produced this render
+    naturalWidth: number;       // Intrinsic width (CSS px) the rendering engine chose for the chart
+    naturalHeight: number;      // Intrinsic height (CSS px) the rendering engine chose for the chart
 }
 
 const cache = new Map<string, ChartCacheEntry>();
diff --git a/src/app/tableThunks.ts b/src/app/tableThunks.ts
index 72760816..c148dbd9 100644
--- a/src/app/tableThunks.ts
+++ b/src/app/tableThunks.ts
@@ -22,6 +22,46 @@ import { DataFormulatorState, dfActions, fetchColumnStats, fetchFieldSemanticTyp
 import { tableDataDB } from './workspaceDB';
 import i18n from '../i18n';
 
+/**
+ * Persist a derived / agent-generated table's full rows to IndexedDB for
+ * **ephemeral mode**, returning a copy that keeps only a sample + a `virtual`
+ * marker in Redux (mirroring how the `loadTable` thunk handles ephemeral data).
+ *
+ * In ephemeral mode the IndexedDB `table_data` store is the only durable source
+ * of truth: every API call ships those rows back to the server as
+ * `_workspace_tables`. Tables inserted straight into Redux (via
+ * `insertDerivedTables` / `overrideDerivedTables`) would otherwise never reach
+ * IndexedDB, leaving the server's scratch workspace — and the grid's pagination
+ * — with an empty data body.
+ *
+ * Callers must invoke this only when in ephemeral mode (they own that check).
+ * On save failure the original table is returned unchanged so the session keeps
+ * working with the full rows in Redux.
+ */
+export async function persistEphemeralDerivedTable(workspaceId: string, table: DictTable): Promise<DictTable> {
+    if (table.rows.length === 0) {
+        return table;
+    }
+
+    const tableId = table.virtual?.tableId || table.id;
+    const fullRows = table.rows;
+    const fullRowCount = Math.max(table.virtual?.rowCount ?? 0, fullRows.length);
+
+    try {
+        await tableDataDB.save(workspaceId, tableId, fullRows);
+    } catch (e) {
+        console.warn('[persistEphemeralDerivedTable] IndexedDB save failed; keeping full rows in Redux:', e);
+        return table;
+    }
+
+    const sampleSize = Math.min(1000, fullRows.length);
+    return {
+        ...table,
+        rows: fullRows.slice(0, sampleSize),
+        virtual: { tableId, rowCount: fullRowCount },
+    };
+}
+
 /** Gzip-compress a string into a Blob using the browser's CompressionStream API. */
 async function compressBlob(data: string): Promise<Blob> {
     const blob = new Blob([new TextEncoder().encode(data)]);
diff --git a/src/app/useFormulateData.ts b/src/app/useFormulateData.ts
index dd128b50..160f8a85 100644
--- a/src/app/useFormulateData.ts
+++ b/src/app/useFormulateData.ts
@@ -9,6 +9,7 @@ import { Chart, FieldItem, Trigger, createDictTable, DictTable } from '../compon
 import { getUrls, getTriggers, translateBackend } from './utils';
 import { apiRequest, streamRequest } from './apiClient';
 import { getErrorMessage } from './errorCodes';
+import { persistEphemeralDerivedTable } from './tableThunks';
 
 export type IdeaItem = {
     text: string;
@@ -84,6 +85,8 @@ export function useFormulateData() {
     const conceptShelfItems = useSelector((state: DataFormulatorState) => state.conceptShelfItems);
     const charts = useSelector(dfSelectors.getAllCharts);
     const activeModel = useSelector(dfSelectors.getActiveModel);
+    const workspaceBackend = useSelector((state: DataFormulatorState) => state.serverConfig.WORKSPACE_BACKEND);
+    const activeWorkspaceId = useSelector((state: DataFormulatorState) => state.activeWorkspace?.id);
 
     /**
      * Resolve the actual chart that's rendered for a derived table. The
@@ -413,7 +416,7 @@ export function useFormulateData() {
             body: JSON.stringify(messageBody),
             signal: controller.signal,
         })
-        .then(({ data }) => {
+        .then(async ({ data }) => {
             if (!data.results || data.results.length === 0) {
                 dispatch(dfActions.addMessages({
                     "timestamp": Date.now(),
@@ -541,15 +544,21 @@ export function useFormulateData() {
                 }
             }
 
+            // Ephemeral mode: persist full rows to IndexedDB (keeps only a
+            // sample + virtual marker in Redux). Other backends store on the server.
+            const persistedTable = (workspaceBackend === 'ephemeral' && activeWorkspaceId)
+                ? await persistEphemeralDerivedTable(activeWorkspaceId, candidateTable)
+                : candidateTable;
+
             // Insert or override table
             if (overrideTableId) {
-                dispatch(dfActions.overrideDerivedTables(candidateTable));
+                dispatch(dfActions.overrideDerivedTables(persistedTable));
             } else {
-                dispatch(dfActions.insertDerivedTables(candidateTable));
+                dispatch(dfActions.insertDerivedTables(persistedTable));
             }
 
             // Add missing concepts
-            const names = candidateTable.names;
+            const names = persistedTable.names;
             const missingNames = names.filter((name: string) => !conceptShelfItems.some(field => field.name === name));
             const conceptsToAdd = missingNames.map((name: string) => ({
                 id: `concept-${name}-${Date.now()}`,
@@ -559,20 +568,20 @@ export function useFormulateData() {
             } as FieldItem));
 
             dispatch(dfActions.addConceptItems(conceptsToAdd));
-            dispatch(fetchFieldSemanticType(candidateTable));
-            dispatch(fetchCodeExpl(candidateTable));
+            dispatch(fetchFieldSemanticType(persistedTable));
+            dispatch(fetchCodeExpl(persistedTable));
 
             // Compute current concepts for chart creation
             const currentConcepts = [...conceptShelfItems.filter(c => names.includes(c.name)), ...conceptsToAdd];
 
             // Delegate chart creation to the caller
-            const focusedChartId = createChart({ candidateTable, refinedGoal, currentConcepts });
+            const focusedChartId = createChart({ candidateTable: persistedTable, refinedGoal, currentConcepts });
 
             if (focusedChartId) {
-                dispatch(fetchChartInsight({ chartId: focusedChartId, tableId: candidateTable.id }) as any);
+                dispatch(fetchChartInsight({ chartId: focusedChartId, tableId: persistedTable.id }) as any);
             }
 
-            onSuccess?.({ displayInstruction, candidateTable, focusedChartId });
+            onSuccess?.({ displayInstruction, candidateTable: persistedTable, focusedChartId });
         })
         .catch((error) => {
             if (error.name === 'AbortError') {
diff --git a/src/lib/agents-chart/vegalite/assemble.ts b/src/lib/agents-chart/vegalite/assemble.ts
index 8ba2e559..5b9af52d 100644
--- a/src/lib/agents-chart/vegalite/assemble.ts
+++ b/src/lib/agents-chart/vegalite/assemble.ts
@@ -61,6 +61,25 @@ import { filterOverflow } from '../core/filter-overflow';
 import { computeLayout, computeChannelBudgets, computeMinSubplotDimensions } from '../core/compute-layout';
 import { vlApplyLayoutToSpec, vlApplyTooltips } from './instantiate-spec';
 
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Escape characters that Vega-Lite interprets as field-access path syntax.
+ *
+ * In Vega-Lite a `field` string like `"a.b"` is parsed as a nested accessor
+ * (`datum.a.b`), and `"a[0]"` as array indexing. Column names that literally
+ * contain `.`, `[`, or `]` (e.g. `"Oranges, Navel, per lb."`) must therefore be
+ * escaped with a backslash so the renderer resolves the flat key instead of an
+ * undefined nested path (which silently produces empty marks).
+ *
+ * Only the `field` accessor string needs escaping — direct JS data access via
+ * `row[fieldName]` continues to use the raw, unescaped name.
+ */
+const escapeVlFieldName = (name: string): string =>
+    name.replace(/[.[\]]/g, (ch) => `\\${ch}`);
+
 // ---------------------------------------------------------------------------
 // Public API
 // ---------------------------------------------------------------------------
@@ -490,7 +509,13 @@ function buildVLEncodings(
         }
 
         if (fieldName) {
-            encodingObj.field = fieldName;
+            const escapedFieldName = escapeVlFieldName(fieldName);
+            encodingObj.field = escapedFieldName;
+            // Preserve a readable axis/legend title when the raw name had to be
+            // escaped (VL would otherwise display the backslash-escaped string).
+            if (escapedFieldName !== fieldName) {
+                encodingObj.title = fieldName;
+            }
 
             // Use Phase 0's resolved type
             encodingObj.type = cs?.type || 'nominal';
@@ -511,7 +536,7 @@ function buildVLEncodings(
                     encodingObj.title = "Count";
                     encodingObj.type = "quantitative";
                 } else {
-                    encodingObj.field = `${fieldName}_${encoding.aggregate}`;
+                    encodingObj.field = escapeVlFieldName(`${fieldName}_${encoding.aggregate}`);
                     encodingObj.type = "quantitative";
                 }
             }
diff --git a/src/views/ChartRenderService.tsx b/src/views/ChartRenderService.tsx
index efdf4d38..3a836369 100644
--- a/src/views/ChartRenderService.tsx
+++ b/src/views/ChartRenderService.tsx
@@ -53,7 +53,7 @@ interface RenderJob {
  */
 async function renderHeadless(
     vlSpec: any,
-): Promise<{ svg: string; pngDataUrl: string }> {
+): Promise<{ svg: string; pngDataUrl: string; width: number; height: number }> {
     // Compile Vega-Lite → Vega spec
     const vgSpec = compile(vlSpec as any).spec;
 
@@ -72,10 +72,31 @@ async function renderHeadless(
         view.toImageURL('png', 2),  // scale factor 2 for retina
     ]);
 
+    // Capture the intrinsic size the engine laid the chart out at (includes
+    // axes / legends / titles), so consumers can preserve its true aspect
+    // ratio instead of forcing a fixed box.
+    const { width, height } = extractSvgSize(svg);
+
     // Finalize the view to free resources
     view.finalize();
 
-    return { svg, pngDataUrl };
+    return { svg, pngDataUrl, width, height };
+}
+
+/**
+ * Extract the intrinsic pixel dimensions from a Vega-rendered SVG string.
+ * Vega emits `<svg ... width="W" height="H" ...>`; we read those so the
+ * chart's engine-decided aspect ratio is preserved downstream. Falls back to
+ * the base render size if the attributes can't be parsed.
+ */
+function extractSvgSize(svg: string): { width: number; height: number } {
+    const tag = svg.match(/<svg\b[^>]*>/i)?.[0] ?? '';
+    const w = tag.match(/\bwidth="([\d.]+)"/);
+    const h = tag.match(/\bheight="([\d.]+)"/);
+    return {
+        width: w ? Math.round(parseFloat(w[1])) : FULL_WIDTH,
+        height: h ? Math.round(parseFloat(h[1])) : FULL_HEIGHT,
+    };
 }
 
 /**
@@ -214,6 +235,8 @@ export const ChartRenderService: FC = () => {
                 thumbnailDataUrl: thumbnailPng,
                 fullPngDataUrl: fullResult.pngDataUrl,
                 specKey: cacheKey,
+                naturalWidth: fullResult.width,
+                naturalHeight: fullResult.height,
             };
             setCachedChart(chart.id, entry);
 
diff --git a/src/views/DataThread.tsx b/src/views/DataThread.tsx
index 248dbe75..90280098 100644
--- a/src/views/DataThread.tsx
+++ b/src/views/DataThread.tsx
@@ -1686,15 +1686,16 @@ let SingleThreadGroupView: FC<{
         // Add table card and its charts
         pushTableAndChartItems(tableId, tableElementList[i], 'table', isHighlighted);
 
-        // Add report cards anchored to charts of this table
-        pushReportItems(tableId, isHighlighted);
-
         // After-table entries (e.g. summary)
         const afterTable = afterTableMap.get(tableId);
         if (afterTable && afterTable.length > 0) {
             pushInteractionEntries(afterTable, tableId, 'trigger', isHighlighted, 'interaction-after');
         }
 
+        // Add report cards anchored to charts of this table — placed after the
+        // summary block so the report/chat node follows the agent's summary.
+        pushReportItems(tableId, isHighlighted);
+
         // Running or clarifying agent state
         pushAgentDraftItems(tableId, 'trigger', isHighlighted);
     });
@@ -1723,15 +1724,16 @@ let SingleThreadGroupView: FC<{
 
         pushTableAndChartItems(lt.id, _buildTableCard(lt.id), 'leaf-table', isHL);
 
-        // Add report cards anchored to charts of this leaf table
-        pushReportItems(lt.id, isHL);
-
         // After-table entries (e.g. summary)
         const leafAfterEntries = leafAfterTableMap.get(lt.id);
         if (leafAfterEntries && leafAfterEntries.length > 0) {
             pushInteractionEntries(leafAfterEntries, lt.id, 'leaf-trigger', isHL, 'leaf-after');
         }
 
+        // Add report cards anchored to charts of this leaf table — placed after
+        // the summary block so the report/chat node follows the agent's summary.
+        pushReportItems(lt.id, isHL);
+
         // Running or clarifying agent state
         pushAgentDraftItems(lt.id, 'leaf-trigger', isHL);
     });
diff --git a/src/views/ReportView.tsx b/src/views/ReportView.tsx
index a01f9b1a..77eb55b2 100644
--- a/src/views/ReportView.tsx
+++ b/src/views/ReportView.tsx
@@ -58,6 +58,27 @@ export const ReportView: FC = () => {
         }));
     };
 
+    // The report content column is capped at 816px (see render below); leave a
+    // little breathing room so embedded charts never butt against the edge.
+    const REPORT_MAX_CHART_WIDTH = 720;
+
+    // Derive the embed dimensions for a chart from the size the rendering engine
+    // actually chose (stored on the cache entry). We preserve that aspect ratio
+    // and only scale down to fit the report column — never up — so a wide time
+    // series stays wide and a tall chart stays tall, instead of being forced
+    // into a fixed square. Falls back to the configured default when the engine
+    // size isn't available yet (e.g. transient thumbnail-only state on reload).
+    const embedDimsFor = (chartId: string): { width: number; height: number } => {
+        const cached = getCachedChart(chartId);
+        const natW = cached?.naturalWidth;
+        const natH = cached?.naturalHeight;
+        if (natW && natH) {
+            const scale = Math.min(REPORT_MAX_CHART_WIDTH / natW, 1);
+            return { width: Math.round(natW * scale), height: Math.round(natH * scale) };
+        }
+        return { width: config.defaultChartWidth, height: config.defaultChartHeight };
+    };
+
     // Helper function to show messages using dfSlice
     const showMessage = (message: string, type: 'success' | 'error' | 'info' | 'warning' = 'success') => {
         const msg: Message = {
@@ -463,10 +484,12 @@ ${styles}
                 if (cached?.svg) {
                     const blob = new Blob([cached.svg], { type: 'image/svg+xml;charset=utf-8' });
                     const blobUrl = URL.createObjectURL(blob);
-                    updateCachedReportImages(chartId, blobUrl, config.defaultChartWidth, config.defaultChartHeight);
+                    const { width, height } = embedDimsFor(chartId);
+                    updateCachedReportImages(chartId, blobUrl, width, height);
                 } else if (chartThumbnails[chartId]) {
                     // Fall back to thumbnail
-                    updateCachedReportImages(chartId, chartThumbnails[chartId], config.defaultChartWidth, config.defaultChartHeight);
+                    const { width, height } = embedDimsFor(chartId);
+                    updateCachedReportImages(chartId, chartThumbnails[chartId], width, height);
                 }
             });
         }
@@ -562,9 +585,11 @@ ${styles}
                 const cached = getCachedChart(chart.id);
                 if (cached?.svg) {
                     const blob = new Blob([cached.svg], { type: 'image/svg+xml;charset=utf-8' });
-                    updateCachedReportImages(chart.id, URL.createObjectURL(blob), config.defaultChartWidth, config.defaultChartHeight);
+                    const { width, height } = embedDimsFor(chart.id);
+                    updateCachedReportImages(chart.id, URL.createObjectURL(blob), width, height);
                 } else if (chartThumbnails[chart.id]) {
-                    updateCachedReportImages(chart.id, chartThumbnails[chart.id], config.defaultChartWidth, config.defaultChartHeight);
+                    const { width, height } = embedDimsFor(chart.id);
+                    updateCachedReportImages(chart.id, chartThumbnails[chart.id], width, height);
                 }
             });
         }
diff --git a/src/views/SimpleChartRecBox.tsx b/src/views/SimpleChartRecBox.tsx
index e94d2192..debf6548 100644
--- a/src/views/SimpleChartRecBox.tsx
+++ b/src/views/SimpleChartRecBox.tsx
@@ -27,6 +27,7 @@ import { AppDispatch } from '../app/store';
 import { resolveRecommendedChart, getUrls, getTriggers, translateBackend } from '../app/utils';
 import { streamRequest } from '../app/apiClient';
 import { getErrorMessage } from '../app/errorCodes';
+import { persistEphemeralDerivedTable } from '../app/tableThunks';
 import { Chart, ClarificationResponse, DictTable, FieldItem, createDictTable, InteractionEntry } from "../components/ComponentType";
 import { normalizeClarifyEvent, formatClarificationResponses } from '../app/clarification';
 
@@ -122,6 +123,8 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
     const conceptShelfItems = useSelector((state: DataFormulatorState) => state.conceptShelfItems);
     const config = useSelector((state: DataFormulatorState) => state.config);
     const activeModel = useSelector(dfSelectors.getActiveModel);
+    const workspaceBackend = useSelector((state: DataFormulatorState) => state.serverConfig.WORKSPACE_BACKEND);
+    const activeWorkspaceId = useSelector((state: DataFormulatorState) => state.activeWorkspace?.id);
     const draftNodes = useSelector((state: DataFormulatorState) => state.draftNodes);
     const chartThumbnails = useSelector((state: DataFormulatorState) => state.chartThumbnails) || {};
 
@@ -389,6 +392,132 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         return null;
     }, [pendingClarification, draftNodes]);
 
+    // ── Shared structured thread context builder (Tier 2 + Tier 3) ──
+    // Produces the same focused/peripheral thread context used by both the
+    // data agent (exploreFromChat) and the report agent (reportFromChat), so
+    // the report has the actual exploration narrative — user questions, agent
+    // thinking, findings — instead of just a flat list of charts.
+    const buildThreadContext = useCallback((targetTableId: string): {
+        focusedThread: any[] | undefined;
+        otherThreads: any[] | undefined;
+    } => {
+        // Tier 2: Focused thread — detailed per-step info
+        const focusedSteps: any[] = [];
+        let walkTable = tables.find(t => t.id === targetTableId);
+        const visited = new Set<string>();
+        const focusedChainIds = new Set<string>();
+        while (walkTable?.derive?.trigger) {
+            if (visited.has(walkTable.id)) break;
+            visited.add(walkTable.id);
+            focusedChainIds.add(walkTable.id);
+            const trigger = walkTable.derive.trigger;
+            const interaction = trigger.interaction || [];
+            const userPrompt = interaction.find(e => e.role === 'prompt')?.content;
+            const instruction = interaction.find(e => e.role === 'instruction');
+            const summary = interaction.find(e => e.role === 'summary');
+
+            // Find the actual resolved chart (not the trigger's "Auto" stub)
+            const resolvedChart = charts.find(c => c.tableRef === walkTable!.id && c.source === 'trigger')
+                || charts.find(c => c.tableRef === walkTable!.id);
+            const chartType = resolvedChart?.chartType || '';
+            // Map field IDs to field names for readable context
+            const encodings = resolvedChart?.encodingMap
+                ? Object.fromEntries(
+                    Object.entries(resolvedChart.encodingMap)
+                        .filter(([, v]: [string, any]) => v?.fieldID)
+                        .map(([k, v]: [string, any]) => {
+                            const field = conceptShelfItems.find(f => f.id === v.fieldID);
+                            return [k, field?.name || v.fieldID];
+                        })
+                  )
+                : {};
+
+            const step: any = {
+                table_name: walkTable.virtual?.tableId || walkTable.id,
+                columns: walkTable.names,
+                row_count: walkTable.virtual?.rowCount ?? walkTable.rows.length,
+                user_question: userPrompt || '',
+                agent_thinking: instruction?.plan || '',
+                display_instruction: instruction?.displayContent || instruction?.content || '',
+                chart_type: chartType,
+                encodings,
+                agent_summary: summary?.content || '',
+            };
+
+            // Include chart thumbnail for the focused leaf table (the one the user is looking at)
+            if (walkTable.id === targetTableId && resolvedChart && chartThumbnails[resolvedChart.id]) {
+                step.chart_thumbnail = chartThumbnails[resolvedChart.id];
+            }
+
+            focusedSteps.unshift(step);
+
+            walkTable = tables.find(t => t.id === trigger.tableId);
+        }
+        const focusedThread = focusedSteps.length > 0 ? focusedSteps : undefined;
+
+        // Tier 3: Peripheral threads — one-line summary per step
+        // Find all leaf tables (no children or all children are anchored)
+        const leafTables = tables.filter(t => {
+            const children = tables.filter(c => c.derive?.trigger.tableId === t.id);
+            return children.length === 0 || children.every(c => c.anchored);
+        });
+
+        const peripheralThreads: any[] = [];
+        for (const leaf of leafTables) {
+            // Skip the focused thread's leaf
+            if (focusedChainIds.has(leaf.id)) continue;
+            // Skip root/source tables
+            if (!leaf.derive) continue;
+
+            const triggers = getTriggers(leaf, tables);
+            if (triggers.length === 0) continue;
+
+            const STEP_FINDING_CHAR_LIMIT = 200;
+            const steps: string[] = [];
+            for (const trig of triggers) {
+                const instr = trig.interaction?.find((e: InteractionEntry) => e.role === 'instruction');
+                const label = instr?.displayContent || instr?.content || '';
+                // Look up the actual resolved chart from state, not the trigger's "Auto" stub
+                const chartForStep = charts.find(c => c.tableRef === trig.resultTableId && c.source === 'trigger')
+                    || charts.find(c => c.tableRef === trig.resultTableId);
+                const chartType = chartForStep?.chartType || '';
+                const encStr = chartForStep?.encodingMap
+                    ? Object.entries(chartForStep.encodingMap)
+                        .filter(([, v]: [string, any]) => v?.fieldID)
+                        .map(([k, v]: [string, any]) => {
+                            const field = conceptShelfItems.find(f => f.id === v.fieldID);
+                            return `${k}: ${field?.name || v.fieldID}`;
+                        })
+                        .join(', ')
+                    : '';
+                // Per-step agent commentary: the `summary` entry that the
+                // visualize action emits after running this step.
+                let finding = trig.interaction?.find(
+                    (e: InteractionEntry) => e.role === 'summary',
+                )?.content?.trim() || '';
+                if (finding.length > STEP_FINDING_CHAR_LIMIT) {
+                    finding = finding.slice(0, STEP_FINDING_CHAR_LIMIT - 1).trimEnd() + '…';
+                }
+                const head = `${label}${chartType ? ` → ${chartType}` : ''}${encStr ? ` (${encStr})` : ''}`;
+                steps.push(finding ? `${head} — finding: ${finding}` : head);
+            }
+
+            if (steps.length > 0) {
+                const sourceTableId = triggers[0].tableId;
+                const sourceTable = tables.find(t => t.id === sourceTableId);
+                peripheralThreads.push({
+                    source_table: sourceTable?.virtual?.tableId || sourceTableId,
+                    leaf_table: leaf.virtual?.tableId || leaf.id,
+                    step_count: steps.length,
+                    steps,
+                });
+            }
+        }
+        const otherThreads = peripheralThreads.length > 0 ? peripheralThreads : undefined;
+
+        return { focusedThread, otherThreads };
+    }, [tables, charts, conceptShelfItems, chartThumbnails]);
+
     const exploreFromChat = useCallback((prompt: string, clarificationContext?: {
         trajectory: any[];
         completedStepCount: number;
@@ -445,124 +574,10 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         }
 
         // ── Build structured thread context (Tier 2 + Tier 3) ──
-        let focusedThread: any[] | undefined = undefined;
-        let otherThreads: any[] | undefined = undefined;
-        if (!isResume) {
-            // Tier 2: Focused thread — detailed per-step info
-            const focusedSteps: any[] = [];
-            let walkTable = tables.find(t => t.id === focusedTableId);
-            const visited = new Set<string>();
-            const focusedChainIds = new Set<string>();
-            while (walkTable?.derive?.trigger) {
-                if (visited.has(walkTable.id)) break;
-                visited.add(walkTable.id);
-                focusedChainIds.add(walkTable.id);
-                const trigger = walkTable.derive.trigger;
-                const interaction = trigger.interaction || [];
-                const userPrompt = interaction.find(e => e.role === 'prompt')?.content;
-                const instruction = interaction.find(e => e.role === 'instruction');
-                const summary = interaction.find(e => e.role === 'summary');
-
-                // Find the actual resolved chart (not the trigger's "Auto" stub)
-                const resolvedChart = charts.find(c => c.tableRef === walkTable!.id && c.source === 'trigger')
-                    || charts.find(c => c.tableRef === walkTable!.id);
-                const chartType = resolvedChart?.chartType || '';
-                // Map field IDs to field names for readable context
-                const encodings = resolvedChart?.encodingMap
-                    ? Object.fromEntries(
-                        Object.entries(resolvedChart.encodingMap)
-                            .filter(([, v]: [string, any]) => v?.fieldID)
-                            .map(([k, v]: [string, any]) => {
-                                const field = conceptShelfItems.find(f => f.id === v.fieldID);
-                                return [k, field?.name || v.fieldID];
-                            })
-                      )
-                    : {};
-
-                const step: any = {
-                    table_name: walkTable.virtual?.tableId || walkTable.id,
-                    columns: walkTable.names,
-                    row_count: walkTable.virtual?.rowCount ?? walkTable.rows.length,
-                    user_question: userPrompt || '',
-                    agent_thinking: instruction?.plan || '',
-                    display_instruction: instruction?.displayContent || instruction?.content || '',
-                    chart_type: chartType,
-                    encodings,
-                    agent_summary: summary?.content || '',
-                };
-
-                // Include chart thumbnail for the focused leaf table (the one the user is looking at)
-                if (walkTable.id === focusedTableId && resolvedChart && chartThumbnails[resolvedChart.id]) {
-                    step.chart_thumbnail = chartThumbnails[resolvedChart.id];
-                }
-
-                focusedSteps.unshift(step);
-
-                walkTable = tables.find(t => t.id === trigger.tableId);
-            }
-            if (focusedSteps.length > 0) focusedThread = focusedSteps;
-
-            // Tier 3: Peripheral threads — one-line summary per step
-            // Find all leaf tables (no children or all children are anchored)
-            const leafTables = tables.filter(t => {
-                const children = tables.filter(c => c.derive?.trigger.tableId === t.id);
-                return children.length === 0 || children.every(c => c.anchored);
-            });
-
-            const peripheralThreads: any[] = [];
-            for (const leaf of leafTables) {
-                // Skip the focused thread's leaf
-                if (focusedChainIds.has(leaf.id)) continue;
-                // Skip root/source tables
-                if (!leaf.derive) continue;
-
-                const triggers = getTriggers(leaf, tables);
-                if (triggers.length === 0) continue;
-
-                const STEP_FINDING_CHAR_LIMIT = 200;
-                const steps: string[] = [];
-                for (const trig of triggers) {
-                    const tt = tables.find(t2 => t2.id === trig.resultTableId);
-                    const instr = trig.interaction?.find((e: InteractionEntry) => e.role === 'instruction');
-                    const label = instr?.displayContent || instr?.content || '';
-                    // Look up the actual resolved chart from state, not the trigger's "Auto" stub
-                    const chartForStep = charts.find(c => c.tableRef === trig.resultTableId && c.source === 'trigger')
-                        || charts.find(c => c.tableRef === trig.resultTableId);
-                    const chartType = chartForStep?.chartType || '';
-                    const encStr = chartForStep?.encodingMap
-                        ? Object.entries(chartForStep.encodingMap)
-                            .filter(([, v]: [string, any]) => v?.fieldID)
-                            .map(([k, v]: [string, any]) => {
-                                const field = conceptShelfItems.find(f => f.id === v.fieldID);
-                                return `${k}: ${field?.name || v.fieldID}`;
-                            })
-                            .join(', ')
-                        : '';
-                    // Per-step agent commentary: the `summary` entry that the
-                    // visualize action emits after running this step.
-                    let finding = trig.interaction?.find(
-                        (e: InteractionEntry) => e.role === 'summary',
-                    )?.content?.trim() || '';
-                    if (finding.length > STEP_FINDING_CHAR_LIMIT) {
-                        finding = finding.slice(0, STEP_FINDING_CHAR_LIMIT - 1).trimEnd() + '…';
-                    }
-                    const head = `${label}${chartType ? ` → ${chartType}` : ''}${encStr ? ` (${encStr})` : ''}`;
-                    steps.push(finding ? `${head} — finding: ${finding}` : head);
-                }
-
-                if (steps.length > 0) {
-                    const sourceTableId = triggers[0].tableId;
-                    const sourceTable = tables.find(t => t.id === sourceTableId);
-                    peripheralThreads.push({
-                        source_table: sourceTable?.virtual?.tableId || sourceTableId,
-                        leaf_table: leaf.virtual?.tableId || leaf.id,
-                        step_count: steps.length,
-                        steps,
-                    });
-                }
-            }
-            if (peripheralThreads.length > 0) otherThreads = peripheralThreads;
-        }
+        // Skip on resume — the trajectory already carries the prior context.
+        const { focusedThread, otherThreads } = isResume
+            ? { focusedThread: undefined, otherThreads: undefined }
+            : buildThreadContext(focusedTableId);
 
         // Resolve primary table names from primaryTableIds (includes defaults + @-mentioned)
         const primaryTableNames = primaryTableIds.map(id => {
@@ -668,7 +683,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         let thinkingSteps: string[] = [];
         let pendingThought: string = '';
 
-        const processStreamingResult = (result: any) => {
+        const processStreamingResult = async (result: any) => {
             // ── context_info: show injected rules/knowledge at the top ──
             // Rendered as already-completed tool-style steps (✓ prefix) so they
             // visually match the rest of the agent's tool-call timeline.
@@ -863,6 +878,14 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                 createdTables.push(candidateTable);
                 lastCreatedTableId = candidateTableId;
 
+                // Ephemeral mode: persist full rows to IndexedDB (keeps only a
+                // sample + virtual marker in Redux). Other backends store on the server.
+                if (workspaceBackend === 'ephemeral' && activeWorkspaceId) {
+                    const persisted = await persistEphemeralDerivedTable(activeWorkspaceId, candidateTable);
+                    candidateTable.rows = persisted.rows;
+                    candidateTable.virtual = persisted.virtual;
+                }
+
                 const names = candidateTable.names;
                 const missingNames = names.filter(name =>
                     !conceptShelfItems.some(field => field.name === name) &&
@@ -1106,7 +1129,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                     }
 
                     allResults.push(data);
-                    processStreamingResult(data);
+                    await processStreamingResult(data);
                     if (data.type === "completion" || data.type === "clarify" || data.type === "explain" || data.type === "delegate") {
                         handleCompletion();
                         return;
@@ -1204,6 +1227,11 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
 
         const actionTables = selectedTableIds.map(id => tables.find(t => t.id === id) as DictTable).filter(Boolean);
 
+        // Send the same structured exploration narrative the data agent gets,
+        // so the report is grounded in the actual thread (user questions, agent
+        // thinking, findings) rather than a flat list of charts.
+        const { focusedThread, otherThreads } = buildThreadContext(focusedTableId);
+
         const body = JSON.stringify({
             model: activeModel,
             input_tables: actionTables.map(t => ({
@@ -1215,6 +1243,8 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             }),
             charts: availableCharts,
             user_prompt: cleanPrompt,
+            ...(focusedThread ? { focused_thread: focusedThread } : {}),
+            ...(otherThreads ? { other_threads: otherThreads } : {}),
         });
 
         const controller = new AbortController();
@@ -1302,7 +1332,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             agentAbortRef.current = null;
             setIsChatFormulating(false);
         }
-    }, [focusedTableId, charts, tables, selectedTableIds, primaryTableIds, conceptShelfItems, activeModel, dispatch]);
+    }, [focusedTableId, charts, tables, selectedTableIds, primaryTableIds, conceptShelfItems, activeModel, dispatch, buildThreadContext]);
 
     // Honor cross-component handoff requests targeting the Report Gen
     // agent (e.g. Data Agent's `delegate` card with target='report_gen').

From 7deaed78e0b419caea98987bd277987388440520 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Fri, 5 Jun 2026 17:44:37 -0700
Subject: [PATCH 13/29] some redesign

---
 .../agents/agent_chart_restyle.py             | 121 +++-
 src/app/dfSlice.tsx                           |  34 +-
 src/app/restyle.ts                            | 129 ++++-
 src/components/ComponentType.tsx              |  39 ++
 src/i18n/locales/en/chart.json                |   2 +
 src/i18n/locales/zh/chart.json                |   2 +
 .../agents-chart/vegalite/instantiate-spec.ts |  75 ++-
 src/views/ChartQuickConfig.tsx                | 197 +++++++
 src/views/ChartVariantStrip.tsx               | 533 ++++++++++++++++++
 src/views/EncodingBox.tsx                     |  66 ++-
 src/views/EncodingShelfCard.tsx               |  85 +--
 src/views/VisualizationView.tsx               | 225 ++++++--
 12 files changed, 1354 insertions(+), 154 deletions(-)
 create mode 100644 src/views/ChartQuickConfig.tsx
 create mode 100644 src/views/ChartVariantStrip.tsx

diff --git a/py-src/data_formulator/agents/agent_chart_restyle.py b/py-src/data_formulator/agents/agent_chart_restyle.py
index 61edc4d6..657d0ec5 100644
--- a/py-src/data_formulator/agents/agent_chart_restyle.py
+++ b/py-src/data_formulator/agents/agent_chart_restyle.py
@@ -50,17 +50,46 @@
 Hard rules:
 1. Do not include a `data` block in your output. The caller re-attaches live rows.
 2. Only reference columns that exist in the data sample.
+3. Preserve field-name escaping EXACTLY. Column names containing `.`, `[`, or `]` are escaped with a backslash in `field` references (e.g. a column literally named `Tomatoes, per lb.` appears as `"field": "Tomatoes, per lb\\."`). Keep those backslashes intact — do not drop or add them. An unescaped `.` makes Vega-Lite read it as a nested-object path, which breaks the chart (empty plot).
 
-Out-of-scope: only refuse if the request genuinely needs data that isn't in the table — e.g. joining another dataset, a column that doesn't exist and can't be derived from existing ones. In that case return:
+Out-of-scope: refuse if the request genuinely needs data that simply isn't there and can't be derived in Vega-Lite — e.g. joining a separate dataset, or a column that doesn't exist and can't be computed from the existing ones. Anything expressible with a Vega-Lite `transform` (aggregations, calculated fields, filters, folds, window/joinaggregate, etc.) is in scope — add the transforms you need. If you do refuse, return:
 {"out_of_scope": true, "rationale": "<one sentence on what data is missing>"}
 
 Otherwise return:
 {
   "vlSpec": <the new Vega-Lite spec, with no `data` block>,
   "label": "<two-word lowercase label, e.g. \"dark theme\", \"rotated labels\", \"percent of total\">",
-  "rationale": "<one short sentence on what you changed>"
+  "rationale": "<one short sentence on what you changed>",
+  "configUI": <a SHORT list (2-4) of simple follow-up controls — see below>
 }
 
+[configUI — generative follow-up controls]
+After you produce the new spec, design 2-4 small UI controls that let the user keep tweaking THIS specific variant without re-prompting. Pick knobs that are meaningful for the chart you just made (e.g. mark opacity, corner radius, point size, font size, gridlines on/off, label angle, color scheme, legend position).
+
+Each control declares WHERE in the spec it writes and the allowed VALUES — there is NO code, just a `path` (the location in the spec) plus the value the chosen option writes there. Shapes:
+- "key": short unique id, lowercase no spaces, e.g. "opacity"
+- "label": short human label, e.g. "opacity"
+- "path": array describing the location in the vlSpec to write the value to, e.g. ["mark","opacity"] or ["encoding","x","axis","labelAngle"] or ["config","legend","orient"]. Use array indices (numbers) for arrays, e.g. ["layer",0,"mark","color"]. Intermediate objects are created if missing.
+- "type": one of "continuous" | "binary" | "discrete"
+- for "continuous": "min", "max", optional "step", and "defaultValue" (number) — the value written at `path` is the number itself
+- for "binary": "defaultValue" (true/false) — the boolean is written at `path`
+- for "discrete": "options" (array of {"value": <any>, "label": "<text>"}) and "defaultValue" — the chosen option's `value` is written at `path`. The `value` may be a scalar OR a whole object (e.g. a full mark sub-spec or a color array), which the app sets wholesale at `path`.
+
+Rules for configUI:
+- `defaultValue` MUST equal what the spec you returned already encodes at that `path`, so the controls start in sync with the chart.
+- Make sure `path` points at a real location in the spec you returned (so toggling actually changes the visible chart).
+- Never use "__proto__", "prototype", or "constructor" as a path segment.
+
+Example configUI:
+[
+  {"key": "opacity", "label": "opacity", "type": "continuous", "min": 0.2, "max": 1, "step": 0.05, "defaultValue": 0.9, "path": ["mark", "opacity"]},
+  {"key": "grid", "label": "gridlines", "type": "binary", "defaultValue": true, "path": ["encoding", "y", "axis", "grid"]},
+  {"key": "scheme", "label": "palette", "type": "discrete", "defaultValue": "tableau10", "path": ["encoding", "color", "scale", "scheme"],
+   "options": [{"value": "tableau10", "label": "tableau"}, {"value": "category10", "label": "category"}, {"value": "set2", "label": "set2"}]}
+]
+
+If no meaningful per-variant control fits, return "configUI": [].
+
 Return ONLY the JSON object — no markdown fences, no commentary.
 '''
 
@@ -167,6 +196,7 @@ def run(
                             "vlSpec": cleaned,
                             "rationale": str(parsed.get("rationale", "")).strip(),
                             "label": str(parsed.get("label", "")).strip(),
+                            "configUI": self._sanitize_config_ui(parsed.get("configUI")),
                         }
 
         # No usable response.
@@ -180,6 +210,93 @@ def run(
     # Guardrails
     # ------------------------------------------------------------------
 
+    _FORBIDDEN_PATH_SEGMENTS = {"__proto__", "prototype", "constructor"}
+
+    def _sanitize_config_ui(self, raw: Any) -> list[dict]:
+        """Validate the LLM-authored configUI array into a clean list.
+
+        Each control is a declarative "write value at path" knob — there is no
+        code. We validate the path (non-empty, no prototype-polluting segments)
+        and the per-type params, dropping anything malformed. Returns [] when
+        nothing is usable. The frontend re-validates as well.
+        """
+        if not isinstance(raw, list):
+            return []
+        out: list[dict] = []
+        seen: set[str] = set()
+        for c in raw:
+            if not isinstance(c, dict):
+                continue
+            key = str(c.get("key", "")).strip()
+            label = str(c.get("label", "")).strip()
+            ctype = c.get("type")
+            if not key or not label or key in seen:
+                continue
+
+            # Validate path: non-empty list of str / non-negative int, no
+            # prototype-polluting segments.
+            raw_path = c.get("path")
+            if not isinstance(raw_path, list) or len(raw_path) == 0:
+                continue
+            path: list = []
+            path_ok = True
+            for seg in raw_path:
+                if isinstance(seg, bool):
+                    path_ok = False
+                    break
+                if isinstance(seg, int) and seg >= 0:
+                    path.append(seg)
+                elif isinstance(seg, str) and seg and seg not in self._FORBIDDEN_PATH_SEGMENTS:
+                    path.append(seg)
+                else:
+                    path_ok = False
+                    break
+            if not path_ok:
+                continue
+
+            if ctype == "binary":
+                out.append({"key": key, "label": label, "type": "binary",
+                            "path": path, "defaultValue": bool(c.get("defaultValue"))})
+            elif ctype == "continuous":
+                try:
+                    cmin = float(c.get("min"))
+                    cmax = float(c.get("max"))
+                except (TypeError, ValueError):
+                    continue
+                if not (cmax > cmin):
+                    continue
+                entry = {"key": key, "label": label, "type": "continuous",
+                         "path": path, "min": cmin, "max": cmax}
+                try:
+                    step = float(c.get("step"))
+                    if step > 0:
+                        entry["step"] = step
+                except (TypeError, ValueError):
+                    pass
+                try:
+                    entry["defaultValue"] = float(c.get("defaultValue"))
+                except (TypeError, ValueError):
+                    entry["defaultValue"] = cmin
+                out.append(entry)
+            elif ctype == "discrete":
+                opts_raw = c.get("options")
+                if not isinstance(opts_raw, list):
+                    continue
+                options = [
+                    {"value": o.get("value"), "label": str(o.get("label", "")).strip()}
+                    for o in opts_raw
+                    if isinstance(o, dict) and str(o.get("label", "")).strip()
+                ]
+                if not options:
+                    continue
+                default = c.get("defaultValue", options[0]["value"])
+                out.append({"key": key, "label": label, "type": "discrete",
+                            "path": path, "options": options, "defaultValue": default})
+            else:
+                continue
+            seen.add(key)
+        return out
+
     def _enforce_guardrails(self, original: dict, candidate: dict) -> dict | None:
         """Apply post-hoc guardrails to a candidate spec.
 
diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx
index 2ceb55aa..cf630d78 100644
--- a/src/app/dfSlice.tsx
+++ b/src/app/dfSlice.tsx
@@ -1275,6 +1275,15 @@ export const dataFormulatorSlice = createSlice({
                 chart.insight = action.payload.insight;
             }
         },
+        // Zoom level applied by the resizer. Stored on the Chart (not in
+        // config, which is for template-defined properties) so it persists
+        // with the chart across focus changes and session save/load.
+        updateChartScaleFactor: (state, action: PayloadAction<{chartId: string, scaleFactor: number}>) => {
+            let chart = collectAllCharts(state).find(c => c.id == action.payload.chartId);
+            if (chart) {
+                chart.scaleFactor = action.payload.scaleFactor === 1 ? undefined : action.payload.scaleFactor;
+            }
+        },
         // --- Style variants (see design-docs/28-chart-style-refinement-agent.md) ---
         // Variants are user-authored "skins" of a chart's Vega-Lite spec. They live
         // on Chart, persist with the session, and drive both the focused canvas
@@ -1317,14 +1326,35 @@ export const dataFormulatorSlice = createSlice({
         // Replace a variant's spec in place — used by the "refresh stale variant"
         // flow (overlay in VisualizationView). The variant id stays the same so
         // the chip doesn't visibly disappear and re-appear.
-        updateStyleVariant: (state, action: PayloadAction<{chartId: string, variantId: string, vlSpec: any, rationale?: string, encodingFingerprint?: string}>) => {
-            const { chartId, variantId, vlSpec, rationale, encodingFingerprint } = action.payload;
+        updateStyleVariant: (state, action: PayloadAction<{chartId: string, variantId: string, vlSpec: any, rationale?: string, encodingFingerprint?: string, configUI?: ChartStyleVariant['configUI']}>) => {
+            const { chartId, variantId, vlSpec, rationale, encodingFingerprint, configUI } = action.payload;
             const chart = collectAllCharts(state).find(c => c.id === chartId);
             const v = chart?.styleVariants?.find(v => v.id === variantId);
             if (!v) return;
             v.vlSpec = vlSpec;
             if (rationale !== undefined) v.rationale = rationale;
             if (encodingFingerprint !== undefined) v.encodingFingerprint = encodingFingerprint;
+            // The agent re-authored the controls; replace them and reset values
+            // so stale keys don't linger.
+            if (configUI !== undefined) {
+                v.configUI = configUI && configUI.length > 0 ? configUI : undefined;
+                v.configValues = undefined;
+            }
+        },
+        // Set the value of a single generative-UI control on a style variant.
+        // value === undefined removes the override (falls back to the control's
+        // defaultValue at render time).
+        updateVariantConfigValue: (state, action: PayloadAction<{chartId: string, variantId: string, key: string, value: any}>) => {
+            const { chartId, variantId, key, value } = action.payload;
+            const chart = collectAllCharts(state).find(c => c.id === chartId);
+            const v = chart?.styleVariants?.find(v => v.id === variantId);
+            if (!v) return;
+            if (value === undefined) {
+                if (v.configValues) delete v.configValues[key];
+            } else {
+                if (!v.configValues) v.configValues = {};
+                v.configValues[key] = value;
+            }
         },
         updateChartEncoding: (state, action: PayloadAction<{chartId: string, channel: Channel, encoding: EncodingItem}>) => {
             let chartId = action.payload.chartId;
diff --git a/src/app/restyle.ts b/src/app/restyle.ts
index 4e6d4f8c..70c75c62 100644
--- a/src/app/restyle.ts
+++ b/src/app/restyle.ts
@@ -11,7 +11,7 @@
  * See design-docs/28-chart-style-refinement-agent.md.
  */
 
-import { Chart, ChartStyleVariant, FieldItem, DictTable, computeEncodingFingerprint } from '../components/ComponentType';
+import { Chart, ChartStyleVariant, VariantConfigControl, FieldItem, DictTable, computeEncodingFingerprint } from '../components/ComponentType';
 import { assembleVegaChart, getUrls } from './utils';
 import { apiRequest } from './apiClient';
 import { checkChartAvailability } from '../views/ChartUtils';
@@ -122,7 +122,7 @@ export function buildDataContext(
 }
 
 export type RestyleResult =
-    | { kind: 'spec'; vlSpec: any; rationale?: string; label?: string }
+    | { kind: 'spec'; vlSpec: any; rationale?: string; label?: string; configUI?: VariantConfigControl[] }
     | { kind: 'out_of_scope'; rationale?: string };
 
 /**
@@ -172,6 +172,7 @@ export async function callRestyleAgent(args: {
         vlSpec: newSpec,
         rationale: typeof data.rationale === 'string' ? data.rationale : undefined,
         label: typeof data.label === 'string' ? data.label : undefined,
+        configUI: sanitizeConfigUI(data.configUI),
     };
 }
 
@@ -183,6 +184,7 @@ export function makeVariant(args: {
     rationale?: string;
     label: string;
     basedOnVariantId?: string;
+    configUI?: VariantConfigControl[];
 }): ChartStyleVariant {
     return {
         id: `v-${Date.now()}`,
@@ -193,5 +195,128 @@ export function makeVariant(args: {
         encodingFingerprint: computeEncodingFingerprint(args.chart),
         createdAt: Date.now(),
         rationale: args.rationale,
+        configUI: args.configUI && args.configUI.length > 0 ? args.configUI : undefined,
+        configValues: undefined,
     };
 }
+
+// ---------------------------------------------------------------------------
+// Variant generative-UI controls (path-based, no code execution)
+// ---------------------------------------------------------------------------
+
+// Object keys that must never be used as a path segment — writing to these can
+// pollute Object.prototype and is a classic prototype-pollution sink.
+const FORBIDDEN_PATH_SEGMENTS = new Set(['__proto__', 'prototype', 'constructor']);
+
+/**
+ * Validate and normalize the `configUI` array returned by the restyle agent.
+ * Drops anything malformed (bad path, missing params, prototype-polluting
+ * segments) so a bad LLM payload can't produce broken or unsafe controls.
+ * Returns undefined when there are no usable controls.
+ */
+export function sanitizeConfigUI(raw: any): VariantConfigControl[] | undefined {
+    if (!Array.isArray(raw)) return undefined;
+    const out: VariantConfigControl[] = [];
+    const seenKeys = new Set<string>();
+    for (const c of raw) {
+        if (!c || typeof c !== 'object') continue;
+        const key = typeof c.key === 'string' ? c.key.trim() : '';
+        const label = typeof c.label === 'string' ? c.label.trim() : '';
+        if (!key || !label || seenKeys.has(key)) continue;
+
+        // Path must be a non-empty array of strings/numbers with no
+        // prototype-polluting segments.
+        if (!Array.isArray(c.path) || c.path.length === 0) continue;
+        const path: (string | number)[] = [];
+        let pathOk = true;
+        for (const seg of c.path) {
+            if (typeof seg === 'number' && Number.isInteger(seg) && seg >= 0) {
+                path.push(seg);
+            } else if (typeof seg === 'string' && seg.length > 0 && !FORBIDDEN_PATH_SEGMENTS.has(seg)) {
+                path.push(seg);
+            } else {
+                pathOk = false;
+                break;
+            }
+        }
+        if (!pathOk) continue;
+
+        if (c.type === 'binary') {
+            out.push({ key, label, path, type: 'binary', defaultValue: !!c.defaultValue });
+        } else if (c.type === 'continuous') {
+            const min = Number(c.min), max = Number(c.max);
+            if (!isFinite(min) || !isFinite(max) || max <= min) continue;
+            const step = isFinite(Number(c.step)) && Number(c.step) > 0 ? Number(c.step) : undefined;
+            const dv = isFinite(Number(c.defaultValue)) ? Number(c.defaultValue) : min;
+            out.push({ key, label, path, type: 'continuous', min, max, step, defaultValue: dv });
+        } else if (c.type === 'discrete') {
+            if (!Array.isArray(c.options) || c.options.length === 0) continue;
+            const options = c.options
+                .filter((o: any) => o && typeof o === 'object' && typeof o.label === 'string')
+                .map((o: any) => ({ value: o.value, label: o.label }));
+            if (options.length === 0) continue;
+            const dv = c.defaultValue !== undefined ? c.defaultValue : options[0].value;
+            out.push({ key, label, path, type: 'discrete', options, defaultValue: dv });
+        } else {
+            continue;
+        }
+        seenKeys.add(key);
+    }
+    return out.length > 0 ? out : undefined;
+}
+
+/**
+ * Write `value` into `obj` at `path`, creating intermediate objects/arrays as
+ * needed. Pure data operation — no code execution. Returns true on success.
+ *
+ * Safety: refuses prototype-polluting segments and won't descend through a
+ * non-object intermediate it can't safely replace.
+ */
+function setAtPath(obj: any, path: (string | number)[], value: any): boolean {
+    if (!obj || typeof obj !== 'object' || path.length === 0) return false;
+    let node = obj;
+    for (let i = 0; i < path.length - 1; i++) {
+        const seg = path[i];
+        if (typeof seg === 'string' && FORBIDDEN_PATH_SEGMENTS.has(seg)) return false;
+        let next = node[seg as any];
+        if (next === null || typeof next !== 'object') {
+            // Create the right container based on the next segment's type.
+            next = typeof path[i + 1] === 'number' ? [] : {};
+            node[seg as any] = next;
+        }
+        node = next;
+    }
+    const last = path[path.length - 1];
+    if (typeof last === 'string' && FORBIDDEN_PATH_SEGMENTS.has(last)) return false;
+    node[last as any] = value;
+    return true;
+}
+
+/**
+ * Apply a variant's generative-UI controls to its Vega-Lite spec.
+ *
+ * For each control we write the current value (from `configValues`, falling
+ * back to `defaultValue`) into the spec at the control's `path`. The value may
+ * be a scalar or a whole object. This is a pure, declarative transform — no
+ * model-authored code runs. Returns a NEW spec; never mutates the input.
+ */
+export function applyVariantConfigUI(
+    spec: any,
+    configUI: VariantConfigControl[] | undefined,
+    configValues: Record<string, any> | undefined,
+): any {
+    if (!configUI || configUI.length === 0) return spec;
+    let working: any;
+    try { working = structuredClone(spec); } catch { working = JSON.parse(JSON.stringify(spec)); }
+    for (const control of configUI) {
+        const value = configValues && control.key in configValues
+            ? configValues[control.key]
+            : control.defaultValue;
+        try {
+            setAtPath(working, control.path, value);
+        } catch (err) {
+            console.warn(`[variant-config] control "${control.key}" failed to apply`, err);
+        }
+    }
+    return working;
+}
diff --git a/src/components/ComponentType.tsx b/src/components/ComponentType.tsx
index 3746db42..5a64b420 100644
--- a/src/components/ComponentType.tsx
+++ b/src/components/ComponentType.tsx
@@ -359,8 +359,45 @@ export interface ChartStyleVariant {
     encodingFingerprint: string,  // see computeEncodingFingerprint(); used to detect staleness
     createdAt: number,
     rationale?: string,           // optional one-line explanation from the agent
+    // Generative UI: a few simple knobs the restyle agent attaches to the
+    // variant so the user can keep tweaking the agent-authored spec without
+    // re-prompting. While a variant is active these replace the chart-template
+    // config. See VariantConfigControl and applyVariantConfigUI in app/restyle.ts.
+    configUI?: VariantConfigControl[],
+    // Current value for each control, keyed by control.key. Missing key → use
+    // the control's defaultValue.
+    configValues?: Record<string, any>,
 }
 
+/**
+ * A single generative-UI control authored by the restyle agent for a style
+ * variant. Mirrors the shape of ChartPropertyDef (so it can reuse the same
+ * renderers) but instead of arbitrary code it carries a `path`: the location
+ * inside the Vega-Lite spec to write the chosen value to.
+ *
+ * Applying a control is a pure, declarative "set value at path" operation
+ * (see applyVariantConfigUI / setAtPath). There is NO code execution — the
+ * agent only chooses which knob, where it writes, and the allowed values.
+ * The written value may be a scalar OR a whole object (e.g. a full mark/axis
+ * sub-spec), which keeps the door open for richer restyle edits while staying
+ * safe.
+ */
+export type VariantConfigControl = {
+    key: string;
+    label: string;
+    /**
+     * Path into the vlSpec where the chosen value is written, as an array of
+     * object keys / array indices, e.g. ["mark","opacity"] or
+     * ["encoding","x","axis","labelAngle"]. Intermediate objects are created
+     * as needed. Prototype-polluting segments are rejected at apply time.
+     */
+    path: (string | number)[];
+} & (
+    | { type: 'continuous'; min: number; max: number; step?: number; defaultValue: number }
+    | { type: 'discrete';  options: { value: any; label: string }[]; defaultValue: any }
+    | { type: 'binary';    defaultValue: boolean }
+);
+
 export type Chart = { 
     id: string, 
     chartType: string, 
@@ -371,6 +408,7 @@ export type Chart = {
     insight?: ChartInsight,  // AI-generated insight about the visualization
     styleVariants?: ChartStyleVariant[],  // user-authored style refinements (see ChartStyleVariant)
     activeVariantId?: string,  // id of the variant currently rendered in the focused canvas; undefined = default
+    scaleFactor?: number,  // zoom level applied by the resizer; undefined = 1 (no zoom)
     unread?: boolean,  // true for agent-generated charts the user hasn't focused yet; cleared on focus
 }
 
@@ -409,6 +447,7 @@ export let duplicateChart = (chart: Chart) : Chart => {
         tableRef: chart.tableRef,
         source: chart.source,
         config: chart.config ? JSON.parse(JSON.stringify(chart.config)) : undefined,
+        scaleFactor: chart.scaleFactor,
         // styleVariants are intentionally NOT copied: they are user-authored
         // refinements tied to the chart they were created on. A duplicate is a
         // fresh canvas. (See design-docs/28-chart-style-refinement-agent.md.)
diff --git a/src/i18n/locales/en/chart.json b/src/i18n/locales/en/chart.json
index a942ab03..cd113617 100644
--- a/src/i18n/locales/en/chart.json
+++ b/src/i18n/locales/en/chart.json
@@ -47,6 +47,8 @@
     "log": "log",
     "insight": "insight",
     "openInVegaEditor": "Open in Vega Editor",
+    "viewChartSpec": "View chart spec",
+    "editChart": "Edit chart",
     "chartInsight": "Chart insight",
     "analyzingChart": "Analyzing chart...",
     "regenerate": "regenerate",
diff --git a/src/i18n/locales/zh/chart.json b/src/i18n/locales/zh/chart.json
index 45e10832..fb54428d 100644
--- a/src/i18n/locales/zh/chart.json
+++ b/src/i18n/locales/zh/chart.json
@@ -47,6 +47,8 @@
     "log": "日志",
     "insight": "洞察",
     "openInVegaEditor": "在 Vega 编辑器中打开",
+    "viewChartSpec": "查看图表规格",
+    "editChart": "编辑图表",
     "chartInsight": "图表洞察",
     "analyzingChart": "分析图表中...",
     "regenerate": "重新生成",
diff --git a/src/lib/agents-chart/vegalite/instantiate-spec.ts b/src/lib/agents-chart/vegalite/instantiate-spec.ts
index 20a0e0e3..b95a5c88 100644
--- a/src/lib/agents-chart/vegalite/instantiate-spec.ts
+++ b/src/lib/agents-chart/vegalite/instantiate-spec.ts
@@ -316,12 +316,29 @@ export function vlApplyLayoutToSpec(
     const facetRows = layout.facet?.rows ?? 1;
     const facetCols = layout.facet?.columns ?? 1;
     if (facetRows > 1 || facetCols > 1) {
-        const limit = Math.max(80, layout.subplotWidth + 20);
-        const headerCfg: Record<string, any> = { labelLimit: limit };
-        if (totalFacets > 6) {
-            headerCfg.labelFontSize = 9;
+        // Constrain each header's labels to the subplot it belongs to:
+        //   • column / wrap-facet headers run horizontally on top → bound by WIDTH
+        //   • row headers run rotated down the side → bound by HEIGHT
+        // Only inject the configs for the facet channels that actually exist.
+        const enc = vgObj.encoding || vgObj.spec?.encoding;
+        const facetDef = vgObj.facet || {};
+        const hasRow = !!(enc?.row || facetDef.row);
+        const hasColumn = !!(enc?.column || facetDef.column);
+        const hasWrap = !!(enc?.facet || (vgObj.facet && !facetDef.row && !facetDef.column));
+
+        const fontCfg: Record<string, any> = totalFacets > 6 ? { labelFontSize: 9 } : {};
+        const colLimit = Math.max(80, layout.subplotWidth + 20);
+        const rowLimit = Math.max(30, layout.subplotHeight);
+
+        if (hasColumn) {
+            vgObj.config.headerColumn = { ...(vgObj.config.headerColumn || {}), ...fontCfg, labelLimit: colLimit };
+        }
+        if (hasRow) {
+            vgObj.config.headerRow = { ...(vgObj.config.headerRow || {}), ...fontCfg, labelLimit: rowLimit };
+        }
+        if (hasWrap) {
+            vgObj.config.headerFacet = { ...(vgObj.config.headerFacet || {}), ...fontCfg, labelLimit: colLimit };
         }
-        vgObj.config.header = { ...(vgObj.config.header || {}), ...headerCfg };
     }
     const encTarget = vgObj.spec?.encoding || vgObj.encoding;
 
@@ -332,14 +349,50 @@ export function vlApplyLayoutToSpec(
         vgObj.config.axisY = { ...(vgObj.config.axisY || {}), ...lightTitle };
     }
 
-    // When row faceting is used, use lighter y axis title styling;
-    // hide it entirely if y is nominal (the labels speak for themselves).
-    if (encTarget?.row || (facetRows > 1 && encTarget?.y)) {
-        if (encTarget?.y?.type === 'nominal') {
+    // Row-faceted y-axis title handling.
+    // Vega-Lite draws the y-axis title once PER facet row, so on a stack of
+    // short subplots the same label repeats down the left edge until it
+    // collapses into an unreadable vertical smear right next to the row header.
+    //
+    //   • Nominal y — the category labels are self-describing, so just drop the
+    //     repeated title.
+    //   • Quantitative/temporal y on a SHARED scale — the measure is identical
+    //     in every subplot, so fold it into the row header title (e.g. the
+    //     rotated left label becomes "Product: Price Index (Start = 100)") and
+    //     suppress the per-subplot title. With an INDEPENDENT y scale each
+    //     subplot can differ, so we leave the per-subplot titles in place.
+    const rowEnc = encTarget?.row || vgObj.facet?.row;
+    const yEnc = encTarget?.y;
+    if (yEnc && (rowEnc || (facetRows > 1 && encTarget?.y))) {
+        if (yEnc.type === 'nominal') {
             if (!vgObj.config) vgObj.config = {};
             vgObj.config.axisY = { ...(vgObj.config.axisY || {}), title: null };
-            if (!encTarget.y.axis) encTarget.y.axis = {};
-            encTarget.y.axis.title = null;
+            if (!yEnc.axis) yEnc.axis = {};
+            yEnc.axis.title = null;
+        } else if (rowEnc && vgObj.resolve?.scale?.y !== 'independent') {
+            const yTitle = (yEnc.axis && yEnc.axis.title) || yEnc.title || yEnc.field;
+            const rowTitle = (rowEnc.header && rowEnc.header.title) || rowEnc.title || rowEnc.field;
+            if (yTitle && rowTitle) {
+                if (!rowEnc.header) rowEnc.header = {};
+                rowEnc.header.title = `${rowTitle}: ${yTitle}`;
+                if (!vgObj.config) vgObj.config = {};
+                vgObj.config.axisY = { ...(vgObj.config.axisY || {}), title: null };
+                if (!yEnc.axis) yEnc.axis = {};
+                yEnc.axis.title = null;
+            }
+        } else {
+            // Wrap-facet (single facet field, no shared side band) — the y-axis
+            // title repeats once per wrap-row down the left edge. There's no row
+            // header to fold it into, so keep it but stop it smearing: pull it
+            // off the tick labels with titlePadding, shrink the font, and cap its
+            // length to the subplot height so it can't overrun a short subplot.
+            if (!vgObj.config) vgObj.config = {};
+            vgObj.config.axisY = {
+                ...(vgObj.config.axisY || {}),
+                titlePadding: 8,
+                titleFontSize: 10,
+                titleLimit: Math.max(30, layout.subplotHeight),
+            };
         }
     }
 
diff --git a/src/views/ChartQuickConfig.tsx b/src/views/ChartQuickConfig.tsx
new file mode 100644
index 00000000..dbcff0d6
--- /dev/null
+++ b/src/views/ChartQuickConfig.tsx
@@ -0,0 +1,197 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+// Compact, horizontal chart-config bar surfaced directly below the chart for
+// quick edits (toggles, sliders, discrete option selects). It mirrors the
+// template-driven config properties from the encoding shelf but in a single
+// wrapping row so users can tweak the chart without opening the full encoding
+// popover. See VisualizationView for placement.
+
+import { FC } from 'react';
+import React from 'react';
+import { useSelector, useDispatch } from 'react-redux';
+
+import { Box, Typography, Select, MenuItem, useTheme } from '@mui/material';
+
+import { DataFormulatorState, dfActions, dfSelectors } from '../app/dfSlice';
+import { AppDispatch } from '../app/store';
+import { Channel, Chart, VariantConfigControl } from '../components/ComponentType';
+import { getChartTemplate } from '../components/ChartTemplates';
+import { ConfigSlider } from './EncodingShelfCard';
+
+export interface ChartQuickConfigProps {
+    chartId: string;
+}
+
+/**
+ * Normalized control shape that both chart-template properties (ChartPropertyDef)
+ * and variant generative-UI controls (VariantConfigControl) map onto, so the
+ * same renderers handle either source.
+ */
+type QuickControl = {
+    key: string;
+    label: string;
+    type: 'continuous' | 'binary' | 'discrete';
+    min?: number;
+    max?: number;
+    step?: number;
+    options?: { value: any; label: string }[];
+    defaultValue?: any;
+};
+
+export const ChartQuickConfig: FC<ChartQuickConfigProps> = function ({ chartId }) {
+    const theme = useTheme();
+    const dispatch = useDispatch<AppDispatch>();
+    const allCharts = useSelector(dfSelectors.getAllCharts);
+    const chart = allCharts.find((c: Chart) => c.id == chartId) as Chart | undefined;
+
+    if (!chart) return null;
+
+    // When a style variant is active, the chart-template config no longer maps
+    // to what's rendered (variants are agent-authored specs that bypass the
+    // compiler). Show the variant's own generative-UI controls instead — or
+    // nothing if the variant didn't ship any.
+    const activeVariant = chart.activeVariantId
+        ? chart.styleVariants?.find(v => v.id === chart.activeVariantId)
+        : undefined;
+
+    let controls: QuickControl[];
+    let getValue: (control: QuickControl) => any;
+    let commit: (control: QuickControl, value: any) => void;
+
+    if (activeVariant) {
+        const configUI = activeVariant.configUI;
+        if (!configUI || configUI.length === 0) return null;
+        controls = configUI.map((c: VariantConfigControl) => ({
+            key: c.key,
+            label: c.label,
+            type: c.type,
+            min: c.type === 'continuous' ? c.min : undefined,
+            max: c.type === 'continuous' ? c.max : undefined,
+            step: c.type === 'continuous' ? c.step : undefined,
+            options: c.type === 'discrete' ? c.options : undefined,
+            defaultValue: c.defaultValue,
+        }));
+        getValue = (control) => {
+            const cv = activeVariant.configValues;
+            return cv && control.key in cv ? cv[control.key] : control.defaultValue;
+        };
+        commit = (control, value) => dispatch(dfActions.updateVariantConfigValue({
+            chartId, variantId: activeVariant.id, key: control.key, value,
+        }));
+    } else {
+        const template = getChartTemplate(chart.chartType);
+        const configProps = template?.properties;
+        if (!configProps || configProps.length === 0) return null;
+
+        // Filter to visible properties (respecting visibleWhen channel predicates).
+        const visibleProps = configProps.filter((propDef) => {
+            if (propDef.visibleWhen?.channels) {
+                return propDef.visibleWhen.channels.some(
+                    ch => chart.encodingMap[ch as Channel]?.fieldID != null
+                );
+            }
+            return true;
+        });
+        if (visibleProps.length === 0) return null;
+        controls = visibleProps as QuickControl[];
+        getValue = (control) => chart.config?.[control.key] ?? control.defaultValue;
+        commit = (control, value) => dispatch(dfActions.updateChartConfig({ chartId, key: control.key, value }));
+    }
+
+    return (
+        <Box sx={{ display: 'flex', justifyContent: 'center', width: '100%', pt: 2, pb: 1 }}>
+        <Box sx={{
+            display: 'inline-flex',
+            flexWrap: 'wrap',
+            alignItems: 'center',
+            justifyContent: 'center',
+            gap: '4px 18px',
+            px: 1.5,
+            py: 0.5,
+            maxWidth: 900,
+            borderRadius: '8px',
+            backgroundColor: 'rgba(0,0,0,0.025)',
+        }}>
+            {controls.map((propDef) => {
+                if (propDef.type === 'continuous') {
+                    const currentValue = getValue(propDef) ?? propDef.min ?? 0;
+                    return (
+                        <Box key={`qc-${propDef.key}`} sx={{ display: 'flex', alignItems: 'center', minWidth: 150 }}>
+                            <Typography variant="caption" sx={{ pr: 0.75, color: 'text.secondary', fontSize: 10, whiteSpace: 'nowrap', fontWeight: 500, userSelect: 'none' }}>
+                                {propDef.label}
+                            </Typography>
+                            <ConfigSlider
+                                value={currentValue}
+                                propDef={propDef}
+                                onCommit={(newValue) => commit(propDef, newValue)}
+                            />
+                        </Box>
+                    );
+                }
+                if (propDef.type === 'binary') {
+                    const currentValue = getValue(propDef) ?? false;
+                    return (
+                        <Box key={`qc-${propDef.key}`} sx={{
+                            display: 'flex', alignItems: 'center', minHeight: '22px',
+                            cursor: 'pointer',
+                        }}
+                            onClick={() => commit(propDef, !currentValue)}>
+                            <Typography variant="caption" sx={{ color: 'text.secondary', fontSize: 10, whiteSpace: 'nowrap', fontWeight: 500, userSelect: 'none', mr: 0.75 }}>
+                                {propDef.label}
+                            </Typography>
+                            <Box sx={{
+                                width: 28, height: 14, borderRadius: '7px',
+                                backgroundColor: currentValue ? theme.palette.primary.main : 'rgba(0,0,0,0.2)',
+                                position: 'relative', transition: 'background-color 0.2s', flexShrink: 0,
+                            }}>
+                                <Box sx={{
+                                    width: 10, height: 10, borderRadius: '50%', backgroundColor: 'white',
+                                    position: 'absolute', top: 2, left: currentValue ? 16 : 2, transition: 'left 0.2s',
+                                }} />
+                            </Box>
+                        </Box>
+                    );
+                }
+                if (propDef.type !== 'discrete' || !propDef.options) return null;
+                const currentValue = getValue(propDef);
+                const options = propDef.options;
+                const currentSerialized = JSON.stringify(currentValue);
+                let selectedIndex = options.findIndex(o => JSON.stringify(o.value) === currentSerialized);
+                if (selectedIndex < 0) selectedIndex = 0;
+                return (
+                    <Box key={`qc-${propDef.key}`} sx={{ display: 'flex', alignItems: 'center', minHeight: '22px' }}>
+                        <Typography variant="caption" sx={{ pr: 0.75, color: 'text.secondary', fontSize: 10, whiteSpace: 'nowrap', fontWeight: 500, userSelect: 'none' }}>
+                            {propDef.label}
+                        </Typography>
+                        <Select
+                            variant="standard"
+                            id={`qc-${propDef.key}-select`}
+                            value={selectedIndex}
+                            onChange={(event) => {
+                                const idx = event.target.value as number;
+                                commit(propDef, options[idx].value);
+                            }}
+                            disableUnderline
+                            sx={{
+                                fontSize: 11, height: '22px', minWidth: 60,
+                                backgroundColor: 'rgba(0,0,0,0.05)', borderRadius: '6px',
+                                '&:hover': { backgroundColor: 'rgba(0,0,0,0.08)' },
+                                '& .MuiSelect-select': { padding: '1px 20px 1px 6px !important', fontSize: 11 },
+                                '& .MuiSvgIcon-root': { fontSize: 14, right: 2 },
+                            }}
+                            renderValue={(idx: number) => <span style={{ fontSize: 11 }}>{options[idx]?.label || 'Default'}</span>}
+                        >
+                            {options.map((opt, i) => (
+                                <MenuItem value={i} key={`qc-${propDef.key}-${i}`} sx={{ fontSize: 11, minHeight: '28px' }}>
+                                    {opt.label}
+                                </MenuItem>
+                            ))}
+                        </Select>
+                    </Box>
+                );
+            })}
+        </Box>
+        </Box>
+    );
+};
diff --git a/src/views/ChartVariantStrip.tsx b/src/views/ChartVariantStrip.tsx
new file mode 100644
index 00000000..8caca950
--- /dev/null
+++ b/src/views/ChartVariantStrip.tsx
@@ -0,0 +1,533 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+// Horizontal strip of chart style variants (created by the restyle agent).
+// Surfaced at the top of the chart canvas so that when multiple versions of a
+// chart exist, the user can switch between them right above the chart. The
+// "default" chip renders the chart from its current encoding (no style
+// refinement); each variant chip activates / refreshes its saved spec.
+//
+// This is a self-contained extraction of the variant logic that used to live
+// inside EncodingShelfCard, so it can be rendered independently of the
+// encoding popover. See dfActions.setActiveVariant / updateStyleVariant /
+// deleteStyleVariant and src/app/restyle.ts.
+
+import { FC, useState } from 'react';
+import React from 'react';
+import { useSelector, useDispatch } from 'react-redux';
+
+import { Box, Typography, CircularProgress, alpha, useTheme, IconButton, Tooltip, Popover, TextField, Card, Divider } from '@mui/material';
+import CloseIcon from '@mui/icons-material/Close';
+import PaletteOutlinedIcon from '@mui/icons-material/PaletteOutlined';
+import SendIcon from '@mui/icons-material/Send';
+
+import { DataFormulatorState, dfActions, dfSelectors } from '../app/dfSlice';
+import { AppDispatch } from '../app/store';
+import { transition } from '../app/tokens';
+import {
+    Chart,
+    ChartStyleVariant,
+    computeEncodingFingerprint,
+    isVariantStale,
+} from '../components/ComponentType';
+import { buildSpecForRestyle, buildDataContext, callRestyleAgent, makeVariant } from '../app/restyle';
+import { STYLE_PRESETS } from './EncodingShelfCard';
+import { getDataTable } from './ChartUtils';
+
+export interface ChartVariantStripProps {
+    chartId: string;
+}
+
+// Quick actions surfaced in the design popover. Each chip sends a
+// self-contained instruction straight to the agent. Grouped into two
+// subsections (restyle / annotate) under a single "Quick actions" heading.
+interface QuickAction { key: string; label: string; description: string; instruction: string }
+
+const RESTYLE_ACTIONS: QuickAction[] = STYLE_PRESETS.map(p => ({
+    key: p.key,
+    label: p.label,
+    description: p.description,
+    instruction: p.instruction,
+}));
+
+const ANNOTATE_ACTIONS: QuickAction[] = [
+    {
+        key: 'annotate-peak',
+        label: 'highest point',
+        description: 'Mark the highest value',
+        instruction: 'Annotate the highest value in the chart with a label.',
+    },
+    {
+        key: 'avg-line',
+        label: 'average line',
+        description: 'Add a reference line at the mean',
+        instruction: 'Add a reference line at the average value.',
+    },
+    {
+        key: 'data-labels',
+        label: 'data labels',
+        description: 'Label each data point with its value',
+        instruction: 'Add data labels showing the value of each mark.',
+    },
+];
+
+export const ChartVariantStrip: FC<ChartVariantStripProps> = function ({ chartId }) {
+    const theme = useTheme();
+    const dispatch = useDispatch<AppDispatch>();
+
+    const tables = useSelector((state: DataFormulatorState) => state.tables);
+    const allCharts = useSelector(dfSelectors.getAllCharts);
+    const conceptShelfItems = useSelector((state: DataFormulatorState) => state.conceptShelfItems);
+    const activeModel = useSelector(dfSelectors.getActiveModel);
+
+    const [refreshingVariantId, setRefreshingVariantId] = useState<string | null>(null);
+    const [restyleAnchor, setRestyleAnchor] = useState<HTMLElement | null>(null);
+    const [restylePrompt, setRestylePrompt] = useState('');
+    const [isRestyling, setIsRestyling] = useState(false);
+    const [pendingPrompt, setPendingPrompt] = useState<string | null>(null);
+
+    const chart = allCharts.find((c: Chart) => c.id == chartId) as Chart | undefined;
+
+    if (!chart) return null;
+    // Restyling only applies to rendered Vega charts, not the raw table or
+    // the not-yet-chosen "Auto" placeholder.
+    if (chart.chartType === 'Table' || chart.chartType === 'Auto') return null;
+
+    const variants: ChartStyleVariant[] = chart.styleVariants ?? [];
+    const activeVariantId = chart.activeVariantId;
+
+    const currentTable = getDataTable(chart, tables, allCharts, conceptShelfItems);
+
+    const pickVariantLabel = (suggested: string | undefined): string => {
+        const taken = new Set(variants.map(v => (v.label || v.id).toLowerCase()));
+        const cleaned = (suggested || '').trim().replace(/^["']+|["']+$/g, '').slice(0, 24);
+        const base = cleaned || `v${variants.length + 1}`;
+        if (!taken.has(base.toLowerCase())) return base;
+        for (let i = 2; i < 100; i++) {
+            const candidate = `${base} ${i}`;
+            if (!taken.has(candidate.toLowerCase())) return candidate;
+        }
+        return base;
+    };
+
+    const handleRestyleSubmit = async (instruction: string) => {
+        const text = instruction.trim();
+        if (!text || isRestyling) return;
+        if (!activeModel) {
+            dispatch(dfActions.addMessages({
+                timestamp: Date.now(),
+                component: 'chart restyle',
+                type: 'error',
+                value: 'No model is configured. Please select a model before restyling.',
+            }));
+            return;
+        }
+        const activeVariant = activeVariantId
+            ? variants.find(v => v.id === activeVariantId)
+            : undefined;
+        const prepared = buildSpecForRestyle(chart, currentTable, conceptShelfItems, activeVariant);
+        if (!prepared) {
+            dispatch(dfActions.addMessages({
+                timestamp: Date.now(),
+                component: 'chart restyle',
+                type: 'error',
+                value: 'Cannot restyle this chart yet — make sure all required fields are encoded first.',
+            }));
+            return;
+        }
+        const { dataSample } = buildDataContext(currentTable, prepared.embeddedData);
+
+        setIsRestyling(true);
+        setPendingPrompt(text);
+        setRestylePrompt('');
+        setRestyleAnchor(null);
+        dispatch(dfActions.changeChartRunningStatus({ chartId, status: true }));
+        try {
+            const result = await callRestyleAgent({
+                instruction: text,
+                vlSpec: prepared.spec,
+                chartType: chart.chartType,
+                dataSample,
+                model: activeModel,
+            });
+            if (result.kind === 'out_of_scope') {
+                dispatch(dfActions.addMessages({
+                    timestamp: Date.now(),
+                    component: 'chart restyle',
+                    type: 'info',
+                    value: result.rationale
+                        ? `Style agent: "${result.rationale}" — this looks like a data change, not a style change.`
+                        : 'This looks like a data change, not a style change.',
+                }));
+                return;
+            }
+            const variant = makeVariant({
+                chart,
+                prompt: text,
+                vlSpec: result.vlSpec,
+                rationale: result.rationale,
+                label: pickVariantLabel(result.label),
+                basedOnVariantId: prepared.basedOnVariantId,
+                configUI: result.configUI,
+            });
+            dispatch(dfActions.addStyleVariant({ chartId, variant, activate: true }));
+        } catch (err: any) {
+            console.warn('[chart-restyle] failed', err);
+            dispatch(dfActions.addMessages({
+                timestamp: Date.now(),
+                component: 'chart restyle',
+                type: 'error',
+                value: `Restyle failed: ${err?.message || String(err)}`,
+            }));
+        } finally {
+            setIsRestyling(false);
+            setPendingPrompt(null);
+            dispatch(dfActions.changeChartRunningStatus({ chartId, status: false }));
+        }
+    };
+
+    const handleRefreshVariant = async (variant: ChartStyleVariant) => {
+        if (refreshingVariantId) return;
+        if (!activeModel) {
+            dispatch(dfActions.addMessages({
+                timestamp: Date.now(),
+                component: 'chart restyle',
+                type: 'error',
+                value: 'No model is configured. Please select a model before refreshing.',
+            }));
+            return;
+        }
+        const prepared = buildSpecForRestyle(chart, currentTable, conceptShelfItems);
+        if (!prepared) {
+            dispatch(dfActions.addMessages({
+                timestamp: Date.now(),
+                component: 'chart restyle',
+                type: 'error',
+                value: 'Cannot refresh — chart is not currently renderable.',
+            }));
+            return;
+        }
+        const { dataSample } = buildDataContext(currentTable, prepared.embeddedData);
+
+        setRefreshingVariantId(variant.id);
+        dispatch(dfActions.changeChartRunningStatus({ chartId, status: true }));
+        try {
+            const result = await callRestyleAgent({
+                instruction: variant.prompt,
+                vlSpec: prepared.spec,
+                chartType: chart.chartType,
+                dataSample,
+                model: activeModel,
+                styleReferenceSpec: variant.vlSpec,
+            });
+            if (result.kind === 'out_of_scope') {
+                dispatch(dfActions.addMessages({
+                    timestamp: Date.now(),
+                    component: 'chart restyle',
+                    type: 'info',
+                    value: result.rationale
+                        ? `Style agent: "${result.rationale}"`
+                        : 'Could not refresh this variant against the current encoding.',
+                }));
+                return;
+            }
+            dispatch(dfActions.updateStyleVariant({
+                chartId,
+                variantId: variant.id,
+                vlSpec: result.vlSpec,
+                rationale: result.rationale,
+                encodingFingerprint: computeEncodingFingerprint(chart),
+                configUI: result.configUI,
+            }));
+        } catch (err: any) {
+            console.warn('[chart-restyle] refresh failed', err);
+            dispatch(dfActions.addMessages({
+                timestamp: Date.now(),
+                component: 'chart restyle',
+                type: 'error',
+                value: `Refresh failed: ${err?.message || String(err)}`,
+            }));
+        } finally {
+            setRefreshingVariantId(null);
+            dispatch(dfActions.changeChartRunningStatus({ chartId, status: false }));
+        }
+    };
+
+    const renderVariantChip = (label: string, opts: {
+        active: boolean,
+        stale?: boolean,
+        refreshing?: boolean,
+        tooltip?: string,
+        onClick: () => void,
+        onDelete?: () => void,
+    }) => {
+        const accent = theme.palette.text.primary;
+        return (
+            <Box
+                key={label}
+                component="span"
+                onClick={opts.onClick}
+                title={opts.tooltip}
+                sx={{
+                    display: 'inline-flex',
+                    alignItems: 'center',
+                    gap: '4px',
+                    height: 20,
+                    px: '6px',
+                    fontSize: 11,
+                    fontWeight: 400,
+                    lineHeight: 1.4,
+                    color: accent,
+                    fontFamily: theme.typography.fontFamily,
+                    borderRadius: '6px',
+                    border: `1px solid ${alpha(accent, opts.active ? 0.45 : 0.12)}`,
+                    borderStyle: opts.stale ? 'dashed' : 'solid',
+                    backgroundColor: opts.active ? alpha(accent, 0.1) : theme.palette.background.paper,
+                    cursor: 'pointer',
+                    opacity: opts.stale ? 0.65 : 1,
+                    transition: transition.fast,
+                    '&:hover': {
+                        backgroundColor: alpha(accent, opts.active ? 0.13 : 0.04),
+                    },
+                }}
+            >
+                {opts.refreshing && (
+                    <CircularProgress size={10} sx={{ color: alpha(accent, 0.5), mr: '-1px' }} />
+                )}
+                <span>{label}</span>
+                {opts.onDelete && (
+                    <Box
+                        component="span"
+                        role="button"
+                        aria-label="delete variant"
+                        onClick={(e) => { e.stopPropagation(); opts.onDelete?.(); }}
+                        sx={{
+                            display: 'inline-flex',
+                            alignItems: 'center',
+                            justifyContent: 'center',
+                            width: 12,
+                            height: 12,
+                            borderRadius: '50%',
+                            color: alpha(accent, 0.4),
+                            cursor: 'pointer',
+                            '&:hover': {
+                                color: accent,
+                                backgroundColor: alpha(accent, 0.08),
+                            },
+                        }}
+                    >
+                        <CloseIcon sx={{ fontSize: 11 }} />
+                    </Box>
+                )}
+            </Box>
+        );
+    };
+
+    return (
+        <Box key='variant-chip-strip' sx={{
+            display: 'flex', flexWrap: 'wrap', alignItems: 'center', justifyContent: 'flex-start', gap: 0.5,
+            px: 1,
+            // Rendered inside the floating top toolbar (see VisualizationView
+            // vis-view-canvas), directly after the zoom resizer. A leading
+            // divider separates the two groups; a min height keeps the row
+            // vertically centered with the resizer controls.
+            minHeight: 34,
+        }}>
+            <Divider orientation="vertical" flexItem sx={{ my: 0.5, mr: 1, borderColor: alpha(theme.palette.text.primary, 0.12) }} />
+            <Typography sx={{ fontSize: 12, color: 'text.secondary', mr: 0.25 }}>
+                style:
+            </Typography>
+            {renderVariantChip('default', {
+                active: !activeVariantId,
+                tooltip: 'Render the chart from its current encoding (no style refinement applied).',
+                onClick: () => dispatch(dfActions.setActiveVariant({ chartId, variantId: undefined })),
+            })}
+            {variants.map(v => {
+                const stale = isVariantStale(chart, v);
+                const refreshing = refreshingVariantId === v.id;
+                return renderVariantChip(v.label || v.id, {
+                    active: v.id === activeVariantId,
+                    stale,
+                    refreshing,
+                    tooltip: stale
+                        ? `Encoding has changed since this variant was created. Clicking will re-run the style agent against the current encoding.\n\nPrompt: ${v.prompt}`
+                        : (v.rationale ? `${v.rationale}\n\nPrompt: ${v.prompt}` : `Prompt: ${v.prompt}`),
+                    onClick: () => {
+                        if (v.id !== activeVariantId) {
+                            dispatch(dfActions.setActiveVariant({ chartId, variantId: v.id }));
+                        }
+                        if (stale && !refreshing) {
+                            handleRefreshVariant(v);
+                        }
+                    },
+                    onDelete: () => dispatch(dfActions.deleteStyleVariant({ chartId, variantId: v.id })),
+                });
+            })}
+            {isRestyling && (
+                <Box
+                    component="span"
+                    title={pendingPrompt ? `Restyling: ${pendingPrompt}` : 'Restyling…'}
+                    sx={{
+                        display: 'inline-flex',
+                        alignItems: 'center',
+                        gap: '4px',
+                        height: 20,
+                        px: '6px',
+                        maxWidth: 160,
+                        fontSize: 11,
+                        fontFamily: theme.typography.fontFamily,
+                        color: 'text.secondary',
+                        borderRadius: '6px',
+                        border: `1px dashed ${alpha(theme.palette.text.primary, 0.2)}`,
+                        backgroundColor: alpha(theme.palette.text.primary, 0.03),
+                    }}
+                >
+                    <CircularProgress size={10} sx={{ color: alpha(theme.palette.text.primary, 0.4) }} />
+                    <Box component="span" sx={{ overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap' }}>
+                        Restyling
+                    </Box>
+                </Box>
+            )}
+            <Tooltip title="Restyle chart…">
+                <Box
+                    component="button"
+                    onClick={(e: React.MouseEvent<HTMLElement>) => setRestyleAnchor(e.currentTarget)}
+                    sx={{
+                        display: 'inline-flex',
+                        alignItems: 'center',
+                        gap: 0.5,
+                        height: 24,
+                        ml: 0.5,
+                        px: 1,
+                        cursor: 'pointer',
+                        border: 'none',
+                        borderRadius: '7px',
+                        fontSize: 12,
+                        fontWeight: 600,
+                        lineHeight: 1,
+                        color: theme.palette.primary.main,
+                        backgroundColor: alpha(theme.palette.primary.main, restyleAnchor ? 0.22 : 0.12),
+                        transition: 'background-color 0.15s',
+                        '&:hover': { backgroundColor: alpha(theme.palette.primary.main, 0.22) },
+                    }}
+                >
+                    <PaletteOutlinedIcon sx={{ fontSize: 15 }} />
+                    Design
+                </Box>
+            </Tooltip>
+            <Popover
+                open={Boolean(restyleAnchor)}
+                anchorEl={restyleAnchor}
+                onClose={() => setRestyleAnchor(null)}
+                anchorOrigin={{ vertical: 'bottom', horizontal: 'left' }}
+                transformOrigin={{ vertical: 'top', horizontal: 'left' }}
+                slotProps={{ paper: { sx: { width: 320, p: 1.25, borderRadius: 2 } } }}
+            >
+                <Typography sx={{ fontSize: 11, fontWeight: 600, color: 'text.secondary', mb: 0.75 }}>
+                    Quick actions
+                </Typography>
+                {[
+                    { label: 'restyle', actions: RESTYLE_ACTIONS },
+                    { label: 'annotate', actions: ANNOTATE_ACTIONS },
+                ].map(group => (
+                    <Box key={group.label} sx={{ display: 'flex', flexWrap: 'wrap', alignItems: 'center', gap: 0.5, mb: 0.75 }}>
+                        <Typography sx={{ fontSize: 11, color: 'text.disabled', lineHeight: '22px', flexShrink: 0, width: 48 }}>
+                            {group.label}
+                        </Typography>
+                        {group.actions.map(action => (
+                            <Tooltip key={action.key} title={action.description}>
+                                <Box
+                                    component="span"
+                                    onClick={() => { if (!isRestyling) handleRestyleSubmit(action.instruction); }}
+                                    sx={{
+                                        display: 'inline-flex',
+                                        alignItems: 'center',
+                                        height: 22,
+                                        px: '8px',
+                                        fontSize: 11,
+                                        fontFamily: theme.typography.fontFamily,
+                                        color: 'text.primary',
+                                        borderRadius: '6px',
+                                        border: `1px solid ${alpha(theme.palette.text.primary, 0.15)}`,
+                                        cursor: isRestyling ? 'default' : 'pointer',
+                                        opacity: isRestyling ? 0.5 : 1,
+                                        transition: transition.fast,
+                                        '&:hover': { backgroundColor: alpha(theme.palette.text.primary, 0.06) },
+                                    }}
+                                >
+                                    {action.label}
+                                </Box>
+                            </Tooltip>
+                        ))}
+                    </Box>
+                ))}
+                <Typography sx={{ fontSize: 11, fontWeight: 600, color: 'text.secondary', mb: 0.75 }}>
+                    Design yourself
+                </Typography>
+                <Card
+                    variant='outlined'
+                    sx={{
+                        position: 'relative',
+                        display: 'flex', flexDirection: 'column',
+                        px: 1, pt: 0.5, pb: 0.25,
+                        borderWidth: 1,
+                        borderColor: alpha(theme.palette.text.primary, 0.2),
+                        borderRadius: '8px',
+                        overflow: 'visible',
+                        transition: transition.fast,
+                        '&:hover': {
+                            borderColor: alpha(theme.palette.primary.main, 0.6),
+                        },
+                        '&:focus-within': {
+                            borderColor: alpha(theme.palette.primary.main, 0.8),
+                        },
+                    }}
+                >
+                    <TextField
+                        variant="standard"
+                        autoFocus
+                        sx={{
+                            flex: 1,
+                            "& .MuiInput-input": { fontSize: '12px', lineHeight: 1.5 },
+                            "& .MuiInput-underline:before": { borderBottom: 'none' },
+                            "& .MuiInput-underline:hover:not(.Mui-disabled):before": { borderBottom: 'none' },
+                            "& .MuiInput-underline:after": { borderBottom: 'none' },
+                        }}
+                        placeholder="Describe a style, e.g. “use a muted pastel palette”"
+                        value={restylePrompt}
+                        disabled={isRestyling}
+                        onChange={(e) => setRestylePrompt(e.target.value)}
+                        onKeyDown={(e) => {
+                            if (e.key === 'Enter' && !e.shiftKey) {
+                                e.preventDefault();
+                                handleRestyleSubmit(restylePrompt);
+                            }
+                        }}
+                        slotProps={{ inputLabel: { shrink: true } }}
+                        fullWidth
+                        multiline
+                        minRows={2}
+                        maxRows={5}
+                    />
+                    <Box sx={{ display: 'flex', flexDirection: 'row', alignItems: 'center', justifyContent: 'flex-end' }}>
+                        <Tooltip title="Restyle">
+                            <span>
+                                <IconButton
+                                    size="small"
+                                    color="primary"
+                                    sx={{ p: 0.5 }}
+                                    disabled={isRestyling || !restylePrompt.trim()}
+                                    onClick={() => handleRestyleSubmit(restylePrompt)}
+                                >
+                                    {isRestyling
+                                        ? <CircularProgress size={18} sx={{ color: theme.palette.primary.main }} />
+                                        : <SendIcon sx={{ fontSize: 18 }} />}
+                                </IconButton>
+                            </span>
+                        </Tooltip>
+                    </Box>
+                </Card>
+            </Popover>
+        </Box>
+    );
+};
diff --git a/src/views/EncodingBox.tsx b/src/views/EncodingBox.tsx
index bf1abf4f..26a929be 100644
--- a/src/views/EncodingBox.tsx
+++ b/src/views/EncodingBox.tsx
@@ -543,27 +543,27 @@ export const EncodingBox: FC<EncodingBoxProps> = function EncodingBox({ channel,
     let normalizedDisplay = "";
     
     let handleSelectOption = (option: string) => {
-        if (conceptShelfItems.map(f => f.name).includes(option)) {
-            //console.log(`yah-haha: ${option}`);
-            updateEncProp("fieldID", (conceptShelfItems.find(f => f.name == option) as FieldItem).id);
-        } else {
-            if (option == "") {
-                console.log("nothing happens")
-            } else {
-                let newConept = {
-                    id: `concept-${Date.now()}`, name: option,
-                    source: "custom", tableRef: "custom",
-                } as FieldItem;
-                dispatch(dfActions.updateConceptItems(newConept));
-                updateEncProp("fieldID", newConept.id);
-            }
-            
+        // The encoding shelf only accepts fields that already exist in the
+        // current table. Selecting anything else (a stale concept from another
+        // table, or a typed-but-nonexistent name) is ignored — creating new
+        // fields here is not allowed, since that would require re-deriving data.
+        const fieldItem = conceptShelfItems.find(f => f.name == option);
+        const isAvailable = !!fieldItem && (!activeTable || activeTable.names.includes(option));
+        if (isAvailable) {
+            updateEncProp("fieldID", (fieldItem as FieldItem).id);
         }
     }
 
 
     let conceptGroups = groupConceptItems(conceptShelfItems, tables);
 
+    // Field names selectable in this encoding shelf: only fields that exist in
+    // the current table. Anything else cannot be assigned here.
+    let availableFieldNames = conceptGroups
+        .filter(g => activeTable ? activeTable.names.includes(g.field.name) : true)
+        .map(g => g.field.name)
+        .filter(name => name != "");
+
     let groupNames = [...new Set(conceptGroups.map(g => g.group))];
     conceptGroups.sort((a, b) => {
         if (groupNames.indexOf(a.group) < groupNames.indexOf(b.group)) {
@@ -623,15 +623,10 @@ export const EncodingBox: FC<EncodingBoxProps> = function EncodingBox({ channel,
         }}
         // value={tempValue}
         filterOptions={(options, params) => {
-            const filtered = filter(options, params);
-            const { inputValue } = params;
-            // Suggest the creation of a new value
-            const isExisting = options.some((option) => inputValue === option);
-            if (!isExisting) {
-                return [`${inputValue}`, ...filtered,  ]
-            } else {
-                return [...filtered];
-            }
+            // The encoding shelf only accepts fields that already exist in the
+            // current table — creating brand-new fields (which would require
+            // re-deriving data) is not allowed here.
+            return filter(options, params);
         }}
         sx={{ 
             flexGrow: 1, 
@@ -647,7 +642,7 @@ export const EncodingBox: FC<EncodingBoxProps> = function EncodingBox({ channel,
         handleHomeEndKeys
         autoHighlight
         id={`autocomplete-${chartId}-${channel}`}
-        options={conceptGroups.map(g => g.field.name).filter(name => name != "")}
+        options={availableFieldNames}
         getOptionLabel={(option) => {
             // Value selected with enter, right from the input
             return option;
@@ -776,9 +771,24 @@ export const EncodingBox: FC<EncodingBoxProps> = function EncodingBox({ channel,
                 );
             }
         }}
-        freeSolo
         renderInput={(params) => (
             <TextField {...params} variant="standard" autoComplete='off' placeholder={t('encoding.fieldPlaceholder')}
+                onKeyDownCapture={(event) => {
+                    // The MUI Autocomplete handles Enter on the input itself,
+                    // and `autoHighlight` makes it auto-select the first option
+                    // even when the typed text doesn't match. Intercept Enter in
+                    // the capture phase: only let it through when the current
+                    // input is an exact available field; otherwise neutralize it
+                    // so a stray Enter never assigns a field or bubbles up to
+                    // trigger an unrelated refresh/formulate.
+                    if (event.key === 'Enter') {
+                        const value = (event.target as HTMLInputElement).value?.trim();
+                        if (!value || !availableFieldNames.includes(value)) {
+                            event.preventDefault();
+                            event.stopPropagation();
+                        }
+                    }
+                }}
                 sx={{height: "24px", "& .MuiInput-root": {height: "24px", fontSize: "small"}}} />
         )}
         slotProps={{
@@ -789,6 +799,10 @@ export const EncodingBox: FC<EncodingBoxProps> = function EncodingBox({ channel,
                     '& .MuiAutocomplete-listbox': {
                         maxHeight: '600px !important'
                     },
+                    '& .MuiAutocomplete-noOptions': {
+                        fontSize: '11px',
+                        padding: '6px 12px',
+                    },
                 }
             }
         }}
diff --git a/src/views/EncodingShelfCard.tsx b/src/views/EncodingShelfCard.tsx
index 1386e614..7e042cf5 100644
--- a/src/views/EncodingShelfCard.tsx
+++ b/src/views/EncodingShelfCard.tsx
@@ -49,7 +49,7 @@ import { Channel, Chart, FieldItem, Trigger, duplicateChart, ChartStyleVariant,
 
 import _ from 'lodash';
 
-const ConfigSlider: FC<{
+export const ConfigSlider: FC<{
     value: number;
     propDef: { label: string; min?: number; max?: number; step?: number };
     onCommit: (value: number) => void;
@@ -143,6 +143,12 @@ export interface EncodingShelfCardProps {
     chartId: string;
     trigger?: Trigger;
     noBorder?: boolean;
+    // Render only the chat / follow-up box (+ ideas). Used by the floating
+    // chat FAB so the chat lives off-canvas.
+    chatOnly?: boolean;
+    // Render the encoding shelf without the chat box (+ no ideas). Used by the
+    // floating encoding popover at the top-right of the chart.
+    hideChat?: boolean;
 }
 
 
@@ -306,14 +312,14 @@ export const TriggerCard: FC<{
  * them to specific Vega-Lite config blocks (typography, color, gridlines,
  * background, title alignment, etc.).
  */
-interface StylePreset {
+export interface StylePreset {
     key: string;
     label: string;
     description: string;
     instruction: string;
 }
 
-const STYLE_PRESETS: StylePreset[] = [
+export const STYLE_PRESETS: StylePreset[] = [
     {
         key: 'nyt',
         label: 'New York Times',
@@ -335,13 +341,6 @@ const STYLE_PRESETS: StylePreset[] = [
         instruction:
             'Restyle this chart in the FiveThirtyEight (538) blog style.',
     },
-    {
-        key: 'dark',
-        label: 'Dark Mode',
-        description: 'Dark theme',
-        instruction:
-            'Restyle this chart for a dark theme.',
-    },
     {
         key: 'presentation',
         label: 'Presentation',
@@ -359,7 +358,7 @@ const STYLE_PRESETS: StylePreset[] = [
 ];
 
 
-export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId }) {
+export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId, chatOnly, hideChat }) {
     const { t } = useTranslation();
     const theme = useTheme();
 
@@ -436,7 +435,6 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
     const { streamIdeas, formulateData } = useFormulateData();
 
     const [chartTypeMenuOpen, setChartTypeMenuOpen] = useState<boolean>(false);
-    const [encodingHovered, setEncodingHovered] = useState<boolean>(false);
 
     // Anchor for the bottom-left "style presets" menu in the follow-up
     // speech bubble. A preset click sends a detailed style instruction
@@ -444,17 +442,8 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
     // these are guaranteed style-only changes by construction).
     const [stylePresetAnchor, setStylePresetAnchor] = useState<HTMLElement | null>(null);
 
-    // Auto-expand encoding shelf when dragging a concept or operator card
-    const { isDraggingField } = useDragLayer((monitor) => ({
-        isDraggingField: monitor.isDragging() && 
-            (monitor.getItemType() === 'concept-card' || monitor.getItemType() === 'operator-card'),
-    }));
-
-    const shouldExpand = encodingHovered || isDraggingField;
-
-    // When no fields are assigned to any channel, show all channels expanded
-    const hasAnyField = Object.values(encodingMap).some(enc => enc?.fieldID);
-    const shouldExpandAll = !hasAnyField || shouldExpand;
+    // Encoding channels are always shown (no auto hide/expand on hover/drag).
+    const shouldExpandAll = true;
     
 
     let handleUpdateChartType = (newChartType: string) => {
@@ -1423,7 +1412,8 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
 
 
     let channelComponent = (
-        <Box sx={{ width: "100%", minWidth: "256px", height: '100%', display: "flex", flexDirection: "column", gap: '4px' }}>
+        <Box sx={{ width: "100%", minWidth: "220px", height: '100%', display: "flex", flexDirection: "column", gap: '4px' }}>
+            {!chatOnly && (<>
             <Box key='mark-selector-box' sx={{ ml: 1, flex: '0 0 auto', display: 'flex', alignItems: 'center' }}>
                 <FormControl sx={{ m: 1, minWidth: 120, flex: 1, margin: "0px 0"}} size="small">
                     <Select
@@ -1525,9 +1515,7 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
             <Box key='encoding-and-config' sx={{
                     ml: 1,
                     flex: '1 1 auto',
-                }} style={{ height: "calc(100% - 100px)" }} className="encoding-list"
-                onMouseEnter={() => setEncodingHovered(true)}
-                onMouseLeave={() => setEncodingHovered(false)}>
+                }} style={{ height: "calc(100% - 100px)" }} className="encoding-list">
             {(() => {
                     const template = getChartTemplate(chart.chartType);
                     const configProps = template?.properties;
@@ -1657,8 +1645,7 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
                 })()}
                 {encodingBoxGroups}
             </Box>
-            {variantChipStrip}
-            {formulateInputBox}
+            </>)}
         </Box>);
 
     // Whether any agent work is in flight (intent classify, restyle, or the
@@ -1712,46 +1699,6 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
             <Box sx={{ padding: '4px 0px' }}>
                 {channelComponent}
             </Box>
-            {/* Ideas chips shown inline below the formulate box */}
-            {(currentChartIdeas.length > 0 || (isLoadingIdeas && thinkingBuffer)) && (
-                <Box sx={{
-                    display: 'flex',
-                    flexDirection: 'column',
-                    gap: 0.5,
-                    pt: 0.5,
-                }}>
-                    {currentChartIdeas.length > 0 && (
-                        <Typography sx={{
-                            fontSize: 11,
-                            color: 'text.secondary',
-                        }}>
-                            {t('encoding.ideasHeading')}
-                        </Typography>
-                    )}
-                    <Box sx={{ display: 'flex', flexWrap: 'wrap', gap: 0.5 }}>
-                        {currentChartIdeas.map((idea, index) => (
-                            <IdeaChip
-                                mini={true}
-                                key={index}
-                                idea={idea}
-                                theme={theme}
-                                onClick={() => handleIdeaClick(idea.text)}
-                            />
-                        ))}
-                        {isLoadingIdeas && thinkingBuffer && <ThinkingBufferEffect text={thinkingBuffer.slice(-40)} sx={{ width: '100%' }} />}
-                    </Box>
-                </Box>
-            )}
-            {isLoadingIdeas && !thinkingBuffer && (
-                <Box sx={{ padding: '2px 0' }}>
-                    {ThinkingBanner(
-                        (ideaPhase === 'building_context' ? t('chartRec.progressBuildingContext')
-                           : ideaPhase === 'generating' ? t('chartRec.progressGenerating')
-                           : t('encoding.ideating'))
-                        + (ideaElapsed > 0 ? ` (${ideaElapsed}s)` : '')
-                    )}
-                </Box>
-            )}
         </Box>
     );
 
diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx
index 78af7812..21f87dbd 100644
--- a/src/views/VisualizationView.tsx
+++ b/src/views/VisualizationView.tsx
@@ -46,7 +46,7 @@ import { useDispatch, useSelector } from 'react-redux';
 import { DataFormulatorState, dfActions, fetchChartInsight } from '../app/dfSlice';
 import { assembleVegaChart, extractFieldsFromEncodingMap, getUrls, prepVisTable, fetchWithIdentity } from '../app/utils';
 import { displayRowsCache } from '../app/displayRowsCache';
-import { buildEmbeddedDataForChart } from '../app/restyle';
+import { buildEmbeddedDataForChart, applyVariantConfigUI } from '../app/restyle';
 import { apiRequest } from '../app/apiClient';
 import embed from 'vega-embed';
 import { Chart, EncodingItem, EncodingMap, FieldItem, computeInsightKey } from '../components/ComponentType';
@@ -54,6 +54,7 @@ import { Chart, EncodingItem, EncodingMap, FieldItem, computeInsightKey } from '
 import DeleteIcon from '@mui/icons-material/Delete';
 import TerminalIcon from '@mui/icons-material/Terminal';
 import QuestionAnswerIcon from '@mui/icons-material/QuestionAnswer';
+import TuneIcon from '@mui/icons-material/Tune';
 import ContentCopyIcon from '@mui/icons-material/ContentCopy';
 import ZoomInIcon from '@mui/icons-material/ZoomIn';
 import ZoomOutIcon from '@mui/icons-material/ZoomOut';
@@ -76,7 +77,9 @@ import { useTranslation } from 'react-i18next';
 
 import { ChatDialog } from './ChatDialog';
 import { PlanStepsView } from './InteractionEntryCard';
-import { EncodingShelfThread } from './EncodingShelfThread';
+import { EncodingShelfCard } from './EncodingShelfCard';
+import { ChartQuickConfig } from './ChartQuickConfig';
+import { ChartVariantStrip } from './ChartVariantStrip';
 import { CustomReactTable } from './ReactTable';
 import { InsightIcon } from '../icons';
 import TableChartOutlinedIcon from '@mui/icons-material/TableChartOutlined';
@@ -236,6 +239,32 @@ export let SampleSizeEditor: FC<{
     </Box>
 }
 
+/**
+ * Recursively scale every width/height in a Vega-Lite spec by `factor`.
+ * Used to apply the zoom resizer to style-variant specs, which bypass the
+ * compiler's canvas sizing. Handles numeric sizes, `{step: N}` band sizes,
+ * and nested view-composition specs (spec / layer / concat / facet).
+ */
+const scaleSpecSize = (node: any, factor: number): void => {
+    if (!node || typeof node !== 'object') return;
+    for (const dim of ['width', 'height'] as const) {
+        const v = node[dim];
+        if (typeof v === 'number') {
+            node[dim] = Math.round(v * factor);
+        } else if (v && typeof v === 'object' && typeof v.step === 'number') {
+            node[dim] = { ...v, step: Math.round(v.step * factor) };
+        }
+    }
+    for (const key of ['spec', 'layer', 'concat', 'hconcat', 'vconcat', 'facet'] as const) {
+        const child = node[key];
+        if (Array.isArray(child)) {
+            child.forEach(c => scaleSpecSize(c, factor));
+        } else if (child && typeof child === 'object') {
+            scaleSpecSize(child, factor);
+        }
+    }
+};
+
 /** Main chart uses vega-embed (interactive tooltips). Static toSVG() removes hover behavior. */
 const VegaChartRenderer: FC<{
     chart: Chart;
@@ -288,6 +317,22 @@ const VegaChartRenderer: FC<{
             );
             spec.data = { values: variantValues };
 
+            // Apply the variant's generative-UI controls (agent-authored simple
+            // knobs) onto the spec using the user's current values. This is a
+            // pure "set value at path" transform (no code execution) and runs
+            // before size scaling so a control that touches width/height is
+            // still scaled by the resizer. See applyVariantConfigUI.
+            spec = applyVariantConfigUI(spec, activeVariant.configUI, activeVariant.configValues);
+
+            // Variants bypass assembleVegaChart, so the zoom resizer's
+            // scaleFactor (which normally flows through the compiler's canvas
+            // sizing) wouldn't affect them. Apply it directly by scaling every
+            // width/height in the stored spec — numeric sizes and {step: N}
+            // band sizes alike — so the resizer works on restyled charts too.
+            if (scaleFactor !== 1) {
+                scaleSpecSize(spec, scaleFactor);
+            }
+
         } else {
             spec = assembleVegaChart(
                 chart.chartType,
@@ -348,7 +393,7 @@ const VegaChartRenderer: FC<{
         const embedResult: { current?: Awaited<ReturnType<typeof embed>> } = {};
 
         el.innerHTML = '';
-        embed(el, { ...spec }, { actions: true, renderer: 'canvas' })
+        embed(el, { ...spec }, { actions: false, renderer: 'canvas' })
             .then((result) => {
                 if (cancelled) {
                     result.finalize();
@@ -402,8 +447,19 @@ const VegaChartRenderer: FC<{
                 id={elementId}
                 sx={{
                     maxWidth: '100%',
-                    overflow: 'visible',
-                    '& .vega-embed': { margin: 'auto', overflow: 'visible' },
+                    overflow: 'hidden',
+                    // vega-embed adds its `.vega-embed` class to THIS element (the
+                    // div we pass to embed()) and renders the <canvas>/<svg> as a
+                    // direct child. Vega writes explicit inline width/height (in CSS
+                    // px) on that canvas/svg, so we must override them with
+                    // !important to let the chart shrink to the panel width while
+                    // keeping its aspect ratio (height: auto). A descendant
+                    // `.vega-embed` selector would NOT match — the class is on this
+                    // element itself, not a child.
+                    '& > canvas, & > svg': {
+                        maxWidth: '100%',
+                        height: 'auto !important',
+                    },
                 }}
             />
         </Box>
@@ -469,12 +525,21 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
     const [bottomTab, setBottomTab] = useState<string>('data');
     const [localScaleFactor, setLocalScaleFactor] = useState<number>(1);
     const [chatDialogOpen, setChatDialogOpen] = useState<boolean>(false);
+    // Floating encoding-shelf popover. The button lives in the stable outer
+    // panel (not inside the chart's <Fade>), so it never remounts or shifts
+    // when the chart re-renders. We anchor the popover to that button via a ref.
+    const [encodingOpen, setEncodingOpen] = useState<boolean>(false);
+    const editButtonRef = useRef<HTMLButtonElement | null>(null);
 
     // Reset local UI state when focused chart changes
     useEffect(() => {
         setBottomTab('data');
-        setLocalScaleFactor(1);
+        // Restore the persisted zoom for the newly focused chart (stored on
+        // the Chart object so it survives switching charts and session
+        // save/load). Falls back to 1 for charts that have never been zoomed.
+        setLocalScaleFactor(focusedChart?.scaleFactor ?? 1);
         setChatDialogOpen(false);
+        setEncodingOpen(false);
     }, [focusedChartId]);
 
 
@@ -804,7 +869,6 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
     } else if (table.derive) {
         chartMessage = t('chart.msgWarning');
     }
-
     let chartActionItems = isDataStale ? [] : (
         <Box sx={{display: "flex", flexDirection: "column", flex: 1, my: 1}}>
             {(table.virtual ? activeVisTableTotalRowCount > serverConfig.MAX_DISPLAY_ROWS : table.rows.length > serverConfig.MAX_DISPLAY_ROWS) && !(chartUnavailable || encodingShelfEmpty) ? (
@@ -840,8 +904,8 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
     
     let focusedComponent = [];
 
-    let focusedElement = <Fade key={`fade-${focusedChart.id}-${dataVersion}-${focusedChart.chartType}-${JSON.stringify(focusedChart.encodingMap)}`} 
-                            in={!isDataStale} timeout={600}>    
+    let focusedElement = <Fade key={`fade-${focusedChart.id}-${dataVersion}-${focusedChart.chartType}-${JSON.stringify(focusedChart.encodingMap)}`}
+                            in={!isDataStale} timeout={600}>
                             <Box sx={{display: "flex", flexDirection: "column", flexShrink: 0, justifyContent: 'center', justifyItems: 'center', maxWidth: '100%', mt: 'max(120px, 4vh)', mb: 'max(120px, 4vh)'}} className="chart-box">
                                 {/*
                                   Chart container chrome
@@ -850,12 +914,11 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
                                     positioned zoom-slider overlay (chartResizer, ~32px tall
                                     anchored top-left) never covers chart content. Without this,
                                     full-width charts like KPI grids run right up under the slider.
-                                  - pr: 28  → reserves a strip on the right for vega-embed's
-                                    actions menu ("..."), which floats at the top-right of the
-                                    Vega canvas and can otherwise hug / extend past the panel edge.
-                                  - minHeight: 280 → guarantees the Vega actions menu and its
-                                    dropdown have vertical room to render even when a chart's
-                                    intrinsic height is very small (e.g. one row of compact cards).
+                                  - pr: 28  → reserves a strip on the right for the floating
+                                    "edit chart" button overlay (see the focused-box in `content`).
+                                  - minHeight: 280 → guarantees the chart has vertical room to
+                                    render even when a chart's intrinsic height is very small
+                                    (e.g. one row of compact cards).
                                   These are view-level concerns and intentionally NOT solved per
                                   chart template.
                                 */}
@@ -875,12 +938,26 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
                                         onSpecReady={handleSpecReady}
                                     />
                                 </Box>
+                                {/* Quick chart-config controls (toggles/sliders/selects) for
+                                    fast in-place tweaks without opening the full encoding
+                                    popover. Kept INSIDE the chart-box so it reads as part of
+                                    the same chart component rather than drifting down toward
+                                    the data panel below. Placed ABOVE the action items so the
+                                    options sit directly under the chart, before the AI hint.
+                                    Hidden while synthesis is running — the chart is being
+                                    regenerated, so config controls would be premature. */}
+                                {!chartUnavailable && !chartSynthesisInProgress.includes(focusedChart.id) && focusedChart.chartType !== "Table" && focusedChart.chartType !== "Auto" && (
+                                    <ChartQuickConfig chartId={focusedChart.id} />
+                                )}
                                 {chartActionItems}
-                            </Box>                        
+                            </Box>
                         </Fade>;
 
     focusedComponent = [
         <Box key="chart-focused-element" className="chart-focused-box"  sx={{ minHeight: 'min(75vh, 800px)', width: "100%", display: "flex", flexDirection: "column", flexShrink: 0}}>
+            {/* Style-variant switcher now lives in the floating top toolbar
+                (see vis-view-canvas return) so it stays pinned alongside the
+                zoom resizer instead of scrolling with the chart content. */}
             <Box sx={{ my: 'auto' }}>
                 {focusedElement}
             </Box>
@@ -932,7 +1009,7 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
                         const adaptiveWidth = Math.max(MIN_TABLE_WIDTH, Math.min(MAX_TABLE_WIDTH, totalColWidth + SCROLLBAR_WIDTH + 16)) + 34;
 
                         return (
-                            <Box sx={{ margin: '8px auto 24px auto', padding: 0, height: adaptiveHeight, width: adaptiveWidth, overflow: 'hidden', flexShrink: 0 }}>
+                            <Box sx={{ margin: '8px auto 24px auto', padding: 0, height: adaptiveHeight, width: '100%', maxWidth: adaptiveWidth, minWidth: 0, overflow: 'hidden' }}>
                                 <FreeDataViewFC maximizable />
                             </Box>
                         );
@@ -1040,64 +1117,128 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
             dialog={triggerTable?.derive?.dialog || table.derive?.dialog as any[]} /> : null,
     ]
     
-    const ENCODING_SHELF_WIDTH = 240;
-
     let content = [
-        <Box key='focused-box' className="vega-focused vis-scroll" sx={{ display: "flex", overflowY: 'auto', overflowX: 'hidden', flexDirection: 'column', position: 'relative', flex: 1, pr: `${ENCODING_SHELF_WIDTH}px` }}>
+        <Box key='focused-box' className="vega-focused vis-scroll" sx={{ display: "flex", overflowY: 'auto', overflowX: 'hidden', flexDirection: 'column', position: 'relative', flex: 1 }}>
             {focusedComponent}
         </Box>,
-        /* Floating encoding shelf panel */
-        <Box key='encoding-shelf' sx={{
-            position: 'absolute',
-            top: 0,
-            right: 0,
-            zIndex: 10,
-            height: '100%',
-            pointerEvents: 'none',
-        }}>
-            <Box sx={{ pointerEvents: 'auto' }}>
-                <EncodingShelfThread chartId={focusedChart.id} />
+        /* Encoding shelf popover, anchored to the floating "edit chart" button. */
+        <Popover
+            key='encoding-popover'
+            open={encodingOpen && Boolean(editButtonRef.current)}
+            anchorEl={editButtonRef.current}
+            onClose={() => setEncodingOpen(false)}
+            anchorOrigin={{ vertical: 'bottom', horizontal: 'right' }}
+            transformOrigin={{ vertical: 'top', horizontal: 'right' }}
+            slotProps={{ paper: { sx: { width: 320, maxHeight: '78vh', overflowY: 'auto', mt: 0.5, borderRadius: '10px', overflowX: 'visible' } } }}
+        >
+            <EncodingShelfCard chartId={focusedChart.id} />
+            {/* Small, low-emphasis footer for advanced users to inspect the
+                assembled Vega-Lite spec in the external Vega editor. */}
+            <Box sx={{ display: 'flex', justifyContent: 'flex-end', px: 1.5, pt: 0.5, pb: 1 }}>
+                <Button
+                    size="small"
+                    startIcon={<OpenInNewIcon sx={{ fontSize: 13 }} />}
+                    disabled={!renderedSpec || focusedChart.chartType === "Table" || focusedChart.chartType === "Auto"}
+                    onClick={handleOpenInVegaEditor}
+                    sx={{ textTransform: 'none', fontSize: '0.65rem', color: 'text.disabled', minWidth: 'auto', py: 0, '&:hover': { color: 'text.secondary', backgroundColor: 'transparent' } }}
+                >
+                    {t('chart.openInVegaEditor')}
+                </Button>
             </Box>
-        </Box>
+        </Popover>
     ]
 
     let [scaleMin, scaleMax] = [0.2, 2.4]
 
+    // Persist the zoom onto the chart so it survives switching charts.
+    // Called on commit (button click / slider release) rather than on every
+    // drag tick, to avoid churning the charts array ref mid-drag.
+    const persistScaleFactor = React.useCallback((value: number) => {
+        if (!focusedChartId) return;
+        dispatch(dfActions.updateChartScaleFactor({
+            chartId: focusedChartId,
+            scaleFactor: value,
+        }));
+    }, [dispatch, focusedChartId]);
+
     // Memoize chart resizer to avoid re-creating Material-UI components on every render
     let chartResizer = useMemo(() => <Stack spacing={1} direction="row" sx={{ 
-        margin: 1, width: 160, position: "absolute", zIndex: 10, 
-        backgroundColor: 'rgba(255, 255, 255, 0.9)',
-        borderRadius: '4px',
+        width: 160, flexShrink: 0,
     }} alignItems="center">
         <Tooltip key="zoom-out-tooltip" title={t('chart.zoomOut')}>
             <span>
                 <IconButton color="primary" size='small' disabled={localScaleFactor <= scaleMin} onClick={() => {
-                    setLocalScaleFactor(s => Math.max(scaleMin, Math.round((s - 0.1) * 10) / 10));
+                    const next = Math.max(scaleMin, Math.round((localScaleFactor - 0.1) * 10) / 10);
+                    setLocalScaleFactor(next);
+                    persistScaleFactor(next);
                 }}>
                     <ZoomOutIcon fontSize="small" />
                 </IconButton>
             </span>
         </Tooltip>
         <Slider aria-label={t('chart.resizeSliderAria')} size='small' defaultValue={1} step={0.1} min={scaleMin} max={scaleMax} 
-                value={localScaleFactor} onChange={(event: Event, newValue: number | number[]) => {
-            setLocalScaleFactor(newValue as number);
-        }} />
+                value={localScaleFactor}
+                onChange={(event: Event, newValue: number | number[]) => {
+                    setLocalScaleFactor(newValue as number);
+                }}
+                onChangeCommitted={(event, newValue) => {
+                    persistScaleFactor(newValue as number);
+                }} />
         <Tooltip key="zoom-in-tooltip" title={t('chart.zoomIn')}>
             <span>
                 <IconButton color="primary" size='small' disabled={localScaleFactor >= scaleMax} onClick={() => {
-                    setLocalScaleFactor(s => Math.min(scaleMax, Math.round((s + 0.1) * 10) / 10));
+                    const next = Math.min(scaleMax, Math.round((localScaleFactor + 0.1) * 10) / 10);
+                    setLocalScaleFactor(next);
+                    persistScaleFactor(next);
                 }}>
                     <ZoomInIcon fontSize="small" />
                 </IconButton>
             </span>
         </Tooltip>
-    </Stack>, [localScaleFactor, t]);
+    </Stack>, [localScaleFactor, t, persistScaleFactor]);
 
     return <Box ref={componentRef} id="vis-view-canvas" sx={{overflow: "hidden", display: 'flex', flex: 1, position: 'relative'}}>
         {/* No full-screen block while the agent works: the previous chart
             stays visible, and progress is signaled non-intrusively on the
             chat box + encoding shelf (see EncodingShelfCard). */}
-        {chartUnavailable ? "" : chartResizer}
+        {/* Floating top toolbar: zoom resizer + style-variant strip live
+            together here (NOT inside the scrolling chart content), so every
+            control stays pinned to the top of the panel instead of some
+            floating and some scrolling away. pointerEvents are disabled on the
+            empty bar area so it never blocks chart interaction underneath. */}
+        <Box sx={{
+            position: 'absolute', top: 0, left: 0, right: 0, zIndex: 10,
+            display: 'flex', alignItems: 'center', gap: 0.5, px: 1, py: '8px',
+            backgroundColor: '#fff',
+            pointerEvents: 'none', '& > *': { pointerEvents: 'auto' },
+        }}>
+            {chartResizer}
+            {focusedChart && focusedChart.chartType !== 'Table' && focusedChart.chartType !== 'Auto' && (
+                <ChartVariantStrip chartId={focusedChart.id} />
+            )}
+            {/* Edit-chart (encoding shelf) button — right-aligned in the same
+                floating toolbar so all top controls sit on one pinned row.
+                Opens the encoding shelf popover; stays available even when the
+                chart can't render yet, so users can fix the encoding. */}
+            {focusedChart && focusedChart.chartType !== 'Table' && focusedChart.chartType !== 'Auto' && (
+                <Tooltip title={t('chart.editChart')} placement="left">
+                    <IconButton
+                        ref={editButtonRef}
+                        size="small"
+                        onClick={() => setEncodingOpen(o => !o)}
+                        sx={{
+                            ml: 'auto', mr: '8px',
+                            backgroundColor: encodingOpen ? 'primary.main' : 'rgba(255,255,255,0.92)',
+                            color: encodingOpen ? 'primary.contrastText' : 'text.secondary',
+                            border: '1px solid', borderColor: 'divider',
+                            boxShadow: '0 1px 4px rgba(0,0,0,0.12)',
+                            '&:hover': { backgroundColor: encodingOpen ? 'primary.dark' : 'rgba(255,255,255,1)' },
+                        }}>
+                        <TuneIcon sx={{ fontSize: 18 }} />
+                    </IconButton>
+                </Tooltip>
+            )}
+        </Box>
         {content}
     </Box>
 }

From c27217bbc7317ffc03825153f24547f3af0a9f61 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Fri, 5 Jun 2026 17:51:29 -0700
Subject: [PATCH 14/29] cleanup

---
 src/views/ChartVariantStrip.tsx | 30 ++++++++++++++----------------
 src/views/VisualizationView.tsx |  2 +-
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/src/views/ChartVariantStrip.tsx b/src/views/ChartVariantStrip.tsx
index 8caca950..a125a593 100644
--- a/src/views/ChartVariantStrip.tsx
+++ b/src/views/ChartVariantStrip.tsx
@@ -334,10 +334,7 @@ export const ChartVariantStrip: FC<ChartVariantStripProps> = function ({ chartId
             minHeight: 34,
         }}>
             <Divider orientation="vertical" flexItem sx={{ my: 0.5, mr: 1, borderColor: alpha(theme.palette.text.primary, 0.12) }} />
-            <Typography sx={{ fontSize: 12, color: 'text.secondary', mr: 0.25 }}>
-                style:
-            </Typography>
-            {renderVariantChip('default', {
+            {variants.length > 0 && renderVariantChip('default', {
                 active: !activeVariantId,
                 tooltip: 'Render the chart from its current encoding (no style refinement applied).',
                 onClick: () => dispatch(dfActions.setActiveVariant({ chartId, variantId: undefined })),
@@ -395,23 +392,24 @@ export const ChartVariantStrip: FC<ChartVariantStripProps> = function ({ chartId
                     sx={{
                         display: 'inline-flex',
                         alignItems: 'center',
-                        gap: 0.5,
-                        height: 24,
+                        gap: '4px',
+                        height: 20,
                         ml: 0.5,
-                        px: 1,
+                        px: '6px',
                         cursor: 'pointer',
-                        border: 'none',
-                        borderRadius: '7px',
-                        fontSize: 12,
-                        fontWeight: 600,
-                        lineHeight: 1,
+                        fontSize: 11,
+                        fontWeight: 400,
+                        lineHeight: 1.4,
+                        fontFamily: theme.typography.fontFamily,
+                        borderRadius: '6px',
+                        border: `1px solid ${alpha(theme.palette.primary.main, restyleAnchor ? 0.45 : 0.25)}`,
                         color: theme.palette.primary.main,
-                        backgroundColor: alpha(theme.palette.primary.main, restyleAnchor ? 0.22 : 0.12),
-                        transition: 'background-color 0.15s',
-                        '&:hover': { backgroundColor: alpha(theme.palette.primary.main, 0.22) },
+                        backgroundColor: restyleAnchor ? alpha(theme.palette.primary.main, 0.1) : theme.palette.background.paper,
+                        transition: transition.fast,
+                        '&:hover': { backgroundColor: alpha(theme.palette.primary.main, 0.06) },
                     }}
                 >
-                    <PaletteOutlinedIcon sx={{ fontSize: 15 }} />
+                    <PaletteOutlinedIcon sx={{ fontSize: 13 }} />
                     Design
                 </Box>
             </Tooltip>
diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx
index 21f87dbd..0efc6e91 100644
--- a/src/views/VisualizationView.tsx
+++ b/src/views/VisualizationView.tsx
@@ -1009,7 +1009,7 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
                         const adaptiveWidth = Math.max(MIN_TABLE_WIDTH, Math.min(MAX_TABLE_WIDTH, totalColWidth + SCROLLBAR_WIDTH + 16)) + 34;
 
                         return (
-                            <Box sx={{ margin: '8px auto 24px auto', padding: 0, height: adaptiveHeight, width: '100%', maxWidth: adaptiveWidth, minWidth: 0, overflow: 'hidden' }}>
+                            <Box sx={{ margin: '8px auto 24px auto', padding: 0, height: adaptiveHeight, width: '100%', minWidth: '80%', maxWidth: adaptiveWidth, overflow: 'hidden' }}>
                                 <FreeDataViewFC maximizable />
                             </Box>
                         );

From 3f0312a9ec69824f99ac517dedd4fac669e73e97 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Fri, 5 Jun 2026 23:01:55 -0700
Subject: [PATCH 15/29] cleanup

---
 src/views/ChartQuickConfig.tsx  | 11 +++--
 src/views/ChartVariantStrip.tsx | 82 +++++++++++++++------------------
 src/views/VisualizationView.tsx | 16 ++++++-
 3 files changed, 59 insertions(+), 50 deletions(-)

diff --git a/src/views/ChartQuickConfig.tsx b/src/views/ChartQuickConfig.tsx
index dbcff0d6..3a3a0b59 100644
--- a/src/views/ChartQuickConfig.tsx
+++ b/src/views/ChartQuickConfig.tsx
@@ -172,13 +172,14 @@ export const ChartQuickConfig: FC<ChartQuickConfigProps> = function ({ chartId }
                                 const idx = event.target.value as number;
                                 commit(propDef, options[idx].value);
                             }}
-                            disableUnderline
                             sx={{
                                 fontSize: 11, height: '22px', minWidth: 60,
-                                backgroundColor: 'rgba(0,0,0,0.05)', borderRadius: '6px',
-                                '&:hover': { backgroundColor: 'rgba(0,0,0,0.08)' },
-                                '& .MuiSelect-select': { padding: '1px 20px 1px 6px !important', fontSize: 11 },
-                                '& .MuiSvgIcon-root': { fontSize: 14, right: 2 },
+                                color: 'text.secondary',
+                                '&:before': { borderBottomColor: 'rgba(0,0,0,0.2)' },
+                                '&:hover:not(.Mui-disabled):before': { borderBottomColor: 'rgba(0,0,0,0.42)' },
+                                '&:after': { borderBottomColor: 'rgba(0,0,0,0.42)' },
+                                '& .MuiSelect-select': { padding: '1px 18px 1px 2px !important', fontSize: 11 },
+                                '& .MuiSvgIcon-root': { fontSize: 14, right: 0, color: 'rgba(0,0,0,0.4)' },
                             }}
                             renderValue={(idx: number) => <span style={{ fontSize: 11 }}>{options[idx]?.label || 'Default'}</span>}
                         >
diff --git a/src/views/ChartVariantStrip.tsx b/src/views/ChartVariantStrip.tsx
index a125a593..1cf4ab80 100644
--- a/src/views/ChartVariantStrip.tsx
+++ b/src/views/ChartVariantStrip.tsx
@@ -16,7 +16,7 @@ import { FC, useState } from 'react';
 import React from 'react';
 import { useSelector, useDispatch } from 'react-redux';
 
-import { Box, Typography, CircularProgress, alpha, useTheme, IconButton, Tooltip, Popover, TextField, Card, Divider } from '@mui/material';
+import { Box, Typography, CircularProgress, alpha, useTheme, IconButton, Tooltip, Popover, TextField, Card, Divider, Button } from '@mui/material';
 import CloseIcon from '@mui/icons-material/Close';
 import PaletteOutlinedIcon from '@mui/icons-material/PaletteOutlined';
 import SendIcon from '@mui/icons-material/Send';
@@ -43,12 +43,14 @@ export interface ChartVariantStripProps {
 // subsections (restyle / annotate) under a single "Quick actions" heading.
 interface QuickAction { key: string; label: string; description: string; instruction: string }
 
-const RESTYLE_ACTIONS: QuickAction[] = STYLE_PRESETS.map(p => ({
-    key: p.key,
-    label: p.label,
-    description: p.description,
-    instruction: p.instruction,
-}));
+const RESTYLE_ACTIONS: QuickAction[] = STYLE_PRESETS
+    .filter(p => ['nyt', 'economist', 'comic'].includes(p.key))
+    .map(p => ({
+        key: p.key,
+        label: p.label,
+        description: p.description,
+        instruction: p.instruction,
+    }));
 
 const ANNOTATE_ACTIONS: QuickAction[] = [
     {
@@ -261,7 +263,7 @@ export const ChartVariantStrip: FC<ChartVariantStripProps> = function ({ chartId
         onClick: () => void,
         onDelete?: () => void,
     }) => {
-        const accent = theme.palette.text.primary;
+        const accent = opts.active ? theme.palette.primary.main : theme.palette.text.primary;
         return (
             <Box
                 key={label}
@@ -272,22 +274,22 @@ export const ChartVariantStrip: FC<ChartVariantStripProps> = function ({ chartId
                     display: 'inline-flex',
                     alignItems: 'center',
                     gap: '4px',
-                    height: 20,
-                    px: '6px',
+                    height: 22,
+                    px: '7px',
                     fontSize: 11,
-                    fontWeight: 400,
+                    fontWeight: opts.active ? 500 : 400,
                     lineHeight: 1.4,
                     color: accent,
                     fontFamily: theme.typography.fontFamily,
                     borderRadius: '6px',
-                    border: `1px solid ${alpha(accent, opts.active ? 0.45 : 0.12)}`,
+                    border: `1px solid ${alpha(accent, opts.active ? 0.5 : 0.2)}`,
                     borderStyle: opts.stale ? 'dashed' : 'solid',
-                    backgroundColor: opts.active ? alpha(accent, 0.1) : theme.palette.background.paper,
+                    backgroundColor: opts.active ? alpha(accent, 0.08) : theme.palette.background.paper,
                     cursor: 'pointer',
                     opacity: opts.stale ? 0.65 : 1,
                     transition: transition.fast,
                     '&:hover': {
-                        backgroundColor: alpha(accent, opts.active ? 0.13 : 0.04),
+                        backgroundColor: alpha(accent, opts.active ? 0.12 : 0.04),
                     },
                 }}
             >
@@ -334,7 +336,10 @@ export const ChartVariantStrip: FC<ChartVariantStripProps> = function ({ chartId
             minHeight: 34,
         }}>
             <Divider orientation="vertical" flexItem sx={{ my: 0.5, mr: 1, borderColor: alpha(theme.palette.text.primary, 0.12) }} />
-            {variants.length > 0 && renderVariantChip('default', {
+            <Typography sx={{ fontSize: 12, color: 'text.secondary', mr: 0.25 }}>
+                style:
+            </Typography>
+            {renderVariantChip('default', {
                 active: !activeVariantId,
                 tooltip: 'Render the chart from its current encoding (no style refinement applied).',
                 onClick: () => dispatch(dfActions.setActiveVariant({ chartId, variantId: undefined })),
@@ -386,32 +391,17 @@ export const ChartVariantStrip: FC<ChartVariantStripProps> = function ({ chartId
                 </Box>
             )}
             <Tooltip title="Restyle chart…">
-                <Box
-                    component="button"
+                <IconButton
+                    color="primary"
+                    size="small"
                     onClick={(e: React.MouseEvent<HTMLElement>) => setRestyleAnchor(e.currentTarget)}
                     sx={{
-                        display: 'inline-flex',
-                        alignItems: 'center',
-                        gap: '4px',
-                        height: 20,
-                        ml: 0.5,
-                        px: '6px',
-                        cursor: 'pointer',
-                        fontSize: 11,
-                        fontWeight: 400,
-                        lineHeight: 1.4,
-                        fontFamily: theme.typography.fontFamily,
-                        borderRadius: '6px',
-                        border: `1px solid ${alpha(theme.palette.primary.main, restyleAnchor ? 0.45 : 0.25)}`,
-                        color: theme.palette.primary.main,
-                        backgroundColor: restyleAnchor ? alpha(theme.palette.primary.main, 0.1) : theme.palette.background.paper,
-                        transition: transition.fast,
-                        '&:hover': { backgroundColor: alpha(theme.palette.primary.main, 0.06) },
+                        ml: 0.25,
+                        backgroundColor: restyleAnchor ? alpha(theme.palette.primary.main, 0.1) : 'transparent',
                     }}
                 >
-                    <PaletteOutlinedIcon sx={{ fontSize: 13 }} />
-                    Design
-                </Box>
+                    <PaletteOutlinedIcon fontSize="small" />
+                </IconButton>
             </Tooltip>
             <Popover
                 open={Boolean(restyleAnchor)}
@@ -419,17 +409,17 @@ export const ChartVariantStrip: FC<ChartVariantStripProps> = function ({ chartId
                 onClose={() => setRestyleAnchor(null)}
                 anchorOrigin={{ vertical: 'bottom', horizontal: 'left' }}
                 transformOrigin={{ vertical: 'top', horizontal: 'left' }}
-                slotProps={{ paper: { sx: { width: 320, p: 1.25, borderRadius: 2 } } }}
+                slotProps={{ paper: { sx: { width: 340, p: 2, borderRadius: 2 } } }}
             >
-                <Typography sx={{ fontSize: 11, fontWeight: 600, color: 'text.secondary', mb: 0.75 }}>
+                <Typography sx={{ fontSize: 11, fontWeight: 600, color: 'text.secondary', textTransform: 'uppercase', letterSpacing: 0.5, mb: 1.25 }}>
                     Quick actions
                 </Typography>
                 {[
                     { label: 'restyle', actions: RESTYLE_ACTIONS },
                     { label: 'annotate', actions: ANNOTATE_ACTIONS },
                 ].map(group => (
-                    <Box key={group.label} sx={{ display: 'flex', flexWrap: 'wrap', alignItems: 'center', gap: 0.5, mb: 0.75 }}>
-                        <Typography sx={{ fontSize: 11, color: 'text.disabled', lineHeight: '22px', flexShrink: 0, width: 48 }}>
+                    <Box key={group.label} sx={{ display: 'flex', flexWrap: 'wrap', alignItems: 'flex-start', rowGap: 0.5, columnGap: 0.5, mb: 1.5 }}>
+                        <Typography sx={{ fontSize: 11, color: 'text.disabled', height: 20, display: 'flex', alignItems: 'center', flexShrink: 0, width: 44 }}>
                             {group.label}
                         </Typography>
                         {group.actions.map(action => (
@@ -440,7 +430,7 @@ export const ChartVariantStrip: FC<ChartVariantStripProps> = function ({ chartId
                                     sx={{
                                         display: 'inline-flex',
                                         alignItems: 'center',
-                                        height: 22,
+                                        height: 20,
                                         px: '8px',
                                         fontSize: 11,
                                         fontFamily: theme.typography.fontFamily,
@@ -450,7 +440,10 @@ export const ChartVariantStrip: FC<ChartVariantStripProps> = function ({ chartId
                                         cursor: isRestyling ? 'default' : 'pointer',
                                         opacity: isRestyling ? 0.5 : 1,
                                         transition: transition.fast,
-                                        '&:hover': { backgroundColor: alpha(theme.palette.text.primary, 0.06) },
+                                        '&:hover': {
+                                            backgroundColor: alpha(theme.palette.primary.main, 0.06),
+                                            borderColor: alpha(theme.palette.primary.main, 0.4),
+                                        },
                                     }}
                                 >
                                     {action.label}
@@ -459,7 +452,8 @@ export const ChartVariantStrip: FC<ChartVariantStripProps> = function ({ chartId
                         ))}
                     </Box>
                 ))}
-                <Typography sx={{ fontSize: 11, fontWeight: 600, color: 'text.secondary', mb: 0.75 }}>
+                <Divider sx={{ my: 1.5 }} />
+                <Typography sx={{ fontSize: 11, fontWeight: 600, color: 'text.secondary', textTransform: 'uppercase', letterSpacing: 0.5, mb: 1 }}>
                     Design yourself
                 </Typography>
                 <Card
diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx
index 0efc6e91..06d2732c 100644
--- a/src/views/VisualizationView.tsx
+++ b/src/views/VisualizationView.tsx
@@ -243,7 +243,9 @@ export let SampleSizeEditor: FC<{
  * Recursively scale every width/height in a Vega-Lite spec by `factor`.
  * Used to apply the zoom resizer to style-variant specs, which bypass the
  * compiler's canvas sizing. Handles numeric sizes, `{step: N}` band sizes,
- * and nested view-composition specs (spec / layer / concat / facet).
+ * `config.view.continuousWidth/Height` (how continuous-scale charts encode
+ * their plot size), and nested view-composition specs (spec / layer /
+ * concat / facet).
  */
 const scaleSpecSize = (node: any, factor: number): void => {
     if (!node || typeof node !== 'object') return;
@@ -255,6 +257,18 @@ const scaleSpecSize = (node: any, factor: number): void => {
             node[dim] = { ...v, step: Math.round(v.step * factor) };
         }
     }
+    // Continuous-scale charts (e.g. line/area with quantitative or temporal
+    // axes) carry no top-level numeric width/height; their plot size lives in
+    // config.view.continuousWidth / continuousHeight. Scale those too so the
+    // zoom resizer affects continuous variant charts, not just discrete ones.
+    const view = node.config?.view;
+    if (view && typeof view === 'object') {
+        for (const dim of ['continuousWidth', 'continuousHeight'] as const) {
+            if (typeof view[dim] === 'number') {
+                view[dim] = Math.round(view[dim] * factor);
+            }
+        }
+    }
     for (const key of ['spec', 'layer', 'concat', 'hconcat', 'vconcat', 'facet'] as const) {
         const child = node[key];
         if (Array.isArray(child)) {

From 21863b40f3da7976d55ebde35bb5c364847c0ceb Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Mon, 8 Jun 2026 18:41:46 -0700
Subject: [PATCH 16/29] semantic ui test

---
 .../agents/agent_code_explanation.py          |  28 +-
 .../agents/agent_report_gen.py                | 270 ++++----
 py-src/data_formulator/agents/data_agent.py   |  23 +-
 src/app/dfSlice.tsx                           |  71 ++
 src/app/restyle.ts                            |   2 +-
 src/app/tokens.ts                             |  18 +
 src/components/ChartTemplates.tsx             |  24 +-
 src/i18n/locales/en/chart.json                |   1 +
 src/i18n/locales/en/common.json               |  12 +-
 src/i18n/locales/zh/chart.json                |   1 +
 src/i18n/locales/zh/common.json               |  12 +-
 src/lib/agents-chart/chartjs/assemble.ts      |   9 +-
 src/lib/agents-chart/core/compute-layout.ts   |  93 ++-
 src/lib/agents-chart/core/encoding-actions.ts | 110 ++++
 .../agents-chart/core/encoding-overrides.ts   |  44 ++
 src/lib/agents-chart/core/field-semantics.ts  |  13 +-
 src/lib/agents-chart/core/index.ts            |   9 +
 src/lib/agents-chart/core/semantic-types.ts   | 105 ++-
 src/lib/agents-chart/core/types.ts            | 192 +++++-
 src/lib/agents-chart/echarts/assemble.ts      |   9 +-
 .../agents-chart/echarts/templates/heatmap.ts |  59 +-
 src/lib/agents-chart/gofish/assemble.ts       |   9 +-
 src/lib/agents-chart/vegalite/assemble.ts     | 169 ++++-
 src/lib/agents-chart/vegalite/index.ts        |   2 +-
 .../agents-chart/vegalite/instantiate-spec.ts | 224 +++----
 .../agents-chart/vegalite/templates/area.ts   |   6 +-
 .../vegalite/templates/bar-table.ts           |  25 +-
 .../agents-chart/vegalite/templates/bar.ts    | 226 ++++++-
 .../agents-chart/vegalite/templates/index.ts  | 218 +++++-
 .../vegalite/templates/kpi-card.ts            |   2 +-
 .../vegalite/templates/lollipop.ts            |   4 +-
 src/views/ChartQuickConfig.tsx                | 192 +++++-
 src/views/ChatDialog.tsx                      |  23 +-
 src/views/EncodingBox.tsx                     | 170 +----
 src/views/EncodingShelfCard.tsx               |  28 +-
 src/views/ExplComponents.tsx                  |  98 ++-
 src/views/ReportView.tsx                      | 207 +++---
 src/views/SimpleChartRecBox.tsx               |  72 +-
 src/views/TiptapReportEditor.tsx              | 526 +++++++++------
 src/views/ViewUtils.tsx                       |   9 +-
 src/views/VisualizationView.tsx               | 419 +++++-------
 .../lib/agents-chart/flint_py_extract.test.ts | 240 +++++++
 .../unit/lib/agents-chart/sortAction.test.ts  | 210 ++++++
 .../vegalite/bandedLabelAngle.test.ts         |  65 ++
 .../vegalite/chartOptionApplicability.test.ts | 189 ++++++
 .../vegalite/closedDomainStacking.test.ts     |  92 +++
 .../agents-chart/vegalite/logScale.test.ts    | 157 +++++
 .../vegalite/zeroBaseline.test.ts             | 161 +++++
 .../unit/views/formatCellValue.test.ts        |  10 +-
 yarn.lock                                     | 618 +++++++-----------
 50 files changed, 3961 insertions(+), 1515 deletions(-)
 create mode 100644 src/lib/agents-chart/core/encoding-actions.ts
 create mode 100644 src/lib/agents-chart/core/encoding-overrides.ts
 create mode 100644 tests/frontend/unit/lib/agents-chart/flint_py_extract.test.ts
 create mode 100644 tests/frontend/unit/lib/agents-chart/sortAction.test.ts
 create mode 100644 tests/frontend/unit/lib/agents-chart/vegalite/bandedLabelAngle.test.ts
 create mode 100644 tests/frontend/unit/lib/agents-chart/vegalite/chartOptionApplicability.test.ts
 create mode 100644 tests/frontend/unit/lib/agents-chart/vegalite/closedDomainStacking.test.ts
 create mode 100644 tests/frontend/unit/lib/agents-chart/vegalite/logScale.test.ts
 create mode 100644 tests/frontend/unit/lib/agents-chart/vegalite/zeroBaseline.test.ts

diff --git a/py-src/data_formulator/agents/agent_code_explanation.py b/py-src/data_formulator/agents/agent_code_explanation.py
index 43d78535..d783845a 100644
--- a/py-src/data_formulator/agents/agent_code_explanation.py
+++ b/py-src/data_formulator/agents/agent_code_explanation.py
@@ -17,12 +17,23 @@
 
 For each non-trivial derived field, output:
   1. the field name(s)
-  2. a short formula — use actual field names (e.g. `Profit = Revenue - Cost`),
-     and reach for formal math (\sum, \frac, etc.) only when it's the clearest
-     way to express the computation.
+  2. a short formula explaining the computation.
+
+Pick ONE format per formula — never mix the two:
+
+- Code span (default, use this for almost everything): wrap the formula in
+  single backticks and write field names exactly as they appear in the data.
+  Underscores stay literal — never add backslashes.
+  e.g. `basket_cost = Bananas + Bread + Milk`  (NOT `basket\_cost`)
+
+- LaTeX (only when you genuinely need math notation such as a summation,
+  fraction, square root, or a statistical model's defining equation): inline
+  `\( ... \)` or block `\[ ... \]`. Use short abstract variables (x, n, ...)
+  so you never need underscores or escaping inside the math.
+  e.g. \[ \text{Normalized} = \frac{x - \min(x)}{\max(x) - \min(x)} \]
 
 A brief one-line description before the formula is allowed when it adds clarity
-(e.g. "Within each Major\_category:"). Otherwise keep it to just the formula.
+(e.g. "Within each category:"). Otherwise keep it to just the formula.
 
 Skip fields whose computation is trivial or obvious from the name
 (count/min/max/avg/sum, year/decade extraction, simple rename, etc.).
@@ -31,10 +42,7 @@
 
 For statistical-analysis code (regression, clustering, hypothesis tests),
 emit a single entry with `"field": "Statistical Analysis"` containing the
-model's defining equation(s).
-
-LaTeX: inline `\( ... \)`, block `\[ ... \]`, escape underscores as `\_`.
-Prefer inline for short formulas, block when there's vertical structure.
+model's defining equation(s) in LaTeX.
 
 If nothing is worth showing, return an empty list.
 
@@ -131,11 +139,11 @@ def extract_decade(date_str):
 [
     {
         "field": "Norm_Rating, Norm_Gross",
-        "explanation": "-BSLASH-[ -BSLASH-text{Normalized} = -BSLASH-frac{x - -BSLASH-min(x)}{-BSLASH-max(x) - -BSLASH-min(x)} -BSLASH-]"
+        "explanation": "\\[ \\text{Normalized} = \\frac{x - \\min(x)}{\\max(x) - \\min(x)} \\]"
     },
     {
         "field": "Critical_Commercial_Score",
-        "explanation": "-BSLASH-[ -BSLASH-text{Critical-BSLASH-_Commercial-BSLASH-_Score} = -BSLASH-text{Norm-BSLASH-_Rating} -BSLASH-times -BSLASH-text{Norm-BSLASH-_Gross} -BSLASH-]"
+        "explanation": "`Critical_Commercial_Score = Norm_Rating * Norm_Gross`"
     }
 ]
 '''
diff --git a/py-src/data_formulator/agents/agent_report_gen.py b/py-src/data_formulator/agents/agent_report_gen.py
index 80d4c31b..540c5a0c 100644
--- a/py-src/data_formulator/agents/agent_report_gen.py
+++ b/py-src/data_formulator/agents/agent_report_gen.py
@@ -3,25 +3,28 @@
 
 """Report generation agent with tool-calling for inspect + embed.
 
-Two-phase architecture:
-  - **Phase 1 (Inspect)**: Non-streaming LLM call with inspection tools.
-    Agent calls inspect_chart / inspect_source_data to gather information.
-    Results are fed back as context. Invisible to the user.
-  - **Phase 2 (Generate)**: Streaming LLM call with embedding tools.
-    Agent writes the report narrative token-by-token.
-    embed_chart / embed_table tool calls produce structured blocks
-    in the output stream — rendered by the frontend as inline content.
+Single agentic loop:
+  - Each round is a streaming LLM call with the inspection tools available.
+    The agent calls inspect_chart / inspect_source_data to gather information
+    whenever it needs it; the results (and rendered chart images) are fed back
+    as context and the loop continues.
+  - When the agent stops calling tools and starts writing prose, that prose IS
+    the report — it streams token-by-token to the user, with charts embedded
+    inline via ![caption](chart://chart_id) markdown links.
+  - Because the tool channel stays available throughout, the agent uses real
+    tool calls instead of leaking tool-call syntax into the report text.
 """
 
 import json
 import logging
+import re
 from typing import Any, Generator
 
 import pandas as pd
 
 from data_formulator.agent_config import reasoning_effort_for
 from data_formulator.agents.agent_utils import (
-    attach_reasoning_content,
+    accumulate_reasoning_content,
     generate_data_summary,
 )
 from data_formulator.agents.agent_language import inject_language_instruction
@@ -120,18 +123,16 @@
 - Plan a report that covers the meaningful findings across the exploration,
   not just the last or most obvious chart.
 
-## Phase 1 — Inspect
-Use `inspect_chart` and `inspect_source_data` to gather what you need before
-writing. `inspect_chart` returns the chart's rendered image, a data sample, and
-the transformation code — so you can see exactly what each chart shows and write
-accurate captions and insights.
-- Inspect the charts that correspond to the key findings you plan to present.
-  For a multi-section report or dashboard, that usually means several charts.
-- You can inspect multiple charts in one call (pass several chart_ids).
-- Don't fetch charts you have no intention of discussing, but don't under-inspect
-  either — a report that ignores most of the exploration is a poor report.
+## Inspecting charts and data
+You have two tools available the whole time: `inspect_chart` and
+`inspect_source_data`. Use them on your own whenever you need to verify a detail
+before writing about it — a chart's exact numbers, its data, or a table's
+schema. `inspect_chart` returns the chart's rendered image, a data sample, and
+the code that produced it. Check the charts behind the key findings you present.
 
-## Phase 2 — Write the report
+## Write the report
+Write the report directly in markdown — your prose streams straight to the
+reader. Inspect whatever you need as you go.
 
 ### Embedding charts (REQUIRED FORMAT — do not change this)
 To embed a chart image, use markdown image syntax with a `chart://` URL:
@@ -182,8 +183,30 @@
 """
 
 
+# Defense-in-depth: keeping the tool channel available across the whole loop
+# means the model normally uses real tool calls instead of writing tool-call
+# syntax as text. But some harmony / gpt-oss style models still occasionally leak
+# their tool-call channel into the text stream (e.g. "to=functions.inspect_chart
+# ... json {\"chart_ids\": [...]}"), sometimes with degenerate spam tokens. As a
+# cheap last line of defense we strip the obvious leak markers out of each
+# streamed delta before it reaches the report.
+_LEAK_SPECIAL_TOKEN = re.compile(r"<\|[^|>]*\|>")
+_LEAK_TOOLCALL = re.compile(
+    r"(?:\bcommentary\b\s*)?\bto\s*=\s*functions\.[A-Za-z0-9_]+"
+    r"[\s\S]*?\{[\s\S]*?\}",
+)
+
+
+def _strip_leaked_tool_syntax(text: str) -> str:
+    """Remove leaked harmony special tokens and tool-call headers (with their
+    trailing JSON args) from a streamed report delta. Clean prose is untouched."""
+    text = _LEAK_TOOLCALL.sub("", text)
+    text = _LEAK_SPECIAL_TOKEN.sub("", text)
+    return text
+
+
 class ReportGenAgent:
-    """Tool-calling report generation agent with two-phase streaming."""
+    """Tool-calling report generation agent with a single streaming loop."""
 
     def __init__(self, client, workspace, language_instruction=""):
         self.client = client
@@ -199,7 +222,7 @@ def run(
         other_threads: list[dict[str, Any]] | None = None,
         primary_tables: list[str] | None = None,
     ) -> Generator[dict[str, Any], None, None]:
-        """Generate a report via two-phase tool-calling.
+        """Generate a report via a single tool-calling loop.
 
         Yields SSE-style dicts:
             {"type": "text_delta", "content": "..."}
@@ -238,85 +261,136 @@ def run(
         system_prompt = SYSTEM_PROMPT
         system_prompt = inject_language_instruction(system_prompt, self.language_instruction)
 
+        write_instruction = (
+            "Write a report in markdown that covers the key findings across the "
+            "exploration — don't reduce it to a single chart unless the request "
+            "explicitly asks for something that brief. Pull up whatever charts or "
+            "data you need to look at as you go (this happens automatically and "
+            "is invisible to the reader), and embed each chart you discuss with "
+            "![caption](chart://chart_id)."
+        )
         messages: list[dict] = [
             {"role": "system", "content": system_prompt},
-            {"role": "user", "content": f"{context}\n\n[USER REQUEST]\n\n{user_prompt}"},
+            {
+                "role": "user",
+                "content": f"{context}\n\n[USER REQUEST]\n\n{user_prompt}\n\n{write_instruction}",
+            },
         ]
 
-        # ── Phase 1: Inspect (non-streaming) ──────────────────────────
-        messages = self._run_inspect_phase(messages, input_tables, charts)
-
-        # ── Phase 2: Generate (streaming with embed tools) ────────────
-        yield from self._run_generate_phase(messages, charts, input_tables)
+        # Single agentic loop: the model inspects via tool calls as needed, then
+        # streams the report. Tools stay available throughout, so it uses the
+        # real tool channel instead of leaking tool-call syntax as text.
+        yield from self._run_agent_loop(messages, charts, input_tables)
 
     # ------------------------------------------------------------------
-    # Phase 1: Inspection loop
+    # Agentic loop: inspect-as-needed, then stream the report
     # ------------------------------------------------------------------
 
-    def _run_inspect_phase(
+    def _run_agent_loop(
         self,
         messages: list[dict],
-        input_tables: list[dict[str, Any]],
         charts: list[dict[str, Any]],
-    ) -> list[dict]:
-        """Run non-streaming inspect calls. Returns updated messages."""
-        max_rounds = 5
+        input_tables: list[dict[str, Any]],
+    ) -> Generator[dict[str, Any], None, None]:
+        """Single streaming tool-calling loop.
+
+        Each round is a streaming LLM call with the inspect tools available. If
+        the model emits tool calls, we execute them (attaching rendered chart
+        images) and loop. When the model stops calling tools and just writes
+        prose, that prose IS the report and streams straight to the user.
+        Because the tool channel stays available the whole time, the model never
+        has to fall back to writing tool-call syntax as text.
+        """
+        max_rounds = 6
 
-        for _ in range(max_rounds):
+        for round_idx in range(max_rounds):
             try:
-                response = self._call_llm(messages, tools=INSPECT_TOOLS)
+                stream = self._call_llm_streaming(messages, tools=INSPECT_TOOLS)
             except Exception as e:
-                logger.warning(f"[ReportAgent] Inspect phase error: {e}")
-                from data_formulator.error_handler import collect_stream_warning
-                collect_stream_warning(
-                    "Report data inspection failed — report may be incomplete",
-                    detail=str(e),
-                    message_code="INSPECT_PHASE_FAILED",
-                )
-                break
-
-            if not response or not response.choices:
-                break
-
-            choice = response.choices[0]
-            content = choice.message.content or ""
-            tool_calls = getattr(choice.message, "tool_calls", None)
-
-            if not tool_calls:
-                # Agent is ready to write — don't append its text yet,
-                # Phase 2 will re-prompt with embed tools
-                break
+                logger.error(f"[ReportAgent] LLM call failed: {e}")
+                yield {"type": "text_delta", "content": f"Error generating report: {e}"}
+                return
+
+            text_parts: list[str] = []
+            reasoning_acc: str | None = None
+            tool_calls_acc: dict[int, dict[str, Any]] = {}
+
+            for chunk in stream:
+                if not chunk.choices:
+                    continue
+                delta = chunk.choices[0].delta
+                reasoning_acc = accumulate_reasoning_content(reasoning_acc, delta)
+
+                content = getattr(delta, "content", None)
+                if content:
+                    text_parts.append(content)
+                    cleaned = _strip_leaked_tool_syntax(content)
+                    if cleaned:
+                        yield {"type": "text_delta", "content": cleaned}
+
+                for tcd in getattr(delta, "tool_calls", None) or []:
+                    idx = getattr(tcd, "index", 0) or 0
+                    slot = tool_calls_acc.setdefault(
+                        idx, {"id": None, "name": "", "arguments": ""}
+                    )
+                    if getattr(tcd, "id", None):
+                        slot["id"] = tcd.id
+                    fn = getattr(tcd, "function", None)
+                    if fn is not None:
+                        if getattr(fn, "name", None):
+                            slot["name"] = fn.name
+                        if getattr(fn, "arguments", None):
+                            slot["arguments"] += fn.arguments
+
+            # No tool calls this round → the model wrote the report. Done.
+            if not tool_calls_acc:
+                return
+
+            # Inspection round: record the tool calls, execute them, then loop.
+            ordered = [tool_calls_acc[i] for i in sorted(tool_calls_acc)]
+            for i, tc in enumerate(ordered):
+                if not tc["id"]:
+                    tc["id"] = f"call_{round_idx}_{i}"
 
-            # Append assistant message
             assistant_msg: dict[str, Any] = {
                 "role": "assistant",
-                "content": content or None,
+                "content": "".join(text_parts) or None,
                 "tool_calls": [
                     {
-                        "id": tc.id,
+                        "id": tc["id"],
                         "type": "function",
                         "function": {
-                            "name": tc.function.name,
-                            "arguments": tc.function.arguments,
+                            "name": tc["name"],
+                            "arguments": tc["arguments"] or "{}",
                         },
                     }
-                    for tc in tool_calls
+                    for tc in ordered
                 ],
             }
-            attach_reasoning_content(assistant_msg, choice.message)
+            if reasoning_acc:
+                assistant_msg["reasoning_content"] = reasoning_acc
             messages.append(assistant_msg)
 
-            # Execute each tool. Chart images can't ride along in tool-result
-            # messages on most providers, so we collect them and attach them as
-            # a single follow-up vision message after all tool results.
+            # Chart images can't ride along in tool-result messages on most
+            # providers, so we collect them and attach them as a single
+            # follow-up vision message after all tool results.
             pending_images: list[str] = []
-            for tc in tool_calls:
-                tool_name = tc.function.name
+            for tc in ordered:
+                tool_name = tc["name"]
                 try:
-                    tool_args = json.loads(tc.function.arguments)
+                    tool_args = json.loads(tc["arguments"] or "{}")
                 except json.JSONDecodeError:
                     tool_args = {}
 
+                # Tell the frontend what the agent is doing (start/end), the
+                # same way the data agent streams tool_start / tool_result.
+                yield {
+                    "type": "tool_start",
+                    "tool": tool_name,
+                    "chart_ids": tool_args.get("chart_ids") if tool_name == "inspect_chart" else None,
+                    "table_names": tool_args.get("table_names") if tool_name == "inspect_source_data" else None,
+                }
+
                 if tool_name == "inspect_chart":
                     tool_content, image_urls = self._handle_inspect_chart(
                         tool_args.get("chart_ids", []), charts
@@ -331,9 +405,11 @@ def _run_inspect_phase(
                 else:
                     tool_content = f"Unknown tool: {tool_name}"
 
+                yield {"type": "tool_result", "tool": tool_name, "status": "ok"}
+
                 messages.append({
                     "role": "tool",
-                    "tool_call_id": tc.id,
+                    "tool_call_id": tc["id"],
                     "content": tool_content,
                 })
 
@@ -354,48 +430,12 @@ def _run_inspect_phase(
                     })
                 messages.append({"role": "user", "content": image_blocks})
 
-            logger.info(f"[ReportAgent] Inspect phase: executed {len(tool_calls)} tool call(s)")
-
-        return messages
-
-    # ------------------------------------------------------------------
-    # Phase 2: Streaming generation with embed tools
-    # ------------------------------------------------------------------
-
-    def _run_generate_phase(
-        self,
-        messages: list[dict],
-        charts: list[dict[str, Any]],
-        input_tables: list[dict[str, Any]],
-    ) -> Generator[dict[str, Any], None, None]:
-        """Stream the report as plain text with [IMAGE()] placeholders."""
-
-        # Add a nudge to start writing
-        messages.append({
-            "role": "user",
-            "content": (
-                "Now write the report in markdown, grounded in the exploration "
-                "threads and the charts/data you inspected. Cover the key "
-                "findings across the exploration — don't reduce it to a single "
-                "chart unless the request explicitly calls for something that "
-                "brief. Embed each chart you discuss with "
-                "![caption](chart://chart_id)."
-            ),
-        })
-
-        try:
-            stream = self._call_llm_streaming(messages, tools=None)
-        except Exception as e:
-            logger.error(f"[ReportAgent] Generate phase error: {e}")
-            yield {"type": "text_delta", "content": f"Error generating report: {e}"}
-            return
+            logger.info(
+                f"[ReportAgent] Round {round_idx + 1}: executed "
+                f"{len(ordered)} tool call(s)"
+            )
 
-        for chunk in stream:
-            if not chunk.choices:
-                continue
-            delta = chunk.choices[0].delta
-            if hasattr(delta, "content") and delta.content:
-                yield {"type": "text_delta", "content": delta.content}
+        logger.warning("[ReportAgent] Tool-call rounds exhausted without a report")
 
     # ------------------------------------------------------------------
     # Tool handlers
@@ -536,14 +576,6 @@ def _resolve_table_data(
     # LLM call helpers
     # ------------------------------------------------------------------
 
-    def _call_llm(self, messages: list[dict], tools: list[dict] | None = None):
-        """Non-streaming LLM call with optional tool definitions."""
-        if tools:
-            return self.client.get_completion_with_tools(
-                messages, tools=tools, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model),
-            )
-        return self.client.get_completion(messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model))
-
     def _call_llm_streaming(self, messages: list[dict], tools: list[dict] | None = None):
         """Streaming LLM call with optional tool definitions."""
         if tools:
diff --git a/py-src/data_formulator/agents/data_agent.py b/py-src/data_formulator/agents/data_agent.py
index 56de9037..8c35f783 100644
--- a/py-src/data_formulator/agents/data_agent.py
+++ b/py-src/data_formulator/agents/data_agent.py
@@ -280,18 +280,25 @@ def _rescue_validate_action(data: dict) -> list[str]:
 }}
 ```
 
-Use `delegate` to hand off to a peer agent. Each option becomes a one-click
-button (the string is both the button label and the seed prompt). Provide
-1–2 options; if two, make them meaningfully distinct (e.g. different search
-angles, or executive summary vs. deep-dive).
+Use `delegate` to hand off to a peer agent. Each option is a seed prompt for
+the target agent.
 
 Valid `target` values:
 - **`data_loading`** — the user's question needs data that isn't in the
-  workspace. Options are short search phrases (e.g. `'monthly orders 2024'`).
-  Prefer `clarify` if the workspace tables might already cover the question.
+  workspace. Each option becomes a one-click button (the string is both the
+  button label and the seed prompt). Provide 1–2 short search phrases (e.g.
+  `'monthly orders 2024'`); if two, make them meaningfully distinct (e.g.
+  different search angles). Prefer `clarify` if the workspace tables might
+  already cover the question.
 - **`report_gen`** — the user wants a narrative report or write-up over
-  the charts already produced. Options restate the report style in one
-  short sentence.
+  the charts already produced. This hand-off is **automatic** (no button —
+  the user is not asked to choose), so provide **exactly one** option: a
+  single, well-formed seed prompt for the report agent. 
+  Elaborate that one prompt from the conversation context —
+  name the subject, the angle/focus the user asked for, and which findings or
+  charts it should cover (e.g. `'Write a report on 2024 regional sales,
+  focusing on why the West region outperformed, covering the revenue-by-region
+  and monthly-trend charts'`).
 
 ## Understanding your context
 
diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx
index cf630d78..f08fec27 100644
--- a/src/app/dfSlice.tsx
+++ b/src/app/dfSlice.tsx
@@ -131,6 +131,19 @@ export interface GeneratedReport {
     contentSnapshotHash?: string;
     prompt?: string;
     status?: 'generating' | 'completed' | 'error';
+    generatingPhase?: 'inspecting' | 'writing';  // transient: which phase the agent is in while generating
+    // transient: accumulated inspect steps, flipped to done on completion.
+    // `charts` carries lightweight descriptors (chartType for the icon + a
+    // display name) so the editor can render a chart-type icon next to a title
+    // or field list. Kept serializable (no React nodes) for redux-persist.
+    inspectionSteps?: {
+        label: string;
+        doneLabel?: string;   // past-tense label shown once the step completes
+        done: boolean;
+        charts?: { chartType: string; name: string }[];
+        startedAt?: number;   // epoch ms when the tool call started
+        durationMs?: number;  // wall time once the step is done
+    }[];
 }
 
 export interface DataFormulatorState {
@@ -345,6 +358,27 @@ const collectAllCharts = (state: DataFormulatorState): Chart[] => {
     return [...state.charts, ...triggerCharts];
 };
 
+// Category-B encoding-action overrides (e.g. heatmap color scheme) are stored in
+// chart.config keyed by the action key, and composed onto the encoding by the
+// Flint compiler at assemble time (applyEncodingOverrides). When the user
+// re-binds, clears, or swaps a channel that an override declares as a
+// `dependency`, the stored value is stale, so we drop it here. This reset is
+// host-side policy only; Flint never resets — it just composes
+// "override + current encoding". The action's declared dependencies live in the
+// template's EncodingActionDef.
+const resetDependentEncodingOverrides = (chart: Chart, ...changedChannels: Channel[]) => {
+    if (!chart.config) return;
+    const actions = getChartTemplate(chart.chartType)?.encodingActions;
+    if (!actions || actions.length === 0) return;
+    for (const action of actions) {
+        const deps = action.dependencies;
+        if (!deps) continue;
+        if (changedChannels.some(ch => deps.includes(ch)) && chart.config[action.key] !== undefined) {
+            delete chart.config[action.key];
+        }
+    }
+};
+
 let getUnrefedDerivedTableIds = (state: DataFormulatorState) => {
     // find tables directly referred by charts
     let allCharts = collectAllCharts(state);
@@ -1363,6 +1397,9 @@ export const dataFormulatorSlice = createSlice({
             let chart = collectAllCharts(state).find(c => c.id == chartId);
             if (chart) {
                 chart.encodingMap[channel] = encoding;
+                // The channel's binding changed — drop any Category-B override
+                // that depended on it (see resetDependentEncodingOverrides).
+                resetDependentEncodingOverrides(chart, channel);
                 // Auto-revert to default whenever the user edits the encoding so
                 // the canvas reflects what they're editing. Existing variants
                 // stay in the chip strip (now stale). See
@@ -1416,6 +1453,11 @@ export const dataFormulatorSlice = createSlice({
                     if (encoding.dtype !== value) changed = true;
                     encoding.dtype = value;
                 }
+                // When the user actually edits a channel in the shelf, drop any
+                // Category-B override computed against it (declared via the
+                // action's `dependencies`) so a stale override can't keep
+                // winning over the shelf edit. See resetDependentEncodingOverrides.
+                if (changed) resetDependentEncodingOverrides(chart, channel);
                 // Auto-revert to default when the encoding actually changes
                 // (see above). No-op updates must NOT clear the variant.
                 if (changed && chart.activeVariantId) chart.activeVariantId = undefined;
@@ -1433,6 +1475,8 @@ export const dataFormulatorSlice = createSlice({
 
                 chart.encodingMap[channel1] = { fieldID: enc2.fieldID, aggregate: enc2.aggregate, sortBy: enc2.sortBy, sortOrder: enc2.sortOrder };
                 chart.encodingMap[channel2] = { fieldID: enc1.fieldID, aggregate: enc1.aggregate, sortBy: enc1.sortBy, sortOrder: enc1.sortOrder };
+                // Both channels' bindings changed — drop dependent overrides.
+                resetDependentEncodingOverrides(chart, channel1, channel2);
                 // Auto-revert to default when the encoding changes (see above).
                 if (chart.activeVariantId) chart.activeVariantId = undefined;
             }
@@ -1816,9 +1860,36 @@ export const dataFormulatorSlice = createSlice({
                 report.content = content;
                 if (title) report.title = title;
                 if (status) report.status = status;
+                // Once real report text starts streaming, switch the indicator to
+                // the "writing" phase. When generation ends, clear transient state.
+                if (content) report.generatingPhase = 'writing';
+                if (status === 'completed' || status === 'error') {
+                    report.generatingPhase = undefined;
+                    report.inspectionSteps = undefined;
+                }
                 report.updatedAt = Date.now();
             }
         },
+        updateGeneratedReportProgress: (state, action: PayloadAction<{ id: string; kind: 'start' | 'end'; label?: string; doneLabel?: string; charts?: { chartType: string; name: string }[] }>) => {
+            const { id, kind, label, doneLabel, charts } = action.payload;
+            const report = state.generatedReports.find(r => r.id === id);
+            if (!report) return;
+            report.generatingPhase = 'inspecting';
+            const steps = report.inspectionSteps ?? [];
+            if (kind === 'start' && label) {
+                steps.push({ label, doneLabel, done: false, charts, startedAt: Date.now() });
+            } else if (kind === 'end') {
+                // Flip the first still-pending step to done (FIFO matches the
+                // order the backend emits start/end), so concurrent tool calls
+                // each resolve independently rather than adding a new message.
+                const pending = steps.find(s => !s.done);
+                if (pending) {
+                    pending.done = true;
+                    if (pending.startedAt) pending.durationMs = Date.now() - pending.startedAt;
+                }
+            }
+            report.inspectionSteps = steps;
+        },
         clearGeneratedReports: (state) => {
             state.generatedReports = [];
             // Redux Persist will handle persistence automatically
diff --git a/src/app/restyle.ts b/src/app/restyle.ts
index 70c75c62..a760bb79 100644
--- a/src/app/restyle.ts
+++ b/src/app/restyle.ts
@@ -95,7 +95,7 @@ export function buildSpecForRestyle(
         spec = JSON.parse(JSON.stringify(basedOnVariant.vlSpec));
     } else {
         spec = JSON.parse(JSON.stringify(fullSpec));
-        delete spec._computedConfig;
+        delete spec._options;
     }
     delete spec.data;
     return { spec, basedOnVariantId: basedOnVariant?.id, embeddedData };
diff --git a/src/app/tokens.ts b/src/app/tokens.ts
index acd78bd6..4695edb8 100644
--- a/src/app/tokens.ts
+++ b/src/app/tokens.ts
@@ -73,6 +73,24 @@ export const transition = {
     slow: 'all 0.3s ease',
 } as const;
 
+// ── Floating overlay controls ──────────────────────────────────────────
+
+/**
+ * Floating icon-button pill for canvas overlays (chart visualization toolbar,
+ * report action buttons). A semi-transparent "glass" fill with a defined
+ * border so the control stays legible over busy, colorful canvases as well as
+ * plain document backgrounds. Resting state is neutral; spread this and
+ * override `color` / `&:hover` for destructive or active variants.
+ */
+export const floatingPillSx: SxProps = {
+    backgroundColor: 'background.paper',
+    border: `1px solid ${borderColor.divider}`,
+    boxShadow: '0 1px 4px rgba(0,0,0,0.12)',
+    color: 'text.secondary',
+    transition: transition.normal,
+    '&:hover': { backgroundColor: 'action.hover', color: 'primary.main' },
+};
+
 // ── Border radius ──────────────────────────────────────────────────────
 // Values are MUI spacing units (1 unit = 4px via theme.spacing)
 
diff --git a/src/components/ChartTemplates.tsx b/src/components/ChartTemplates.tsx
index ac5c8d93..5385136a 100644
--- a/src/components/ChartTemplates.tsx
+++ b/src/components/ChartTemplates.tsx
@@ -99,25 +99,11 @@ export const CHART_ICONS: Record<string, React.ReactElement> = {
 // Build CHART_TEMPLATES by adding icons to library template defs
 // ---------------------------------------------------------------------------
 
-/** Global properties injected into any template that supports column/row faceting. */
-const FACET_AXIS_PROPERTIES = [
-    {
-        key: 'independentYAxis', label: 'Independent Y-Axis', type: 'binary' as const,
-        visibleWhen: { channels: ['column', 'row'] },
-    },
-];
-
-function addIcons(defs: { chart: string; channels?: string[]; properties?: any[] }[]): ChartTemplate[] {
-    return defs.map(def => {
-        const hasFacetChannels = def.channels?.some(ch => ch === 'column' || ch === 'row');
-        const extraProps = hasFacetChannels ? FACET_AXIS_PROPERTIES : [];
-        const mergedProperties = [...(def.properties || []).filter((p: any) => p.key !== 'independentYAxis'), ...extraProps];
-        return {
-            ...def,
-            properties: mergedProperties,
-            icon: CHART_ICONS[def.chart] || <InsightsIcon />,
-        };
-    }) as ChartTemplate[];
+function addIcons(defs: { chart: string }[]): ChartTemplate[] {
+    return defs.map(def => ({
+        ...def,
+        icon: CHART_ICONS[def.chart] || <InsightsIcon />,
+    })) as ChartTemplate[];
 }
 
 export const CHART_TEMPLATES: { [key: string]: ChartTemplate[] } = Object.fromEntries(
diff --git a/src/i18n/locales/en/chart.json b/src/i18n/locales/en/chart.json
index cd113617..68dd50c2 100644
--- a/src/i18n/locales/en/chart.json
+++ b/src/i18n/locales/en/chart.json
@@ -19,6 +19,7 @@
     "saveCopy": "save a copy",
     "duplicate": "duplicate the chart",
     "delete": "delete",
+    "deleteChart": "delete chart",
     "sampleSize": "Sample size",
     "sampleSizeAria": "Sample size",
     "sampleAgain": "sample again!",
diff --git a/src/i18n/locales/en/common.json b/src/i18n/locales/en/common.json
index 784e6123..aea3505d 100644
--- a/src/i18n/locales/en/common.json
+++ b/src/i18n/locales/en/common.json
@@ -194,6 +194,8 @@
   "report": {
     "deleteReport": "Delete report",
     "backToEditor": "Back to editor",
+    "editReport": "Edit report",
+    "doneEditing": "Done editing",
     "createChartifactReport": "Create Chartifact report",
     "shareReportAsImage": "Share report as image",
     "couldNotFindContent": "Could not find report content to capture",
@@ -235,6 +237,12 @@
     "createChartifact": "Create Chartifact",
     "copied": "Copied!",
     "copyContent": "Copy content",
+    "contentCopied": "Report content copied to clipboard.",
+    "inspectingCharts": "inspecting charts...",
+    "inspectedCharts": "inspected charts",
+    "downloadAndShare": "Download & share",
+    "saveAsImage": "Save as image",
+    "downloadPdf": "Download PDF",
     "imageActions": "Image",
     "copyImage": "Copy image to clipboard",
     "downloadPng": "Download PNG",
@@ -425,6 +433,7 @@
     "runningCode": "running code...",
     "creatingChart": "creating chart...",
     "inspectingData": "inspecting source data...",
+    "inspectedData": "inspected source data",
     "rulesLoaded": "Reading rules: {{rules}}",
     "knowledgeLoaded": "Reading knowledge: {{knowledge}}",
     "searching": "searching...",
@@ -781,7 +790,8 @@
     "numberedList": "Numbered List",
     "quote": "Quote",
     "generating": "Generating…",
-    "writingReport": "Writing your report…"
+    "writingReport": "Writing your report…",
+    "workingTitle": "Working on your report"
   },
   "sidebar": {
     "openDataSources": "Data Sources",
diff --git a/src/i18n/locales/zh/chart.json b/src/i18n/locales/zh/chart.json
index fb54428d..d08ce521 100644
--- a/src/i18n/locales/zh/chart.json
+++ b/src/i18n/locales/zh/chart.json
@@ -19,6 +19,7 @@
     "saveCopy": "保存副本",
     "duplicate": "复制图表",
     "delete": "删除",
+    "deleteChart": "删除图表",
     "sampleSize": "样本大小",
     "sampleSizeAria": "样本大小",
     "sampleAgain": "重新采样！",
diff --git a/src/i18n/locales/zh/common.json b/src/i18n/locales/zh/common.json
index ee0ff177..354c3bbc 100644
--- a/src/i18n/locales/zh/common.json
+++ b/src/i18n/locales/zh/common.json
@@ -194,6 +194,8 @@
   "report": {
     "deleteReport": "删除报告",
     "backToEditor": "返回编辑器",
+    "editReport": "编辑报告",
+    "doneEditing": "完成编辑",
     "createChartifactReport": "创建 Chartifact 报告",
     "shareReportAsImage": "以图片分享报告",
     "couldNotFindContent": "无法找到要截取的报告内容",
@@ -235,6 +237,12 @@
     "createChartifact": "创建 Chartifact",
     "copied": "已复制！",
     "copyContent": "复制内容",
+    "contentCopied": "报告内容已复制到剪贴板。",
+    "inspectingCharts": "检查图表中...",
+    "inspectedCharts": "已检查图表",
+    "downloadAndShare": "下载与分享",
+    "saveAsImage": "保存为图片",
+    "downloadPdf": "下载 PDF",
     "imageActions": "图片",
     "copyImage": "复制图片到剪贴板",
     "downloadPng": "下载 PNG",
@@ -425,6 +433,7 @@
     "runningCode": "运行代码中...",
     "creatingChart": "生成图表中...",
     "inspectingData": "检查数据源中...",
+    "inspectedData": "已检查数据源",
     "rulesLoaded": "读取规则：{{rules}}",
     "knowledgeLoaded": "读取知识：{{knowledge}}",
     "searching": "搜索中...",
@@ -781,7 +790,8 @@
     "numberedList": "有序列表",
     "quote": "引用",
     "generating": "生成中…",
-    "writingReport": "正在撰写报告…"
+    "writingReport": "正在撰写报告…",
+    "workingTitle": "正在处理你的报告"
   },
   "sidebar": {
     "openDataSources": "数据源",
diff --git a/src/lib/agents-chart/chartjs/assemble.ts b/src/lib/agents-chart/chartjs/assemble.ts
index c77cab0f..8f06eb00 100644
--- a/src/lib/agents-chart/chartjs/assemble.ts
+++ b/src/lib/agents-chart/chartjs/assemble.ts
@@ -32,6 +32,7 @@ import {
     InstantiateContext,
 } from '../core/types';
 import type { ChartWarning } from '../core/types';
+import { applyEncodingOverrides } from '../core/encoding-overrides';
 import { cjsGetTemplateDef } from './templates';
 import { resolveChannelSemantics, convertTemporalData } from '../core/resolve-semantics';
 import { computeZeroDecision } from '../core/semantic-types';
@@ -60,7 +61,7 @@ import { cjsApplyLayoutToSpec, cjsApplyTooltips } from './instantiate-spec';
  */
 export function assembleChartjs(input: ChartAssemblyInput): any {
     const chartType = input.chart_spec.chartType;
-    const encodings = input.chart_spec.encodings;
+    const rawEncodings = input.chart_spec.encodings;
     const data = input.data.values ?? [];
     const semanticTypes = input.semantic_types ?? {};
     const canvasSize = input.chart_spec.canvasSize ?? { width: 400, height: 320 };
@@ -71,6 +72,12 @@ export function assembleChartjs(input: ChartAssemblyInput): any {
         throw new Error(`Unknown Chart.js chart type: ${chartType}. Use cjsAllTemplateDefs to see available types.`);
     }
 
+    // Compose Category-B encoding-action overrides (stored by the host in
+    // chartProperties, keyed by action key) onto the base encodings before any
+    // pipeline phase runs. Flint owns the transform; the host only stores the
+    // override value. See applyEncodingOverrides / EncodingActionDef.
+    const encodings = applyEncodingOverrides(chartTemplate, rawEncodings, chartProperties);
+
     const warnings: ChartWarning[] = [];
 
     // ═══════════════════════════════════════════════════════════════════════
diff --git a/src/lib/agents-chart/core/compute-layout.ts b/src/lib/agents-chart/core/compute-layout.ts
index 35396374..9121cb41 100644
--- a/src/lib/agents-chart/core/compute-layout.ts
+++ b/src/lib/agents-chart/core/compute-layout.ts
@@ -69,26 +69,53 @@ import {
 const VL_SHORT_DISCRETE_CATEGORY_COUNT = 4;
 const VL_SHORT_DISCRETE_LABEL_MAX_LEN = 8;
 
-/** Few, short category strings → skip angled axis labels in Vega-Lite (config.axisX/Y). */
-function discreteAxisShouldUseHorizontalLabels(
+/** Approximate width (px) of one label character at the given font size. */
+const APPROX_CHAR_WIDTH_RATIO = 0.62;
+
+/** Distinct label strings for a discrete axis field, plus derived stats. */
+interface DiscreteLabelStats {
+    count: number;
+    maxLen: number;
+    /** True when every label parses as a finite number (e.g. years, bins, IDs). */
+    allNumeric: boolean;
+}
+
+function computeDiscreteLabelStats(
     field: string | undefined,
-    channelType: string | undefined,
     table: any[],
-): boolean {
-    if (!field) return false;
-    if (channelType === 'quantitative') return true;
-
+): DiscreteLabelStats | null {
+    if (!field) return null;
     const uniques = new Set<string>();
     for (const row of table) {
         const v = row[field];
         if (v == null || v === '') continue;
         uniques.add(String(v));
     }
-    if (uniques.size === 0) return false;
+    if (uniques.size === 0) return null;
     const labels = [...uniques];
-    if (labels.length > VL_SHORT_DISCRETE_CATEGORY_COUNT) return false;
-    const maxLen = Math.max(...labels.map(s => s.length));
-    return maxLen <= VL_SHORT_DISCRETE_LABEL_MAX_LEN;
+    return {
+        count: labels.length,
+        maxLen: Math.max(...labels.map(s => s.length)),
+        allNumeric: labels.every(s => s.trim() !== '' && isFinite(Number(s))),
+    };
+}
+
+/**
+ * Few, short category strings → keep axis labels horizontal in Vega-Lite. Used
+ * for the Y axis, where banded labels read horizontally in the left margin
+ * regardless of band height (so quantitative/numeric labels stay horizontal).
+ */
+function discreteYAxisShouldUseHorizontalLabels(
+    field: string | undefined,
+    channelType: string | undefined,
+    table: any[],
+): boolean {
+    if (!field) return false;
+    if (channelType === 'quantitative') return true;
+    const stats = computeDiscreteLabelStats(field, table);
+    if (!stats) return false;
+    if (stats.count > VL_SHORT_DISCRETE_CATEGORY_COUNT) return false;
+    return stats.maxLen <= VL_SHORT_DISCRETE_LABEL_MAX_LEN;
 }
 
 // ---------------------------------------------------------------------------
@@ -783,20 +810,46 @@ export function computeLayout(
     if (xHasDiscreteItems) {
         const xf = channelSemantics.x?.field;
         const xt = effectiveTypes.x || channelSemantics.x?.type;
-        if (discreteAxisShouldUseHorizontalLabels(xf, xt, table)) {
-            // Must be explicit: omitting labelAngle leaves VL defaults (e.g. -45° on ordinal).
-            xLabel = {
-                ...xLabel,
-                labelAngle: 0,
-                labelAlign: 'center',
-                labelBaseline: 'top',
-            };
+        const stats = computeDiscreteLabelStats(xf, table);
+        if (stats) {
+            // Numeric-like labels (declared quantitative, or all values parse as
+            // numbers — years, bins, IDs) compete for the band's width when laid
+            // out horizontally. A continuous field split into many narrow bands
+            // yields many/wide numbers that crowd. Decide horizontal vs. angled
+            // by whether the widest label fits within one band.
+            const numericLike = xt === 'quantitative' || stats.allNumeric;
+            const labelPx = stats.maxLen * xLabel.fontSize * APPROX_CHAR_WIDTH_RATIO;
+            const fitsHorizontally = labelPx <= xStepSize;
+            const fewShortStrings = !numericLike
+                && stats.count <= VL_SHORT_DISCRETE_CATEGORY_COUNT
+                && stats.maxLen <= VL_SHORT_DISCRETE_LABEL_MAX_LEN;
+
+            if (fewShortStrings || (numericLike && fitsHorizontally)) {
+                // Must be explicit: omitting labelAngle leaves VL defaults (e.g. -45° on ordinal).
+                xLabel = {
+                    ...xLabel,
+                    labelAngle: 0,
+                    labelAlign: 'center',
+                    labelBaseline: 'top',
+                };
+            } else if (numericLike && !fitsHorizontally && xLabel.labelAngle === undefined) {
+                // Numeric labels that don't fit horizontally and weren't already
+                // rotated by step-based sizing (which only rotates at narrow
+                // steps). Without this, VL keeps them horizontal and the numbers
+                // overlap. Rotate to -45°.
+                xLabel = {
+                    ...xLabel,
+                    labelAngle: -45,
+                    labelAlign: 'right',
+                    labelBaseline: 'top',
+                };
+            }
         }
     }
     if (yHasDiscreteItems) {
         const yf = channelSemantics.y?.field;
         const yt = effectiveTypes.y || channelSemantics.y?.type;
-        if (discreteAxisShouldUseHorizontalLabels(yf, yt, table)) {
+        if (discreteYAxisShouldUseHorizontalLabels(yf, yt, table)) {
             yLabel = {
                 ...yLabel,
                 labelAngle: 0,
diff --git a/src/lib/agents-chart/core/encoding-actions.ts b/src/lib/agents-chart/core/encoding-actions.ts
new file mode 100644
index 00000000..21763252
--- /dev/null
+++ b/src/lib/agents-chart/core/encoding-actions.ts
@@ -0,0 +1,110 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+import type { ChartEncoding, EncodingActionDef } from './types';
+
+/**
+ * Reusable factories for Category-B encoding actions (see EncodingActionDef).
+ *
+ * These are authored once and attached to many templates, so the per-chart
+ * knowledge (which channel is the category axis, which carries the measure)
+ * lives in one place instead of being re-implemented per template.
+ */
+
+/** The semantic sort choices the Sort control exposes. */
+export type SortChoice = 'value-asc' | 'value-desc';
+
+// A measure is a quantitative channel or any aggregated channel.
+const isMeasureEnc = (e?: ChartEncoding): boolean =>
+    !!e?.field && (!!e.aggregate || e.type === 'quantitative');
+
+// A sortable category axis is discrete (nominal/ordinal). Temporal axes are
+// deliberately excluded: reordering a time axis by value scrambles the
+// chronology, so Sort should not apply to them.
+const isDiscreteCategoryEnc = (e?: ChartEncoding): boolean =>
+    !!e?.field && !e.aggregate && e.type !== 'quantitative' && e.type !== 'temporal';
+
+/**
+ * Identify the discrete category axis and the measure axis among a pair of
+ * position channels, so Sort works under either orientation (vertical or
+ * horizontal) and only when a discrete axis actually exists.
+ *
+ * Returns `null` when there is no discrete category + measure pair to sort —
+ * e.g. a temporal-x time series, or two quantitative axes (scatter). Callers
+ * use this both to gate visibility and to no-op safely.
+ */
+function resolveSortChannels(
+    encodings: Record<string, ChartEncoding>,
+    candidates: [string, string],
+): { category: string; measure: string } | null {
+    const category = candidates.find(c => isDiscreteCategoryEnc(encodings[c]));
+    const measure = candidates.find(c => isMeasureEnc(encodings[c]));
+    if (!category || !measure || category === measure) return null;
+    return { category, measure };
+}
+
+/**
+ * Sort the category axis of a bar-like chart by the measure value.
+ *
+ * Encoding model: a value sort writes `sortBy = <measure channel>` (one of
+ * 'x' | 'y', which the assembler understands) on the category channel.
+ * "Default" clears the sort so the field's canonical ordering wins — the
+ * natural order for ordinal/temporal-like categories, or alphabetic otherwise,
+ * as decided by semantic resolution. The action is only applicable — and only
+ * visible — when one position channel is a discrete category and the other is
+ * a measure.
+ *
+ * @param channels Position-channel pair (default ['x', 'y']); the orientation
+ *                 (which one is the category) is resolved per-encoding at runtime.
+ */
+export function makeSortAction(options?: {
+    key?: string;
+    label?: string;
+    channels?: [string, string];
+}): EncodingActionDef {
+    const candidates = options?.channels ?? ['x', 'y'];
+    return {
+        key: options?.key ?? 'sort',
+        label: options?.label ?? 'Sort',
+        dependencies: candidates,
+        isApplicable: (ctx) => resolveSortChannels(ctx.encodings, candidates) !== null,
+        control: {
+            type: 'discrete',
+            options: [
+                { value: undefined, label: 'Default' },
+                { value: 'value-desc', label: 'Value ↓' },
+                { value: 'value-asc', label: 'Value ↑' },
+            ],
+        },
+        get: (encodings) => {
+            const resolved = resolveSortChannels(encodings, candidates);
+            if (!resolved) return undefined;
+            const { category, measure } = resolved;
+            const enc = encodings[category];
+            if (enc.sortBy === measure) {
+                return enc.sortOrder === 'descending' ? 'value-desc' : 'value-asc';
+            }
+            // Any other sort (label order, custom value order, sort-by-color)
+            // isn't representable by this control → show as Default.
+            return undefined;
+        },
+        set: (encodings, value: SortChoice | undefined) => {
+            const resolved = resolveSortChannels(encodings, candidates);
+            if (!resolved) return encodings;
+            const { category, measure } = resolved;
+            const base = encodings[category];
+            let next: ChartEncoding;
+            switch (value) {
+                case 'value-asc':
+                    next = { ...base, sortBy: measure, sortOrder: 'ascending' };
+                    break;
+                case 'value-desc':
+                    next = { ...base, sortBy: measure, sortOrder: 'descending' };
+                    break;
+                default:
+                    next = { ...base, sortBy: undefined, sortOrder: undefined };
+            }
+            return { ...encodings, [category]: next };
+        },
+    };
+}
diff --git a/src/lib/agents-chart/core/encoding-overrides.ts b/src/lib/agents-chart/core/encoding-overrides.ts
new file mode 100644
index 00000000..2506f0cd
--- /dev/null
+++ b/src/lib/agents-chart/core/encoding-overrides.ts
@@ -0,0 +1,44 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+import type { ChartEncoding, ChartTemplateDef } from './types';
+
+/**
+ * Compose a template's encoding-action overrides onto the base encodings.
+ *
+ * Category-B quick options (sort, color scheme, aggregate, orientation, …) are
+ * stored by the host as *configuration overrides* keyed by the action's `key`
+ * inside `chartProperties` — exactly like a chart property. They are NOT written
+ * into the encoding map. This function is where the compiler composes them:
+ * for each `encodingAction` whose override is present, it applies the action's
+ * `set(encodings, value)` to produce the transformed encodings that feed the
+ * rest of assembly.
+ *
+ * Backends call this once, at the very top of `assemble`, so every downstream
+ * phase (semantic resolution → overflow → layout → instantiate) — and the
+ * `InstantiateContext.encodings` handed to templates — sees the transformed
+ * encodings. The base `encodings` argument is never mutated.
+ *
+ * An absent override (`undefined`) means "no override" and is skipped, so the
+ * base encoding value (whatever the encoding shelf set, if anything) stands.
+ * Because the override key matches the action key, charts saved before this
+ * mechanism — which stored e.g. `chartProperties.colorScheme` directly — are
+ * picked up automatically with no separate legacy fallback.
+ */
+export function applyEncodingOverrides(
+    template: ChartTemplateDef,
+    encodings: Record<string, ChartEncoding>,
+    chartProperties?: Record<string, any>,
+): Record<string, ChartEncoding> {
+    const actions = template.encodingActions;
+    if (!actions || actions.length === 0 || !chartProperties) return encodings;
+
+    let result = encodings;
+    for (const action of actions) {
+        const override = chartProperties[action.key];
+        if (override !== undefined) {
+            result = action.set(result, override);
+        }
+    }
+    return result;
+}
diff --git a/src/lib/agents-chart/core/field-semantics.ts b/src/lib/agents-chart/core/field-semantics.ts
index 2fc06233..6ccd9a50 100644
--- a/src/lib/agents-chart/core/field-semantics.ts
+++ b/src/lib/agents-chart/core/field-semantics.ts
@@ -501,12 +501,15 @@ export function resolveZeroClassFromAnnotation(
  * Recommend a scale type based on semantic type and data distribution.
  *
  * Conservative policy — only triggers when ALL of these hold:
- *   1. The semantic type is in the ALLOW-list (Population, GDP, etc.)
- *   2. Data spans ≥ 4 orders of magnitude (10 000×)
- *   3. At least 10 data points
+ *   1. The semantic type is an additive measure with an open domain and is
+ *      not a generic fallback (i.e. Amount, Quantity, Duration — types whose
+ *      magnitude is meaningful and can legitimately span many decades).
+ *   2. Data spans ≥ 6 orders of magnitude (1,000,000×).
+ *   3. At least 10 data points, all non-negative.
  *
- * This avoids surprising users on normal datasets while still helping
- * with genuinely wide-range data like city populations or GDP figures.
+ * This intentionally almost never fires on everyday data; it only helps with
+ * genuinely wide-range additive measures. When it does not fire the axis stays
+ * linear, and the user can still opt into log via the per-axis quick control.
  */
 export function resolveScaleType(
     semanticType: string,
diff --git a/src/lib/agents-chart/core/index.ts b/src/lib/agents-chart/core/index.ts
index 00ddbf33..4cf5d5d6 100644
--- a/src/lib/agents-chart/core/index.ts
+++ b/src/lib/agents-chart/core/index.ts
@@ -28,12 +28,21 @@ export {
     type LayoutResult,
     type InstantiateContext,
     type ChartPropertyDef,
+    type ChartOption,
+    type OptionEvalContext,
+    type EncodingActionDef,
     type OverflowStrategy,
     type OverflowStrategyContext,
     type OverflowResult,
     type ChannelBudgets,
 } from './types';
 
+// Encoding-action override composition
+export { applyEncodingOverrides } from './encoding-overrides';
+
+// Reusable encoding-action factories
+export { makeSortAction, type SortChoice } from './encoding-actions';
+
 // Semantic type system
 export {
     SemanticTypes,
diff --git a/src/lib/agents-chart/core/semantic-types.ts b/src/lib/agents-chart/core/semantic-types.ts
index 6bf58f19..0baee84d 100644
--- a/src/lib/agents-chart/core/semantic-types.ts
+++ b/src/lib/agents-chart/core/semantic-types.ts
@@ -371,6 +371,33 @@ export interface ZeroDecision {
     domainPadFraction: number;
     /** The zero class that drove this decision */
     zeroClass: ZeroClass | 'unknown';
+    /**
+     * Whether this is a *forced* (non-debatable) decision:
+     *   - `true`  → mandatory: a length/area mark, data that crosses zero, or a
+     *     zero-meaningful type on a length mark. Including zero is structural.
+     *   - `false` → the engine still has a recommended `zero`, but anchoring at
+     *     zero is at least conceptually a choice.
+     * `forced` records the structural side of the decision; it is NOT the gate
+     * for the UI toggle — see `uncertain` below.
+     */
+    forced: boolean;
+    /**
+     * Whether the zero-vs-fit choice is a *genuine toss-up worth surfacing* to
+     * the user. Hosts read this (via the property `check`) to decide whether to
+     * show the "Zero X/Y" toggle at all.
+     *
+     * We deliberately keep this narrow to avoid UI clutter: it is `true` ONLY
+     * for a zero-meaningful field on a position mark whose data sits far enough
+     * from zero that anchoring at zero would noticeably compress the view (a
+     * real zoom-in-vs-anchor tradeoff). Every other case — arbitrary types
+     * (zero is meaningless, just fit the data), contextual types (the engine's
+     * data-range call is confident enough), meaningful types whose data already
+     * spans most of the way to zero (the choice barely changes anything), and
+     * all forced/unknown cases — is `false`, so no toggle is shown and the
+     * engine's `zero` value simply applies. The engine's `zero` remains the
+     * recommended default when the toggle is shown.
+     */
+    uncertain: boolean;
 }
 
 // zeroMeaningfulTypes, zeroArbitraryTypes, zeroContextualTypes, zeroPadMap:
@@ -400,6 +427,30 @@ export function getZeroClass(semanticType: string): ZeroClass | 'unknown' {
  * @param markType      The mark type ('bar', 'line', 'point', etc.)
  * @param values        Optional numeric data values for data-range analysis
  */
+/**
+ * Above this ratio of dataMin/dataMax, the data band sits far enough above
+ * zero that anchoring the axis at zero would leave at least half the axis
+ * empty — a big enough gap that "zoom into the data" vs "keep the zero
+ * reference" is a genuine toss-up worth offering as a toggle. Below it, the
+ * data already spans most of the way to zero, so including zero barely changes
+ * the view and we keep it on silently.
+ */
+const ZERO_BASELINE_GAP_THRESHOLD = 0.5;
+
+/**
+ * True when strictly-positive data sits far enough from zero that anchoring at
+ * zero would noticeably compress the view (see ZERO_BASELINE_GAP_THRESHOLD).
+ * Returns false for empty data or any data that touches/crosses zero (there the
+ * baseline is inside the data range, so it is not a debatable gap).
+ */
+function dataFarFromZero(values?: number[]): boolean {
+    if (!values || values.length === 0) return false;
+    const dataMin = Math.min(...values);
+    const dataMax = Math.max(...values);
+    if (dataMin <= 0 || dataMax <= 0) return false;
+    return dataMin / dataMax >= ZERO_BASELINE_GAP_THRESHOLD;
+}
+
 export function computeZeroDecision(
     semanticType: string,
     channel: string,
@@ -411,24 +462,47 @@ export function computeZeroDecision(
     const entry = getRegistryEntry(semanticType);
     const zeroClass = getZeroClass(semanticType);
 
-    // --- Zero-meaningful types: always zero ---
+    // --- Zero-meaningful types: zero is the conventional baseline ---
     if (zeroClass === 'meaningful') {
-        return { zero: true, domainPadFraction: 0, zeroClass };
+        // Length marks (bar/area/rect): the baseline is structurally required —
+        // a bar's length is meaningless without zero. Not debatable.
+        if (isBarLike) {
+            return { zero: true, domainPadFraction: 0, zeroClass, forced: true, uncertain: false };
+        }
+        // Position marks (line/point/strip): zero is the conventional reference,
+        // so the recommended default is ON. We only *offer* the toggle when the
+        // data sits far enough from zero that anchoring at zero would noticeably
+        // compress the view — a genuine zoom-in-vs-keep-the-reference toss-up.
+        // When the data already spans most of the way to zero, the choice barely
+        // changes anything, so we keep zero on silently and hide the toggle.
+        return {
+            zero: true,
+            domainPadFraction: 0,
+            zeroClass,
+            forced: false,
+            uncertain: dataFarFromZero(values),
+        };
     }
 
     // --- Zero-arbitrary types: never zero, apply padding ---
     if (zeroClass === 'arbitrary') {
-        // Exception: bar/area marks with data that touches/crosses zero
+        // Exception: bar/area marks with data that touches/crosses zero —
+        // the baseline is structurally required, so this is forced.
         if (isBarLike && values && values.length > 0) {
             const dataMin = Math.min(...values);
             if (dataMin <= 0) {
-                return { zero: true, domainPadFraction: 0, zeroClass };
+                return { zero: true, domainPadFraction: 0, zeroClass, forced: true, uncertain: false };
             }
         }
+        // Strictly away from zero on an arbitrary scale: zero is meaningless
+        // here, so data-fit is simply the right answer — there is nothing to
+        // debate and no toggle is offered.
         return {
             zero: false,
             domainPadFraction: entry.zeroPad || 0.05,
             zeroClass,
+            forced: false,
+            uncertain: false,
         };
     }
 
@@ -437,33 +511,36 @@ export function computeZeroDecision(
         const dataMin = Math.min(...values);
         const dataMax = Math.max(...values);
 
-        // Data touches/crosses zero → include it
+        // Data touches/crosses zero → include it (forced: the baseline is
+        // inside the data range).
         if (dataMin <= 0) {
-            return { zero: true, domainPadFraction: 0, zeroClass };
+            return { zero: true, domainPadFraction: 0, zeroClass, forced: true, uncertain: false };
         }
 
         // How far is data from zero?
         const proximity = dataMax > 0 ? dataMin / dataMax : 0;
 
-        // Close to zero → include it
+        // Close to zero → include it. The engine's data-range call is confident
+        // enough here, so no toggle is offered.
         if (proximity < 0.3) {
-            return { zero: true, domainPadFraction: 0, zeroClass };
+            return { zero: true, domainPadFraction: 0, zeroClass, forced: false, uncertain: false };
         }
 
-        // Far from zero + bar/area → still include (bar length integrity)
+        // Far from zero + bar/area → still include (bar length integrity, forced).
         if (isBarLike) {
-            return { zero: true, domainPadFraction: 0, zeroClass };
+            return { zero: true, domainPadFraction: 0, zeroClass, forced: true, uncertain: false };
         }
 
-        // Far from zero + non-bar → data-fit with padding
-        return { zero: false, domainPadFraction: 0.05, zeroClass };
+        // Far from zero + non-bar → data-fit with padding (engine's call, no toggle).
+        return { zero: false, domainPadFraction: 0.05, zeroClass, forced: false, uncertain: false };
     }
 
     // --- No semantic type or unrecognized → no opinion, let VL decide ---
+    // Unknown class is never debatable: we have no basis for a toggle.
     if (isBarLike && isPositional) {
-        return { zero: true, domainPadFraction: 0, zeroClass: 'unknown' };
+        return { zero: true, domainPadFraction: 0, zeroClass: 'unknown', forced: true, uncertain: false };
     }
-    return { zero: false, domainPadFraction: 0.05, zeroClass: 'unknown' };
+    return { zero: false, domainPadFraction: 0.05, zeroClass: 'unknown', forced: true, uncertain: false };
 }
 
 /**
diff --git a/src/lib/agents-chart/core/types.ts b/src/lib/agents-chart/core/types.ts
index 7f2da084..7f3d1803 100644
--- a/src/lib/agents-chart/core/types.ts
+++ b/src/lib/agents-chart/core/types.ts
@@ -444,20 +444,189 @@ export interface InstantiateContext {
 // Chart Template
 // ---------------------------------------------------------------------------
 
+/**
+ * The minimal, render-time context an option's applicability check reads.
+ *
+ * Shared by both option families so they use one predicate convention:
+ *   - `ChartPropertyDef.check` (Category A, data-aware properties)
+ *   - `EncodingActionDef.isApplicable` (Category B, encoding actions)
+ *
+ * `encodings` is always present (it's all a host needs to gate an encoding
+ * action). The remaining fields are populated by the compiler during assembly
+ * and let data-aware *properties* inspect the actual values + resolved
+ * semantics; a predicate that only reads `encodings` (e.g. "is color bound?")
+ * works with the bare `{ encodings }` a host can build on its own.
+ */
+export interface OptionEvalContext {
+    /** User-level encodings (channel → field binding). Always present. */
+    encodings: Record<string, ChartEncoding>;
+    /** Per-channel semantic decisions (Phase 0). Present during assembly. */
+    channelSemantics?: Record<string, ChannelSemantics>;
+    /** Full (pre-overflow) data rows, for data-aware preconditions. */
+    data?: any[];
+    /** Current user-set chart property overrides. */
+    chartProperties?: Record<string, any>;
+}
+
 /**
  * Defines a configurable property for a chart template.
  * Describes the value domain; the app decides how to render it.
  */
-export type ChartPropertyDef = {
-    key: string;
-    label: string;
-    /** Optional predicate: show this property only when certain encoding channels are assigned. */
-    visibleWhen?: { channels: string[] };
-} & (
+
+/** The value-domain variants a property can take (the discriminated arm). */
+export type ChartPropertyVariant =
     | { type: 'continuous'; min: number; max: number; step?: number; defaultValue?: number }
     | { type: 'discrete';  options: { value: any; label: string }[]; defaultValue?: any }
-    | { type: 'binary';    defaultValue?: boolean }
-);
+    | { type: 'binary';    defaultValue?: boolean };
+
+/**
+ * The renderable descriptor of a property: its identity, label, and value
+ * domain. This is the part a host needs to draw a control, and it is shared
+ * verbatim by both sides of the Flint↔host boundary:
+ *
+ *   - `ChartPropertyDef`  = `ChartProperty` + the applicability *rule* (`check`)
+ *   - `ChartOption`       = `ChartProperty` + the resolved *answer* (`applicable`/`value`)
+ *
+ * Keeping the descriptor common means the template definition and the resolved
+ * option never drift in shape; they differ only by rule-vs-answer.
+ */
+export type ChartProperty = {
+    key: string;
+    label: string;
+} & ChartPropertyVariant;
+
+export type ChartPropertyDef = ChartProperty & {
+    /**
+     * The single applicability check for this property, co-located with it so a
+     * reader sees *why* an option is offered without digging into the compiler.
+     * Pure — reads only `OptionEvalContext` — and returns:
+     *   - `applicable`: is this property worth offering for the current spec +
+     *     data? It subsumes both structural gates (a channel is bound, e.g.
+     *     `!!ctx.encodings.color?.field`) and data-aware ones (a wide-range axis,
+     *     an additive single-sign measure, …). A property with no `check`
+     *     is always offered.
+     *   - `recommendedValue` (optional): the engine's suggested default, used to
+     *     seed the control when the host hasn't set an explicit value.
+     *
+     * Because it requires no live data to answer a structural check, a static
+     * host (the encoding-shelf popover) can call it with just `{ encodings }`;
+     * a data-aware property then reports `applicable: false` there — surfacing
+     * only in the data-aware quick-config bar — without needing a separate flag.
+     */
+    check?: (ctx: OptionEvalContext) => { applicable: boolean; recommendedValue?: any };
+};
+
+/**
+ * A chart property descriptor annotated with its applicability and resolved
+ * value for a *specific* spec + dataset. Produced by `getChartOptions` (and
+ * carried on the assembled spec under `_options`).
+ *
+ * This is the contract between Flint and any host (Data Formulator, an AI agent,
+ * another renderer):
+ *
+ *   - `applicable` — did this property pass its precondition for this render?
+ *     Each property answers via its own `check`: structural ones (e.g. stack
+ *     mode) are applicable when their channel is bound; data-aware ones (e.g.
+ *     per-axis log scale, faceted independent y) only when the data warrants it
+ *     (wide-range continuous axis, faceted quantitative y, …). A host should
+ *     surface a control only when it is applicable; passing a non-applicable
+ *     property to the compiler is accepted but silently ignored.
+ *   - `value` — the value Flint will actually use: the host's explicit choice
+ *     (from `chart_spec.chartProperties[key]`) when set, otherwise the engine's
+ *     recommended default. Hosts seed their control from this so an "auto"
+ *     recommendation (e.g. log on a 10⁶× axis) is reflected without the host
+ *     having to recompute it.
+ *
+ * A `ChartOption` shares the renderable `ChartProperty` descriptor with the
+ * template def but carries the *answer* (`applicable`/`value`) instead of the
+ * *rule* (`check`). That keeps it a resolved, serializable view a host consumes
+ * across the spec/JSON boundary (Python path included), where the rule function
+ * wouldn't survive anyway.
+ */
+export type ChartOption = ChartProperty & {
+    /** Did this property pass its precondition for the current spec + data? */
+    applicable: boolean;
+    /** Explicit host choice if set, otherwise the engine's recommended default. */
+    value: any;
+};
+
+
+/**
+ * Defines a "quick action" whose effect is an **encoding transform** (Category B):
+ * sort, color scheme, aggregate, type, orientation (x↔y swap), etc.
+ *
+ * These operate at a different pipeline stage than ChartPropertyDef:
+ *
+ *   Category B (this type):  (encoding + override) ──► transformed encoding ──► assemble ──► spec
+ *                                         └──── set() ────┘
+ *   Category A (properties): encoding ──► assemble ──► spec ──► (props tweak spec in instantiate)
+ *
+ * An encoding action transforms the *input* to assembly, so the full pipeline
+ * (semantic resolution → overflow → layout → assembly) re-runs on the result.
+ * That is exactly why structural options must live here: sort changes which
+ * categories survive overflow, aggregate changes the data values, orientation
+ * changes which axis is banded — none of which can be faked by patching the
+ * assembled spec afterwards. ChartPropertyDef, by contrast, only overrides the
+ * already-assembled spec and is limited to visual decoration (cornerRadius,
+ * opacity, curve, donut hole).
+ *
+ * Storage = override, not encoding state. The action's value is stored by the
+ * host as a *configuration override* (exactly like a chart property), keyed by
+ * `key` inside `chart_spec.chartProperties`. The encoding map (the encoding
+ * shelf's state) is left untouched. The compiler — not the host — applies the
+ * override at assemble time:
+ *
+ *   transformedEncodings = set(currentEncodings, chartProperties[key])
+ *
+ * So Flint always sees just "override value + current encoding" and composes
+ * them; it never mutates persistent encoding state. (See applyEncodingOverrides.)
+ *
+ *   get(encodings)        → derive the control's displayed value from the base
+ *                           encodings when no override is set
+ *   set(encodings, value) → compose: return the encodings with the override applied
+ *
+ * `set` is declarative: it returns what the encodings should be after the
+ * override, not a list of imperative operations. Any transform — changing one
+ * property, swapping two channels, clearing a channel — is just "produce a new
+ * map", so there is no operation taxonomy to grow.
+ *
+ * `dependencies` declares which encoding channels the override is computed
+ * against. It is a pure declaration consumed by the *host*: when the user edits
+ * one of these channels in the encoding shelf, the host clears (resets) the
+ * override so a stale value can't linger. Flint never resets — reset is host
+ * logic; Flint only ever composes override + current encoding.
+ *
+ * The control shape mirrors ChartPropertyDef so the host can reuse the same
+ * renderers; only the pipeline stage differs (encoding transform vs spec tweak).
+ */
+export type EncodingActionDef = {
+    key: string;
+    label: string;
+    /**
+     * Channels this override is computed against. When the host detects an edit
+     * to any of these channels in the encoding shelf, it resets this override to
+     * default. Pure declaration — Flint itself never reads this for composition.
+     */
+    dependencies?: string[];
+    /** How to render the control (same value domains as ChartPropertyDef). */
+    control:
+        | { type: 'continuous'; min: number; max: number; step?: number }
+        | { type: 'discrete';  options: { value: any; label: string }[] }
+        | { type: 'binary' };
+    /**
+     * Optional applicability predicate — the single gate for whether this action
+     * is offered. It reads the shared `OptionEvalContext`; in practice an action
+     * only needs `ctx.encodings`, so it subsumes both channel-assignment checks
+     * (is a channel bound? e.g. `!!ctx.encodings.color?.field`) and type checks
+     * (e.g. Sort needs a discrete category axis, so it must not appear on a
+     * purely temporal/quantitative chart). Pure. Defaults to always-applicable.
+     */
+    isApplicable?: (ctx: OptionEvalContext) => boolean;
+    /** Derive the displayed control value from the base encodings map (pure). */
+    get: (encodings: Record<string, ChartEncoding>) => any;
+    /** Compose: return the encodings with this override value applied (pure). */
+    set: (encodings: Record<string, ChartEncoding>, value: any) => Record<string, ChartEncoding>;
+};
 
 /**
  * Chart template definition — pure data, no UI/icon dependencies.
@@ -523,6 +692,13 @@ export interface ChartTemplateDef {
     /** Optional configurable properties for the chart type */
     properties?: ChartPropertyDef[];
 
+    /**
+     * Optional encoding-level quick actions (Category B). Clicking one of these
+     * mutates the encodings map (the same state the encoding shelf edits),
+     * rather than chart-native config. See EncodingActionDef.
+     */
+    encodingActions?: EncodingActionDef[];
+
     /**
      * Optional post-processing hook.
      * Called after instantiation and layout application, before the final
diff --git a/src/lib/agents-chart/echarts/assemble.ts b/src/lib/agents-chart/echarts/assemble.ts
index 13a2cde7..e9a70a7c 100644
--- a/src/lib/agents-chart/echarts/assemble.ts
+++ b/src/lib/agents-chart/echarts/assemble.ts
@@ -59,6 +59,7 @@ import {
     InstantiateContext,
 } from '../core/types';
 import type { ChartWarning } from '../core/types';
+import { applyEncodingOverrides } from '../core/encoding-overrides';
 import { ecGetTemplateDef } from './templates';
 import { resolveChannelSemantics, convertTemporalData } from '../core/resolve-semantics';
 import { toTypeString, type SemanticAnnotation } from '../core/field-semantics';
@@ -91,7 +92,7 @@ import { getPaletteForScheme } from './colormap';
  */
 export function assembleECharts(input: ChartAssemblyInput): any {
     const chartType = input.chart_spec.chartType;
-    const encodings = input.chart_spec.encodings;
+    const rawEncodings = input.chart_spec.encodings;
     const data = input.data.values ?? [];
     const semanticTypes = input.semantic_types ?? {};
     const canvasSize = input.chart_spec.canvasSize ?? { width: 400, height: 320 };
@@ -102,6 +103,12 @@ export function assembleECharts(input: ChartAssemblyInput): any {
         throw new Error(`Unknown ECharts chart type: ${chartType}. Use ecAllTemplateDefs to see available types.`);
     }
 
+    // Compose Category-B encoding-action overrides (stored by the host in
+    // chartProperties, keyed by action key) onto the base encodings before any
+    // pipeline phase runs. Flint owns the transform; the host only stores the
+    // override value. See applyEncodingOverrides / EncodingActionDef.
+    const encodings = applyEncodingOverrides(chartTemplate, rawEncodings, chartProperties);
+
     const warnings: ChartWarning[] = [];
 
     // ═══════════════════════════════════════════════════════════════════════
diff --git a/src/lib/agents-chart/echarts/templates/heatmap.ts b/src/lib/agents-chart/echarts/templates/heatmap.ts
index 20ddd86f..5967b501 100644
--- a/src/lib/agents-chart/echarts/templates/heatmap.ts
+++ b/src/lib/agents-chart/echarts/templates/heatmap.ts
@@ -10,7 +10,7 @@
  *       and a visualMap component for the color scale.
  */
 
-import { ChartTemplateDef, ChartPropertyDef } from '../../core/types';
+import { ChartTemplateDef, EncodingActionDef } from '../../core/types';
 import { extractCategories, DEFAULT_COLORS } from './utils';
 import { getPaletteForScheme } from '../colormap';
 
@@ -55,7 +55,7 @@ export const ecHeatmapDef: ChartTemplateDef = {
         // (defaultBandSize=20, minStep=6), matching VL heatmap sizing.
     }),
     instantiate: (spec, ctx) => {
-        const { channelSemantics, table, chartProperties, colorDecisions } = ctx;
+        const { channelSemantics, table, colorDecisions, encodings } = ctx;
         const xCS = channelSemantics.x;
         const yCS = channelSemantics.y;
         const colorCS = channelSemantics.color;
@@ -101,7 +101,14 @@ export const ecHeatmapDef: ChartTemplateDef = {
         if (maxVal === -Infinity) maxVal = 1;
 
         // Color scheme
-        const schemeName = chartProperties?.colorScheme || 'viridis';
+        // Category-B encoding override: the compiler already composed
+        // chartProperties.colorScheme onto encoding.color.scheme before assembly
+        // (see applyEncodingOverrides), so we just read it here. This also covers
+        // charts saved before the migration, whose value lived in
+        // chartProperties.colorScheme.
+        const encScheme = encodings?.color?.scheme;
+        const userScheme = (encScheme && encScheme !== 'default') ? encScheme : undefined;
+        const schemeName = userScheme || 'viridis';
         const decision = colorDecisions?.color ?? colorDecisions?.group;
         const isDivergingScale =
             decision?.schemeType === 'diverging'
@@ -238,24 +245,32 @@ export const ecHeatmapDef: ChartTemplateDef = {
             }
         }
     },
-    properties: [
+    encodingActions: [
         {
-            key: 'colorScheme', label: 'Scheme', type: 'discrete', options: [
-                { value: undefined, label: 'Default (Viridis)' },
-                { value: 'viridis', label: 'Viridis' },
-                { value: 'inferno', label: 'Inferno' },
-                { value: 'magma', label: 'Magma' },
-                { value: 'plasma', label: 'Plasma' },
-                { value: 'turbo', label: 'Turbo' },
-                { value: 'blues', label: 'Blues' },
-                { value: 'reds', label: 'Reds' },
-                { value: 'greens', label: 'Greens' },
-                { value: 'oranges', label: 'Oranges' },
-                { value: 'purples', label: 'Purples' },
-                { value: 'greys', label: 'Greys' },
-                { value: 'blueorange', label: 'Blue-Orange (diverging)' },
-                { value: 'redblue', label: 'Red-Blue (diverging)' },
-            ],
-        } as ChartPropertyDef,
-    ],
+            key: 'colorScheme',
+            label: 'Scheme',
+            isApplicable: (ctx) => !!ctx.encodings.color?.field,
+            dependencies: ['color'],
+            control: {
+                type: 'discrete', options: [
+                    { value: undefined, label: 'Default (Viridis)' },
+                    { value: 'viridis', label: 'Viridis' },
+                    { value: 'inferno', label: 'Inferno' },
+                    { value: 'magma', label: 'Magma' },
+                    { value: 'plasma', label: 'Plasma' },
+                    { value: 'turbo', label: 'Turbo' },
+                    { value: 'blues', label: 'Blues' },
+                    { value: 'reds', label: 'Reds' },
+                    { value: 'greens', label: 'Greens' },
+                    { value: 'oranges', label: 'Oranges' },
+                    { value: 'purples', label: 'Purples' },
+                    { value: 'greys', label: 'Greys' },
+                    { value: 'blueorange', label: 'Blue-Orange (diverging)' },
+                    { value: 'redblue', label: 'Red-Blue (diverging)' },
+                ],
+            },
+            get: (enc) => enc.color?.scheme,
+            set: (enc, value) => ({ ...enc, color: { ...enc.color, scheme: value } }),
+        },
+    ] as EncodingActionDef[],
 };
diff --git a/src/lib/agents-chart/gofish/assemble.ts b/src/lib/agents-chart/gofish/assemble.ts
index 470fd6e5..8cc24c08 100644
--- a/src/lib/agents-chart/gofish/assemble.ts
+++ b/src/lib/agents-chart/gofish/assemble.ts
@@ -41,6 +41,7 @@ import {
     InstantiateContext,
 } from '../core/types';
 import type { ChartWarning } from '../core/types';
+import { applyEncodingOverrides } from '../core/encoding-overrides';
 import { gfGetTemplateDef } from './templates';
 import { resolveChannelSemantics, convertTemporalData } from '../core/resolve-semantics';
 import { computeZeroDecision } from '../core/semantic-types';
@@ -321,7 +322,7 @@ function buildSpecDescription(gfDesc: any): string {
  */
 export function assembleGoFish(input: ChartAssemblyInput): GoFishSpec {
     const chartType = input.chart_spec.chartType;
-    const encodings = input.chart_spec.encodings;
+    const rawEncodings = input.chart_spec.encodings;
     const data = input.data.values ?? [];
     const semanticTypes = input.semantic_types ?? {};
     const canvasSize = input.chart_spec.canvasSize ?? { width: 400, height: 320 };
@@ -332,6 +333,12 @@ export function assembleGoFish(input: ChartAssemblyInput): GoFishSpec {
         throw new Error(`Unknown GoFish chart type: ${chartType}. Use gfAllTemplateDefs to see available types.`);
     }
 
+    // Compose Category-B encoding-action overrides (stored by the host in
+    // chartProperties, keyed by action key) onto the base encodings before any
+    // pipeline phase runs. Flint owns the transform; the host only stores the
+    // override value. See applyEncodingOverrides / EncodingActionDef.
+    const encodings = applyEncodingOverrides(chartTemplate, rawEncodings, chartProperties);
+
     const warnings: ChartWarning[] = [];
 
     // ═══════════════════════════════════════════════════════════════════════
diff --git a/src/lib/agents-chart/vegalite/assemble.ts b/src/lib/agents-chart/vegalite/assemble.ts
index 5b9af52d..7c0dfcec 100644
--- a/src/lib/agents-chart/vegalite/assemble.ts
+++ b/src/lib/agents-chart/vegalite/assemble.ts
@@ -52,7 +52,8 @@ import {
     LayoutDeclaration,
     InstantiateContext,
 } from '../core/types';
-import type { ChartWarning } from '../core/types';
+import type { ChartWarning, ChartOption, OptionEvalContext } from '../core/types';
+import { applyEncodingOverrides } from '../core/encoding-overrides';
 import { vlGetTemplateDef } from './templates';
 import { inferVisCategory, computeZeroDecision } from '../core/semantic-types';
 import { resolveChannelSemantics, convertTemporalData } from '../core/resolve-semantics';
@@ -102,7 +103,7 @@ const escapeVlFieldName = (name: string): string =>
  */
 export function assembleVegaLite(input: ChartAssemblyInput): any {
     const chartType = input.chart_spec.chartType;
-    const encodings = input.chart_spec.encodings;
+    const rawEncodings = input.chart_spec.encodings;
     const data = input.data.values ?? [];
     const semanticTypes = input.semantic_types ?? {};
     const canvasSize = input.chart_spec.canvasSize ?? { width: 400, height: 320 };
@@ -113,6 +114,45 @@ export function assembleVegaLite(input: ChartAssemblyInput): any {
         throw new Error(`Unknown chart type: ${chartType}`);
     }
 
+    // Compose Category-B encoding-action overrides (stored by the host in
+    // chartProperties, keyed by action key) onto the base encodings before any
+    // pipeline phase runs. Flint owns the transform; the host only stores the
+    // override value. See applyEncodingOverrides / EncodingActionDef.
+    //
+    // Some actions (e.g. Sort) must know each channel's resolved encoding TYPE
+    // to decide which position axis is the discrete category and which is the
+    // measure. The host leaves `type` unset ("auto") for most encodings, so we
+    // run a preliminary semantics pass to fill in the inferred types, compose
+    // the overrides onto the type-enriched encodings, then re-resolve semantics
+    // on the result below (so that, e.g., a value-sort correctly suppresses the
+    // field's canonical ordinal ordering).
+    const convertedData = convertTemporalData(data, semanticTypes);
+    const prelimSemantics = resolveChannelSemantics(
+        rawEncodings, data, semanticTypes, convertedData,
+    );
+    const typedRawEncodings: Record<string, ChartEncoding> = {};
+    for (const [ch, enc] of Object.entries(rawEncodings)) {
+        typedRawEncodings[ch] = enc.type
+            ? enc
+            : { ...enc, type: prelimSemantics[ch]?.type };
+    }
+    // Axis dtype override (`xAxisType` / `yAxisType` properties): the user can
+    // force a position channel's interpretation between a continuous time scale
+    // ('temporal') and discrete bands ('nominal') for date-like fields that
+    // carry a dual interpretation. Applies to either axis — x on a vertical
+    // bar/line, y on a horizontal (transposed) bar/lollipop. Applied at the
+    // encoding level so the whole pipeline (sorting, layout, formatting) honors
+    // it — resolveChannelSemantics treats an explicit encoding.type as
+    // authoritative. Whether each control is *offered* is decided by the
+    // property's own `check` (see AXIS_DTYPE_PROPERTIES).
+    for (const axis of ['x', 'y'] as const) {
+        const choice = chartProperties?.[`${axis}AxisType`];
+        if ((choice === 'temporal' || choice === 'nominal') && typedRawEncodings[axis]?.field) {
+            typedRawEncodings[axis] = { ...typedRawEncodings[axis], type: choice };
+        }
+    }
+    const encodings = applyEncodingOverrides(chartTemplate, typedRawEncodings, chartProperties);
+
     const warnings: ChartWarning[] = [];
 
     // ═══════════════════════════════════════════════════════════════════════
@@ -122,9 +162,6 @@ export function assembleVegaLite(input: ChartAssemblyInput): any {
     const tplMark = chartTemplate.template?.mark;
     const templateMarkType = typeof tplMark === 'string' ? tplMark : tplMark?.type;
 
-    // Convert temporal data once — feeds semantic resolution and all downstream stages
-    const convertedData = convertTemporalData(data, semanticTypes);
-
     const channelSemantics = resolveChannelSemantics(
         encodings, data, semanticTypes, convertedData,
     );
@@ -142,6 +179,66 @@ export function assembleVegaLite(input: ChartAssemblyInput): any {
         }
     }
 
+    // ── Zero-baseline override (position-cognitive axes) ──
+    // computeZeroDecision (above) is the single authority on whether an axis
+    // includes zero. For axes where that call is a genuine toss-up worth
+    // surfacing (see makeZeroBaselineCheck / ZeroDecision.uncertain), the host
+    // may override it via the stored config `includeZero_x`/`includeZero_y` (a boolean on/off
+    // toggle). We honor it by overwriting `cs.zero.zero`, leaving the rest of
+    // the decision intact, so every downstream consumer — the spec applier
+    // (instantiate-spec) AND banking layout (compute-layout reads cs.zero) —
+    // renders the user's choice consistently. Placed before the log-scale
+    // override and the layout phase so banking is zero-aware of the override.
+    if (chartTemplate.markCognitiveChannel === 'position') {
+        for (const axis of ['x', 'y'] as const) {
+            const cs = channelSemantics[axis];
+            if (!cs?.field || cs.type !== 'quantitative' || !cs.zero) continue;
+            const choice = chartProperties?.[`includeZero_${axis}`];
+            if (choice === undefined) continue; // keep the engine's decision
+            cs.zero = { ...cs.zero, zero: choice };
+        }
+    }
+
+    // ── Log-scale override (position-cognitive axes) ──
+    // A log/symlog scale only makes sense on a continuous quantitative POSITION
+    // axis (scatter/line/strip) — never on length/area marks, where bars encode
+    // magnitude as length from a zero baseline (log destroys the baseline and
+    // log(0) is undefined). The engine recommends log conservatively in
+    // resolveScaleType (→ cs.scaleType). Here we apply the user's per-axis
+    // override of that recommendation via the stored config `logScale_x`/
+    // `logScale_y` (a boolean on/off toggle). Whether the control is *offered*
+    // and its recommended default are decided by the property's own `check`
+    // (see LOG_SCALE_PROPERTIES) and surfaced through `getChartOptions`.
+    // On non-position marks we additionally strip any recommended log/symlog
+    // scale so length/area encodings always render linearly from their baseline.
+    if (chartTemplate.markCognitiveChannel === 'position') {
+        for (const axis of ['x', 'y'] as const) {
+            const cs = channelSemantics[axis];
+            if (!cs?.field || cs.type !== 'quantitative') continue;
+            // Binned axes use VL's linear bin computation — log conflicts.
+            if (chartTemplate.template?.encoding?.[axis]?.bin) continue;
+
+            const choice = chartProperties?.[`logScale_${axis}`];
+            if (choice === undefined) continue; // keep the engine's recommendation
+
+            // The control is a simple on/off toggle: `true` forces log (symlog
+            // when zeros are present), `false` forces linear.
+            const hasZero = data.some(row => row[cs.field] === 0);
+            cs.scaleType = choice === false
+                ? undefined                       // force linear
+                : (hasZero ? 'symlog' : 'log');   // force log (symlog if zeros)
+        }
+    } else {
+        // Non-position mark (length/area): never apply a log/symlog scale —
+        // these encodings read magnitude from a zero baseline that log destroys.
+        for (const axis of ['x', 'y'] as const) {
+            const cs = channelSemantics[axis];
+            if (cs?.scaleType === 'log' || cs?.scaleType === 'symlog') {
+                cs.scaleType = undefined;
+            }
+        }
+    }
+
     // ═══════════════════════════════════════════════════════════════════════
     // STEP 0a: declareLayoutMode (VL-free template hook)
     // ═══════════════════════════════════════════════════════════════════════
@@ -451,16 +548,64 @@ export function assembleVegaLite(input: ChartAssemblyInput): any {
     }
     result._width = layoutResult.subplotWidth;
     result._height = layoutResult.subplotHeight;
-    // Expose computed config so the UI can seed toggle defaults from heuristic results.
-    // Only include keys when the corresponding property is relevant (e.g. faceted).
-    const computedConfig: Record<string, any> = {};
-    if (hasFacetedQuant) {
-        computedConfig.independentYAxis = computedIndependentYAxis;
-    }
-    result._computedConfig = computedConfig;
+    // Annotated option catalog: every configurable property this template
+    // exposes, tagged with whether it is *applicable* for this spec + data and
+    // the *value* the compiler will use (host choice if set, else the engine's
+    // recommended default). This is the single contract a host (DF, an AI agent,
+    // another renderer) reads to know which controls to surface and how to seed
+    // them — see ChartOption / getChartOptions. Passing a non-applicable
+    // property back to the compiler is accepted but silently ignored.
+    //
+    // Each property decides its own applicability through its pure `check(ctx)`
+    // (the single source of truth, co-located with the property). The one piece
+    // that can't live there is `independentYAxis`'s *recommended default* —
+    // whether to turn it on automatically — which is layout-coupled (it needs the
+    // resolved facet grid and the assembled spec's facet/y structure, differing
+    // for 1-D vs 2-D facets); that value is computed above and threaded in here.
+    const evalCtx: OptionEvalContext = {
+        encodings,
+        channelSemantics,
+        data,
+        chartProperties,
+    };
+    const layoutCoupledRecommendation: Record<string, any> = {
+        independentYAxis: computedIndependentYAxis,
+    };
+
+    result._options = (chartTemplate.properties ?? []).map((def): ChartOption => {
+        const ev = def.check?.(evalCtx);
+        const applicable = ev ? ev.applicable : true;
+        const recommended = layoutCoupledRecommendation[def.key] ?? ev?.recommendedValue;
+        const value = chartProperties?.[def.key] ?? recommended ?? def.defaultValue;
+        // Strip the `check` rule — a ChartOption is the resolved, serializable
+        // answer (`applicable`/`value`), not the predicate that produced it.
+        const { check, ...rest } = def;
+        return { ...rest, applicable, value };
+    });
     return result;
 }
 
+/**
+ * Inspect a chart spec + dataset and report the configurable options Flint
+ * exposes for it, each annotated with whether it is *applicable* and the *value*
+ * the compiler will use (see ChartOption).
+ *
+ * This is the "ask Flint what knobs are available" entry point. A host calls it
+ * with the same input it would pass to `assembleVegaLite`, renders a control for
+ * each applicable option seeded from `value`, and feeds the user's choices back
+ * via `chart_spec.chartProperties`. Because applicability is derived from the
+ * data (not from the chosen values), the set is stable across that loop.
+ *
+ * It runs the same analysis pipeline as `assembleVegaLite` (the options are a
+ * by-product of assembly), so applicability can never drift from what the
+ * compiler actually does — a property reported applicable is exactly one the
+ * compiler will honor.
+ */
+export function getChartOptions(input: ChartAssemblyInput): ChartOption[] {
+    const spec = assembleVegaLite(input);
+    return spec && Array.isArray(spec._options) ? spec._options : [];
+}
+
 // ===========================================================================
 // buildVLEncodings — Translate abstract semantics → VL encoding objects
 // ===========================================================================
diff --git a/src/lib/agents-chart/vegalite/index.ts b/src/lib/agents-chart/vegalite/index.ts
index e9bfded1..b6c956b2 100644
--- a/src/lib/agents-chart/vegalite/index.ts
+++ b/src/lib/agents-chart/vegalite/index.ts
@@ -11,7 +11,7 @@
  */
 
 // VL assembly function
-export { assembleVegaLite } from './assemble';
+export { assembleVegaLite, getChartOptions } from './assemble';
 
 // VL spec instantiation (Phase 2)
 export { vlApplyLayoutToSpec, vlApplyTooltips } from './instantiate-spec';
diff --git a/src/lib/agents-chart/vegalite/instantiate-spec.ts b/src/lib/agents-chart/vegalite/instantiate-spec.ts
index b95a5c88..7a336142 100644
--- a/src/lib/agents-chart/vegalite/instantiate-spec.ts
+++ b/src/lib/agents-chart/vegalite/instantiate-spec.ts
@@ -23,19 +23,7 @@ import type {
     ChartWarning,
 } from '../core/types';
 import type { FormatSpec } from '../core/field-semantics';
-import {
-    looksLikeDateString,
-    analyzeTemporalField,
-    computeDataVotes,
-    pickBestLevel,
-    levelToFormat,
-    SEMANTIC_LEVEL,
-} from '../core/resolve-semantics';
-import {
-    getVisCategory,
-    inferVisCategory,
-} from '../core/semantic-types';
-import { toTypeString, snapToBoundHeuristic } from '../core/field-semantics';
+import { snapToBoundHeuristic } from '../core/field-semantics';
 
 const DEFAULT_QUANTITATIVE_AXIS_FORMAT = ',.12~g';
 
@@ -148,54 +136,19 @@ export function vlApplyLayoutToSpec(
         }
     };
 
-    const applyOrdinalTemporalFormat = (enc: any, channel: string, cs: ChannelSemantics | undefined) => {
-        if (!enc || !enc.field) return;
-        if (enc.type !== 'ordinal' && enc.type !== 'nominal') return;
-        if (!cs) return;
-
-        const semanticType = toTypeString(context.semanticTypes[enc.field]);
-        const stCategory = semanticType ? getVisCategory(semanticType) : null;
-        if (stCategory !== 'temporal') return;
-
-        const fieldVals = context.table.map((r: any) => r[enc.field]).filter((v: any) => v != null);
-
-        // Single unique value → no temporal formatting (would lose precision, e.g. "2007-12" → "2007")
-        const uniqueVals = new Set(fieldVals.map(String));
-        if (uniqueVals.size <= 1) return;
-
-        const datelikeCnt = fieldVals.filter((v: any) =>
-            typeof v !== 'string' || looksLikeDateString(String(v))
-        ).length;
-        if (datelikeCnt < fieldVals.length * 0.5) return;
-
-        const analysis = analyzeTemporalField(fieldVals);
-        if (!analysis) return;
-
-        const votes = computeDataVotes(analysis.same);
-        const semLevel = SEMANTIC_LEVEL[semanticType];
-        if (semLevel !== undefined) votes[semLevel] += 3;
-        const { level, score } = pickBestLevel(votes);
-        if (score < 5) return;
-
-        const fmt = levelToFormat(level, analysis);
-        if (!fmt) return;
-
-        const expr = `isValid(toDate(datum.label)) ? timeFormat(toDate(datum.label), '${fmt}') : datum.label`;
-        if (channel === 'x' || channel === 'y') {
-            if (enc.axis === null) return; // preserve axis suppression
-            if (!enc.axis) enc.axis = {};
-            enc.axis.labelExpr = expr;
-        } else if (channel === 'color') {
-            if (!enc.legend) enc.legend = {};
-            enc.legend.labelExpr = expr;
-        }
-    };
+    // Discrete (ordinal/nominal) temporal axes and legends render the raw
+    // string value as-is. A discrete axis treats values as opaque categories,
+    // so the label should match what's in the table (e.g. "2010-01"). We avoid
+    // toDate/timeFormat reformatting here: it added no value for already-string
+    // data and risked timezone-shifted labels ("2010-01" → "Dec 2009"). If a
+    // column genuinely needs date parsing/formatting (e.g. numeric timestamps),
+    // the user can switch that axis to temporal (continuous), where Vega-Lite
+    // handles parsing and multi-level labels natively.
 
     // Iterate all encoding locations (top-level, spec, layers)
     const applyTemporalToEncoding = (encoding: Record<string, any>) => {
         for (const [ch, enc] of Object.entries(encoding)) {
             applyTemporalFormat(enc, ch, channelSemantics[ch]);
-            applyOrdinalTemporalFormat(enc, ch, channelSemantics[ch]);
         }
     };
 
@@ -380,19 +333,6 @@ export function vlApplyLayoutToSpec(
                 if (!yEnc.axis) yEnc.axis = {};
                 yEnc.axis.title = null;
             }
-        } else {
-            // Wrap-facet (single facet field, no shared side band) — the y-axis
-            // title repeats once per wrap-row down the left edge. There's no row
-            // header to fold it into, so keep it but stop it smearing: pull it
-            // off the tick labels with titlePadding, shrink the font, and cap its
-            // length to the subplot height so it can't overrun a short subplot.
-            if (!vgObj.config) vgObj.config = {};
-            vgObj.config.axisY = {
-                ...(vgObj.config.axisY || {}),
-                titlePadding: 8,
-                titleFontSize: 10,
-                titleLimit: Math.max(30, layout.subplotHeight),
-            };
         }
     }
 
@@ -591,23 +531,26 @@ function formatSpecToLabelExpr(fmt: FormatSpec): string | null {
 }
 
 /**
- * Compute the maximum stacked (group) total for a quantitative field.
+ * Compute the positive and negative stacked extremes for a quantitative field.
  *
  * For a stacked bar chart with:
  *   x = category (grouping), y = value (stacked), color = series
  *
- * This computes sum(value) for each category group and returns the max.
- * Used to check whether stacked totals exceed an intrinsic domain bound
- * (e.g., percentages summing to >100%).
+ * Vega-Lite stacks positive and negative contributions *separately* (positives
+ * grow up from 0, negatives down from 0), so we track each side independently —
+ * summing signed values together would let a mix of +0.9 and −0.2 cancel and
+ * hide a tall positive stack. Returns the largest positive group sum and the
+ * most-negative group sum, used to check whether either side overflows an
+ * intrinsic domain bound (e.g., correlations summing past 1).
  *
  * Returns undefined if the grouping field can't be determined.
  */
-function computeMaxStackedTotal(
+function computeStackedExtremes(
     table: any[],
     measureField: string,
     measureChannel: string,
     channelSemantics: Record<string, ChannelSemantics>,
-): number | undefined {
+): { maxPos: number; minNeg: number } | undefined {
     if (!table || table.length === 0) return undefined;
 
     // The grouping axis is the *other* positional channel
@@ -624,8 +567,9 @@ function computeMaxStackedTotal(
         if (fcs?.field) facetFields.push(fcs.field);
     }
 
-    // Group rows and sum the measure field per group
-    const totals = new Map<string, number>();
+    // Group rows and sum positive / negative contributions per group separately
+    const posTotals = new Map<string, number>();
+    const negTotals = new Map<string, number>();
     for (const row of table) {
         const val = row[measureField];
         if (typeof val !== 'number' || isNaN(val)) continue;
@@ -636,11 +580,40 @@ function computeMaxStackedTotal(
             keyParts.push(String(row[ff]));
         }
         const key = keyParts.join('|||');
-        totals.set(key, (totals.get(key) ?? 0) + val);
+        if (val >= 0) {
+            posTotals.set(key, (posTotals.get(key) ?? 0) + val);
+        } else {
+            negTotals.set(key, (negTotals.get(key) ?? 0) + val);
+        }
     }
 
-    if (totals.size === 0) return undefined;
-    return Math.max(...totals.values());
+    if (posTotals.size === 0 && negTotals.size === 0) return undefined;
+    const maxPos = posTotals.size > 0 ? Math.max(...posTotals.values()) : 0;
+    const minNeg = negTotals.size > 0 ? Math.min(...negTotals.values()) : 0;
+    return { maxPos, minNeg };
+}
+
+/**
+ * Detect whether a discrete category repeats across rows — i.e., multiple rows
+ * share the same category value, which makes Vega-Lite stack the measure even
+ * with no color encoding. Used to recognise implicit no-color stacking so the
+ * intrinsic-domain check runs against the stacked total, not individual values.
+ */
+function hasRepeatedCategory(
+    table: any[],
+    categoryField: string | undefined,
+    measureField: string,
+): boolean {
+    if (!table || table.length === 0 || !categoryField) return false;
+    const seen = new Set<string>();
+    for (const row of table) {
+        const val = row[measureField];
+        if (typeof val !== 'number' || isNaN(val)) continue;
+        const key = String(row[categoryField]);
+        if (seen.has(key)) return true;
+        seen.add(key);
+    }
+    return false;
 }
 
 /**
@@ -789,8 +762,12 @@ function vlApplyFieldContext(
             //   Layered / no stack (stack: null/false): each bar is
             //   independent. → Apply domain constraints as normal.
             //
-            // VL auto-stacks bar/area marks when a color encoding is present
-            // (unless stack: null/false).
+            // VL auto-stacks bar/area marks whenever multiple rows share the
+            // same discrete position — most obviously with a color series, but
+            // ALSO with no color at all when a category repeats (several rows
+            // per x). Both cases sum on the measure axis, so the intrinsic
+            // domain must be checked against the stacked total, not individual
+            // values.
             //
             // Without this: Rating gets auto-fitted to data range (e.g., 2-4.5)
             // instead of showing the full 1-5 scale.
@@ -803,7 +780,13 @@ function vlApplyFieldContext(
                 || (Array.isArray(vgObj.layer) && vgObj.layer.some((l: any) => l.encoding?.color?.field))
                 || vgObj.spec?.encoding?.color?.field
             );
-            const isImplicitlyStacked = isBarLike && hasColorEncoding && enc.stack !== null;
+            // The other positional channel; bar-like charts stack the measure
+            // when this axis is discrete and a category repeats across rows.
+            const otherChannel = ch === 'y' ? 'x' : 'y';
+            const otherCS = channelSemantics[otherChannel];
+            const otherIsDiscrete = otherCS?.type === 'nominal' || otherCS?.type === 'ordinal';
+            const isImplicitlyStacked = isBarLike && otherIsDiscrete && enc.stack !== null
+                && (hasColorEncoding || hasRepeatedCategory(context.table, otherCS?.field, enc.field));
             const isStacked = isExplicitlyStacked || isImplicitlyStacked;
             const isNormalizeStacked = enc.stack === 'normalize';
             const isSumStacked = isStacked && !isNormalizeStacked;
@@ -827,28 +810,41 @@ function vlApplyFieldContext(
                 // can't find the intrinsic bounds to snap totals against.
                 const intrinsic = getEffectiveIntrinsicDomain(cs, context.table, enc.field);
                 if (intrinsic) {
-                    const maxTotal = computeMaxStackedTotal(
+                    const extremes = computeStackedExtremes(
                         context.table, enc.field, ch, channelSemantics,
                     );
 
-                    if (maxTotal !== undefined && maxTotal > intrinsic[1]) {
-                        // Stacked totals exceed the intrinsic bound →
-                        // skip domain constraint to avoid clipping.
-                        // Use a small epsilon tolerance for floating-point
-                        // imprecision (e.g., shares summing to 1.0000000001
-                        // instead of exactly 1.0 should still be treated as
-                        // within bounds). Scale epsilon to the domain range.
+                    if (extremes !== undefined) {
+                        // VL stacks positive and negative contributions
+                        // separately, so either side can overflow its bound.
+                        const { maxPos, minNeg } = extremes;
                         const range = intrinsic[1] - intrinsic[0];
+                        // Small epsilon tolerance for floating-point imprecision
+                        // (e.g., shares summing to 1.0000000001 should still be
+                        // treated as within bounds). Scaled to the domain range.
                         const epsilon = range * 1e-6;
-                        if (maxTotal > intrinsic[1] + epsilon) {
+                        const overflowsTop = maxPos > intrinsic[1] + epsilon;
+                        const overflowsBottom = minNeg < intrinsic[0] - epsilon;
+
+                        if (overflowsTop || overflowsBottom) {
+                            // Stacked totals exceed the intrinsic bound on at
+                            // least one side → skip the domain constraint so
+                            // bars aren't clipped (e.g., correlations summing
+                            // past 1, or percentages past 100%).
                             if (cs.domainConstraint) {
                                 skipDomain = true;
                             }
                         } else {
-                            // Within epsilon — treat as equal to the bound.
-                            // Re-run snap so the bound gets applied.
-                            const stackedSnap = snapToBoundHeuristic(intrinsic, [intrinsic[1]]);
+                            // Stacked extremes are within intrinsic bounds.
+                            // Re-run snap on the stacked extremes to pick up
+                            // bounds that individual values missed (e.g.,
+                            // individual shares of 20–40% don't snap to 100%,
+                            // but stacked totals of ~100% should).
+                            const stackedSnap = snapToBoundHeuristic(intrinsic, [maxPos, minNeg]);
                             if (stackedSnap) {
+                                // Merge with existing constraint: keep any bound
+                                // already snapped from individual values, add any
+                                // new bound from stacked totals.
                                 if (cs.domainConstraint) {
                                     effectiveDomainConstraint = {
                                         min: cs.domainConstraint.min ?? stackedSnap.min,
@@ -860,27 +856,6 @@ function vlApplyFieldContext(
                                 }
                             }
                         }
-                    } else if (maxTotal !== undefined) {
-                        // Stacked totals are within intrinsic bounds.
-                        // Re-run snap on stacked totals to pick up bounds
-                        // that individual values missed (e.g., individual
-                        // shares of 20–40% don't snap to 100%, but stacked
-                        // totals of ~100% should).
-                        const stackedSnap = snapToBoundHeuristic(intrinsic, [maxTotal]);
-                        if (stackedSnap) {
-                            // Merge with existing constraint: keep any bound
-                            // already snapped from individual values, add any
-                            // new bound from stacked totals.
-                            if (cs.domainConstraint) {
-                                effectiveDomainConstraint = {
-                                    min: cs.domainConstraint.min ?? stackedSnap.min,
-                                    max: cs.domainConstraint.max ?? stackedSnap.max,
-                                    clamp: cs.domainConstraint.clamp || stackedSnap.clamp,
-                                };
-                            } else {
-                                effectiveDomainConstraint = stackedSnap;
-                            }
-                        }
                     }
                 } else if (cs.domainConstraint) {
                     // No intrinsic domain to compare against → skip to be safe
@@ -890,7 +865,16 @@ function vlApplyFieldContext(
 
             if (effectiveDomainConstraint && enc.type === 'quantitative' && (ch === 'x' || ch === 'y') && !enc.bin && !skipDomain) {
                 if (!enc.scale) enc.scale = {};
-                const { min, max, clamp } = effectiveDomainConstraint;
+                let { min } = effectiveDomainConstraint;
+                const { max, clamp } = effectiveDomainConstraint;
+                // The resolved zero decision (engine default, or the host's
+                // includeZero_x/_y override) is authoritative. When it says "no
+                // zero", a lower bound of exactly 0 in the semantic domain is
+                // merely a non-negativity floor, not a real semantic minimum —
+                // drop it so the axis fits the data instead of being re-pinned
+                // to zero. Length marks (bar/area/rect) always keep zero.
+                const wantsNoZero = cs.zero?.zero === false;
+                if (!isBarLike && wantsNoZero && min === 0) min = undefined;
                 if (min !== undefined && max !== undefined) {
                     enc.scale.domain = [min, max];
                     // For non-bar marks (scatter, line, etc.), the explicit
@@ -901,11 +885,13 @@ function vlApplyFieldContext(
                     // zero with correct proportional lengths — VL extends
                     // the domain to include 0, and the upper bound is still
                     // capped by the domain constraint (e.g., [0,5] not [0,6]).
-                    if (!isBarLike && enc.scale.zero !== undefined) {
+                    // Never clobber a decided zero:false.
+                    if (!isBarLike && enc.scale.zero !== undefined && !wantsNoZero) {
                         delete enc.scale.zero;
                     }
                 } else {
-                    // Partial constraint — snap one end while auto-fitting the other.
+                    // Partial constraint (or the zero-floor dropped above) — snap
+                    // the bounded end while auto-fitting the other.
                     // E.g., Percentage data at 97% → domainMax = 100, domainMin auto-fits.
                     if (min !== undefined) enc.scale.domainMin = min;
                     if (max !== undefined) enc.scale.domainMax = max;
diff --git a/src/lib/agents-chart/vegalite/templates/area.ts b/src/lib/agents-chart/vegalite/templates/area.ts
index 033c3a04..0054ecdb 100644
--- a/src/lib/agents-chart/vegalite/templates/area.ts
+++ b/src/lib/agents-chart/vegalite/templates/area.ts
@@ -53,7 +53,11 @@ export const areaChartDef: ChartTemplateDef = {
     properties: [
         interpolateConfigProperty,
         { key: "opacity", label: "Opacity", type: "continuous", min: 0.1, max: 1, step: 0.05, defaultValue: 0.7 },
-        { key: "stackMode", label: "Stack", type: "discrete", options: [
+        { key: "stackMode", label: "Stack", type: "discrete",
+          // A stack mode only does something when a series dimension (color) is
+          // present to stack; without it there is a single area band.
+          check: (ctx) => ({ applicable: !!ctx.encodings.color?.field }),
+          options: [
             { value: undefined, label: "Stacked (default)" },
             { value: "normalize", label: "Normalize (100%)" },
             { value: "center", label: "Center" },
diff --git a/src/lib/agents-chart/vegalite/templates/bar-table.ts b/src/lib/agents-chart/vegalite/templates/bar-table.ts
index f3c8d509..21b385ce 100644
--- a/src/lib/agents-chart/vegalite/templates/bar-table.ts
+++ b/src/lib/agents-chart/vegalite/templates/bar-table.ts
@@ -796,7 +796,28 @@ export const barTableDef: ChartTemplateDef = {
         { key: 'maxRows', label: 'Max Rows', type: 'continuous', min: 5, max: 100, step: 1, defaultValue: 20 },
         // Off by default — safer for arbitrary measures. The agent (or
         // the user) can flip it on when a "% of total" share is
-        // meaningful (additive, single-sign, non-zero total).
-        { key: 'showPercent', label: 'Show % of Total', type: 'binary', defaultValue: false },
+        // meaningful (additive, single-sign, non-zero total). Its `check`
+        // reports applicability per render from the measure's data.
+        {
+            key: 'showPercent', label: 'Show % of Total', type: 'binary', defaultValue: false,
+            check: (ctx) => {
+                // A "% of total" share only reads sensibly for an additive,
+                // single-sign measure with a non-zero total — a share of a
+                // mixed-sign or intensive (mean-aggregated) measure is misleading.
+                const mcs = ctx.channelSemantics?.x;
+                if (!mcs?.field || mcs.type !== 'quantitative' || mcs.aggregationDefault === 'average') {
+                    return { applicable: false };
+                }
+                let sum = 0, hasNeg = false, hasPos = false, count = 0;
+                for (const row of ctx.data ?? []) {
+                    const v = row[mcs.field];
+                    if (typeof v !== 'number' || !isFinite(v)) continue;
+                    count++;
+                    if (v < 0) hasNeg = true; else if (v > 0) hasPos = true;
+                    sum += v;
+                }
+                return { applicable: count > 0 && !(hasNeg && hasPos) && Math.abs(sum) > 0 };
+            },
+        },
     ] as ChartPropertyDef[],
 };
diff --git a/src/lib/agents-chart/vegalite/templates/bar.ts b/src/lib/agents-chart/vegalite/templates/bar.ts
index 38da3f22..a5c23352 100644
--- a/src/lib/agents-chart/vegalite/templates/bar.ts
+++ b/src/lib/agents-chart/vegalite/templates/bar.ts
@@ -1,13 +1,56 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT License.
 
-import { ChartTemplateDef, ChartPropertyDef } from '../../core/types';
+import { ChartTemplateDef, ChartPropertyDef, EncodingActionDef } from '../../core/types';
+import { makeSortAction } from '../../core/encoding-actions';
 import {
     defaultBuildEncodings, setMarkProp, adjustBarMarks, adjustRectTiling,
     detectBandedAxisFromSemantics, detectBandedAxisForceDiscrete,
     resolveAsDiscrete, ensureDiscreteTypes,
 } from './utils';
 
+const HEATMAP_SCHEME_COLORS: Record<string, [string, string]> = {
+    viridis: ['#440154', '#fde725'],
+    inferno: ['#000004', '#fcffa4'],
+    magma: ['#000004', '#fcfdbf'],
+    plasma: ['#0d0887', '#f0f921'],
+    turbo: ['#30123b', '#7a0403'],
+    blues: ['#f7fbff', '#08519c'],
+    reds: ['#fff5f0', '#a50f15'],
+    greens: ['#f7fcf5', '#00441b'],
+    oranges: ['#fff5eb', '#7f2704'],
+    purples: ['#fcfbfd', '#3f007d'],
+    greys: ['#ffffff', '#252525'],
+};
+
+function hexLuma(hex: string): number {
+    const m = /^#?([0-9a-f]{6})$/i.exec(hex);
+    if (!m) return 0;
+    const n = parseInt(m[1], 16);
+    const r = (n >> 16) & 255;
+    const g = (n >> 8) & 255;
+    const b = n & 255;
+    return (0.2126 * r + 0.7152 * g + 0.0722 * b) / 255;
+}
+
+function getSafeHeatmapIntrinsicDomain(ctx: any, colorField: string | undefined): [number, number] | undefined {
+    if (!colorField) return undefined;
+
+    const colorChannel = ctx.channelSemantics?.color;
+    const annotation = colorChannel?.semanticAnnotation;
+
+    if (annotation?.intrinsicDomain) {
+        return annotation.intrinsicDomain;
+    }
+
+    const semanticType = annotation?.semanticType;
+    if (semanticType === 'Correlation') return [-1, 1];
+    if (semanticType === 'Latitude') return [-90, 90];
+    if (semanticType === 'Longitude') return [-180, 180];
+
+    return undefined;
+}
+
 // ─── Bar Chart ──────────────────────────────────────────────────────────────
 
 export const barChartDef: ChartTemplateDef = {
@@ -33,6 +76,7 @@ export const barChartDef: ChartTemplateDef = {
     properties: [
         { key: "cornerRadius", label: "Corners", type: "continuous", min: 0, max: 15, step: 1, defaultValue: 0 },
     ] as ChartPropertyDef[],
+    encodingActions: [makeSortAction()] as EncodingActionDef[],
 };
 
 // ─── Pyramid Chart ──────────────────────────────────────────────────────────
@@ -196,6 +240,7 @@ export const groupedBarChartDef: ChartTemplateDef = {
         defaultBuildEncodings(spec, ctx.resolvedEncodings);
         adjustBarMarks(spec, ctx);
     },
+    encodingActions: [makeSortAction()] as EncodingActionDef[],
 };
 
 // ─── Stacked Bar Chart ──────────────────────────────────────────────────────
@@ -229,13 +274,18 @@ export const stackedBarChartDef: ChartTemplateDef = {
         adjustBarMarks(spec, ctx);
     },
     properties: [
-        { key: "stackMode", label: "Stack", type: "discrete", options: [
+        { key: "stackMode", label: "Stack", type: "discrete",
+          // A stack mode only does something when a series dimension (color) is
+          // present to stack; without it there is a single bar per category.
+          check: (ctx) => ({ applicable: !!ctx.encodings.color?.field }),
+          options: [
             { value: undefined, label: "Stacked (default)" },
             { value: "normalize", label: "Normalize (100%)" },
             { value: "center", label: "Center" },
             { value: "layered", label: "Layered (overlap)" },
         ] },
     ] as ChartPropertyDef[],
+    encodingActions: [makeSortAction()] as EncodingActionDef[],
 };
 
 // ─── Histogram ──────────────────────────────────────────────────────────────
@@ -272,38 +322,166 @@ export const heatmapDef: ChartTemplateDef = {
     template: { mark: "rect", encoding: {} },
     channels: ["x", "y", "color", "column", "row"],
     markCognitiveChannel: 'color',
-    declareLayoutMode: () => ({
-        axisFlags: { x: { banded: true }, y: { banded: true } },
-    }),
+    declareLayoutMode: (_cs, _table, chartProperties) => {
+        const showTextLabels = !!chartProperties?.showTextLabels;
+        return {
+            axisFlags: { x: { banded: true }, y: { banded: true } },
+            // Labels need slightly larger cells so the value text isn't crushed,
+            // but we keep this close to the unlabeled defaults (minStep 6 /
+            // defaultBandSize 20) so a labeled heatmap doesn't balloon. The small
+            // label font (see instantiate) is what lets these stay compact.
+            paramOverrides: showTextLabels
+                ? { minStep: 9, defaultBandSize: 22 }
+                : undefined,
+        };
+    },
     instantiate: (spec, ctx) => {
         defaultBuildEncodings(spec, ctx.resolvedEncodings);
         // Apply color scheme from chart properties
         const config = ctx.chartProperties;
-        if (config?.colorScheme && spec.encoding?.color) {
+        const showTextLabels = !!config?.showTextLabels;
+        const colorField = spec.encoding?.color?.field;
+        const colorVals = colorField
+            ? ctx.table
+                .map((r: any) => Number(r[colorField]))
+                .filter((v: number) => Number.isFinite(v))
+            : [];
+        const observedMin = colorVals.length > 0 ? Math.min(...colorVals) : 0;
+        const observedMax = colorVals.length > 0 ? Math.max(...colorVals) : 1;
+        const existingScheme = spec.encoding?.color?.scale?.scheme;
+        // Color scheme is a Category-B encoding override: the compiler already
+        // composed chartProperties.colorScheme onto encoding.color.scheme before
+        // assembly (see applyEncodingOverrides), so we just read it here. This
+        // also transparently covers charts saved before the migration, whose
+        // value lived in chartProperties.colorScheme.
+        const encScheme = ctx.encodings?.color?.scheme;
+        const userScheme = (encScheme && encScheme !== 'default') ? encScheme : undefined;
+        const schemeName = userScheme || existingScheme;
+        const isDiverging = schemeName === 'blueorange' || schemeName === 'redblue';
+        const intrinsicDomain = getSafeHeatmapIntrinsicDomain(ctx, colorField);
+
+        let effectiveMin = intrinsicDomain?.[0] ?? observedMin;
+        let effectiveMax = intrinsicDomain?.[1] ?? observedMax;
+
+        if (spec.encoding?.color) {
             if (!spec.encoding.color.scale) spec.encoding.color.scale = {};
-            spec.encoding.color.scale.scheme = config.colorScheme;
+            if (userScheme) {
+                spec.encoding.color.scale.scheme = userScheme;
+            }
+            if (isDiverging && effectiveMin < 0 && effectiveMax > 0) {
+                const sym = Math.max(Math.abs(effectiveMin), Math.abs(effectiveMax));
+                effectiveMin = -sym;
+                effectiveMax = sym;
+                spec.encoding.color.scale.domain = [-sym, sym];
+                spec.encoding.color.scale.domainMid = 0;
+            } else if (intrinsicDomain) {
+                spec.encoding.color.scale.domain = [effectiveMin, effectiveMax];
+            }
         }
         adjustBarMarks(spec, ctx);
         adjustRectTiling(spec, ctx);
+
+        if (showTextLabels && spec.encoding?.color?.field) {
+            const baseEncoding = spec.encoding || {};
+            const xEncoding = baseEncoding.x;
+            const yEncoding = baseEncoding.y;
+            const span = effectiveMax - effectiveMin;
+
+            const cellMinDim = Math.min(ctx.layout.xStep || 50, ctx.layout.yStep || 50);
+            // Keep the in-cell value text small so cells can stay compact (close
+            // to the unlabeled heatmap). Cap at 9px and step down for tighter
+            // cells rather than growing the font/cells to fit it.
+            const labelFontSize = cellMinDim >= 40 ? 9 : cellMinDim >= 28 ? 8 : 7;
+            const labelFormat = cellMinDim >= 44 ? '.2f' : '.1f';
+
+            const sequentialPalette = HEATMAP_SCHEME_COLORS[schemeName || 'viridis'] || HEATMAP_SCHEME_COLORS.viridis;
+            const highIsLight = hexLuma(sequentialPalette[1]) >= hexLuma(sequentialPalette[0]);
+            const strongThreshold = span > 0
+                ? (isDiverging
+                    ? Math.max(Math.abs(effectiveMin), Math.abs(effectiveMax)) * 0.5
+                    : effectiveMin + span * 0.6)
+                : undefined;
+
+            spec.layer = [
+                {
+                    mark: spec.mark,
+                    encoding: {
+                        ...(xEncoding ? { x: xEncoding } : {}),
+                        ...(yEncoding ? { y: yEncoding } : {}),
+                        ...(baseEncoding.color ? { color: baseEncoding.color } : {}),
+                    },
+                },
+                {
+                    mark: {
+                        type: 'text',
+                        align: 'center',
+                        baseline: 'middle',
+                        fontSize: labelFontSize,
+                    },
+                    encoding: {
+                        ...(xEncoding ? { x: xEncoding } : {}),
+                        ...(yEncoding ? { y: yEncoding } : {}),
+                        text: {
+                            field: colorField,
+                            type: 'quantitative',
+                            format: labelFormat,
+                        },
+                        color: strongThreshold == null
+                            ? { value: 'black' }
+                            : {
+                                condition: {
+                                    test: isDiverging
+                                        ? `datum.${colorField} > ${strongThreshold} || datum.${colorField} < ${-strongThreshold}`
+                                        : `datum.${colorField} >= ${strongThreshold}`,
+                                    value: isDiverging
+                                        ? 'white'
+                                        : (highIsLight ? 'black' : 'white'),
+                                },
+                                value: isDiverging
+                                    ? 'black'
+                                    : (highIsLight ? 'white' : 'black'),
+                            },
+                    },
+                },
+            ];
+            delete spec.mark;
+        }
     },
     properties: [
+        { key: 'showTextLabels', label: 'Show labels', type: 'binary', defaultValue: false },
+    ] as ChartPropertyDef[],
+    // Color scheme is an encoding-level edit (writes encoding.scheme on the
+    // color channel), so it is exposed as a Category-B encoding action rather
+    // than a chart-native property. The host stores the chosen value as an
+    // override in chartProperties.colorScheme; the compiler composes it onto the
+    // encoding (see applyEncodingOverrides). `dependencies` tells the host to
+    // reset the override when the color channel's binding changes in the shelf.
+    encodingActions: [
         {
-            key: "colorScheme", label: "Scheme", type: "discrete", options: [
-                { value: undefined, label: "Default" },
-                { value: "viridis", label: "Viridis" },
-                { value: "inferno", label: "Inferno" },
-                { value: "magma", label: "Magma" },
-                { value: "plasma", label: "Plasma" },
-                { value: "turbo", label: "Turbo" },
-                { value: "blues", label: "Blues" },
-                { value: "reds", label: "Reds" },
-                { value: "greens", label: "Greens" },
-                { value: "oranges", label: "Oranges" },
-                { value: "purples", label: "Purples" },
-                { value: "greys", label: "Greys" },
-                { value: "blueorange", label: "Blue-Orange (diverging)" },
-                { value: "redblue", label: "Red-Blue (diverging)" },
-            ],
+            key: 'colorScheme',
+            label: 'Scheme',
+            isApplicable: (ctx) => !!ctx.encodings.color?.field,
+            dependencies: ['color'],
+            control: {
+                type: 'discrete', options: [
+                    { value: undefined, label: "Default" },
+                    { value: "viridis", label: "Viridis" },
+                    { value: "inferno", label: "Inferno" },
+                    { value: "magma", label: "Magma" },
+                    { value: "plasma", label: "Plasma" },
+                    { value: "turbo", label: "Turbo" },
+                    { value: "blues", label: "Blues" },
+                    { value: "reds", label: "Reds" },
+                    { value: "greens", label: "Greens" },
+                    { value: "oranges", label: "Oranges" },
+                    { value: "purples", label: "Purples" },
+                    { value: "greys", label: "Greys" },
+                    { value: "blueorange", label: "Blue-Orange (diverging)" },
+                    { value: "redblue", label: "Red-Blue (diverging)" },
+                ],
+            },
+            get: (encodings) => encodings.color?.scheme,
+            set: (encodings, value) => ({ ...encodings, color: { ...encodings.color, scheme: value } }),
         },
-    ] as ChartPropertyDef[],
+    ] as EncodingActionDef[],
 };
diff --git a/src/lib/agents-chart/vegalite/templates/index.ts b/src/lib/agents-chart/vegalite/templates/index.ts
index 28102d33..03da7f4e 100644
--- a/src/lib/agents-chart/vegalite/templates/index.ts
+++ b/src/lib/agents-chart/vegalite/templates/index.ts
@@ -10,6 +10,7 @@
  */
 
 import { ChartTemplateDef } from '../../core/types';
+import type { ChartPropertyDef, OptionEvalContext } from '../../core/types';
 
 // --- Individual chart imports ---
 import { scatterPlotDef, regressionDef, rangedDotPlotDef, boxplotDef } from './scatter';
@@ -30,6 +31,203 @@ import { usMapDef, worldMapDef } from './map';
 import { customPointDef, customLineDef, customBarDef, customRectDef, customAreaDef } from './custom';
 import { kpiCardDef } from './kpi-card';
 
+/**
+ * Cross-cutting properties injected into every template that supports
+ * column/row faceting. `independentYAxis` lets the user give each facet its own
+ * y-scale. Its `check` reports *applicability* purely (the chart is faceted
+ * *and* its y is quantitative); the recommended default — whether to turn it on
+ * by default — is layout-coupled (it depends on the resolved facet grid and the
+ * per-facet range spread) and is supplied by the compiler at assembly time.
+ */
+const FACET_AXIS_PROPERTIES: ChartPropertyDef[] = [
+    {
+        key: 'independentYAxis', label: 'Independent Y', type: 'binary',
+        check: (ctx) => ({
+            applicable:
+                (!!ctx.encodings.column?.field || !!ctx.encodings.row?.field) &&
+                ctx.channelSemantics?.y?.type === 'quantitative',
+        }),
+    },
+];
+
+/**
+ * Cross-cutting per-axis log-scale controls, injected into every
+ * position-cognitive template (scatter/line/strip — never length/area marks,
+ * where a zero baseline matters). Their `check` decides per render which axes
+ * are eligible (continuous quantitative, wide-range data) and reports the
+ * recommended default. Each is a simple on/off toggle: ON forces a log/symlog
+ * scale, OFF forces linear.
+ */
+function makeLogScaleCheck(axis: 'x' | 'y') {
+    return (ctx: OptionEvalContext): { applicable: boolean; recommendedValue?: any } => {
+        const cs = ctx.channelSemantics?.[axis];
+        if (!cs?.field || cs.type !== 'quantitative') return { applicable: false };
+        let posMin = Infinity, posMax = -Infinity, posCount = 0, hasNegative = false;
+        for (const row of ctx.data ?? []) {
+            const v = row[cs.field];
+            if (typeof v !== 'number' || !isFinite(v)) continue;
+            if (v < 0) hasNegative = true;
+            else if (v > 0) { posCount++; if (v < posMin) posMin = v; if (v > posMax) posMax = v; }
+        }
+        // Offer only on non-negative data with enough positive spread (≥ 3
+        // orders of magnitude); log is undefined for negatives.
+        const offerEligible = !hasNegative && posCount >= 5 && posMax / posMin >= 1000;
+        const choice = ctx.chartProperties?.[`logScale_${axis}`];
+        // The engine's recommendation survives in cs.scaleType whenever it
+        // matters: an unset choice never overrides it, and a set choice wins via
+        // chartProperties anyway (so recommendedValue is moot in that case).
+        const recommendsLog = cs.scaleType === 'log' || cs.scaleType === 'symlog';
+        return {
+            applicable: offerEligible || choice === true || choice === false,
+            recommendedValue: recommendsLog,
+        };
+    };
+}
+
+const LOG_SCALE_PROPERTIES: ChartPropertyDef[] = [
+    {
+        key: 'logScale_x', label: 'Log X', type: 'binary', defaultValue: false,
+        check: makeLogScaleCheck('x'),
+    },
+    {
+        key: 'logScale_y', label: 'Log Y', type: 'binary', defaultValue: false,
+        check: makeLogScaleCheck('y'),
+    },
+];
+
+/**
+ * Cross-cutting per-axis zero-baseline controls, injected into every
+ * position-cognitive template (scatter/line/strip — never length/area marks,
+ * where the baseline is structurally required). Each is an on/off toggle: ON
+ * anchors the axis at zero, OFF lets it fit the data range.
+ *
+ * The control is *passive*: it never re-derives a zero recommendation of its
+ * own. The engine already decided (computeZeroDecision → cs.zero); the `check`
+ * just reads that decision. To keep the UI uncluttered it offers the toggle
+ * ONLY when the engine flags the choice as a genuine toss-up worth surfacing
+ * (`cs.zero.uncertain === true`) — i.e. a zero-meaningful field on a position
+ * mark whose data sits far enough from zero that anchoring at zero would
+ * noticeably compress the view. Every other case (arbitrary types where zero is
+ * meaningless, contextual data-range calls, meaningful data that already spans
+ * to zero, and forced/unknown cases) is hidden, since there is nothing to
+ * debate. The recommended default is the engine's own `cs.zero.zero`. Once the
+ * host has set an explicit value the toggle stays visible so the choice can be
+ * reverted.
+ */
+function makeZeroBaselineCheck(axis: 'x' | 'y') {
+    return (ctx: OptionEvalContext): { applicable: boolean; recommendedValue?: any } => {
+        const cs = ctx.channelSemantics?.[axis];
+        if (!cs?.field || cs.type !== 'quantitative') return { applicable: false };
+        const decision = cs.zero;
+        if (!decision) return { applicable: false };
+        const choice = ctx.chartProperties?.[`includeZero_${axis}`];
+        return {
+            applicable: decision.uncertain || choice === true || choice === false,
+            recommendedValue: decision.zero,
+        };
+    };
+}
+
+const ZERO_BASELINE_PROPERTIES: ChartPropertyDef[] = [
+    {
+        key: 'includeZero_x', label: 'Zero X', type: 'binary',
+        check: makeZeroBaselineCheck('x'),
+    },
+    {
+        key: 'includeZero_y', label: 'Zero Y', type: 'binary',
+        check: makeZeroBaselineCheck('y'),
+    },
+];
+
+/**
+ * Cross-cutting X-axis dtype toggle, injected into banded/categorical-x
+ * templates (bar, line, area, lollipop) whose category axis carries a genuine
+ * *dual* interpretation: a date-like field the resolver classified as
+ * `temporal` but whose distinct values also form a modest, readable set of
+ * discrete labels (e.g. year-month buckets like "2010-01"). The control lets
+ * the user force that axis between a continuous time scale (`temporal`) and
+ * discrete bands (`nominal`). It applies to *either* position axis — x on a
+ * vertical bar/line, y on a horizontal (transposed) bar/lollipop. The chosen
+ * value is applied at the *encoding* level (encoding.<axis>.type) by the
+ * assembler, so the whole pipeline — sorting, layout, formatting — honors the
+ * override (resolveChannelSemantics treats an explicit encoding.type as
+ * authoritative). See assembleVegaLite.
+ *
+ * Charts that qualify (a position axis is a discrete-capable band that also
+ * accepts a continuous time scale).
+ */
+const AXIS_DTYPE_CHARTS = new Set([
+    'Bar Chart', 'Line Chart', 'Area Chart', 'Lollipop Chart',
+]);
+
+/** Above this distinct-value count, discrete bands are unreadable — only the
+ *  continuous time scale makes sense, so the toggle is not offered. */
+const AXIS_DTYPE_MAX_CATEGORIES = 50;
+
+function makeAxisDtypeCheck(axis: 'x' | 'y') {
+    return (ctx: OptionEvalContext): { applicable: boolean; recommendedValue?: any } => {
+        const cs = ctx.channelSemantics?.[axis];
+        if (!cs?.field) return { applicable: false };
+        // Once the user picks a value the override flips cs.type, so keep
+        // the control visible on any explicit choice.
+        const choice = ctx.chartProperties?.[`${axis}AxisType`];
+        if (choice != null) return { applicable: true, recommendedValue: 'temporal' };
+        // Otherwise offer only the genuine dual-interpretation case: a
+        // date-like axis the resolver made temporal, with a modest number of
+        // distinct values so discrete bands stay readable.
+        if (cs.type !== 'temporal') return { applicable: false };
+        const distinct = new Set(
+            (ctx.data ?? []).map(r => r[cs.field]).filter(v => v != null && v !== ''),
+        );
+        const dual = distinct.size >= 2 && distinct.size <= AXIS_DTYPE_MAX_CATEGORIES;
+        return { applicable: dual, recommendedValue: 'temporal' };
+    };
+}
+
+const AXIS_DTYPE_PROPERTIES: ChartPropertyDef[] = [
+    {
+        key: 'xAxisType', label: 'X as', type: 'discrete',
+        options: [
+            { value: 'temporal', label: 'Temporal' },
+            { value: 'nominal', label: 'Discrete' },
+        ],
+        check: makeAxisDtypeCheck('x'),
+    },
+    {
+        key: 'yAxisType', label: 'Y as', type: 'discrete',
+        options: [
+            { value: 'temporal', label: 'Temporal' },
+            { value: 'nominal', label: 'Discrete' },
+        ],
+        check: makeAxisDtypeCheck('y'),
+    },
+];
+
+/**
+ * Attach the cross-cutting properties (faceting, log scale, axis dtype) a
+ * template qualifies for, based on its channels and mark-cognitive role. Keeps
+ * these options co-located with the engine that evaluates them, so a downstream
+ * consumer of Flint sees a self-describing template catalog. Idempotent:
+ * any property the template already declares with the same key wins.
+ */
+function withInjectedProperties(def: ChartTemplateDef): ChartTemplateDef {
+    const hasFacetChannels = def.channels?.some(ch => ch === 'column' || ch === 'row');
+    const isPosition = def.markCognitiveChannel === 'position';
+    const wantsAxisDtype = AXIS_DTYPE_CHARTS.has(def.chart);
+    const extra: ChartPropertyDef[] = [
+        ...(hasFacetChannels ? FACET_AXIS_PROPERTIES : []),
+        ...(isPosition ? LOG_SCALE_PROPERTIES : []),
+        ...(isPosition ? ZERO_BASELINE_PROPERTIES : []),
+        ...(wantsAxisDtype ? AXIS_DTYPE_PROPERTIES : []),
+    ];
+    if (extra.length === 0) return def;
+    const ownKeys = new Set((def.properties ?? []).map(p => p.key));
+    return {
+        ...def,
+        properties: [...(def.properties ?? []), ...extra.filter(p => !ownKeys.has(p.key))],
+    };
+}
+
 /**
  * All chart template definitions, grouped by category.
  * Keys are category names shown in the UI, values are arrays of template definitions.
@@ -38,15 +236,17 @@ import { kpiCardDef } from './kpi-card';
  * their dominant visual primitive (point, bar, line/area, etc.). This keeps
  * placement objective and the picker readable.
  */
-export const vlTemplateDefs: { [key: string]: ChartTemplateDef[] } = {
-    "Points":          [scatterPlotDef, regressionDef, rangedDotPlotDef, stripPlotDef],
-    "Bars":            [barChartDef, groupedBarChartDef, stackedBarChartDef, lollipopChartDef, waterfallChartDef],
-    "Distributions":   [histogramDef, densityPlotDef, boxplotDef, pyramidChartDef, candlestickChartDef],
-    "Lines & Areas":   [lineChartDef, bumpChartDef, areaChartDef, streamgraphDef],
-    "Circular":        [pieChartDef, roseChartDef, radarChartDef],
-    "Tables & Maps":   [heatmapDef, barTableDef, kpiCardDef, usMapDef, worldMapDef],
-    "Custom":          [customPointDef, customLineDef, customBarDef, customRectDef, customAreaDef],
-};
+export const vlTemplateDefs: { [key: string]: ChartTemplateDef[] } = Object.fromEntries(
+    Object.entries({
+        "Points":          [scatterPlotDef, regressionDef, rangedDotPlotDef, stripPlotDef],
+        "Bars":            [barChartDef, groupedBarChartDef, stackedBarChartDef, lollipopChartDef, waterfallChartDef],
+        "Distributions":   [histogramDef, densityPlotDef, boxplotDef, pyramidChartDef, candlestickChartDef],
+        "Lines & Areas":   [lineChartDef, bumpChartDef, areaChartDef, streamgraphDef],
+        "Circular":        [pieChartDef, roseChartDef, radarChartDef],
+        "Tables & Maps":   [heatmapDef, barTableDef, kpiCardDef, usMapDef, worldMapDef],
+        "Custom":          [customPointDef, customLineDef, customBarDef, customRectDef, customAreaDef],
+    }).map(([category, defs]) => [category, defs.map(withInjectedProperties)]),
+);
 
 /**
  * Flat list of all Vega-Lite chart template definitions.
diff --git a/src/lib/agents-chart/vegalite/templates/kpi-card.ts b/src/lib/agents-chart/vegalite/templates/kpi-card.ts
index a09c7d63..be2ffb2a 100644
--- a/src/lib/agents-chart/vegalite/templates/kpi-card.ts
+++ b/src/lib/agents-chart/vegalite/templates/kpi-card.ts
@@ -516,7 +516,7 @@ export const kpiCardDef: ChartTemplateDef = {
             max: 1,
             step: 0.05,
             defaultValue: 0.5,
-            visibleWhen: { channels: ['goal'] },
+            check: (ctx) => ({ applicable: !!ctx.encodings.goal?.field }),
         },
     ] as ChartPropertyDef[],
 };
diff --git a/src/lib/agents-chart/vegalite/templates/lollipop.ts b/src/lib/agents-chart/vegalite/templates/lollipop.ts
index 96fe54f0..7eeba7d2 100644
--- a/src/lib/agents-chart/vegalite/templates/lollipop.ts
+++ b/src/lib/agents-chart/vegalite/templates/lollipop.ts
@@ -1,7 +1,8 @@
 // Copyright (c) Microsoft Corporation.
 // Licensed under the MIT License.
 
-import { ChartTemplateDef, ChartPropertyDef } from '../../core/types';
+import { ChartTemplateDef, ChartPropertyDef, EncodingActionDef } from '../../core/types';
+import { makeSortAction } from '../../core/encoding-actions';
 import { detectBandedAxisFromSemantics, setMarkProp } from './utils';
 
 export const lollipopChartDef: ChartTemplateDef = {
@@ -128,4 +129,5 @@ export const lollipopChartDef: ChartTemplateDef = {
     properties: [
         { key: "dotSize", label: "Dot Size", type: "continuous", min: 20, max: 300, step: 10, defaultValue: 80 },
     ] as ChartPropertyDef[],
+    encodingActions: [makeSortAction()] as EncodingActionDef[],
 };
diff --git a/src/views/ChartQuickConfig.tsx b/src/views/ChartQuickConfig.tsx
index 3a3a0b59..37508811 100644
--- a/src/views/ChartQuickConfig.tsx
+++ b/src/views/ChartQuickConfig.tsx
@@ -9,18 +9,98 @@
 
 import { FC } from 'react';
 import React from 'react';
+import { useTranslation } from 'react-i18next';
 import { useSelector, useDispatch } from 'react-redux';
-
-import { Box, Typography, Select, MenuItem, useTheme } from '@mui/material';
+import { Box, Typography, Select, MenuItem, useTheme, Tooltip, IconButton, Divider } from '@mui/material';
+import DeleteIcon from '@mui/icons-material/Delete';
 
 import { DataFormulatorState, dfActions, dfSelectors } from '../app/dfSlice';
 import { AppDispatch } from '../app/store';
-import { Channel, Chart, VariantConfigControl } from '../components/ComponentType';
+import { Chart, FieldItem, VariantConfigControl } from '../components/ComponentType';
 import { getChartTemplate } from '../components/ChartTemplates';
+import { ChartEncoding, ChartOption, EncodingActionDef } from '../lib/agents-chart';
 import { ConfigSlider } from './EncodingShelfCard';
 
 export interface ChartQuickConfigProps {
     chartId: string;
+    /**
+     * Working-table metadata (`{ [fieldName]: { type, semanticType, ... } }`).
+     * Used to resolve a field's concrete encoding type when the encoding's own
+     * `dtype` is left on "auto" — so type-aware action applicability (e.g. Sort
+     * needing a discrete category axis) matches what the compiler renders.
+     */
+    tableMetadata?: Record<string, { type?: string; semanticType?: string }>;
+    /**
+     * Flint's annotated option catalog for the current render (the spec's
+     * `_options`). Each entry carries `applicable` (whether the option passed its
+     * precondition for this spec + data) and `value` (the value the compiler will
+     * use — host choice if set, else the engine's recommended default). The bar
+     * renders a control for every applicable option, seeded from `value`. When
+     * absent (spec not yet rendered) no chart-property controls are shown.
+     */
+    options?: ChartOption[];
+    /**
+     * When true, the built-in delete-chart action is rendered disabled (e.g.
+     * while a synthesis/transform is in flight for this chart).
+     */
+    deleteDisabled?: boolean;
+}
+
+/**
+ * Map a working-table column `Type` to the 4-way ChartEncoding type the
+ * encoding-action applicability checks reason about. The table metadata `type`
+ * is already concretely inferred (not "auto") for loaded tables, so this is a
+ * reliable fallback when an encoding leaves its own `dtype` unset.
+ */
+function tableTypeToEncodingType(t?: string): ChartEncoding['type'] | undefined {
+    switch (t) {
+        case 'number':
+        case 'integer':
+        case 'duration':
+            return 'quantitative';
+        case 'date':
+        case 'datetime':
+        case 'time':
+            return 'temporal';
+        case 'string':
+        case 'boolean':
+            return 'nominal';
+        default:
+            return undefined;
+    }
+}
+
+/**
+ * Build a ChartEncoding-shaped view of a chart's encoding map so encoding
+ * actions can derive their state and produce a new map against the same model
+ * the encoding shelf uses (single source of truth). Field identity is exposed
+ * as the field *name* (per ChartEncoding's contract); the reducer translates
+ * names back to fieldIDs when applying the result.
+ *
+ * When an encoding's own `dtype` is unset ("auto"), the type is resolved from
+ * the working-table metadata so downstream type-aware checks (e.g. Sort) see
+ * the same concrete type the compiler will use.
+ */
+function buildEncodings(
+    chart: Chart,
+    conceptShelfItems: FieldItem[],
+    tableMetadata?: Record<string, { type?: string; semanticType?: string }>,
+): Record<string, ChartEncoding> {
+    const out: Record<string, ChartEncoding> = {};
+    for (const [channel, item] of Object.entries(chart.encodingMap)) {
+        const field = item.fieldID ? conceptShelfItems.find(f => f.id === item.fieldID) : undefined;
+        const resolvedType = item.dtype
+            ?? (field?.name ? tableTypeToEncodingType(tableMetadata?.[field.name]?.type) : undefined);
+        out[channel] = {
+            field: field?.name,
+            type: resolvedType,
+            aggregate: item.aggregate,
+            sortOrder: item.sortOrder,
+            sortBy: item.sortBy,
+            scheme: item.scheme,
+        };
+    }
+    return out;
 }
 
 /**
@@ -37,12 +117,18 @@ type QuickControl = {
     step?: number;
     options?: { value: any; label: string }[];
     defaultValue?: any;
+    /** Resolved value from Flint (host choice if set, else recommended default). */
+    value?: any;
+    /** Present when this control is a Category-B encoding action (commits to encodings). */
+    encodingAction?: EncodingActionDef;
 };
 
-export const ChartQuickConfig: FC<ChartQuickConfigProps> = function ({ chartId }) {
+export const ChartQuickConfig: FC<ChartQuickConfigProps> = function ({ chartId, tableMetadata, options, deleteDisabled }) {
+    const { t } = useTranslation('chart');
     const theme = useTheme();
     const dispatch = useDispatch<AppDispatch>();
     const allCharts = useSelector(dfSelectors.getAllCharts);
+    const conceptShelfItems = useSelector((state: DataFormulatorState) => state.conceptShelfItems);
     const chart = allCharts.find((c: Chart) => c.id == chartId) as Chart | undefined;
 
     if (!chart) return null;
@@ -61,8 +147,7 @@ export const ChartQuickConfig: FC<ChartQuickConfigProps> = function ({ chartId }
 
     if (activeVariant) {
         const configUI = activeVariant.configUI;
-        if (!configUI || configUI.length === 0) return null;
-        controls = configUI.map((c: VariantConfigControl) => ({
+        controls = (configUI ?? []).map((c: VariantConfigControl) => ({
             key: c.key,
             label: c.label,
             type: c.type,
@@ -81,22 +166,68 @@ export const ChartQuickConfig: FC<ChartQuickConfigProps> = function ({ chartId }
         }));
     } else {
         const template = getChartTemplate(chart.chartType);
-        const configProps = template?.properties;
-        if (!configProps || configProps.length === 0) return null;
-
-        // Filter to visible properties (respecting visibleWhen channel predicates).
-        const visibleProps = configProps.filter((propDef) => {
-            if (propDef.visibleWhen?.channels) {
-                return propDef.visibleWhen.channels.some(
-                    ch => chart.encodingMap[ch as Channel]?.fieldID != null
-                );
-            }
-            return true;
-        });
-        if (visibleProps.length === 0) return null;
-        controls = visibleProps as QuickControl[];
-        getValue = (control) => chart.config?.[control.key] ?? control.defaultValue;
-        commit = (control, value) => dispatch(dfActions.updateChartConfig({ chartId, key: control.key, value }));
+        const encodingActions = template?.encodingActions ?? [];
+
+        // Encoding view used both for type-aware applicability checks and for
+        // deriving an action's displayed value from the base encoding.
+        const encodingsView = buildEncodings(chart, conceptShelfItems, tableMetadata);
+
+        // Category-A controls: chart-native config (chart.config). Flint already
+        // evaluated each property's applicability (structural channel-assignment
+        // and data-aware preconditions like log-scale eligibility) and resolved
+        // its value, so the host just renders the applicable ones seeded from
+        // `value`. See ChartOption / getChartOptions.
+        const propControls: QuickControl[] = (options ?? [])
+            .filter(opt => opt.applicable)
+            .map(opt => ({
+                key: opt.key,
+                label: opt.label,
+                type: opt.type,
+                min: opt.type === 'continuous' ? opt.min : undefined,
+                max: opt.type === 'continuous' ? opt.max : undefined,
+                step: opt.type === 'continuous' ? opt.step : undefined,
+                options: opt.type === 'discrete' ? opt.options : undefined,
+                defaultValue: opt.defaultValue,
+                value: opt.value,
+            }));
+
+        // Category-B controls: encoding actions. The chosen value is stored as
+        // a config override (chart.config[key]) and composed onto the encoding
+        // by the compiler at assemble time — not written into the encoding map.
+        // A single `isApplicable` predicate gates visibility: it inspects the
+        // base encodings, so it covers both channel assignment (is a channel
+        // bound?) and type fit (e.g. Sort needs a discrete category axis).
+        const actionControls: QuickControl[] = encodingActions
+            .filter(action => !action.isApplicable || action.isApplicable({ encodings: encodingsView }))
+            .map(action => ({
+                key: action.key,
+                label: action.label,
+                type: action.control.type,
+                min: action.control.type === 'continuous' ? action.control.min : undefined,
+                max: action.control.type === 'continuous' ? action.control.max : undefined,
+                step: action.control.type === 'continuous' ? action.control.step : undefined,
+                options: action.control.type === 'discrete' ? action.control.options : undefined,
+                encodingAction: action,
+            }));
+
+        controls = [...propControls, ...actionControls];
+        // Encoding-action value is a configuration *override* stored in
+        // chart.config (keyed by the action key), exactly like a chart property.
+        // It is NOT written into the encoding map — Flint composes the override
+        // onto the encodings at assemble time (applyEncodingOverrides). When no
+        // override is set, fall back to the value derived from the base encoding.
+        // For chart properties, `control.value` is Flint's resolved value (host
+        // choice if set, else the engine's recommended default) so an "auto"
+        // recommendation is reflected without re-deriving it here.
+        getValue = (control) => control.encodingAction
+            ? (chart.config?.[control.key] ?? control.encodingAction.get(encodingsView))
+            : chart.config?.[control.key] ?? control.value ?? control.defaultValue;
+        commit = (control, value) => {
+            // Both categories commit the same way: a value in chart.config.
+            // Category A tweaks the assembled spec; Category B is composed onto
+            // the encodings by the compiler. The host stays out of the transform.
+            dispatch(dfActions.updateChartConfig({ chartId, key: control.key, value }));
+        };
     }
 
     return (
@@ -192,6 +323,23 @@ export const ChartQuickConfig: FC<ChartQuickConfigProps> = function ({ chartId }
                     </Box>
                 );
             })}
+            {/* Built-in chart-level action: delete this chart. Lives in the
+                property-config bar so the chart's controls and its delete sit
+                together; a hairline divider sets it apart from the property
+                controls. Only shown when there are controls to set it apart
+                from — otherwise it stands alone in the bar. */}
+            <Tooltip title={t('deleteChart')}>
+                <span>
+                    <IconButton
+                        size="small"
+                        disabled={deleteDisabled}
+                        onClick={() => dispatch(dfActions.deleteChartById(chartId))}
+                        sx={{ color: 'text.disabled','&:hover': { color: 'error.main', backgroundColor: 'rgba(211, 47, 47, 0.08)' } }}
+                    >
+                        <DeleteIcon sx={{ fontSize: 16 }} />
+                    </IconButton>
+                </span>
+            </Tooltip>
         </Box>
         </Box>
     );
diff --git a/src/views/ChatDialog.tsx b/src/views/ChatDialog.tsx
index c74d3187..3f843398 100644
--- a/src/views/ChatDialog.tsx
+++ b/src/views/ChatDialog.tsx
@@ -12,12 +12,14 @@ import {
 		Dialog,
         DialogTitle,
         DialogContent,
-        DialogActions,
         Button,
+        IconButton,
         styled,
         CardContent,
 } from '@mui/material';
 
+import CloseIcon from '@mui/icons-material/Close';
+import QuestionAnswerIcon from '@mui/icons-material/QuestionAnswer';
 import React from 'react';
 import { alpha, useTheme } from '@mui/material/styles';
 import { CodeBox } from './VisualizationView';
@@ -423,18 +425,25 @@ export const ChatDialog: FC<ChatDialogProps> = function ChatDialog({code, dialog
 
     return (
         <Dialog
-            sx={{ '& .MuiDialog-paper': { maxWidth: '95%', maxHeight: '90%', minWidth: 300 } }}
-            maxWidth={false}
+            sx={{ '& .MuiDialog-paper': { maxHeight: '90%' } }}
+            maxWidth="md"
+            fullWidth
             open={open}
+            onClose={() => { handleCloseDialog() }}
             key="chat-dialog-dialog"
         >
-            <DialogTitle><Typography>{t('chatDialog.agentLog')}</Typography></DialogTitle>
+            <DialogTitle sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', py: 1.25 }}>
+                <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
+                    <QuestionAnswerIcon sx={{ fontSize: 18, color: 'text.secondary' }} />
+                    <Typography sx={{ fontSize: 14, fontWeight: 600 }}>{t('chatDialog.agentLog')}</Typography>
+                </Box>
+                <IconButton size="small" aria-label={t('app.close')} onClick={() => { handleCloseDialog() }}>
+                    <CloseIcon sx={{ fontSize: 18 }} />
+                </IconButton>
+            </DialogTitle>
             <DialogContent ref={dialogContentRef} sx={{overflowY: "auto", overflowX: "hidden"}} dividers>
                 {body}
             </DialogContent>
-            <DialogActions>
-                <Button onClick={()=>{ handleCloseDialog() }}>{t('app.close')}</Button>
-            </DialogActions>
         </Dialog>
     );
 }
\ No newline at end of file
diff --git a/src/views/EncodingBox.tsx b/src/views/EncodingBox.tsx
index 26a929be..9052f31c 100644
--- a/src/views/EncodingBox.tsx
+++ b/src/views/EncodingBox.tsx
@@ -48,7 +48,7 @@ import _ from 'lodash';
 
 import '../scss/EncodingShelf.scss';
 import AnimateHeight from 'react-animate-height';
-import { getIconFromDtype, getIconFromType, groupConceptItems } from './ViewUtils';
+import { getIconFromDtype, getIconFromType } from './ViewUtils';
 import { getUrls, fetchWithIdentity } from '../app/utils';
 import { apiRequest } from '../app/apiClient';
 import { Type } from '../data/types';
@@ -555,26 +555,13 @@ export const EncodingBox: FC<EncodingBoxProps> = function EncodingBox({ channel,
     }
 
 
-    let conceptGroups = groupConceptItems(conceptShelfItems, tables);
-
-    // Field names selectable in this encoding shelf: only fields that exist in
-    // the current table. Anything else cannot be assigned here.
-    let availableFieldNames = conceptGroups
-        .filter(g => activeTable ? activeTable.names.includes(g.field.name) : true)
-        .map(g => g.field.name)
+    // Field names selectable in this encoding shelf, listed in the same order as
+    // the columns of the current table. Only fields that exist in the table can
+    // be assigned here, so the table's column list is the source of truth — no
+    // need to derive or group them from the concept shelf.
+    let availableFieldNames = (activeTable ? activeTable.names : conceptShelfItems.map(f => f.name))
         .filter(name => name != "");
 
-    let groupNames = [...new Set(conceptGroups.map(g => g.group))];
-    conceptGroups.sort((a, b) => {
-        if (groupNames.indexOf(a.group) < groupNames.indexOf(b.group)) {
-            return -1;
-        } else if (groupNames.indexOf(a.group) > groupNames.indexOf(b.group)) {
-            return 1;
-        } else {
-            return activeTable && activeTable.names.includes(a.field.name) && !activeTable.names.includes(b.field.name) ? -1 : 1;
-        }
-    })
-
     // Smart Popper component that switches between bottom-end and top-end
     const CustomPopper = (props: any) => {
         return (
@@ -647,129 +634,30 @@ export const EncodingBox: FC<EncodingBoxProps> = function EncodingBox({ channel,
             // Value selected with enter, right from the input
             return option;
         }}
-        groupBy={(option) => {
-            let groupItem = conceptGroups.find(item => item.field.name == option);
-            if (groupItem && groupItem.field.name != "") {
-                return `${groupItem.group}`;
-            } else {
-                return t('encoding.createNewFieldGroup')
-            }         
-        }}
-        renderGroup={(params) => (
-            <Box key={params.key}>
-              <Box className="GroupHeader">{params.group}</Box>
-              <Box className="GroupItems" sx={{ 
-                display: 'grid', 
-                gridTemplateColumns: 'repeat(2, 1fr)', 
-                padding: '4px'
-              }}>
-                {params.children}
-              </Box>
-            </Box>
-        )}
         renderOption={(props, option) => {
-            let renderOption = (conceptShelfItems.map(f => f.name).includes(option)) ? option : `${option}`;
-            let otherStyle = option == `` ? {color: "darkgray", fontStyle: "italic"} : {}
-
-            // Find the field item for this option
-            const fieldItem = conceptShelfItems.find(f => f.name === option);
-            
-            if (fieldItem) {
-                // Create a mini concept card
-                let backgroundColor = theme.palette.primary.main;
-                if (fieldItem.source == "original") {
-                    backgroundColor = theme.palette.primary.light;
-                } else if (fieldItem.source == "custom") {
-                    backgroundColor = theme.palette.custom.main;
-                }
-
-                // Add overlay logic similar to ConceptCard - make fields not in focused table more transparent
-                let draggleCardHeaderBgOverlay = 'rgba(255, 255, 255, 0.9)';
-                
-                // Add subtle tint for non-focused fields
-                if (activeTable && !activeTable.names.includes(fieldItem.name)) {
-                    draggleCardHeaderBgOverlay = 'rgba(255, 255, 255, 1)';
-                }
-
-                // Extract only the compatible props for Card
-                const { key, ...cardProps } = props;
-
-                return (
-                    <Card 
-                        key={key}
-                        onClick={() => handleSelectOption(option)}
-                        sx={{ 
-                            minWidth: 80, 
-                            backgroundColor, 
-                            position: "relative",
-                            border: "none",
-                            cursor: "pointer",
-                            margin: '2px 4px',
-                            "&:hover": {
-                                boxShadow: "0 2px 4px 0 rgb(0 0 0 / 20%)"
-                            }
-                        }}
-                        variant="outlined"
-                        className={`data-field-list-item draggable-card`}
-                    >
-                        <Box sx={{ 
-                                cursor: "pointer", 
-                                background: draggleCardHeaderBgOverlay,
-                                display: 'flex',
-                                alignItems: 'center',
-                                minHeight: '20px',
-                                ml: 0.5
-                            }}
-                            className={`draggable-card-inner ${fieldItem.source}`}
-                        >
-                            <Typography sx={{
-                                margin: '0px 4px',
-                                fontSize: 10, 
-                                width: "100%",
-                                display: 'flex',
-                                alignItems: 'center',
-                                gap: '4px',
-                            }} component={'span'}>
-                                {getIconFromType(activeTable?.metadata[fieldItem.name]?.type || Type.Auto)}
-                                <span style={{
-                                    whiteSpace: "nowrap",
-                                    overflow: "hidden", 
-                                    textOverflow: "ellipsis",
-                                    flexShrink: 1
-                                }}>
-                                    {fieldItem.name}
-                                </span>
-                            </Typography>
-                        </Box>
-                    </Card>
-                );
-            } else {
-                // For non-existing options (like new field creation)
-                return (
-                    <Typography 
-                        {...props} 
-                        onClick={() => handleSelectOption(option)}
-                        sx={{
-                            fontSize: "10px", 
-                            padding: '4px 6px',
-                            margin: '2px 4px',
-                            cursor: 'pointer',
-                            border: '1px dashed #ccc',
-                            borderRadius: '4px',
-                            backgroundColor: 'rgba(0,0,0,0.02)',
-                            height: '24px',
-                            display: 'flex',
-                            alignItems: 'center',
-                            "&:hover": {
-                                backgroundColor: 'rgba(0,0,0,0.05)'
-                            },
-                            ...otherStyle
-                        }}
-                    >
-                        {renderOption || t('encoding.newFieldNamePlaceholder')}
-                    </Typography>
-                );
-            }
+            const { key, ...liProps } = props as any;
+            const dtype = activeTable?.metadata[option]?.type || Type.Auto;
+            return (
+                <Box
+                    key={key}
+                    {...liProps}
+                    onClick={() => handleSelectOption(option)}
+                    sx={{
+                        display: 'flex',
+                        alignItems: 'center',
+                        gap: '6px',
+                        fontSize: 11,
+                        padding: '4px 8px !important',
+                        cursor: 'pointer',
+                        '&:hover': { backgroundColor: 'rgba(0,0,0,0.05)' },
+                    }}
+                >
+                    {getIconFromType(dtype)}
+                    <span style={{ whiteSpace: 'nowrap', overflow: 'hidden', textOverflow: 'ellipsis' }}>
+                        {option}
+                    </span>
+                </Box>
+            );
         }}
         renderInput={(params) => (
             <TextField {...params} variant="standard" autoComplete='off' placeholder={t('encoding.fieldPlaceholder')}
diff --git a/src/views/EncodingShelfCard.tsx b/src/views/EncodingShelfCard.tsx
index 7e042cf5..909f4fb6 100644
--- a/src/views/EncodingShelfCard.tsx
+++ b/src/views/EncodingShelfCard.tsx
@@ -1426,6 +1426,12 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
                         onOpen={() => setChartTypeMenuOpen(true)}
                         onClose={() => setChartTypeMenuOpen(false)}
                         MenuProps={{
+                            // Use a plain "menu" rather than the default
+                            // "selectedMenu": the latter shifts the popup up so
+                            // the selected item overlaps the trigger (and
+                            // auto-scrolls to it), which makes the dropdown feel
+                            // like it jumps. "menu" simply opens straight below.
+                            variant: 'menu',
                             anchorOrigin: {
                                 vertical: 'bottom',
                                 horizontal: 'left',
@@ -1436,6 +1442,8 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
                             },
                             PaperProps: {
                                 sx: {
+                                    mt: 1,
+                                    maxHeight: '60vh',
                                     '& .MuiList-root': {
                                         display: 'grid',
                                         gridTemplateColumns: '1fr 1fr',
@@ -1520,15 +1528,23 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
                     const template = getChartTemplate(chart.chartType);
                     const configProps = template?.properties;
                     if (!configProps || configProps.length === 0) return null;
+                    // Minimal encodings view for a property's own `check`. This
+                    // static popover has no live data/semantics, so a data-aware
+                    // property's check returns `applicable: false` here and the
+                    // property self-hides — surfacing only in the quick-config bar.
+                    const shelfEncodings: Record<string, { field: any }> = {};
+                    for (const ch of Object.keys(chart.encodingMap)) {
+                        const fieldID = chart.encodingMap[ch as Channel]?.fieldID;
+                        if (fieldID != null) shelfEncodings[ch] = { field: fieldID };
+                    }
                     return (
                         <Box sx={{ display: 'flex', flexDirection: 'column', gap: '1px', mb: '6px' }}>
                             {configProps.map((propDef) => {
-                                // App-level visibility: hide properties whose visibleWhen channels aren't assigned
-                                if (propDef.visibleWhen?.channels) {
-                                    const hasAny = propDef.visibleWhen.channels.some(
-                                        ch => chart.encodingMap[ch as Channel]?.fieldID != null
-                                    );
-                                    if (!hasAny) return null;
+                                // A property gates itself via its own applicability
+                                // check. Without one it is always shown.
+                                if (propDef.check &&
+                                    !propDef.check({ encodings: shelfEncodings as any }).applicable) {
+                                    return null;
                                 }
                                 if (propDef.type === 'continuous') {
                                     const currentValue = chart.config?.[propDef.key] ?? propDef.defaultValue ?? propDef.min ?? 0;
diff --git a/src/views/ExplComponents.tsx b/src/views/ExplComponents.tsx
index 7856545a..699dd98c 100644
--- a/src/views/ExplComponents.tsx
+++ b/src/views/ExplComponents.tsx
@@ -24,7 +24,7 @@ import InfoIcon from '@mui/icons-material/Info';
 // Helper function to render text with LaTeX math expressions
 const renderWithMath = (text: string) => {
 
-    const parts: Array<{ type: 'text' | 'inline' | 'block', content: string }> = [];
+    const parts: Array<{ type: 'text' | 'inline' | 'block' | 'code', content: string }> = [];
     let currentIndex = 0;
     let currentText = '';
     
@@ -91,6 +91,29 @@ const renderWithMath = (text: string) => {
                 currentIndex++;
             }
         }
+        // Check for inline code `...`
+        else if (text[currentIndex] === '`') {
+            // Find the closing backtick
+            let codeEnd = currentIndex + 1;
+            while (codeEnd < text.length && text[codeEnd] !== '`') {
+                codeEnd++;
+            }
+
+            if (codeEnd < text.length) {
+                // Found complete inline code span
+                if (currentText) {
+                    parts.push({ type: 'text', content: currentText });
+                    currentText = '';
+                }
+                const codeContent = text.slice(currentIndex + 1, codeEnd);
+                parts.push({ type: 'code', content: codeContent });
+                currentIndex = codeEnd + 1;
+            } else {
+                // No closing backtick found, treat as text
+                currentText += text[currentIndex];
+                currentIndex++;
+            }
+        }
         // Regular character
         else {
             currentText += text[currentIndex];
@@ -116,42 +139,62 @@ const renderWithMath = (text: string) => {
             } catch (error) {
                 return <span key={index}>{`\\[${part.content}\\]`}</span>;
             }
+        } else if (part.type === 'code') {
+            return (
+                <Box
+                    component="code"
+                    key={index}
+                    sx={{
+                        fontFamily: 'monospace',
+                        fontSize: '0.92em',
+                        px: 0.5,
+                        py: 0.1,
+                        borderRadius: '4px',
+                        backgroundColor: (theme) => alpha(theme.palette.text.primary, 0.06),
+                        color: 'text.primary',
+                        // Allow long code spans (e.g. summed field lists) to wrap
+                        // instead of overflowing the card horizontally.
+                        whiteSpace: 'pre-wrap',
+                        overflowWrap: 'anywhere',
+                        wordBreak: 'break-word',
+                    }}
+                >
+                    {part.content}
+                </Box>
+            );
         } else {
             return <span key={index}>{part.content}</span>;
         }
     });
 };
 
-// Styled components for the concept explanation cards
+// Styled components for the concept explanation entries.
+// Rendered as lightweight metadata rows (label + formula) rather than boxed
+// cards, so they read as inline annotations on the derived table.
 const ConceptExplanationCard = styled(Box, {
     shouldForwardProp: (prop) => prop !== 'secondary',
-})<{ secondary: boolean }>(({ theme, secondary }) => ({
-    padding: '8px 10px',
+})<{ secondary: boolean }>(() => ({
     minWidth: 0,
-    borderLeft: `3px solid ${secondary ? theme.palette.secondary.main : theme.palette.primary.light}`,
-    borderRadius: '2px',
-    backgroundColor: alpha(theme.palette.background.paper, 0.5),
-    transition: transition.normal,
-    '&:hover': {
-        backgroundColor: alpha(theme.palette.primary.main, 0.04),
-    },
+    padding: '2px 0',
 }));
 
 const ConceptName = styled(Typography, {
     shouldForwardProp: (prop) => prop !== 'secondary',
 })<{ secondary: boolean }>(({ theme, secondary }) => ({
-    fontSize: '12px',
+    fontSize: '11px',
     fontWeight: 600,
-    color: secondary ? theme.palette.secondary.main : theme.palette.primary.main,
-    marginBottom: '3px',
+    color: secondary ? theme.palette.secondary.main : theme.palette.text.secondary,
+    marginBottom: '1px',
     display: 'flex',
     alignItems: 'center',
     gap: '4px',
+    fontFamily: 'monospace',
+    letterSpacing: '0.01em',
 }));
 
 const ConceptExplanation = styled(Typography)(({ theme }) => ({
     fontSize: '11px',
-    lineHeight: 1.4,
+    lineHeight: 1.5,
     minWidth: 0,
     color: theme.palette.text.primary,
     '& .katex': {
@@ -163,13 +206,20 @@ const ConceptExplanation = styled(Typography)(({ theme }) => ({
     // the bottom padding and only show the scrollbar if it's actually needed
     // (and hide its track to keep the card clean).
     '& .katex-display': {
-        margin: '4px 0',
+        margin: '10px 0',
         paddingBottom: 0,
         overflowX: 'auto',
         overflowY: 'hidden',
         scrollbarWidth: 'none',
         '&::-webkit-scrollbar': { display: 'none' },
     },
+    // Block-displayed formulas (fractions, sums, roots) need more height and
+    // a slightly larger glyph size than inline math so stacked structure is
+    // legible — inline `\(...\)` stays compact at 11px above.
+    '& .katex-display > .katex': {
+        fontSize: '15px',
+        lineHeight: 1.5,
+    },
 }));
 
 export interface ConceptExplanationItem {
@@ -199,22 +249,24 @@ export const ConceptExplCards: FC<ConceptExplCardsProps> = ({
 
 
     return (
-        <Box sx={{ position: 'relative', display: 'flex', justifyContent: 'center', width: '100%' }}>
-            {/* Formulas grid — reflows to one column when there isn't room for two
-                side-by-side, so long formulas (\sum, fractions) don't overflow. */}
+        <Box sx={{ position: 'relative', display: 'flex', flexDirection: 'column', width: '100%' }}>
+            {/* Formulas as a metadata list — one per row, separated by hairline
+                dividers so they read as annotations rather than boxed cards. */}
             <Box sx={{
-                    display: 'grid',
-                    gridTemplateColumns: 'repeat(auto-fit, minmax(360px, 1fr))',
-                    gap: 1,
+                    display: 'flex',
+                    flexDirection: 'column',
                     width: '100%',
                     minWidth: 0,
+                    '& > *:not(:last-child)': {
+                        borderBottom: `1px solid ${alpha('#000', 0.06)}`,
+                    },
                 }}>
                     {displayConcepts.map((concept, index) => {
                         let secondary = concept.field == "Statistical Analysis";
                         return (
                         <ConceptExplanationCard key={`${concept.field}-${index}`} secondary={secondary}>
                             <ConceptName secondary={secondary}>
-                                {concept.field}
+                                {concept.field.replace(/\\_/g, '_')}
                             </ConceptName>
                             <ConceptExplanation>
                                 {renderWithMath(concept.explanation)}
diff --git a/src/views/ReportView.tsx b/src/views/ReportView.tsx
index 77eb55b2..339c4373 100644
--- a/src/views/ReportView.tsx
+++ b/src/views/ReportView.tsx
@@ -6,12 +6,19 @@ import {
     Box,
     Typography,
     IconButton,
-    Link,
     Tooltip,
+    Menu,
+    MenuItem,
     useTheme,
 } from '@mui/material';
-import ArrowBackIcon from '@mui/icons-material/ArrowBack';
+import { alpha } from '@mui/material/styles';
 import DeleteIcon from '@mui/icons-material/Delete';
+import EditIcon from '@mui/icons-material/EditOutlined';
+import CheckIcon from '@mui/icons-material/Check';
+import DownloadIcon from '@mui/icons-material/Download';
+import ContentCopyIcon from '@mui/icons-material/ContentCopy';
+import ImageIcon from '@mui/icons-material/Image';
+import PictureAsPdfIcon from '@mui/icons-material/PictureAsPdf';
 import html2canvas from 'html2canvas';
 import { useDispatch, useSelector } from 'react-redux';
 import { DataFormulatorState, dfActions, dfSelectors, GeneratedReport } from '../app/dfSlice';
@@ -20,6 +27,7 @@ import { DictTable } from '../components/ComponentType';
 import { AppDispatch } from '../app/store';
 import { TiptapReportEditor } from './TiptapReportEditor';
 import { getCachedChart } from '../app/chartCache';
+import { floatingPillSx } from '../app/tokens';
 import { useTranslation } from 'react-i18next';
 
 export const ReportView: FC = () => {
@@ -48,8 +56,10 @@ export const ReportView: FC = () => {
     const isGenerating = currentReport?.status === 'generating';
 
     const [cachedReportImages, setCachedReportImages] = useState<Record<string, { url: string; width: number; height: number }>>({});
-    const [copyButtonSuccess, setCopyButtonSuccess] = useState(false);
-    const [imageCopyButtonSuccess, setImageCopyButtonSuccess] = useState(false);
+    // Read-first: report opens as a clean text page; users opt into editing explicitly.
+    const [isEditMode, setIsEditMode] = useState(false);
+    // Download/share menu anchored to the floating download button.
+    const [downloadMenuAnchor, setDownloadMenuAnchor] = useState<null | HTMLElement>(null);
 
     const updateCachedReportImages = (chartId: string, blobUrl: string, width: number, height: number) => {
         setCachedReportImages(prev => ({
@@ -238,41 +248,13 @@ export const ReportView: FC = () => {
                     'text/plain': new Blob([clone.innerText], { type: 'text/plain' }),
                 }),
             ]);
-            setCopyButtonSuccess(true);
-            setTimeout(() => setCopyButtonSuccess(false), 2000);
+            showMessage(t('report.contentCopied'));
         } catch (e) {
             console.warn('Failed to copy report content:', e);
             showMessage(t('report.failedToCopyClipboard'), 'error');
         }
     };
 
-    const copyReportAsImage = async () => {
-        if (!canWriteToClipboard()) {
-            showMessage(getClipboardUnavailableMessage(), 'error');
-            return;
-        }
-
-        try {
-            const canvas = await renderReportToCanvas();
-            if (!canvas) return;
-            const blob = await canvasToBlob(canvas);
-            if (!blob) {
-                showMessage(t('report.failedToGenerateImage'), 'error');
-                return;
-            }
-
-            await navigator.clipboard.write([
-                new ClipboardItem({ 'image/png': blob })
-            ]);
-            showMessage(t('report.imageCopied'));
-            setImageCopyButtonSuccess(true);
-            setTimeout(() => setImageCopyButtonSuccess(false), 2000);
-        } catch (error) {
-            console.error('Error generating report image:', error);
-            showMessage(t('report.failedToGenerateReportImage'), 'error');
-        }
-    };
-
     const downloadReportAsPng = async () => {
         try {
             const canvas = await renderReportToCanvas();
@@ -501,6 +483,17 @@ ${styles}
         }
     }, [currentReportId]);
 
+    // Always return to read mode when switching reports or while a report is generating.
+    useEffect(() => {
+        setIsEditMode(false);
+    }, [currentReportId]);
+
+    useEffect(() => {
+        if (isGenerating) {
+            setIsEditMode(false);
+        }
+    }, [isGenerating]);
+
     // Derive focused report ID from Redux state
     const focusedReportId = focusedId?.type === 'report' ? focusedId.reportId : undefined;
 
@@ -618,6 +611,25 @@ ${styles}
     let displayedReport = generatedReport;
     displayedReport = processReport(displayedReport);
 
+    // Raw markdown (fence stripped) for the lightweight typewriter view while streaming.
+    const rawReportMarkdown = (() => {
+        const m = generatedReport.match(/```markdown\n([\s\S]*?)(?:\n```)?$/);
+        return m ? m[1] : generatedReport;
+    })();
+
+    const downloadMenuItemSx = {
+        minHeight: 30,
+        px: 1.25,
+        py: 0.5,
+        fontSize: 12,
+        color: 'text.secondary',
+        '& .MuiSvgIcon-root': {
+            fontSize: 15,
+            mr: 0.75,
+            color: 'text.disabled',
+        },
+    };
+
     return (
         <Box sx={{ height: '100%', width: '100%', display: 'flex', flexDirection: 'column', overflow: 'hidden' }}>
             <Box sx={{ height: '100%', position: 'relative', overflow: 'hidden' }}>
@@ -631,29 +643,93 @@ ${styles}
                         flexDirection: 'column',
                         gap: 1,
                     }}>
-                        <Tooltip title={t('report.backToEditor')} placement="right">
-                            <IconButton
-                                size="small"
-                                onClick={() => dispatch(dfActions.setViewMode('editor'))}
-                                sx={{
-                                    backgroundColor: 'background.paper',
-                                    boxShadow: '0 1px 4px rgba(0,0,0,0.12)',
-                                    '&:hover': { backgroundColor: 'action.hover' },
+                        {!isGenerating && currentReportId && (
+                            <Tooltip title={isEditMode ? t('report.doneEditing') : t('report.editReport')} placement="right">
+                                <IconButton
+                                    size="small"
+                                    onClick={() => setIsEditMode((v) => !v)}
+                                    sx={isEditMode ? {
+                                        ...floatingPillSx,
+                                        backgroundColor: 'primary.main',
+                                        color: 'primary.contrastText',
+                                        '&:hover': { backgroundColor: 'primary.dark', color: 'primary.contrastText' },
+                                    } : floatingPillSx}
+                                >
+                                    {isEditMode ? <CheckIcon sx={{ fontSize: 18 }} /> : <EditIcon sx={{ fontSize: 18 }} />}
+                                </IconButton>
+                            </Tooltip>
+                        )}
+                        {!isGenerating && currentReportId && (
+                            <Tooltip title={t('report.downloadAndShare')} placement="right">
+                                <IconButton
+                                    size="small"
+                                    onClick={(e) => setDownloadMenuAnchor(e.currentTarget)}
+                                    sx={downloadMenuAnchor ? {
+                                        ...floatingPillSx,
+                                        color: 'primary.main',
+                                    } : floatingPillSx}
+                                >
+                                    <DownloadIcon sx={{ fontSize: 18 }} />
+                                </IconButton>
+                            </Tooltip>
+                        )}
+                        <Menu
+                            anchorEl={downloadMenuAnchor}
+                            open={Boolean(downloadMenuAnchor)}
+                            onClose={() => setDownloadMenuAnchor(null)}
+                            anchorOrigin={{ vertical: 'bottom', horizontal: 'right' }}
+                            transformOrigin={{ vertical: 'top', horizontal: 'left' }}
+                            slotProps={{
+                                paper: {
+                                    sx: {
+                                        ml: 0.5,
+                                        borderRadius: '6px',
+                                        boxShadow: '0 1px 4px rgba(0,0,0,0.12)',
+                                        border: `1px solid ${alpha(theme.palette.divider, 0.5)}`,
+                                    }
+                                }
+                            }}
+                        >
+                            <MenuItem
+                                onClick={() => {
+                                    setDownloadMenuAnchor(null);
+                                    void copyReportContent();
+                                }}
+                                sx={downloadMenuItemSx}
+                            >
+                                <ContentCopyIcon />
+                                {t('report.copyContent')}
+                            </MenuItem>
+                            <MenuItem
+                                onClick={() => {
+                                    setDownloadMenuAnchor(null);
+                                    void downloadReportAsPng();
+                                }}
+                                sx={downloadMenuItemSx}
+                            >
+                                <ImageIcon />
+                                {t('report.saveAsImage')}
+                            </MenuItem>
+                            <MenuItem
+                                onClick={() => {
+                                    setDownloadMenuAnchor(null);
+                                    void exportReportAsPdf();
                                 }}
+                                sx={downloadMenuItemSx}
                             >
-                                <ArrowBackIcon sx={{ fontSize: 18 }} />
-                            </IconButton>
-                        </Tooltip>
+                                <PictureAsPdfIcon />
+                                {t('report.downloadPdf')}
+                            </MenuItem>
+                        </Menu>
                         {currentReportId && (
                             <Tooltip title={t('report.deleteReport')} placement="right">
                                 <IconButton
                                     size="small"
                                     onClick={(e) => deleteReport(currentReportId, e)}
                                     sx={{
-                                        backgroundColor: 'background.paper',
-                                        boxShadow: '0 1px 4px rgba(0,0,0,0.12)',
+                                        ...floatingPillSx,
                                         color: 'error.main',
-                                        '&:hover': { backgroundColor: 'error.50' },
+                                        '&:hover': { backgroundColor: 'error.50', color: 'error.main' },
                                     }}
                                 >
                                     <DeleteIcon sx={{ fontSize: 18 }} />
@@ -679,14 +755,13 @@ ${styles}
                         >
                             <TiptapReportEditor
                                 content={displayedReport}
-                                editable={!isGenerating}
+                                streamingText={rawReportMarkdown}
+                                resolveChartImage={(chartId) => cachedReportImages[chartId]}
+                                editable={isEditMode && !isGenerating}
+                                isGenerating={isGenerating}
+                                generatingPhase={currentReport?.generatingPhase}
+                                inspectionSteps={currentReport?.inspectionSteps}
                                 reportId={currentReportId}
-                                onCopyContent={currentReportId ? copyReportContent : undefined}
-                                onCopyImage={currentReportId ? copyReportAsImage : undefined}
-                                onDownloadPng={currentReportId ? downloadReportAsPng : undefined}
-                                onExportPdf={currentReportId ? exportReportAsPdf : undefined}
-                                copyContentSuccess={copyButtonSuccess}
-                                copyImageSuccess={imageCopyButtonSuccess}
                                 onUpdate={(html) => {
                                     if (currentReportId) {
                                         setGeneratedReport(html);
@@ -694,30 +769,6 @@ ${styles}
                                     }
                                 }}
                             />
-                            
-                            {/* Attribution */}
-                            <Typography sx={{ 
-                                px: 3, pb: 2,
-                                textAlign: 'center',
-                                fontSize: '0.7rem',
-                                color: 'text.disabled',
-                            }}>
-                                {t('report.createdWithAI')}{' '}
-                                <Link 
-                                    href="https://github.com/microsoft/data-formulator" 
-                                    target="_blank" 
-                                    rel="noopener noreferrer"
-                                    sx={{ 
-                                        color: 'text.disabled',
-                                        textDecoration: 'none',
-                                        '&:hover': {
-                                            textDecoration: 'underline'
-                                        }
-                                    }}
-                                >
-                                    https://github.com/microsoft/data-formulator
-                                </Link>
-                            </Typography>
                         </Box>
                     </Box>
                 </Box>
diff --git a/src/views/SimpleChartRecBox.tsx b/src/views/SimpleChartRecBox.tsx
index debf6548..a1c0cf70 100644
--- a/src/views/SimpleChartRecBox.tsx
+++ b/src/views/SimpleChartRecBox.tsx
@@ -1015,7 +1015,30 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                     .filter((s: string) => s.length > 0)
                     .slice(0, 2);
                 const target = (result.target === 'report_gen' ? 'report_gen' : 'data_loading') as 'data_loading' | 'report_gen';
-                if (currentDraftId) {
+
+                if (target === 'report_gen') {
+                    // Auto-delegate to the report agent — no user approval gate.
+                    // When the user asks for a report, jumping straight into
+                    // report generation is the expected behavior, so we pick the
+                    // agent's first seed prompt (falling back to its message) and
+                    // hand off directly. The report_gen handoff useEffect picks
+                    // this up and starts reportFromChat. The placeholder draft
+                    // has no role in the report view, so we drop it like a normal
+                    // completion would.
+                    if (currentDraftId) {
+                        thinkingSteps = [];
+                        pendingThought = '';
+                        dispatch(dfActions.updateDraftRunningPlan({ draftId: currentDraftId, plan: '' }));
+                        dispatch(dfActions.removeDraftNode(currentDraftId));
+                        currentDraftId = null;
+                    }
+                    const seedPrompt = options[0] || message;
+                    if (seedPrompt) {
+                        dispatch(dfActions.requestAgentHandoff({ target: 'report_gen', prompt: seedPrompt }));
+                    }
+                } else if (currentDraftId) {
+                    // data_loading: keep the one-click approval panel — that's a
+                    // different agent / context the user should confirm.
                     const priorSteps = thinkingSteps.filter(s => s.trim()).join('\n');
                     thinkingSteps = [];
                     pendingThought = '';
@@ -1289,6 +1312,53 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                 if (event.type === 'text_delta') {
                     accumulatedMarkdown += (event as any).content;
                     scheduleFlush();
+                } else if (event.type === 'tool_start') {
+                    // Mirror the data agent: surface what the agent is inspecting.
+                    const ev = event as any;
+                    let label = t('dataThread.thinking');
+                    let doneLabel: string | undefined;
+                    let chartDescs: { chartType: string; name: string }[] | undefined;
+                    if (ev.tool === 'inspect_chart') {
+                        // Resolve chart ids to descriptors: chart type (for the
+                        // icon) plus a display name — the insight title when we
+                        // have one, otherwise the encoded fields ("a × b × c").
+                        const ids: string[] = ev.chart_ids || [];
+                        chartDescs = ids
+                            .map(id => {
+                                const c = charts.find(cc => cc.id === id);
+                                if (!c) return undefined;
+                                let name = c.insight?.title;
+                                if (!name) {
+                                    const fields = Object.values(c.encodingMap)
+                                        .map(enc => enc.fieldID)
+                                        .filter((fid): fid is string => !!fid)
+                                        .map(fid => conceptShelfItems.find(f => f.id === fid)?.name)
+                                        .filter((n): n is string => !!n);
+                                    name = fields.length ? fields.join(' × ') : c.chartType;
+                                }
+                                return { chartType: c.chartType, name };
+                            })
+                            .filter((d): d is { chartType: string; name: string } => !!d);
+                        label = t('report.inspectingCharts');
+                        doneLabel = t('report.inspectedCharts');
+                    } else if (ev.tool === 'inspect_source_data') {
+                        const names = ev.table_names?.join(', ') || '';
+                        label = t('dataThread.inspectingData') + (names ? ` ${names}` : '');
+                        doneLabel = t('dataThread.inspectedData') + (names ? ` ${names}` : '');
+                    }
+                    dispatch(dfActions.updateGeneratedReportProgress({
+                        id: reportId,
+                        kind: 'start',
+                        label,
+                        doneLabel,
+                        charts: chartDescs,
+                    }));
+                } else if (event.type === 'tool_result') {
+                    // Flip the matching pending inspect step to done.
+                    dispatch(dfActions.updateGeneratedReportProgress({
+                        id: reportId,
+                        kind: 'end',
+                    }));
                 } else if (event.type === 'error') {
                     const errMsg = event.error ? getErrorMessage(event.error) : t('messages.error');
                     accumulatedMarkdown += `\n\n**Error:** ${errMsg}`;
diff --git a/src/views/TiptapReportEditor.tsx b/src/views/TiptapReportEditor.tsx
index b9083511..fcc8fbaf 100644
--- a/src/views/TiptapReportEditor.tsx
+++ b/src/views/TiptapReportEditor.tsx
@@ -11,34 +11,325 @@ import { TableRow } from '@tiptap/extension-table-row';
 import { TableHeader } from '@tiptap/extension-table-header';
 import { TableCell } from '@tiptap/extension-table-cell';
 import { Markdown } from 'tiptap-markdown';
-import { Box, Button, IconButton, Menu, MenuItem, Tooltip, Divider, useTheme, Typography } from '@mui/material';
+import { Box, IconButton, Tooltip, Divider, Typography, CircularProgress, useTheme } from '@mui/material';
 import { alpha } from '@mui/material/styles';
-import { WritingPencil, ShimmerText, WritingIndicator } from '../components/FunComponents';
+import { WritingIndicator } from '../components/FunComponents';
+import { getChartTemplate } from '../components/ChartTemplates';
 import FormatBoldIcon from '@mui/icons-material/FormatBold';
 import FormatItalicIcon from '@mui/icons-material/FormatItalic';
 import FormatListBulletedIcon from '@mui/icons-material/FormatListBulleted';
 import FormatListNumberedIcon from '@mui/icons-material/FormatListNumbered';
 import FormatQuoteIcon from '@mui/icons-material/FormatQuote';
 import TitleIcon from '@mui/icons-material/Title';
-import ContentCopyIcon from '@mui/icons-material/ContentCopy';
-import ImageIcon from '@mui/icons-material/Image';
-import PictureAsPdfIcon from '@mui/icons-material/PictureAsPdf';
-import DownloadIcon from '@mui/icons-material/Download';
 import CheckCircleIcon from '@mui/icons-material/CheckCircle';
 
+/** Compact "1.2s" / "850ms" style duration for inspection steps. */
+function formatStepDuration(ms: number): string {
+    if (ms < 1000) return `${Math.round(ms)}ms`;
+    return `${(ms / 1000).toFixed(1)}s`;
+}
+
 export interface TiptapReportEditorProps {
     content: string;           // HTML content (from processReport)
-    editable?: boolean;
+    streamingText?: string;    // raw markdown, shown via typewriter while writing-phase streams
+    resolveChartImage?: (chartId: string) => { url: string; width: number; height: number } | undefined; // for streaming chart embeds
+    editable?: boolean;        // edit mode on/off (formatting toolbar visible, content editable)
+    isGenerating?: boolean;    // report is still streaming; suppress export actions, show status
+    generatingPhase?: 'inspecting' | 'writing'; // which phase the agent is in while generating
+    // accumulated inspect steps so the user sees what's happening; `charts`
+    // carries chart-type + display name so we can show a type icon next to it
+    inspectionSteps?: InspectStep[];
     reportId?: string;         // triggers re-focus when switching reports
     onUpdate?: (html: string) => void;
-    onCopyContent?: () => void | Promise<void>;
-    onCopyImage?: () => void | Promise<void>;
-    onDownloadPng?: () => void | Promise<void>;
-    onExportPdf?: () => void | Promise<void>;
-    copyContentSuccess?: boolean;
-    copyImageSuccess?: boolean;
 }
 
+// ── Generating-status UI ───────────────────────────────────────────────────
+// While a report streams, the canvas shows (in order): a "thinking…" spinner
+// before anything arrives → a list of inspection steps (each flips to a ✓ with
+// a duration) → a trailing "thinking…" once all steps resolve → and finally a
+// pencil "writing…" overlay glued to the bottom of the growing text.
+
+export interface InspectStep {
+    label: string;
+    doneLabel?: string;   // past-tense label shown once the step completes
+    done: boolean;
+    charts?: { chartType: string; name: string }[];
+    startedAt?: number;   // epoch ms when the tool call started
+    durationMs?: number;  // wall time once the step is done
+}
+
+/** Small fixed-size slot holding either a spinner or a ✓, aligned to text.
+ *  Text stays uniformly muted; the icon carries the one bit of state color —
+ *  a soft spinner while running, a green check once done (matching the data
+ *  load chat's convention). */
+const StatusIcon: FC<{ done?: boolean }> = ({ done }) => (
+    <Box sx={{ flexShrink: 0, mt: '2px', display: 'flex', alignItems: 'center' }}>
+        {done
+            ? <CheckCircleIcon sx={{ fontSize: 13, color: 'success.main' }} />
+            : <CircularProgress size={11} thickness={5} sx={{ color: 'text.secondary' }} />}
+    </Box>
+);
+
+/** Spinner + gently pulsing label for "thinking…" / "still working" states. */
+const ThinkingRow: FC<{ label: string }> = ({ label }) => (
+    <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.75, py: 0.25 }}>
+        <StatusIcon />
+        <Typography component="span" sx={{
+            fontSize: 12, lineHeight: 1.4, color: 'text.secondary',
+            animation: 'thinking-pulse 1.6s ease-in-out infinite',
+            '@keyframes thinking-pulse': {
+                '0%, 100%': { opacity: 0.6 },
+                '50%': { opacity: 1 },
+            },
+        }}>
+            {label}
+        </Typography>
+    </Box>
+);
+
+/** A single inspection step: status icon, label + duration, then chart chips. */
+const InspectionStepRow: FC<{ step: InspectStep }> = ({ step }) => (
+    <Box sx={{ display: 'flex', alignItems: 'flex-start', gap: 0.75, py: 0.25 }}>
+        <StatusIcon done={step.done} />
+        <Box sx={{ display: 'flex', flexDirection: 'column', rowGap: 0.25, minWidth: 0 }}>
+            {/* Label and elapsed time sit together on the first line. */}
+            <Box sx={{ display: 'flex', alignItems: 'baseline', gap: 0.75, flexWrap: 'wrap' }}>
+                <Typography component="span" sx={{
+                    fontSize: 12, lineHeight: 1.4, color: 'text.primary',
+                    whiteSpace: 'normal', wordBreak: 'break-word',
+                }}>
+                    {step.done && step.doneLabel ? step.doneLabel : step.label}
+                </Typography>
+                {step.done && step.durationMs != null && (
+                    <Typography component="span" sx={{
+                        fontSize: 11, lineHeight: 1.4, color: 'text.disabled',
+                        fontVariantNumeric: 'tabular-nums',
+                    }}>
+                        {formatStepDuration(step.durationMs)}
+                    </Typography>
+                )}
+            </Box>
+            {/* Each inspected chart gets its own line, even when there's only one. */}
+            {step.charts?.map((c, j) => (
+                <Box key={j} sx={{ display: 'inline-flex', alignItems: 'center', gap: 0.25, minWidth: 0 }}>
+                    <Box sx={{
+                        width: 14, height: 14, flexShrink: 0, opacity: 0.85,
+                        display: 'flex', alignItems: 'center', justifyContent: 'center',
+                        '& svg': { fontSize: 14 },
+                    }}>
+                        {getChartTemplate(c.chartType)?.icon}
+                    </Box>
+                    <Typography component="span" sx={{
+                        fontSize: 12, lineHeight: 1.4, color: 'text.secondary',
+                        whiteSpace: 'normal', wordBreak: 'break-word',
+                    }}>
+                        {c.name}
+                    </Typography>
+                </Box>
+            ))}
+        </Box>
+    </Box>
+);
+
+/**
+ * The in-flow status shown before the report text starts streaming: a muted
+ * title, then either a lone "thinking…" (nothing happening yet) or the
+ * accumulated inspection steps followed by a trailing "thinking…" once they
+ * all resolve.
+ */
+const InspectingStatus: FC<{ steps?: InspectStep[] }> = ({ steps }) => {
+    const { t } = useTranslation();
+    return (
+        <Box sx={{ display: 'flex', flexDirection: 'column', gap: 0.5, px: '24px', pt: '40px', pb: '16px' }}>
+            <Typography sx={{
+                fontSize: 11, fontWeight: 600, letterSpacing: '0.05em',
+                textTransform: 'uppercase', color: 'text.secondary', mb: 0.5,
+            }}>
+                {t('editor.workingTitle')}
+            </Typography>
+            {steps?.length
+                ? steps.map((step, i) => <InspectionStepRow key={i} step={step} />)
+                : null}
+            {(!steps?.length || steps.every(s => s.done)) && (
+                <ThinkingRow label={t('dataThread.thinking')} />
+            )}
+        </Box>
+    );
+};
+
+/** Strip inline markdown emphasis markers for the lightweight streaming view. */
+function stripInlineMarkers(line: string): string {
+    return line
+        .replace(/\*\*(.+?)\*\*/g, '$1')
+        .replace(/(^|[^*])\*([^*]+)\*/g, '$1$2')
+        .replace(/`([^`]+)`/g, '$1');
+}
+
+/** Detect a chart-image line: ![caption](chart://id) or legacy [IMAGE(id)]. */
+function matchChartImageLine(line: string): { chartId: string; caption?: string } | null {
+    const md = line.match(/^!\[([^\]]*)\]\(chart:\/\/([^)]+)\)\s*$/);
+    if (md) return { caption: md[1] || undefined, chartId: md[2] };
+    const legacy = line.match(/^\[IMAGE\(([^)]+)\)\]\s*$/);
+    if (legacy) return { chartId: legacy[1] };
+    return null;
+}
+
+type ResolveChartImage = (chartId: string) => { url: string; width: number; height: number } | undefined;
+
+/**
+ * Lightweight, line-based render of the streamed markdown. Good enough to read
+ * smoothly while text arrives; the real TipTap parse happens once on completion.
+ */
+const StreamingMarkdownLite: FC<{ text: string; caret?: React.ReactNode; resolveChartImage?: ResolveChartImage }> = ({ text, caret, resolveChartImage }) => {
+    const lines = text.split('\n');
+    const lastIdx = lines.length - 1;
+    return (
+        <>
+            {lines.map((line, i) => {
+                const tail = i === lastIdx ? caret : null;
+                const img = matchChartImageLine(line);
+                if (img) {
+                    const cached = resolveChartImage?.(img.chartId);
+                    if (cached) {
+                        return (
+                            <Box key={i} component="div" sx={{ textAlign: 'center', my: '0.5em' }}>
+                                <Box component="img" src={cached.url} alt={img.caption ?? ''}
+                                    width={cached.width} height={cached.height}
+                                    sx={{ maxWidth: '100%', height: 'auto', borderRadius: '4px' }} />
+                                {tail}
+                            </Box>
+                        );
+                    }
+                    return (
+                        <Box key={i} component="div" sx={{ textAlign: 'center', color: 'text.disabled', py: '16px' }}>
+                            📊 {img.caption || img.chartId}{tail}
+                        </Box>
+                    );
+                }
+                const h = line.match(/^(#{1,3})\s+(.*)$/);
+                if (h) {
+                    const level = h[1].length;
+                    return (
+                        <Box key={i} component="div" sx={{
+                            fontWeight: level === 1 ? 700 : 600,
+                            fontSize: level === 1 ? '1.75rem' : level === 2 ? '1.4rem' : '1.15rem',
+                            lineHeight: 1.3,
+                            mt: i === 0 ? 0 : '1em', mb: '0.4em',
+                        }}>
+                            {stripInlineMarkers(h[2])}{tail}
+                        </Box>
+                    );
+                }
+                const li = line.match(/^[-*]\s+(.*)$/);
+                if (li) {
+                    return (
+                        <Box key={i} component="div" sx={{ display: 'flex', gap: 1, mb: '0.25em' }}>
+                            <Box component="span" sx={{ color: 'text.disabled' }}>•</Box>
+                            <Box component="span">{stripInlineMarkers(li[1])}{tail}</Box>
+                        </Box>
+                    );
+                }
+                return (
+                    <Box key={i} component="div" sx={{ minHeight: line === '' ? '0.5em' : undefined, mb: line === '' ? 0 : '0.2em' }}>
+                        {stripInlineMarkers(line)}{tail}
+                    </Box>
+                );
+            })}
+        </>
+    );
+};
+
+/**
+ * Typewriter buffer: smoothly reveals `text` regardless of how bursty the
+ * network deltas are. A rAF loop catches the displayed length up to the target,
+ * revealing more per frame when the backlog is large so it never falls behind.
+ */
+const StreamingText: FC<{ text: string; resolveChartImage?: ResolveChartImage }> = ({ text, resolveChartImage }) => {
+    const { t } = useTranslation();
+    const textRef = useRef(text);
+    textRef.current = text;
+    const shownLenRef = useRef(0);
+    const [shown, setShown] = useStateReact('');
+
+    useEffect(() => {
+        let raf = 0;
+        let lastTime = performance.now();
+        let lastTargetLen = 0;
+        let lastChunkTime = lastTime;
+        let fraction = 0;            // sub-character reveal accumulator
+
+        // Reveal rate in chars/ms, smoothed across chunks. Each time a chunk
+        // arrives we estimate the natural rate as (chunk size / time since the
+        // previous chunk), so the chunk is spread out over roughly the gap until
+        // the next one is expected — that feels like natural typing rather than
+        // dumping. Clamped to a sane min/max and floored so it never stalls.
+        const MIN_RATE = 0.012;     // ~12 chars/sec — slowest "typing" we allow
+        const MAX_RATE = 0.20;      // ~200 chars/sec — cap so big bursts don't blur
+        let rate = 0.03;            // initial guess until the first interval is known
+
+        const tick = () => {
+            const now = performance.now();
+            const dt = Math.min(now - lastTime, 100); // clamp tab-switch gaps
+            lastTime = now;
+
+            const target = textRef.current;
+            let len = shownLenRef.current;
+            if (len > target.length) { len = 0; fraction = 0; } // report cleared/restarted
+
+            // On each new chunk, re-estimate the natural typing rate from this
+            // chunk's size and the interval since the previous chunk arrived.
+            const arrived = target.length - lastTargetLen;
+            if (arrived > 0) {
+                const interval = Math.max(now - lastChunkTime, 1);
+                lastChunkTime = now;
+                lastTargetLen = target.length;
+                const chunkRate = arrived / interval;
+                rate = rate * 0.7 + chunkRate * 0.3; // EMA smoothing across chunks
+            }
+
+            const backlog = target.length - len;
+            if (backlog > 0) {
+                // Pace at the smoothed rate, but never below the min typing speed,
+                // and lift slightly when the backlog is large so we don't drift
+                // permanently behind a fast stream.
+                const catchUp = backlog > 240 ? 1.6 : backlog > 80 ? 1.25 : 1;
+                const effRate = Math.min(MAX_RATE, Math.max(MIN_RATE, rate) * catchUp);
+                fraction += effRate * dt;
+                const whole = Math.floor(fraction);
+                if (whole >= 1) {
+                    fraction -= whole;
+                    len = Math.min(target.length, len + whole);
+                    shownLenRef.current = len;
+                    setShown(target.slice(0, len));
+                }
+            }
+            raf = requestAnimationFrame(tick);
+        };
+        raf = requestAnimationFrame(tick);
+        return () => cancelAnimationFrame(raf);
+    }, []);
+
+    return (
+        <Box sx={{
+            px: '24px', pt: '40px', pb: '64px',
+            fontFamily: '-apple-system, BlinkMacSystemFont, "Segoe UI", Helvetica, Arial, sans-serif',
+            fontSize: '0.95rem', lineHeight: 1.7, color: 'rgb(55, 53, 47)',
+        }}>
+            <StreamingMarkdownLite text={shown} resolveChartImage={resolveChartImage} caret={
+                <Box component="span" sx={{
+                    display: 'inline-block', width: '2px', height: '1.1em',
+                    ml: '1px', verticalAlign: 'text-bottom', backgroundColor: 'text.primary',
+                    animation: 'stream-caret 1s step-end infinite',
+                    '@keyframes stream-caret': { '50%': { opacity: 0 } },
+                }} />
+            } />
+            <Box sx={{ mt: shown.length === 0 ? 1 : 2 }}>
+                <WritingIndicator label={t('editor.writingReport')} fontSize="0.85rem" />
+            </Box>
+        </Box>
+    );
+};
+
 /** Resizable image node view — drag bottom-right corner to resize */
 const ResizableImageView: FC<NodeViewProps> = ({ node, updateAttributes, selected }) => {
     const { src, alt, width, height } = node.attrs;
@@ -175,20 +466,18 @@ const ToolbarButton: FC<{
 
 export const TiptapReportEditor: FC<TiptapReportEditorProps> = ({
     content,
+    streamingText,
+    resolveChartImage,
     editable = true,
+    isGenerating = false,
+    generatingPhase,
+    inspectionSteps,
     reportId,
     onUpdate,
-    onCopyContent,
-    onCopyImage,
-    onDownloadPng,
-    onExportPdf,
-    copyContentSuccess = false,
-    copyImageSuccess = false,
 }) => {
     const theme = useTheme();
     const { t } = useTranslation();
     const isFocused = useRef(false);
-    const [imageMenuAnchor, setImageMenuAnchor] = useStateReact<null | HTMLElement>(null);
 
     const editor = useEditor({
         extensions: [
@@ -247,6 +536,9 @@ export const TiptapReportEditor: FC<TiptapReportEditorProps> = ({
     // Always sync if the content contains new images (img tags) that aren't in the editor yet
     useEffect(() => {
         if (!editor) return;
+        // While the writing phase streams, the lightweight typewriter view owns the
+        // display — defer the (expensive) markdown parse until the stream completes.
+        if (generatingPhase === 'writing') return;
         if (!isFocused.current) {
             editor.commands.setContent(content, { emitUpdate: false });
         } else {
@@ -258,7 +550,7 @@ export const TiptapReportEditor: FC<TiptapReportEditorProps> = ({
                 editor.commands.setContent(content, { emitUpdate: false });
             }
         }
-    }, [editor, content]);
+    }, [editor, content, generatingPhase]);
 
     const copyAsRichText = useCallback(async () => {
         if (!editor) return;
@@ -278,70 +570,32 @@ export const TiptapReportEditor: FC<TiptapReportEditorProps> = ({
     if (!editor) return null;
 
     const iconSx = { fontSize: 16 };
-    const exportIconSx = { fontSize: 15 };
-    const exportButtonSx = {
-        minWidth: 0,
-        height: 26,
-        px: 0.75,
-        py: 0,
-        borderRadius: '4px',
-        textTransform: 'none',
-        fontSize: 12,
-        fontWeight: 400,
-        lineHeight: 1,
-        color: 'text.secondary',
-        borderColor: 'transparent',
-        backgroundColor: 'transparent',
-        '& .MuiButton-startIcon': {
-            mr: 0.5,
-            ml: 0,
-            color: 'inherit',
-        },
-        '&:hover': {
-            color: 'primary.main',
-            borderColor: alpha(theme.palette.primary.main, 0.08),
-            backgroundColor: alpha(theme.palette.primary.main, 0.08),
-        },
-    };
-    const exportMenuItemSx = {
-        minHeight: 30,
-        px: 1.25,
-        py: 0.5,
-        fontSize: 12,
-        color: 'text.secondary',
-        '& .MuiSvgIcon-root': {
-            fontSize: 15,
-            mr: 0.75,
-            color: 'text.disabled',
-        },
-    };
-    const hasExportActions = !!(onCopyContent || onCopyImage || onDownloadPng || onExportPdf);
-    const imageMenuOpen = Boolean(imageMenuAnchor);
 
     return (
-        <Box sx={{ display: 'flex', flexDirection: 'column', height: '100%', position: 'relative' }}>
-            {/* Toolbar — always visible, disabled during generation */}
+        <Box sx={{ display: 'flex', flexDirection: 'column', minHeight: '100%', position: 'relative' }}>
+            {/* Toolbar — only in edit mode (formatting); hidden when reading or generating */}
+            {editable && (
             <Box sx={{
                 display: 'flex',
                 alignItems: 'center',
                 gap: '2px',
                 px: 1,
                 py: 2,
+                minHeight: 26,
                 borderBottom: `1px solid ${alpha(theme.palette.divider, 0.3)}`,
                 flexShrink: 0,
                 position: 'sticky',
                 top: 0,
                 zIndex: 5,
                 backgroundColor: 'background.paper',
-                opacity: editable ? 1 : 0.5,
             }}
                 data-report-toolbar
             >
+                {editable && (
                 <Box sx={{
                     display: 'flex',
                     alignItems: 'center',
                     gap: '2px',
-                    pointerEvents: editable ? 'auto' : 'none',
                 }}>
                     <ToolbarButton
                         onClick={() => editor.chain().focus().toggleBold().run()}
@@ -395,126 +649,13 @@ export const TiptapReportEditor: FC<TiptapReportEditorProps> = ({
                         <FormatQuoteIcon sx={iconSx} />
                     </ToolbarButton>
                 </Box>
-                {hasExportActions && editable && (
-                    <Box sx={{
-                        ml: 'auto',
-                        display: 'flex',
-                        alignItems: 'center',
-                        gap: 0.75,
-                        pointerEvents: editable ? 'auto' : 'none',
-                    }}>
-                        {onCopyContent && (
-                            <Button
-                                size="small"
-                                variant="text"
-                                startIcon={copyContentSuccess ? <CheckCircleIcon sx={exportIconSx} /> : <ContentCopyIcon sx={exportIconSx} />}
-                                onClick={onCopyContent}
-                                color={copyContentSuccess ? 'success' : 'primary'}
-                                sx={{
-                                    ...exportButtonSx,
-                                    ...(copyContentSuccess ? {
-                                        color: 'success.main',
-                                        backgroundColor: alpha(theme.palette.success.main, 0.08),
-                                    } : {}),
-                                }}
-                            >
-                                {copyContentSuccess ? t('report.copied') : t('report.copyContent')}
-                            </Button>
-                        )}
-                        {(onCopyImage || onDownloadPng) && (
-                            <>
-                                <Button
-                                    size="small"
-                                    variant="text"
-                                    startIcon={copyImageSuccess ? <CheckCircleIcon sx={exportIconSx} /> : <ImageIcon sx={exportIconSx} />}
-                                    onClick={(event) => setImageMenuAnchor(event.currentTarget)}
-                                    color={copyImageSuccess ? 'success' : 'primary'}
-                                    sx={{
-                                        ...exportButtonSx,
-                                        ...(copyImageSuccess ? {
-                                            color: 'success.main',
-                                            backgroundColor: alpha(theme.palette.success.main, 0.08),
-                                        } : {}),
-                                    }}
-                                >
-                                    {copyImageSuccess ? t('report.copied') : t('report.imageActions')}
-                                </Button>
-                                <Menu
-                                    anchorEl={imageMenuAnchor}
-                                    open={imageMenuOpen}
-                                    onClose={() => setImageMenuAnchor(null)}
-                                    anchorOrigin={{ vertical: 'bottom', horizontal: 'right' }}
-                                    transformOrigin={{ vertical: 'top', horizontal: 'right' }}
-                                    slotProps={{
-                                        paper: {
-                                            sx: {
-                                                mt: 0.5,
-                                                borderRadius: '6px',
-                                                boxShadow: '0 1px 4px rgba(0,0,0,0.12)',
-                                                border: `1px solid ${alpha(theme.palette.divider, 0.5)}`,
-                                            }
-                                        }
-                                    }}
-                                >
-                                    {onCopyImage && (
-                                        <MenuItem
-                                            onClick={() => {
-                                                setImageMenuAnchor(null);
-                                                void onCopyImage();
-                                            }}
-                                            sx={exportMenuItemSx}
-                                        >
-                                            <ContentCopyIcon />
-                                            {t('report.copyImage')}
-                                        </MenuItem>
-                                    )}
-                                    {onDownloadPng && (
-                                        <MenuItem
-                                            onClick={() => {
-                                                setImageMenuAnchor(null);
-                                                void onDownloadPng();
-                                            }}
-                                            sx={exportMenuItemSx}
-                                        >
-                                            <DownloadIcon />
-                                            {t('report.downloadPng')}
-                                        </MenuItem>
-                                    )}
-                                </Menu>
-                            </>
-                        )}
-                        {onExportPdf && (
-                            <Button
-                                size="small"
-                                variant="text"
-                                startIcon={<PictureAsPdfIcon sx={exportIconSx} />}
-                                onClick={onExportPdf}
-                                sx={exportButtonSx}
-                            >
-                                {t('report.exportPdf')}
-                            </Button>
-                        )}
-                    </Box>
                 )}
-                    {!editable && (
-                        <Box sx={{ ml: 'auto', display: 'flex', alignItems: 'center', gap: 0.75, pointerEvents: 'none' }}>
-                            <Box sx={{
-                                width: 6, height: 6, borderRadius: '50%',
-                                backgroundColor: 'primary.main',
-                                animation: 'pulse-dot 1.2s ease-in-out infinite',
-                                '@keyframes pulse-dot': {
-                                    '0%, 100%': { opacity: 0.3 },
-                                    '50%': { opacity: 1 },
-                                },
-                            }} />
-                            <ShimmerText>{t('editor.generating')}</ShimmerText>
-                        </Box>
-                    )}
             </Box>
+            )}
             {/* Editor */}
             <Box sx={{
                 flex: 1,
-                overflowY: 'auto',
+                overflow: 'visible',
                 position: 'relative',
                 '& .tiptap': {
                     outline: 'none',
@@ -632,24 +773,15 @@ export const TiptapReportEditor: FC<TiptapReportEditorProps> = ({
                     },
                 },
             }}>
-                <EditorContent editor={editor} />
-                {/* Shimmer overlay while generating */}
-                {!editable && (
-                    <Box sx={{
-                        position: 'absolute',
-                        bottom: 0,
-                        left: 0,
-                        right: 0,
-                        height: '40%',
-                        pointerEvents: 'none',
-                        background: `linear-gradient(to bottom, transparent 0%, ${alpha(theme.palette.background.paper, 0.6)} 40%, ${theme.palette.background.paper} 100%)`,
-                        display: 'flex',
-                        alignItems: 'flex-end',
-                        justifyContent: 'center',
-                        pb: 6,
-                    }}>
-                        <WritingIndicator label={t('editor.writingReport')} fontSize="0.85rem" />
-                    </Box>
+                {/* While inspecting, the report is still empty — show progress.
+                    While writing, a typewriter view reveals the streamed text
+                    smoothly; TipTap takes over (one parse) once it completes. */}
+                {isGenerating && generatingPhase !== 'writing' ? (
+                    <InspectingStatus steps={inspectionSteps} />
+                ) : isGenerating && generatingPhase === 'writing' ? (
+                    <StreamingText text={streamingText ?? ''} resolveChartImage={resolveChartImage} />
+                ) : (
+                    <EditorContent editor={editor} />
                 )}
             </Box>
         </Box>
diff --git a/src/views/ViewUtils.tsx b/src/views/ViewUtils.tsx
index e54b9cad..c72a4a2d 100644
--- a/src/views/ViewUtils.tsx
+++ b/src/views/ViewUtils.tsx
@@ -143,7 +143,14 @@ const formatTemporalValue = (value: any, dataType: Type): string => {
 };
 
 const formatDuration = (value: any): string => {
-    if (typeof value === 'number') {
+    if (typeof value === 'number' && Number.isFinite(value)) {
+        // The h/m/s format assumes the value is in milliseconds. When the value
+        // isn't a whole number of seconds (e.g. seconds-based columns like
+        // 0.083), flooring would collapse everything to "0s" and destroy the
+        // data — so fall back to the plain number instead of over-formatting.
+        if (value === 0 || !Number.isInteger(value / 1_000)) {
+            return value.toLocaleString('en-US', { maximumFractionDigits: 4 });
+        }
         const h = Math.floor(value / 3_600_000);
         const m = Math.floor((value % 3_600_000) / 60_000);
         const s = Math.floor((value % 60_000) / 1_000);
diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx
index 06d2732c..9673a9ca 100644
--- a/src/views/VisualizationView.tsx
+++ b/src/views/VisualizationView.tsx
@@ -21,21 +21,22 @@ import {
     CardContent,
     Slider,
     Dialog,
+    DialogTitle,
     DialogContent,
     TextField,
-    CircularProgress,
     Popover,
+    Popper,
+    Paper,
+    ClickAwayListener,
     Snackbar,
     Alert,
     Fade,
     Grow,
-    alpha,
 } from '@mui/material';
 
 import _ from 'lodash';
 
-import { borderColor, transition } from '../app/tokens';
-import { WritingIndicator } from '../components/FunComponents';
+import { floatingPillSx } from '../app/tokens';
 
 import ButtonGroup from '@mui/material/ButtonGroup';
 
@@ -43,7 +44,7 @@ import ButtonGroup from '@mui/material/ButtonGroup';
 import '../scss/VisualizationView.scss';
 import '../scss/DataView.scss';
 import { useDispatch, useSelector } from 'react-redux';
-import { DataFormulatorState, dfActions, fetchChartInsight } from '../app/dfSlice';
+import { DataFormulatorState, dfActions } from '../app/dfSlice';
 import { assembleVegaChart, extractFieldsFromEncodingMap, getUrls, prepVisTable, fetchWithIdentity } from '../app/utils';
 import { displayRowsCache } from '../app/displayRowsCache';
 import { buildEmbeddedDataForChart, applyVariantConfigUI } from '../app/restyle';
@@ -51,17 +52,16 @@ import { apiRequest } from '../app/apiClient';
 import embed from 'vega-embed';
 import { Chart, EncodingItem, EncodingMap, FieldItem, computeInsightKey } from '../components/ComponentType';
 
-import DeleteIcon from '@mui/icons-material/Delete';
 import TerminalIcon from '@mui/icons-material/Terminal';
 import QuestionAnswerIcon from '@mui/icons-material/QuestionAnswer';
 import TuneIcon from '@mui/icons-material/Tune';
 import ContentCopyIcon from '@mui/icons-material/ContentCopy';
 import ZoomInIcon from '@mui/icons-material/ZoomIn';
 import ZoomOutIcon from '@mui/icons-material/ZoomOut';
-import FunctionsIcon from '@mui/icons-material/Functions';
 import CasinoIcon from '@mui/icons-material/Casino';
 import SaveAltIcon from '@mui/icons-material/SaveAlt';
 import OpenInNewIcon from '@mui/icons-material/OpenInNew';
+import CloseIcon from '@mui/icons-material/Close';
 import { AgentToyIcon, AnimatedAgentToyIcon } from './AgentToyIcon';
 
 import { CHART_TEMPLATES, getChartTemplate } from '../components/ChartTemplates';
@@ -76,13 +76,11 @@ import 'prismjs/themes/prism.css'; //Example style, you can use another
 import { useTranslation } from 'react-i18next';
 
 import { ChatDialog } from './ChatDialog';
-import { PlanStepsView } from './InteractionEntryCard';
 import { EncodingShelfCard } from './EncodingShelfCard';
 import { ChartQuickConfig } from './ChartQuickConfig';
 import { ChartVariantStrip } from './ChartVariantStrip';
 import { CustomReactTable } from './ReactTable';
 import { InsightIcon } from '../icons';
-import TableChartOutlinedIcon from '@mui/icons-material/TableChartOutlined';
 import { FreeDataViewFC } from './DataView';
 import { formatCellValue } from './ViewUtils';
 
@@ -368,17 +366,6 @@ const VegaChartRenderer: FC<{
             return;
         }
 
-        // Seed chart config with heuristic-computed defaults for properties
-        // the user hasn't explicitly set (e.g. independentYAxis toggle).
-        // Variants don't carry computed config — the agent's spec is final.
-        if (!activeVariant && spec._computedConfig) {
-            for (const [key, value] of Object.entries(spec._computedConfig)) {
-                if (chart.config?.[key] === undefined) {
-                    dispatch(dfActions.updateChartConfig({ chartId: chart.id, key, value }));
-                }
-            }
-        }
-
         spec['background'] = 'white';
 
         // Inject the insight title into the Vega-Lite spec instead of rendering
@@ -498,8 +485,6 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
     let focusedChartId = focusedId?.type === 'chart' ? focusedId.chartId : undefined;
     let chartSynthesisInProgress = useSelector((state: DataFormulatorState) => state.chartSynthesisInProgress) || [];
 
-    let handleDeleteChart = () => { focusedChartId && dispatch(dfActions.deleteChartById(focusedChartId)) }
-
     // Track the assembled Vega-Lite spec from the renderer so we can open it in the Vega Editor
     const [renderedSpec, setRenderedSpec] = useState<any | null>(null);
     const handleSpecReady = useCallback((spec: any | null) => { setRenderedSpec(spec); }, []);
@@ -536,7 +521,7 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
 
     const conceptShelfItems = useSelector((state: DataFormulatorState) => state.conceptShelfItems);
 
-    const [bottomTab, setBottomTab] = useState<string>('data');
+    const [codeDialogOpen, setCodeDialogOpen] = useState<boolean>(false);
     const [localScaleFactor, setLocalScaleFactor] = useState<number>(1);
     const [chatDialogOpen, setChatDialogOpen] = useState<boolean>(false);
     // Floating encoding-shelf popover. The button lives in the stable outer
@@ -547,7 +532,7 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
 
     // Reset local UI state when focused chart changes
     useEffect(() => {
-        setBottomTab('data');
+        setCodeDialogOpen(false);
         // Restore the persisted zoom for the newly focused chart (stored on
         // the Chart object so it survives switching charts and session
         // save/load). Falls back to 1 for charts that have never been zoomed.
@@ -736,9 +721,9 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
 
     let triggerTable = tables.find(t => t.derive?.trigger?.chart?.id == focusedChart?.id);
 
-    // Chart insight
-    const chartInsightInProgress = useSelector((state: DataFormulatorState) => state.chartInsightInProgress) || [];
-    const insightLoading = chartInsightInProgress.includes(focusedChart.id);
+    // Chart insight: the generation UI was removed, but a chart that already
+    // carries an insight still surfaces its title on the rendered chart, so we
+    // keep the freshness check used by `insightTitle` below.
     const currentInsightKey = computeInsightKey(focusedChart);
     const insightFresh = focusedChart.insight?.key === currentInsightKey;
     
@@ -756,18 +741,6 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
         },
     };
 
-    let deleteButton = (
-        <Tooltip title={t('chart.delete')} key="delete-btn-tooltip">
-            <span>
-                <IconButton size="small" disabled={trigger != undefined}
-                    sx={{ ...actionBtnSx, color: 'error.main', '&:hover': { backgroundColor: 'rgba(211, 47, 47, 0.08)', color: 'error.main' } }}
-                    onClick={() => { handleDeleteChart() }}>
-                    <DeleteIcon sx={{ fontSize: 18 }} />
-                </IconButton>
-            </span>
-        </Tooltip>
-    );
-
     let transformCode = "";
     if (table.derive?.code) {
         transformCode = `${table.derive.code}`
@@ -790,85 +763,6 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
         </Tooltip>
     );
 
-    // Toggle buttons for bottom-panel content (icon + text label)
-    const toggleBtnSx = (active: boolean) => ({
-        textTransform: 'none' as const,
-        fontSize: '0.7rem',
-        padding: '2px 8px',
-        borderRadius: '6px',
-        color: active ? 'primary.main' : 'text.secondary',
-        backgroundColor: active ? 'rgba(25, 118, 210, 0.08)' : 'transparent',
-        transition: 'all 0.15s ease',
-        minWidth: 'auto',
-        '& .MuiButton-startIcon': { mr: 0.5 },
-        '&:hover': {
-            backgroundColor: 'rgba(25, 118, 210, 0.08)',
-            color: 'primary.main',
-        },
-    });
-
-    let dataButton = (
-        <Button key="data-btn" size="small"
-            sx={toggleBtnSx(bottomTab === 'data')}
-            startIcon={<TableChartOutlinedIcon sx={{ fontSize: 14 }} />}
-            onClick={() => setBottomTab('data')}>
-            {t('chart.data')}
-        </Button>
-    );
-
-    let derivedTableItems = hasDerived ? [
-        <Button key="code-btn" size="small"
-            sx={toggleBtnSx(bottomTab === 'code')}
-            startIcon={<TerminalIcon sx={{ fontSize: 14 }} />}
-            onClick={() => setBottomTab('code')}>
-            {t('chart.code')}
-        </Button>,
-        ...(hasConcepts ? [
-            <Button key="concepts-btn" size="small"
-                sx={toggleBtnSx(bottomTab === 'concepts')}
-                startIcon={<FunctionsIcon sx={{ fontSize: 14 }} />}
-                onClick={() => setBottomTab('concepts')}>
-                {t('chart.concepts')}
-            </Button>
-        ] : []),
-    ] : [];
-
-    let logButton = hasDerived ? (
-        <Tooltip key="log-btn-tooltip" title={t('chart.log')}>
-            <span>
-                <IconButton key="log-btn" size="small" sx={actionBtnSx}
-                    onClick={() => setChatDialogOpen(true)}>
-                    <QuestionAnswerIcon sx={{ fontSize: 18 }} />
-                </IconButton>
-            </span>
-        </Tooltip>
-    ) : null;
-
-    let insightButton = (!chartUnavailable && focusedChart.chartType !== "Table") ? (
-        <Button key="insight-btn" size="small"
-            sx={toggleBtnSx(bottomTab === 'insight')}
-            startIcon={insightLoading ? <CircularProgress size={12} /> : <InsightIcon sx={{ fontSize: 14 }} />}
-            onClick={() => {
-                if (bottomTab !== 'insight' && !insightFresh && !insightLoading) {
-                    dispatch(fetchChartInsight({ chartId: focusedChart.id, tableId: table.id }) as any);
-                }
-                setBottomTab('insight');
-            }}>
-            {t('chart.insight')}
-        </Button>
-    ) : null;
-
-    let chartActionButtons = [
-        dataButton,
-        insightButton,
-        ...derivedTableItems,
-        <Divider key="action-divider" orientation="vertical" flexItem sx={{ mx: 0.5, my: 0.5 }} />,
-        logButton,
-        // vegaEditorButton,
-        deleteButton,
-    ]
-
-
     let chartMessage = "";
     if (focusedChart.chartType == "Table") {
         chartMessage = t('chart.msgTable');
@@ -956,13 +850,17 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
                                     fast in-place tweaks without opening the full encoding
                                     popover. Kept INSIDE the chart-box so it reads as part of
                                     the same chart component rather than drifting down toward
-                                    the data panel below. Placed ABOVE the action items so the
-                                    options sit directly under the chart, before the AI hint.
-                                    Hidden while synthesis is running — the chart is being
-                                    regenerated, so config controls would be premature. */}
-                                {!chartUnavailable && !chartSynthesisInProgress.includes(focusedChart.id) && focusedChart.chartType !== "Table" && focusedChart.chartType !== "Auto" && (
-                                    <ChartQuickConfig chartId={focusedChart.id} />
-                                )}
+                                    the data panel below. The bar also hosts the built-in
+                                    delete-chart action, so it always renders even when there
+                                    are no property controls (e.g. Table/Auto charts or while
+                                    synthesis is running — in which case property controls are
+                                    suppressed but delete stays reachable). */}
+                                <ChartQuickConfig
+                                    chartId={focusedChart.id}
+                                    tableMetadata={table.metadata}
+                                    options={(!chartUnavailable && !chartSynthesisInProgress.includes(focusedChart.id) && focusedChart.chartType !== "Table" && focusedChart.chartType !== "Auto") ? renderedSpec?._options : undefined}
+                                    deleteDisabled={trigger != undefined}
+                                />
                                 {chartActionItems}
                             </Box>
                         </Fade>;
@@ -975,23 +873,11 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
             <Box sx={{ my: 'auto' }}>
                 {focusedElement}
             </Box>
-            <Box key='chart-action-buttons' sx={{ 
-                display: 'flex', flexShrink: 0, flexDirection: "row", alignItems: 'center',
-                mx: "auto", py: 0.5, gap: 0.25,
-            }}>
-                {chartActionButtons}
-            </Box>
         </Box>,
         <React.Fragment key="bottom-panels">
             {(() => {
-                const panelBoxSx = {
-                    margin: '8px auto 24px auto', padding: '8px', borderRadius: '8px',
-                    border: `1px solid ${borderColor.divider}`,
-                    transition: 'box-shadow 0.2s ease',
-                    '&:hover': { boxShadow: '0 0 8px rgba(25, 118, 210, 0.25)' },
-                };
                 return <Box sx={{ px: 2 }}>
-                    {bottomTab === 'data' && (() => {
+                    {(() => {
                         const ROW_HEIGHT = 25;
                         const HEADER_HEIGHT = 32;
                         const FOOTER_HEIGHT = 32;
@@ -1028,99 +914,6 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
                             </Box>
                         );
                     })()}
-                    {bottomTab === 'code' && hasDerived && (
-                        <Box sx={{ ...panelBoxSx, minWidth: 440, maxWidth: 800 }}>
-                            <Box sx={{ maxHeight: 400, overflow: 'auto' }}>
-                                {(() => {
-                                    const derive = triggerTable?.derive || table.derive;
-                                    const interaction = derive?.trigger?.interaction;
-                                    const lastEntry = interaction?.[interaction.length - 1];
-                                    const plan = lastEntry?.plan || '';
-                                    const planSteps = plan ? (plan.includes('\x1E') ? plan.split('\x1E') : plan.split('\n')).filter((s: string) => s.trim()) : [];
-                                    if (planSteps.length > 0) {
-                                        return (
-                                            <Box sx={{ px: 1.5, pt: 1, pb: 0.5, borderBottom: '1px solid', borderColor: 'divider' }}>
-                                                <Typography sx={{ fontSize: 11, fontWeight: 600, color: 'text.secondary', mb: 0.5 }}>
-                                                    {t('chart.agentLog')}
-                                                </Typography>
-                                                <PlanStepsView steps={planSteps} />
-                                            </Box>
-                                        );
-                                    }
-                                    return null;
-                                })()}
-                                <CodeBox code={transformCode.trimStart()} language={table.virtual ? "sql" : "python"} />
-                            </Box>
-                        </Box>
-                    )}
-                    {bottomTab === 'concepts' && hasConcepts && (
-                        <Box sx={{ ...panelBoxSx, minWidth: 440, maxWidth: 800 }}>
-                            <ConceptExplCards
-                                concepts={extractConceptExplanations(table)}
-                                title={t('chart.derivedConcepts')}
-                                maxCards={8}
-                            />
-                        </Box>
-                    )}
-                    {bottomTab === 'insight' && (
-                        <Box sx={{ ...panelBoxSx, minWidth: 440, maxWidth: 800 }}>
-                            {insightLoading ? (
-                                <Box sx={{ p: 2 }}>
-                                    <WritingIndicator label={t('chart.analyzingChart')} />
-                                </Box>
-                            ) : insightFresh && focusedChart.insight ? (
-                                <Box sx={{ p: 1.5 }}>
-                                    <Box sx={{ 
-                                        display: 'grid', 
-                                        gridTemplateColumns: 'repeat(2, 1fr)',
-                                        gap: 1,
-                                    }}>
-                                        {(focusedChart.insight.takeaways || []).map((takeaway, i) => (
-                                            <Box key={i} sx={{
-                                                padding: '8px 12px',
-                                                borderLeft: '3px solid',
-                                                borderLeftColor: 'primary.light',
-                                                borderRadius: '2px',
-                                                backgroundColor: (theme) => alpha(theme.palette.background.paper, 0.5),
-                                                transition: transition.normal,
-                                                '&:hover': {
-                                                    backgroundColor: (theme) => alpha(theme.palette.primary.main, 0.04),
-                                                },
-                                            }}>
-                                                <Typography sx={{ fontSize: '12px', lineHeight: 1.5, color: 'text.primary' }}>
-                                                    {takeaway}
-                                                </Typography>
-                                            </Box>
-                                        ))}
-                                    </Box>
-                                    <Button
-                                        size="small"
-                                        sx={{ mt: 1.5, textTransform: 'none', fontSize: '0.7rem' }}
-                                        onClick={() => {
-                                            dispatch(fetchChartInsight({ chartId: focusedChart.id, tableId: table.id }) as any);
-                                        }}
-                                    >
-                                        {t('chart.regenerate')}
-                                    </Button>
-                                </Box>
-                            ) : (
-                                <Box sx={{ p: 1.5 }}>
-                                    <Typography fontSize="small" color="text.secondary">
-                                        {t('chart.noInsightAvailable')}
-                                    </Typography>
-                                    <Button
-                                        size="small"
-                                        sx={{ mt: 0.5, textTransform: 'none', fontSize: '0.7rem' }}
-                                        onClick={() => {
-                                            dispatch(fetchChartInsight({ chartId: focusedChart.id, tableId: table.id }) as any);
-                                        }}
-                                    >
-                                        {t('chart.generateInsight')}
-                                    </Button>
-                                </Box>
-                            )}
-                        </Box>
-                    )}
                 </Box>;
             })()}
         </React.Fragment>,
@@ -1129,37 +922,90 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
             handleCloseDialog={() => setChatDialogOpen(false)}
             code={transformCode}
             dialog={triggerTable?.derive?.dialog || table.derive?.dialog as any[]} /> : null,
+        // Code inspector: derivation code + formula/concept metadata, opened from
+        // the floating top-right cluster. A clickaway/close dialog (not a bottom
+        // tab) so the bottom panel stays a pure data table.
+        hasDerived ? (
+            <Dialog key="code-dialog-overlay" open={codeDialogOpen} onClose={() => setCodeDialogOpen(false)}
+                sx={{ '& .MuiDialog-paper': { maxHeight: '90%' } }}
+                maxWidth="md" fullWidth>
+                <DialogTitle sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', py: 1.25 }}>
+                    <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
+                        <TerminalIcon sx={{ fontSize: 18, color: 'text.secondary' }} />
+                        <Typography sx={{ fontSize: 14, fontWeight: 600 }}>{t('chart.code')}</Typography>
+                    </Box>
+                    <IconButton size="small" aria-label={t('app.close')} onClick={() => setCodeDialogOpen(false)}>
+                        <CloseIcon sx={{ fontSize: 18 }} />
+                    </IconButton>
+                </DialogTitle>
+                <DialogContent sx={{ overflowY: 'auto', overflowX: 'hidden' }} dividers>
+                    {hasConcepts && (
+                        <Box sx={{ pb: 1.5, mb: 1.5, borderBottom: '1px solid', borderColor: 'divider' }}>
+                            <Typography sx={{ fontSize: 10, fontWeight: 700, letterSpacing: '0.08em', textTransform: 'uppercase', color: 'text.disabled', mb: 0.75 }}>
+                                {t('chart.derivedConcepts')}
+                            </Typography>
+                            <ConceptExplCards
+                                concepts={extractConceptExplanations(table)}
+                                maxCards={8}
+                            />
+                        </Box>
+                    )}
+                    <CodeBox code={transformCode.trimStart()} language={table.virtual ? "sql" : "python"} />
+                </DialogContent>
+            </Dialog>
+        ) : null,
     ]
     
     let content = [
         <Box key='focused-box' className="vega-focused vis-scroll" sx={{ display: "flex", overflowY: 'auto', overflowX: 'hidden', flexDirection: 'column', position: 'relative', flex: 1 }}>
             {focusedComponent}
         </Box>,
-        /* Encoding shelf popover, anchored to the floating "edit chart" button. */
-        <Popover
+        /* Encoding shelf popover, anchored to the floating "edit chart" button.
+           Rendered as a non-modal Popper (not a Modal-based Popover) so it does
+           NOT mount a full-viewport backdrop/focus-trap. That backdrop used to
+           swallow pointer events outside the panel, which broke dragging fields
+           from the data table into the encoding channels while the shelf is
+           open. A ClickAwayListener keeps the "click outside closes it"
+           behavior. It listens on `onMouseUp` (mirroring EncodingBox): MUI
+           menus/selects portal to document.body but remain REACT descendants of
+           this listener, so their events bubble through the React tree on
+           mouseUp (before the menu closes on click) and are correctly treated as
+           "inside" — picking a chart type therefore does not collapse the shelf.
+           A native HTML5 drag from the table fires no mouseUp, so dragging a
+           field in does not close the shelf either. */
+        <Popper
             key='encoding-popover'
             open={encodingOpen && Boolean(editButtonRef.current)}
             anchorEl={editButtonRef.current}
-            onClose={() => setEncodingOpen(false)}
-            anchorOrigin={{ vertical: 'bottom', horizontal: 'right' }}
-            transformOrigin={{ vertical: 'top', horizontal: 'right' }}
-            slotProps={{ paper: { sx: { width: 320, maxHeight: '78vh', overflowY: 'auto', mt: 0.5, borderRadius: '10px', overflowX: 'visible' } } }}
+            placement='bottom-end'
+            style={{ zIndex: 1300 }}
         >
-            <EncodingShelfCard chartId={focusedChart.id} />
-            {/* Small, low-emphasis footer for advanced users to inspect the
-                assembled Vega-Lite spec in the external Vega editor. */}
-            <Box sx={{ display: 'flex', justifyContent: 'flex-end', px: 1.5, pt: 0.5, pb: 1 }}>
-                <Button
-                    size="small"
-                    startIcon={<OpenInNewIcon sx={{ fontSize: 13 }} />}
-                    disabled={!renderedSpec || focusedChart.chartType === "Table" || focusedChart.chartType === "Auto"}
-                    onClick={handleOpenInVegaEditor}
-                    sx={{ textTransform: 'none', fontSize: '0.65rem', color: 'text.disabled', minWidth: 'auto', py: 0, '&:hover': { color: 'text.secondary', backgroundColor: 'transparent' } }}
+            <ClickAwayListener
+                mouseEvent="onMouseUp"
+                touchEvent="onTouchStart"
+                onClickAway={() => setEncodingOpen(false)}
+            >
+                <Paper
+                    elevation={8}
+                    sx={{ width: 280, maxHeight: '78vh', overflowY: 'auto', mt: 0.5, py: 0.5, borderRadius: '10px', overflowX: 'visible' }}
                 >
-                    {t('chart.openInVegaEditor')}
-                </Button>
-            </Box>
-        </Popover>
+                    <EncodingShelfCard chartId={focusedChart.id} />
+                    {/* Footer: low-emphasis link to inspect the assembled
+                        Vega-Lite spec in the external Vega editor. */}
+                    <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'flex-end', px: 1.5, pb: 1 }}>
+                        <Button
+                            size="small"
+                            startIcon={<OpenInNewIcon sx={{ fontSize: 13 }} />}
+                            disabled={!renderedSpec || focusedChart.chartType === "Table" || focusedChart.chartType === "Auto"}
+                            onClick={handleOpenInVegaEditor}
+                            sx={{ textTransform: 'none', fontSize: '0.65rem', color: 'text.disabled', minWidth: 'auto', py: 0, '&:hover': { color: 'text.secondary', backgroundColor: 'transparent' } }}
+                        >
+                            {t('chart.openInVegaEditor')}
+                        </Button>
+                    </Box>
+                </Paper>
+            </ClickAwayListener>
+        </Popper>
     ]
 
     let [scaleMin, scaleMax] = [0.2, 2.4]
@@ -1230,28 +1076,55 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
             {focusedChart && focusedChart.chartType !== 'Table' && focusedChart.chartType !== 'Auto' && (
                 <ChartVariantStrip chartId={focusedChart.id} />
             )}
-            {/* Edit-chart (encoding shelf) button — right-aligned in the same
-                floating toolbar so all top controls sit on one pinned row.
-                Opens the encoding shelf popover; stays available even when the
-                chart can't render yet, so users can fix the encoding. */}
-            {focusedChart && focusedChart.chartType !== 'Table' && focusedChart.chartType !== 'Auto' && (
-                <Tooltip title={t('chart.editChart')} placement="left">
-                    <IconButton
-                        ref={editButtonRef}
-                        size="small"
-                        onClick={() => setEncodingOpen(o => !o)}
-                        sx={{
-                            ml: 'auto', mr: '8px',
-                            backgroundColor: encodingOpen ? 'primary.main' : 'rgba(255,255,255,0.92)',
-                            color: encodingOpen ? 'primary.contrastText' : 'text.secondary',
-                            border: '1px solid', borderColor: 'divider',
-                            boxShadow: '0 1px 4px rgba(0,0,0,0.12)',
-                            '&:hover': { backgroundColor: encodingOpen ? 'primary.dark' : 'rgba(255,255,255,1)' },
-                        }}>
-                        <TuneIcon sx={{ fontSize: 18 }} />
-                    </IconButton>
-                </Tooltip>
-            )}
+            {/* Right-aligned floating cluster near the top-right: "inspect /
+                edit this chart" controls grouped together (agent log + code +
+                encoding shelf). Chart deletion lives in the chart property-config
+                bar below the chart. */}
+            <Box sx={{ ml: 'auto', mr: '8px', display: 'flex', alignItems: 'center', gap: 0.5 }}>
+                {hasDerived && (
+                    <Tooltip title={t('chart.log')} placement="bottom">
+                        <IconButton
+                            size="small"
+                            onClick={() => setChatDialogOpen(true)}
+                            sx={floatingPillSx}>
+                            <QuestionAnswerIcon sx={{ fontSize: 18 }} />
+                        </IconButton>
+                    </Tooltip>
+                )}
+                {/* Code inspector button — opens the derivation code + formula
+                    metadata in a dialog. Only shown for derived tables. */}
+                {hasDerived && (
+                    <Tooltip title={t('chart.code')} placement="bottom">
+                        <IconButton
+                            size="small"
+                            onClick={() => setCodeDialogOpen(true)}
+                            sx={floatingPillSx}>
+                            <TerminalIcon sx={{ fontSize: 18 }} />
+                        </IconButton>
+                    </Tooltip>
+                )}
+                {/* Edit-chart (encoding shelf) button — opens the encoding shelf
+                    popover; stays available even when the chart can't render yet,
+                    so users can fix the encoding. */}
+                {focusedChart && focusedChart.chartType !== 'Table' && focusedChart.chartType !== 'Auto' && (
+                    <Tooltip title={t('chart.editChart')} placement="left">
+                        <IconButton
+                            ref={editButtonRef}
+                            size="small"
+                            onClick={() => setEncodingOpen(o => !o)}
+                            sx={{
+                                ...floatingPillSx,
+                                ...(encodingOpen ? {
+                                    backgroundColor: 'primary.main',
+                                    color: 'primary.contrastText',
+                                    '&:hover': { backgroundColor: 'primary.dark', color: 'primary.contrastText' },
+                                } : {}),
+                            }}>
+                            <TuneIcon sx={{ fontSize: 18 }} />
+                        </IconButton>
+                    </Tooltip>
+                )}
+            </Box>
         </Box>
         {content}
     </Box>
diff --git a/tests/frontend/unit/lib/agents-chart/flint_py_extract.test.ts b/tests/frontend/unit/lib/agents-chart/flint_py_extract.test.ts
new file mode 100644
index 00000000..54fcc888
--- /dev/null
+++ b/tests/frontend/unit/lib/agents-chart/flint_py_extract.test.ts
@@ -0,0 +1,240 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+//
+// Comprehensive fixture extractor for the Flint-Py compatibility suite.
+//
+// Walks GALLERY_TREE, collects every page rendered with a VegaLite-relevant
+// backend (single library='vegalite' OR render='triple', which always includes
+// VL), and runs the JS `assembleVegaLite` on every TestCase produced by every
+// referenced generator. For each case we write:
+//   flint-py/tests/fixtures/<slug>/input.json
+//   flint-py/tests/fixtures/<slug>/expected.json   (only on JS success)
+//   flint-py/tests/fixtures/<slug>/meta.json       (always)
+//
+// A top-level `manifest.json` records every case with its status, chart type,
+// gallery section/category/page provenance, and any JS error message.
+
+import { describe, it } from 'vitest';
+import * as fs from 'node:fs';
+import * as path from 'node:path';
+
+import { GALLERY_TREE, TEST_GENERATORS } from '../../../../../src/lib/agents-chart/test-data';
+import { assembleVegaLite } from '../../../../../src/lib/agents-chart';
+import type { TestCase } from '../../../../../src/lib/agents-chart/test-data/types';
+import type { ChartAssemblyInput, ChartEncoding } from '../../../../../src/lib/agents-chart/core/types';
+
+const CANVAS_SIZE = { width: 400, height: 300 } as const;
+const DEFAULT_OPTIONS = { addTooltips: true } as const;
+
+const FIXTURES_ROOT = path.resolve(__dirname, '../../../../../flint-py/tests/fixtures');
+
+interface ManifestEntry {
+    slug: string;
+    title: string;
+    chartType: string;
+    section: string;
+    category: string;
+    page: string;
+    generator: string;
+    library: 'vegalite' | 'triple';
+    status: 'js_success' | 'js_error';
+    jsError?: string;
+    fixtureDir?: string;
+}
+
+function slugify(s: string): string {
+    return s
+        .toLowerCase()
+        .replace(/[^a-z0-9]+/g, '_')
+        .replace(/^_+|_+$/g, '')
+        .slice(0, 80);
+}
+
+/** Convert a TestCase into the ChartAssemblyInput that ChartGallery passes to assembleVegaLite. */
+function testCaseToInput(tc: TestCase): ChartAssemblyInput {
+    const encodings: Record<string, ChartEncoding> = {};
+    for (const [channel, ei] of Object.entries(tc.encodingMap)) {
+        if (ei && ei.fieldID) {
+            const entry: ChartEncoding = { field: ei.fieldID };
+            if (ei.dtype)     entry.type      = ei.dtype as any;
+            if (ei.aggregate) entry.aggregate = ei.aggregate as any;
+            if (ei.sortOrder) entry.sortOrder = ei.sortOrder as any;
+            if (ei.sortBy)    entry.sortBy    = ei.sortBy;
+            if (ei.scheme)    entry.scheme    = ei.scheme;
+            encodings[channel] = entry;
+        }
+    }
+
+    const semanticTypes: Record<string, any> = {};
+    for (const [name, meta] of Object.entries(tc.metadata)) {
+        if (meta.semanticType) semanticTypes[name] = meta.semanticType;
+    }
+    if (tc.semanticAnnotations) {
+        for (const [name, ann] of Object.entries(tc.semanticAnnotations)) {
+            semanticTypes[name] = ann;
+        }
+    }
+
+    return {
+        data: { values: tc.data },
+        semantic_types: semanticTypes,
+        chart_spec: {
+            chartType: tc.chartType,
+            encodings,
+            canvasSize: CANVAS_SIZE,
+            ...(tc.chartProperties ? { chartProperties: tc.chartProperties } : {}),
+        },
+        options: { ...DEFAULT_OPTIONS, ...(tc.assembleOptions ?? {}) },
+    };
+}
+
+/** Walk GALLERY_TREE and collect every (section, category, page, generator) tuple
+ *  that produces VegaLite output. Each generator may appear under multiple pages;
+ *  we de-duplicate by generator key, preferring the first page that referenced it.
+ */
+interface PageRef {
+    section: string;
+    category: string;
+    page: string;
+    library: 'vegalite' | 'triple';
+}
+
+function collectVlGeneratorRefs(): Map<string, PageRef> {
+    const seen = new Map<string, PageRef>();
+    for (const section of GALLERY_TREE) {
+        for (const category of section.categories) {
+            for (const page of category.pages) {
+                const isVl = (page.render === 'single' && page.library === 'vegalite')
+                          || page.render === 'triple';
+                if (!isVl) continue;
+                for (const gen of page.generatorKeys) {
+                    if (!seen.has(gen)) {
+                        seen.set(gen, {
+                            section: section.id,
+                            category: category.id,
+                            page: page.id,
+                            library: page.render === 'triple' ? 'triple' : 'vegalite',
+                        });
+                    }
+                }
+            }
+        }
+    }
+    return seen;
+}
+
+describe('flint-py fixture extraction (full gallery)', () => {
+    fs.mkdirSync(FIXTURES_ROOT, { recursive: true });
+    const manifest: ManifestEntry[] = [];
+    const refs = collectVlGeneratorRefs();
+
+    for (const [genKey, ref] of refs) {
+        describe(genKey, () => {
+            const generator = TEST_GENERATORS[genKey];
+            if (!generator) {
+                it('skip — generator not registered', () => {
+                    manifest.push({
+                        slug: `_missing__${slugify(genKey)}`,
+                        title: '(generator not registered)',
+                        chartType: '',
+                        section: ref.section,
+                        category: ref.category,
+                        page: ref.page,
+                        generator: genKey,
+                        library: ref.library,
+                        status: 'js_error',
+                        jsError: 'generator key not found in TEST_GENERATORS',
+                    });
+                });
+                return;
+            }
+
+            let cases: TestCase[];
+            try {
+                cases = generator();
+            } catch (e: any) {
+                it('skip — generator threw', () => {
+                    manifest.push({
+                        slug: `_gen_threw__${slugify(genKey)}`,
+                        title: '(generator threw)',
+                        chartType: '',
+                        section: ref.section,
+                        category: ref.category,
+                        page: ref.page,
+                        generator: genKey,
+                        library: ref.library,
+                        status: 'js_error',
+                        jsError: `generator() threw: ${e?.message || String(e)}`,
+                    });
+                });
+                return;
+            }
+
+            cases.forEach((tc, idx) => {
+                const slug = `${slugify(genKey)}__${String(idx).padStart(2, '0')}__${slugify(tc.title || `case${idx}`)}`;
+                it(tc.title || `case ${idx}`, () => {
+                    const dir = path.join(FIXTURES_ROOT, slug);
+                    fs.mkdirSync(dir, { recursive: true });
+
+                    const entry: ManifestEntry = {
+                        slug,
+                        title: tc.title || `case ${idx}`,
+                        chartType: tc.chartType,
+                        section: ref.section,
+                        category: ref.category,
+                        page: ref.page,
+                        generator: genKey,
+                        library: ref.library,
+                        status: 'js_success',
+                        fixtureDir: slug,
+                    };
+
+                    let input: ChartAssemblyInput;
+                    try {
+                        input = testCaseToInput(tc);
+                    } catch (e: any) {
+                        entry.status = 'js_error';
+                        entry.jsError = `testCaseToInput threw: ${e?.message || String(e)}`;
+                        manifest.push(entry);
+                        fs.writeFileSync(path.join(dir, 'meta.json'), JSON.stringify(entry, null, 2));
+                        return;
+                    }
+
+                    let spec: unknown;
+                    try {
+                        spec = assembleVegaLite(input);
+                    } catch (e: any) {
+                        entry.status = 'js_error';
+                        entry.jsError = e?.message || String(e);
+                        manifest.push(entry);
+                        fs.writeFileSync(
+                            path.join(dir, 'input.json'),
+                            JSON.stringify({ title: tc.title, description: tc.description, chartType: tc.chartType, input }, null, 2),
+                        );
+                        fs.writeFileSync(path.join(dir, 'meta.json'), JSON.stringify(entry, null, 2));
+                        return;
+                    }
+
+                    fs.writeFileSync(
+                        path.join(dir, 'input.json'),
+                        JSON.stringify({ title: tc.title, description: tc.description, chartType: tc.chartType, input }, null, 2),
+                    );
+                    fs.writeFileSync(
+                        path.join(dir, 'expected.json'),
+                        JSON.stringify(spec, null, 2),
+                    );
+                    fs.writeFileSync(path.join(dir, 'meta.json'), JSON.stringify(entry, null, 2));
+                    manifest.push(entry);
+                });
+            });
+        });
+    }
+
+    it('writes the fixture manifest', () => {
+        manifest.sort((a, b) => a.slug.localeCompare(b.slug));
+        fs.writeFileSync(
+            path.join(FIXTURES_ROOT, 'manifest.json'),
+            JSON.stringify(manifest, null, 2),
+        );
+    });
+});
diff --git a/tests/frontend/unit/lib/agents-chart/sortAction.test.ts b/tests/frontend/unit/lib/agents-chart/sortAction.test.ts
new file mode 100644
index 00000000..b9152bbd
--- /dev/null
+++ b/tests/frontend/unit/lib/agents-chart/sortAction.test.ts
@@ -0,0 +1,210 @@
+import { describe, expect, it } from 'vitest';
+import { makeSortAction } from '../../../../../src/lib/agents-chart';
+import { assembleVegaLite } from '../../../../../src/lib/agents-chart';
+
+const baseCanvas = { width: 400, height: 300 };
+
+describe('makeSortAction (Sort encoding action)', () => {
+  const action = makeSortAction();
+
+  describe('get — derive control value from base encodings', () => {
+    it('returns undefined (Default) when no sort is set', () => {
+      const enc = { x: { field: 'cat', type: 'nominal' as const }, y: { field: 'val', aggregate: 'sum' as const } };
+      expect(action.get(enc)).toBeUndefined();
+    });
+
+    it('reads value sort from sortBy referencing the measure channel', () => {
+      const enc = {
+        x: { field: 'cat', type: 'nominal' as const, sortBy: 'y', sortOrder: 'descending' as const },
+        y: { field: 'val', aggregate: 'sum' as const },
+      };
+      expect(action.get(enc)).toBe('value-desc');
+    });
+
+    it('treats a bare label sort (sortOrder, no sortBy) as Default', () => {
+      const enc = {
+        x: { field: 'cat', type: 'nominal' as const, sortOrder: 'ascending' as const },
+        y: { field: 'val', aggregate: 'sum' as const },
+      };
+      expect(action.get(enc)).toBeUndefined();
+    });
+
+    it('treats unrepresentable sorts (custom order / by-color) as Default', () => {
+      const enc = {
+        x: { field: 'cat', type: 'nominal' as const, sortBy: '["B","A"]' },
+        y: { field: 'val', aggregate: 'sum' as const },
+      };
+      expect(action.get(enc)).toBeUndefined();
+    });
+
+    it('detects a horizontal orientation (measure on x, category on y)', () => {
+      const enc = {
+        x: { field: 'val', aggregate: 'sum' as const },
+        y: { field: 'cat', type: 'nominal' as const, sortBy: 'x', sortOrder: 'ascending' as const },
+      };
+      expect(action.get(enc)).toBe('value-asc');
+    });
+
+    it('returns undefined when the category axis is temporal (not sortable)', () => {
+      const enc = {
+        x: { field: 'month', type: 'temporal' as const },
+        y: { field: 'val', type: 'quantitative' as const, aggregate: 'sum' as const },
+      };
+      expect(action.get(enc)).toBeUndefined();
+    });
+
+    it('returns undefined when both axes are quantitative (scatter)', () => {
+      const enc = {
+        x: { field: 'a', type: 'quantitative' as const },
+        y: { field: 'b', type: 'quantitative' as const },
+      };
+      expect(action.get(enc)).toBeUndefined();
+    });
+  });
+
+  describe('isApplicable — type-aware visibility gate', () => {
+    it('is applicable when a discrete category + measure pair exists', () => {
+      const enc = { x: { field: 'cat', type: 'nominal' as const }, y: { field: 'val', aggregate: 'sum' as const } };
+      expect(action.isApplicable?.({ encodings: enc })).toBe(true);
+    });
+
+    it('is not applicable for a temporal-x time series', () => {
+      const enc = {
+        x: { field: 'month', type: 'temporal' as const },
+        y: { field: 'val', type: 'quantitative' as const, aggregate: 'sum' as const },
+      };
+      expect(action.isApplicable?.({ encodings: enc })).toBe(false);
+    });
+
+    it('is not applicable when no measure axis exists', () => {
+      const enc = { x: { field: 'cat', type: 'nominal' as const }, y: { field: 'cat2', type: 'nominal' as const } };
+      expect(action.isApplicable?.({ encodings: enc })).toBe(false);
+    });
+  });
+
+  describe('set — compose the override onto the category channel', () => {
+    const enc = { x: { field: 'cat', type: 'nominal' as const }, y: { field: 'val', aggregate: 'sum' as const } };
+
+    it('value-desc writes sortBy=measure + descending on the category channel', () => {
+      const next = action.set(enc, 'value-desc');
+      expect(next.x.sortBy).toBe('y');
+      expect(next.x.sortOrder).toBe('descending');
+    });
+
+    it('Default (undefined) clears both sort fields', () => {
+      const sorted = action.set(enc, 'value-desc');
+      const cleared = action.set(sorted, undefined);
+      expect(cleared.x.sortBy).toBeUndefined();
+      expect(cleared.x.sortOrder).toBeUndefined();
+    });
+
+    it('does not mutate the input encodings', () => {
+      action.set(enc, 'value-desc');
+      expect(enc.x).not.toHaveProperty('sortBy');
+    });
+
+    it('targets the category channel under horizontal orientation', () => {
+      const horizontal = { x: { field: 'val', aggregate: 'sum' as const }, y: { field: 'cat', type: 'nominal' as const } };
+      const next = action.set(horizontal, 'value-asc');
+      expect(next.y.sortBy).toBe('x');
+      expect(next.y.sortOrder).toBe('ascending');
+      expect(next.x.sortBy).toBeUndefined();
+    });
+
+    it('is a no-op when there is no discrete category axis (temporal x)', () => {
+      const temporal = {
+        x: { field: 'month', type: 'temporal' as const },
+        y: { field: 'val', type: 'quantitative' as const, aggregate: 'sum' as const },
+      };
+      const next = action.set(temporal, 'value-desc');
+      expect(next).toBe(temporal);
+    });
+  });
+
+  describe('end-to-end: override composed by the compiler', () => {
+    const data = {
+      values: [
+        { category: 'A', value: 20 },
+        { category: 'B', value: 50 },
+        { category: 'C', value: 10 },
+      ],
+    };
+
+    it('value-desc override sorts the bar x-axis by the measure', () => {
+      const spec = assembleVegaLite({
+        data,
+        semantic_types: { category: 'Category', value: 'Quantity' },
+        chart_spec: {
+          chartType: 'Bar Chart',
+          encodings: { x: { field: 'category' }, y: { field: 'value', aggregate: 'sum' } },
+          chartProperties: { sort: 'value-desc' },
+          canvasSize: baseCanvas,
+        },
+      });
+      expect(spec.encoding.x.sort).toBe('-y');
+    });
+
+    it('no override leaves the template default ordering', () => {
+      const spec = assembleVegaLite({
+        data,
+        semantic_types: { category: 'Category', value: 'Quantity' },
+        chart_spec: {
+          chartType: 'Bar Chart',
+          encodings: { x: { field: 'category' }, y: { field: 'value', aggregate: 'sum' } },
+          canvasSize: baseCanvas,
+        },
+      });
+      expect(spec.encoding.x.sort).not.toBe('-y');
+    });
+
+    it('applies value-desc when the measure type is auto (resolved by the compiler)', () => {
+      // The y measure has no explicit `type` and no aggregate — its
+      // quantitative-ness is only known after semantic resolution. The
+      // override must still compose (regression: previously no-op'd).
+      const spec = assembleVegaLite({
+        data: {
+          values: [
+            { category: 'A', value: 20 },
+            { category: 'B', value: 50 },
+            { category: 'C', value: 10 },
+          ],
+        },
+        semantic_types: { category: 'Category', value: 'Quantity' },
+        chart_spec: {
+          chartType: 'Bar Chart',
+          encodings: { x: { field: 'category' }, y: { field: 'value' } },
+          chartProperties: { sort: 'value-desc' },
+          canvasSize: baseCanvas,
+        },
+      });
+      expect(spec.encoding.x.sort).toBe('-y');
+    });
+
+    it('value-desc overrides a field’s intrinsic ordinal ordering', () => {
+      // Ordinal category with canonical levels would normally sort by those
+      // levels; an explicit value sort must win over the intrinsic order.
+      const spec = assembleVegaLite({
+        data: {
+          values: [
+            { budget: 'Under $10M', pct: 65 },
+            { budget: '$10M-$30M', pct: 62 },
+            { budget: '$30M-$70M', pct: 64 },
+            { budget: '$70M-$150M', pct: 76 },
+            { budget: '$150M+', pct: 97 },
+          ],
+        },
+        semantic_types: {
+          budget: { semanticType: 'Category', sortOrder: ['Under $10M', '$10M-$30M', '$30M-$70M', '$70M-$150M', '$150M+'] },
+          pct: 'Percentage',
+        },
+        chart_spec: {
+          chartType: 'Bar Chart',
+          encodings: { x: { field: 'budget', type: 'ordinal' }, y: { field: 'pct' } },
+          chartProperties: { sort: 'value-desc' },
+          canvasSize: baseCanvas,
+        },
+      });
+      expect(spec.encoding.x.sort).toBe('-y');
+    });
+  });
+});
diff --git a/tests/frontend/unit/lib/agents-chart/vegalite/bandedLabelAngle.test.ts b/tests/frontend/unit/lib/agents-chart/vegalite/bandedLabelAngle.test.ts
new file mode 100644
index 00000000..7dfeb4c5
--- /dev/null
+++ b/tests/frontend/unit/lib/agents-chart/vegalite/bandedLabelAngle.test.ts
@@ -0,0 +1,65 @@
+import { describe, expect, it } from 'vitest';
+import { assembleVegaLite } from '../../../../../../src/lib/agents-chart';
+
+const canvas = { width: 400, height: 300 };
+
+/**
+ * Numeric labels on a banded (discrete) x-axis must not be forced horizontal
+ * when they would crowd — many/wide numbers should rotate. Few, short numbers
+ * stay horizontal. A continuous (non-banded) quantitative axis is left to
+ * Vega-Lite's own overlap handling.
+ */
+describe('banded x-axis numeric label angle', () => {
+  it('rotates many wide numeric labels on a banded ordinal x-axis', () => {
+    const values = Array.from({ length: 30 }, (_, i) => ({
+      bucket: 1000000 + i * 125000,
+      count: 10 + (i % 7),
+    }));
+    const spec: any = assembleVegaLite({
+      data: { values },
+      semantic_types: { bucket: 'Quantity', count: 'Quantity' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { x: { field: 'bucket', type: 'ordinal' }, y: { field: 'count' } },
+        canvasSize: canvas,
+      },
+    });
+    expect(spec.config.axisX.labelAngle).toBe(-45);
+  });
+
+  it('keeps a few short numeric labels horizontal', () => {
+    const values = [
+      { bucket: 1, count: 10 },
+      { bucket: 2, count: 20 },
+      { bucket: 3, count: 15 },
+    ];
+    const spec: any = assembleVegaLite({
+      data: { values },
+      semantic_types: { bucket: 'Quantity', count: 'Quantity' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { x: { field: 'bucket', type: 'ordinal' }, y: { field: 'count' } },
+        canvasSize: canvas,
+      },
+    });
+    expect(spec.config.axisX.labelAngle).toBe(0);
+  });
+
+  it('leaves a continuous (non-banded) quantitative x-axis to VL overlap handling', () => {
+    const values = Array.from({ length: 25 }, (_, i) => ({
+      bucket: 1000000 + i * 125000,
+      count: 10 + (i % 7),
+    }));
+    const spec: any = assembleVegaLite({
+      data: { values },
+      semantic_types: { bucket: 'Quantity', count: 'Quantity' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { x: { field: 'bucket' }, y: { field: 'count' } },
+        canvasSize: canvas,
+      },
+    });
+    // Continuous axis: no forced labelAngle override from banded-label logic.
+    expect(spec.config.axisX?.labelAngle).toBeUndefined();
+  });
+});
diff --git a/tests/frontend/unit/lib/agents-chart/vegalite/chartOptionApplicability.test.ts b/tests/frontend/unit/lib/agents-chart/vegalite/chartOptionApplicability.test.ts
new file mode 100644
index 00000000..d1818441
--- /dev/null
+++ b/tests/frontend/unit/lib/agents-chart/vegalite/chartOptionApplicability.test.ts
@@ -0,0 +1,189 @@
+import { describe, expect, it } from 'vitest';
+import { assembleVegaLite } from '../../../../../../src/lib/agents-chart';
+
+const canvas = { width: 600, height: 400 };
+
+/** Keys of options Flint reports as applicable for a rendered spec. */
+const applicableKeys = (spec: any): string[] =>
+  (spec._options ?? []).filter((o: any) => o.applicable).map((o: any) => o.key);
+/** Whether a given option key is carried in the catalog at all. */
+const hasOption = (spec: any, key: string): boolean =>
+  (spec._options ?? []).some((o: any) => o.key === key);
+
+describe('stackMode applicability (gated on a series/color channel)', () => {
+  const rows = [
+    { region: 'N', cat: 'a', val: 3 }, { region: 'N', cat: 'b', val: 5 },
+    { region: 'S', cat: 'a', val: 2 }, { region: 'S', cat: 'b', val: 4 },
+  ];
+
+  it('is applicable when color (the series dimension) is bound', () => {
+    const spec = assembleVegaLite({
+      data: { values: rows },
+      semantic_types: { region: 'Category', cat: 'Category', val: 'Quantity' },
+      chart_spec: {
+        chartType: 'Stacked Bar Chart',
+        encodings: { x: { field: 'region' }, y: { field: 'val' }, color: { field: 'cat' } },
+        canvasSize: canvas,
+      },
+    }) as any;
+    expect(applicableKeys(spec)).toContain('stackMode');
+  });
+
+  it('is NOT applicable without a color channel (nothing to stack)', () => {
+    const spec = assembleVegaLite({
+      data: { values: rows },
+      semantic_types: { region: 'Category', val: 'Quantity' },
+      chart_spec: {
+        chartType: 'Stacked Bar Chart',
+        encodings: { x: { field: 'region' }, y: { field: 'val' } },
+        canvasSize: canvas,
+      },
+    }) as any;
+    expect(hasOption(spec, 'stackMode')).toBe(true);
+    expect(applicableKeys(spec)).not.toContain('stackMode');
+  });
+});
+
+describe('independentYAxis applicability (faceted + quantitative y)', () => {
+  const facetRows = [
+    { g: 'A', x: 'p', y: 1 }, { g: 'A', x: 'q', y: 2 },
+    { g: 'B', x: 'p', y: 100 }, { g: 'B', x: 'q', y: 300 },
+  ];
+
+  it('is applicable when faceted with a quantitative y of diverging ranges', () => {
+    const spec = assembleVegaLite({
+      data: { values: facetRows },
+      semantic_types: { g: 'Category', x: 'Category', y: 'Quantity' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { x: { field: 'x' }, y: { field: 'y' }, column: { field: 'g' } },
+        canvasSize: canvas,
+      },
+    }) as any;
+    expect(applicableKeys(spec)).toContain('independentYAxis');
+  });
+
+  it('is NOT applicable when not faceted', () => {
+    const spec = assembleVegaLite({
+      data: { values: facetRows },
+      semantic_types: { x: 'Category', y: 'Quantity' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { x: { field: 'x' }, y: { field: 'y' } },
+        canvasSize: canvas,
+      },
+    }) as any;
+    expect(applicableKeys(spec)).not.toContain('independentYAxis');
+  });
+});
+
+describe('showPercent applicability (additive, single-sign, non-zero total)', () => {
+  function barTable(values: any[], semantic_types: any) {
+    return assembleVegaLite({
+      data: { values },
+      semantic_types,
+      chart_spec: {
+        chartType: 'Bar Table',
+        encodings: { y: { field: 'cat' }, x: { field: 'val' } },
+        canvasSize: canvas,
+      },
+    }) as any;
+  }
+
+  it('is applicable for an additive single-sign measure with a non-zero total', () => {
+    const spec = barTable(
+      [{ cat: 'a', val: 10 }, { cat: 'b', val: 20 }, { cat: 'c', val: 30 }],
+      { cat: 'Category', val: 'Quantity' },
+    );
+    expect(applicableKeys(spec)).toContain('showPercent');
+  });
+
+  it('is NOT applicable for a mixed-sign measure (share would be misleading)', () => {
+    const spec = barTable(
+      [{ cat: 'a', val: 10 }, { cat: 'b', val: -20 }, { cat: 'c', val: 5 }],
+      { cat: 'Category', val: 'Number' },
+    );
+    expect(hasOption(spec, 'showPercent')).toBe(true);
+    expect(applicableKeys(spec)).not.toContain('showPercent');
+  });
+});
+
+describe('xAxisType applicability (date-like x with dual interpretation)', () => {
+  // Year-month strings: the resolver classifies these as temporal, but the
+  // modest distinct set is equally readable as discrete category labels.
+  const monthRows = [
+    { month: '2010-01', cost: 17.8 }, { month: '2011-04', cost: 20.1 },
+    { month: '2012-06', cost: 19.0 }, { month: '2013-09', cost: 19.9 },
+    { month: '2014-11', cost: 21.0 },
+  ];
+
+  function barWithX(values: any[], semantic_types: any, chartProperties?: any) {
+    return assembleVegaLite({
+      data: { values },
+      semantic_types,
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { x: { field: 'month' }, y: { field: 'cost' } },
+        canvasSize: canvas,
+        ...(chartProperties ? { chartProperties } : {}),
+      },
+    }) as any;
+  }
+
+  it('is applicable for a date-like temporal x with a modest distinct count', () => {
+    const spec = barWithX(monthRows, { month: 'YearMonth', cost: 'Quantity' });
+    expect(applicableKeys(spec)).toContain('xAxisType');
+  });
+
+  it('forces a discrete (nominal) x when the user picks "nominal"', () => {
+    const spec = barWithX(
+      monthRows, { month: 'YearMonth', cost: 'Quantity' }, { xAxisType: 'nominal' },
+    );
+    // Override flows through to the encoding type the whole pipeline sees.
+    expect(spec.encoding?.x?.type).toBe('nominal');
+    // The control stays visible after an explicit choice.
+    expect(applicableKeys(spec)).toContain('xAxisType');
+  });
+
+  it('is NOT applicable for a plain categorical x (no temporal interpretation)', () => {
+    const spec = assembleVegaLite({
+      data: { values: [{ region: 'N', cost: 3 }, { region: 'S', cost: 5 }] },
+      semantic_types: { region: 'Category', cost: 'Quantity' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { x: { field: 'region' }, y: { field: 'cost' } },
+        canvasSize: canvas,
+      },
+    }) as any;
+    expect(hasOption(spec, 'xAxisType')).toBe(true);
+    expect(applicableKeys(spec)).not.toContain('xAxisType');
+  });
+
+  it('offers yAxisType for a date-like temporal y (transposed/horizontal bar)', () => {
+    const spec = assembleVegaLite({
+      data: { values: monthRows },
+      semantic_types: { month: 'YearMonth', cost: 'Quantity' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { y: { field: 'month' }, x: { field: 'cost' } },
+        canvasSize: canvas,
+      },
+    }) as any;
+    expect(applicableKeys(spec)).toContain('yAxisType');
+  });
+
+  it('forces a discrete (nominal) y when the user picks "nominal"', () => {
+    const spec = assembleVegaLite({
+      data: { values: monthRows },
+      semantic_types: { month: 'YearMonth', cost: 'Quantity' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { y: { field: 'month' }, x: { field: 'cost' } },
+        canvasSize: canvas,
+        chartProperties: { yAxisType: 'nominal' },
+      },
+    }) as any;
+    expect(spec.encoding?.y?.type).toBe('nominal');
+    expect(applicableKeys(spec)).toContain('yAxisType');
+  });
+});
diff --git a/tests/frontend/unit/lib/agents-chart/vegalite/closedDomainStacking.test.ts b/tests/frontend/unit/lib/agents-chart/vegalite/closedDomainStacking.test.ts
new file mode 100644
index 00000000..623e8aa4
--- /dev/null
+++ b/tests/frontend/unit/lib/agents-chart/vegalite/closedDomainStacking.test.ts
@@ -0,0 +1,92 @@
+import { describe, expect, it } from 'vitest';
+import { assembleVegaLite } from '../../../../../../src/lib/agents-chart';
+
+const canvas = { width: 600, height: 400 };
+
+/**
+ * Regression: a closed-domain measure (Correlation, intrinsic [-1, 1]) on a bar
+ * chart that stacks — either via a color series or via repeated categories with
+ * no color — must NOT keep the intrinsic clamp domain, or the stacked bars
+ * overflow/clip past the fixed axis bound.
+ */
+describe('closed-domain stacked bar overflow', () => {
+  it('drops the intrinsic [-1,1] clamp when a color series stacks past the bound', () => {
+    const products = ['A', 'B', 'C', 'D'];
+    const series = ['s1', 's2', 's3', 's4'];
+    const values: any[] = [];
+    for (const p of products) {
+      for (const s of series) values.push({ product: p, series: s, corr: 0.9 });
+    }
+    const spec = assembleVegaLite({
+      data: { values },
+      semantic_types: { product: 'Category', series: 'Category', corr: 'Correlation' },
+      chart_spec: {
+        chartType: 'Stacked Bar Chart',
+        encodings: { x: { field: 'product' }, y: { field: 'corr' }, color: { field: 'series' } },
+        canvasSize: canvas,
+      },
+    });
+    expect(spec.encoding.y.scale?.domain).toBeUndefined();
+    expect(spec.encoding.y.scale?.clamp).toBeUndefined();
+  });
+
+  it('drops the intrinsic clamp when repeated categories stack with NO color', () => {
+    const products = ['A', 'B', 'C', 'D', 'E'];
+    const values: any[] = [];
+    for (const p of products) {
+      for (let i = 0; i < 4; i++) {
+        values.push({ product: p, corr: p === 'C' ? -0.21 : 0.9 });
+      }
+    }
+    const spec = assembleVegaLite({
+      data: { values },
+      semantic_types: { product: 'Category', corr: 'Correlation' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { x: { field: 'product' }, y: { field: 'corr' } },
+        canvasSize: canvas,
+      },
+    });
+    expect(spec.encoding.y.scale?.domain).toBeUndefined();
+    expect(spec.encoding.y.scale?.clamp).toBeUndefined();
+  });
+
+  it('detects overflow on the negative side even when signed totals would cancel', () => {
+    // Per category: three +0.5 and four -0.5 → signed sum = -0.5 (within [-1,1]),
+    // but the negative stack reaches -2.0, overflowing the lower bound.
+    const products = ['A', 'B'];
+    const values: any[] = [];
+    for (const p of products) {
+      for (let i = 0; i < 3; i++) values.push({ product: p, corr: 0.5 });
+      for (let i = 0; i < 4; i++) values.push({ product: p, corr: -0.5 });
+    }
+    const spec = assembleVegaLite({
+      data: { values },
+      semantic_types: { product: 'Category', corr: 'Correlation' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { x: { field: 'product' }, y: { field: 'corr' } },
+        canvasSize: canvas,
+      },
+    });
+    expect(spec.encoding.y.scale?.domain).toBeUndefined();
+  });
+
+  it('keeps the intrinsic [-1,1] domain for a non-stacking chart (one row per category)', () => {
+    const values = [
+      { product: 'A', corr: 0.9 },
+      { product: 'B', corr: -0.21 },
+      { product: 'C', corr: 0.4 },
+    ];
+    const spec = assembleVegaLite({
+      data: { values },
+      semantic_types: { product: 'Category', corr: 'Correlation' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { x: { field: 'product' }, y: { field: 'corr' } },
+        canvasSize: canvas,
+      },
+    });
+    expect(spec.encoding.y.scale?.domain).toEqual([-1, 1]);
+  });
+});
diff --git a/tests/frontend/unit/lib/agents-chart/vegalite/logScale.test.ts b/tests/frontend/unit/lib/agents-chart/vegalite/logScale.test.ts
new file mode 100644
index 00000000..ab8d011e
--- /dev/null
+++ b/tests/frontend/unit/lib/agents-chart/vegalite/logScale.test.ts
@@ -0,0 +1,157 @@
+import { describe, expect, it } from 'vitest';
+import { assembleVegaLite, getChartOptions } from '../../../../../../src/lib/agents-chart';
+
+const canvas = { width: 500, height: 400 };
+
+/** Keys of options Flint reports as applicable for a rendered spec. */
+const applicableKeys = (spec: any): string[] =>
+  (spec._options ?? []).filter((o: any) => o.applicable).map((o: any) => o.key);
+/** Look up a single option descriptor on a rendered spec. */
+const optionFor = (spec: any, key: string): any =>
+  (spec._options ?? []).find((o: any) => o.key === key);
+
+// Wide-range positive values (≥ 6 orders of magnitude) so the engine's
+// conservative log recommendation fires, and the offer-eligibility (≥ 3
+// decades) is comfortably met.
+const wideX = Array.from({ length: 12 }, (_, i) => ({
+  x: Math.pow(10, i * 0.7), // 1 … ~10^7.7
+  y: i + 1,
+}));
+
+function scatter(encodings: any, chartProperties?: any) {
+  return assembleVegaLite({
+    data: { values: wideX },
+    semantic_types: { x: 'Quantity', y: 'Number' },
+    chart_spec: {
+      chartType: 'Scatter Plot',
+      encodings,
+      canvasSize: canvas,
+      chartProperties,
+    },
+  }) as any;
+}
+
+describe('per-axis log scale: offer eligibility + user override', () => {
+  it('offers logScale_x on a wide-range continuous quantitative position axis', () => {
+    const spec = scatter({ x: { field: 'x' }, y: { field: 'y' } });
+    expect(applicableKeys(spec)).toContain('logScale_x');
+  });
+
+  it('does NOT offer log on a narrow-range axis', () => {
+    const narrow = Array.from({ length: 12 }, (_, i) => ({ x: 10 + i, y: i }));
+    const spec = assembleVegaLite({
+      data: { values: narrow },
+      semantic_types: { x: 'Number', y: 'Number' },
+      chart_spec: {
+        chartType: 'Scatter Plot',
+        encodings: { x: { field: 'x' }, y: { field: 'y' } },
+        canvasSize: canvas,
+      },
+    }) as any;
+    expect(applicableKeys(spec)).not.toContain('logScale_x');
+  });
+
+  it("unset follows the engine recommendation (log for wide-range additive measure)", () => {
+    const spec = scatter({ x: { field: 'x' }, y: { field: 'y' } });
+    expect(spec.encoding.x.scale?.type).toBe('log');
+    // and the option's resolved value reflects that recommendation
+    expect(optionFor(spec, 'logScale_x')?.value).toBe(true);
+  });
+
+  it("false overrides the recommendation and forces a linear axis", () => {
+    const spec = scatter({ x: { field: 'x' }, y: { field: 'y' } }, { logScale_x: false });
+    expect(spec.encoding.x.scale?.type).not.toBe('log');
+    // still offered, so the user can revert
+    expect(applicableKeys(spec)).toContain('logScale_x');
+  });
+
+  it("true forces a log axis even when the engine would not recommend it", () => {
+    // Generic 'Number' over a moderate (non-recommended) range: default stays linear.
+    const vals = Array.from({ length: 12 }, (_, i) => ({ x: (i + 1) * 50, y: i }));
+    const auto = assembleVegaLite({
+      data: { values: vals },
+      semantic_types: { x: 'Number', y: 'Number' },
+      chart_spec: {
+        chartType: 'Scatter Plot',
+        encodings: { x: { field: 'x' }, y: { field: 'y' } },
+        canvasSize: canvas,
+      },
+    }) as any;
+    expect(auto.encoding.x.scale?.type).not.toBe('log');
+
+    const forced = assembleVegaLite({
+      data: { values: vals },
+      semantic_types: { x: 'Number', y: 'Number' },
+      chart_spec: {
+        chartType: 'Scatter Plot',
+        encodings: { x: { field: 'x' }, y: { field: 'y' } },
+        canvasSize: canvas,
+        chartProperties: { logScale_x: true },
+      },
+    }) as any;
+    expect(forced.encoding.x.scale?.type).toBe('log');
+  });
+
+  it("uses symlog for a true toggle when the data contains zeros", () => {
+    const withZeros = [{ x: 0, y: 0 }, ...Array.from({ length: 11 }, (_, i) => ({ x: Math.pow(10, i * 0.6), y: i + 1 }))];
+    const spec = assembleVegaLite({
+      data: { values: withZeros },
+      semantic_types: { x: 'Number', y: 'Number' },
+      chart_spec: {
+        chartType: 'Scatter Plot',
+        encodings: { x: { field: 'x' }, y: { field: 'y' } },
+        canvasSize: canvas,
+        chartProperties: { logScale_x: true },
+      },
+    }) as any;
+    expect(spec.encoding.x.scale?.type).toBe('symlog');
+  });
+
+  it('never offers log on a length-cognitive bar chart, even with wide-range data', () => {
+    const spec = assembleVegaLite({
+      data: { values: wideX.map((d, i) => ({ cat: `c${i}`, val: d.x })) },
+      semantic_types: { cat: 'Category', val: 'Quantity' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { x: { field: 'cat' }, y: { field: 'val' } },
+        canvasSize: canvas,
+      },
+    }) as any;
+    // Length marks never even carry the log-scale option in their catalog.
+    expect((spec._options ?? []).find((o: any) => o.key.startsWith('logScale'))).toBeUndefined();
+  });
+
+  it('offers log only on the quantitative value axis of a line chart (not the temporal axis)', () => {
+    const series = Array.from({ length: 12 }, (_, i) => ({
+      t: `2020-${String((i % 12) + 1).padStart(2, '0')}-01`,
+      v: Math.pow(10, i * 0.7),
+    }));
+    const spec = assembleVegaLite({
+      data: { values: series },
+      semantic_types: { t: 'Date', v: 'Quantity' },
+      chart_spec: {
+        chartType: 'Line Chart',
+        encodings: { x: { field: 't' }, y: { field: 'v' } },
+        canvasSize: canvas,
+      },
+    }) as any;
+    expect(applicableKeys(spec)).toContain('logScale_y');
+    expect(applicableKeys(spec)).not.toContain('logScale_x');
+  });
+
+  it('getChartOptions reports the same applicable options as the rendered spec', () => {
+    const input = {
+      data: { values: wideX },
+      semantic_types: { x: 'Quantity', y: 'Number' },
+      chart_spec: {
+        chartType: 'Scatter Plot',
+        encodings: { x: { field: 'x' }, y: { field: 'y' } },
+        canvasSize: canvas,
+      },
+    };
+    const spec = assembleVegaLite(input) as any;
+    const options = getChartOptions(input);
+    expect(options).toEqual(spec._options);
+    expect(options.filter(o => o.applicable).map(o => o.key)).toContain('logScale_x');
+  });
+});
diff --git a/tests/frontend/unit/lib/agents-chart/vegalite/zeroBaseline.test.ts b/tests/frontend/unit/lib/agents-chart/vegalite/zeroBaseline.test.ts
new file mode 100644
index 00000000..aaf32803
--- /dev/null
+++ b/tests/frontend/unit/lib/agents-chart/vegalite/zeroBaseline.test.ts
@@ -0,0 +1,161 @@
+import { describe, expect, it } from 'vitest';
+import { assembleVegaLite } from '../../../../../../src/lib/agents-chart';
+
+const canvas = { width: 500, height: 400 };
+
+/** Keys of options Flint reports as applicable for a rendered spec. */
+const applicableKeys = (spec: any): string[] =>
+  (spec._options ?? []).filter((o: any) => o.applicable).map((o: any) => o.key);
+/** Look up a single option descriptor on a rendered spec. */
+const optionFor = (spec: any, key: string): any =>
+  (spec._options ?? []).find((o: any) => o.key === key);
+
+/** Resolve the y scale across the possible spec nestings (top / layer / facet). */
+function yScale(spec: any): any {
+  return (
+    spec?.encoding?.y?.scale ??
+    spec?.spec?.encoding?.y?.scale ??
+    (Array.isArray(spec?.layer)
+      ? spec.layer.find((l: any) => l.encoding?.y?.scale)?.encoding?.y?.scale
+      : undefined) ??
+    (Array.isArray(spec?.spec?.layer)
+      ? spec.spec.layer.find((l: any) => l.encoding?.y?.scale)?.encoding?.y?.scale
+      : undefined)
+  );
+}
+
+/** Does the resolved y scale anchor the axis at zero? */
+function yIncludesZero(spec: any): boolean {
+  const scale = yScale(spec);
+  if (!scale) return false;
+  if (scale.zero === true) return true;
+  if (Array.isArray(scale.domain)) return scale.domain[0] === 0;
+  if (scale.domainMin === 0) return true;
+  return false;
+}
+
+/** A scatter plot (position-cognitive) with a typed quantitative y axis. */
+function scatterY(yType: string, yValues: number[], chartProperties?: any) {
+  const values = yValues.map((v, i) => ({ x: i + 1, y: v }));
+  return assembleVegaLite({
+    data: { values },
+    // x = Number (zero-meaningful → forced → never offers includeZero_x),
+    // so only the y axis is under test.
+    semantic_types: { x: 'Number', y: yType },
+    chart_spec: {
+      chartType: 'Scatter Plot',
+      encodings: { x: { field: 'x' }, y: { field: 'y' } },
+      canvasSize: canvas,
+      chartProperties,
+    },
+  }) as any;
+}
+
+describe('zero-baseline toggle: offered only when the choice is a genuine toss-up', () => {
+  it('does NOT offer Zero Y for an arbitrary type away from zero (zero is meaningless → just fit data)', () => {
+    // Temperature = arbitrary; zero is not a meaningful reference, so the
+    // engine fits the data and there is nothing to debate.
+    const spec = scatterY('Temperature', [60, 70, 80, 90, 100]);
+    expect(applicableKeys(spec)).not.toContain('includeZero_y');
+    expect(yIncludesZero(spec)).toBe(false);
+  });
+
+  it('does NOT offer Zero Y for a contextual type close to zero (engine confidently includes zero)', () => {
+    // Percentage = contextual; data 5–25 hugs zero (proximity 0.2) → engine
+    // includes zero and is confident enough that no toggle is needed.
+    const spec = scatterY('Percentage', [5, 10, 15, 20, 25]);
+    expect(applicableKeys(spec)).not.toContain('includeZero_y');
+    expect(yIncludesZero(spec)).toBe(true);
+  });
+
+  it('does NOT offer Zero Y for a meaningful type whose data already spans toward zero', () => {
+    // Price = meaningful; data 10–40 (proximity 0.25) already spans most of the
+    // way to zero, so including zero barely changes the view → keep zero on
+    // silently, no toggle.
+    const spec = scatterY('Price', [10, 20, 30, 40]);
+    expect(applicableKeys(spec)).not.toContain('includeZero_y');
+    expect(yIncludesZero(spec)).toBe(true);
+  });
+
+  it('offers Zero Y for a meaningful type far from zero on a position mark (default ON)', () => {
+    // Price = meaningful; data 1000–1200 (proximity 0.83) sits far from zero, so
+    // anchoring at zero would crush the data into a thin band — a real
+    // zoom-vs-anchor toss-up. Toggle is offered, recommended ON.
+    const spec = scatterY('Price', [1000, 1050, 1100, 1150, 1200]);
+    expect(applicableKeys(spec)).toContain('includeZero_y');
+    expect(optionFor(spec, 'includeZero_y')?.value).toBe(true);
+    expect(yIncludesZero(spec)).toBe(true);
+  });
+
+  it('does NOT offer Zero Y for an unknown/unrecognized type (no opinion to debate)', () => {
+    const spec = scatterY('Mystery', [60, 70, 80, 90]);
+    expect(applicableKeys(spec)).not.toContain('includeZero_y');
+  });
+
+  it('does NOT offer Zero Y on a bar chart (length mark — baseline is structural)', () => {
+    const spec = assembleVegaLite({
+      data: {
+        values: [
+          { cat: 'a', y: 60 }, { cat: 'b', y: 70 },
+          { cat: 'c', y: 80 }, { cat: 'd', y: 90 },
+        ],
+      },
+      semantic_types: { cat: 'Category', y: 'Temperature' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { x: { field: 'cat' }, y: { field: 'y' } },
+        canvasSize: canvas,
+      },
+    }) as any;
+    expect(applicableKeys(spec)).not.toContain('includeZero_y');
+  });
+
+  it('does NOT offer Zero Y for a meaningful type on a bar chart (mandatory baseline)', () => {
+    const spec = assembleVegaLite({
+      data: {
+        values: [
+          { cat: 'a', y: 10 }, { cat: 'b', y: 20 },
+          { cat: 'c', y: 30 }, { cat: 'd', y: 40 },
+        ],
+      },
+      semantic_types: { cat: 'Category', y: 'Price' },
+      chart_spec: {
+        chartType: 'Bar Chart',
+        encodings: { x: { field: 'cat' }, y: { field: 'y' } },
+        canvasSize: canvas,
+      },
+    }) as any;
+    expect(applicableKeys(spec)).not.toContain('includeZero_y');
+  });
+});
+
+describe('zero-baseline toggle: the choice drives the rendered axis', () => {
+  it('unset follows the engine decision (arbitrary away from zero → fits data)', () => {
+    const spec = scatterY('Temperature', [60, 70, 80, 90, 100]);
+    expect(yIncludesZero(spec)).toBe(false);
+  });
+
+  it('ON forces the axis to include zero', () => {
+    const spec = scatterY('Temperature', [60, 70, 80, 90, 100], { includeZero_y: true });
+    expect(yIncludesZero(spec)).toBe(true);
+    // stays offered so the user can revert
+    expect(applicableKeys(spec)).toContain('includeZero_y');
+  });
+
+  it('OFF fits the data even over a zero-anchored semantic domain', () => {
+    // Percentage close to zero would default to a zero baseline (and a
+    // [0,100]-style intrinsic floor). Turning the toggle OFF must win: the
+    // axis fits the data and is NOT re-pinned to zero.
+    const spec = scatterY('Percentage', [5, 10, 15, 20, 25], { includeZero_y: false });
+    expect(yIncludesZero(spec)).toBe(false);
+    expect(applicableKeys(spec)).toContain('includeZero_y');
+  });
+
+  it('OFF fits the data for a meaningful type on a line/point chart', () => {
+    // Price 0.8–2.0 (the screenshot case): default ON shows zero, but turning
+    // the toggle OFF fits the data instead of crushing it against the baseline.
+    const spec = scatterY('Price', [0.8, 1.0, 1.4, 1.8, 2.0], { includeZero_y: false });
+    expect(yIncludesZero(spec)).toBe(false);
+    expect(applicableKeys(spec)).toContain('includeZero_y');
+  });
+});
diff --git a/tests/frontend/unit/views/formatCellValue.test.ts b/tests/frontend/unit/views/formatCellValue.test.ts
index 9f1269b3..4901ccfb 100644
--- a/tests/frontend/unit/views/formatCellValue.test.ts
+++ b/tests/frontend/unit/views/formatCellValue.test.ts
@@ -87,7 +87,15 @@ describe('formatCellValue', () => {
     expect(formatCellValue(3600000, Type.Duration)).toBe('1h');
     expect(formatCellValue(90000, Type.Duration)).toBe('1m 30s');
     expect(formatCellValue(5000, Type.Duration)).toBe('5s');
-    expect(formatCellValue(0, Type.Duration)).toBe('0s');
+    expect(formatCellValue(0, Type.Duration)).toBe('0');
+  });
+
+  it('should not over-format sub-second Duration values', () => {
+    // Seconds-based columns (e.g. timestamp_sec: 0, 0.083, 0.167) must not
+    // collapse to "0s" — show the plain number instead.
+    expect(formatCellValue(0.083, Type.Duration)).toBe('0.083');
+    expect(formatCellValue(0.167, Type.Duration)).toBe('0.167');
+    expect(formatCellValue(1.5, Type.Duration)).toBe('1.5');
   });
 
   it('should pass through non-numeric Duration as string', () => {
diff --git a/yarn.lock b/yarn.lock
index cbfaebd1..c2ee214e 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -290,7 +290,7 @@
 
 "@epic-web/invariant@^1.0.0":
   version "1.0.0"
-  resolved "https://registry.npmjs.org/@epic-web/invariant/-/invariant-1.0.0.tgz#1073e5dee6dd540410784990eb73e4acd25c9813"
+  resolved "https://registry.npmjs.org/@epic-web/invariant/-/invariant-1.0.0.tgz"
   integrity sha512-lrTPqgvfFQtR/eY/qkIzp98OGdNJu0m5ji3q/nJI8v3SXkRKEnWiOxMmbvcSoAIzv/cGiuvRy57k4suKQSAdwA==
 
 "@esbuild/aix-ppc64@0.27.7":
@@ -298,16 +298,16 @@
   resolved "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz"
   integrity sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==
 
-"@esbuild/android-arm64@0.27.7":
-  version "0.27.7"
-  resolved "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz"
-  integrity sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==
-
 "@esbuild/android-arm@0.27.7":
   version "0.27.7"
   resolved "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.7.tgz"
   integrity sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==
 
+"@esbuild/android-arm64@0.27.7":
+  version "0.27.7"
+  resolved "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz"
+  integrity sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==
+
 "@esbuild/android-x64@0.27.7":
   version "0.27.7"
   resolved "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.7.tgz"
@@ -333,16 +333,16 @@
   resolved "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.7.tgz"
   integrity sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==
 
-"@esbuild/linux-arm64@0.27.7":
-  version "0.27.7"
-  resolved "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz"
-  integrity sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==
-
 "@esbuild/linux-arm@0.27.7":
   version "0.27.7"
   resolved "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.7.tgz"
   integrity sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==
 
+"@esbuild/linux-arm64@0.27.7":
+  version "0.27.7"
+  resolved "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz"
+  integrity sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==
+
 "@esbuild/linux-ia32@0.27.7":
   version "0.27.7"
   resolved "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.7.tgz"
@@ -473,7 +473,7 @@
     minimatch "^3.1.5"
     strip-json-comments "^3.1.1"
 
-"@eslint/js@9.39.4", "@eslint/js@^9.15.0":
+"@eslint/js@^9.15.0", "@eslint/js@9.39.4":
   version "9.39.4"
   resolved "https://registry.npmjs.org/@eslint/js/-/js-9.39.4.tgz"
   integrity sha512-nE7DEIchvtiFTwBw4Lfbu59PG+kCofhjsKaCWzxTpt4lfRjRMqG6uMBzKXuEcyXhOHoUp9riAm7/aWYGhXZ9cw==
@@ -691,18 +691,6 @@
   dependencies:
     "@babel/runtime" "^7.29.2"
 
-"@mui/utils@9.0.0":
-  version "9.0.0"
-  resolved "https://registry.npmjs.org/@mui/utils/-/utils-9.0.0.tgz"
-  integrity sha512-bQcqyg/gjULUqTuyUjSAFr6LQGLvtkNtDbJerAtoUn9kGZ0hg5QJiN1PLHMLbeFpe3te1831uq7GFl2ITokGdg==
-  dependencies:
-    "@babel/runtime" "^7.29.2"
-    "@mui/types" "^9.0.0"
-    "@types/prop-types" "^15.7.15"
-    clsx "^2.1.1"
-    prop-types "^15.8.1"
-    react-is "^19.2.4"
-
 "@mui/utils@^7.3.9":
   version "7.3.9"
   resolved "https://registry.npmjs.org/@mui/utils/-/utils-7.3.9.tgz"
@@ -715,6 +703,18 @@
     prop-types "^15.8.1"
     react-is "^19.2.3"
 
+"@mui/utils@9.0.0":
+  version "9.0.0"
+  resolved "https://registry.npmjs.org/@mui/utils/-/utils-9.0.0.tgz"
+  integrity sha512-bQcqyg/gjULUqTuyUjSAFr6LQGLvtkNtDbJerAtoUn9kGZ0hg5QJiN1PLHMLbeFpe3te1831uq7GFl2ITokGdg==
+  dependencies:
+    "@babel/runtime" "^7.29.2"
+    "@mui/types" "^9.0.0"
+    "@types/prop-types" "^15.7.15"
+    clsx "^2.1.1"
+    prop-types "^15.8.1"
+    react-is "^19.2.4"
+
 "@mui/x-internals@^9.1.0":
   version "9.1.0"
   resolved "https://registry.npmjs.org/@mui/x-internals/-/x-internals-9.1.0.tgz"
@@ -739,71 +739,16 @@
     prop-types "^15.8.1"
     react-transition-group "^4.4.5"
 
-"@parcel/watcher-android-arm64@2.5.6":
-  version "2.5.6"
-  resolved "https://registry.npmjs.org/@parcel/watcher-android-arm64/-/watcher-android-arm64-2.5.6.tgz#5f32e0dba356f4ac9a11068d2a5c134ca3ba6564"
-  integrity sha512-YQxSS34tPF/6ZG7r/Ih9xy+kP/WwediEUsqmtf0cuCV5TPPKw/PQHRhueUo6JdeFJaqV3pyjm0GdYjZotbRt/A==
+"@oxc-project/types@=0.122.0":
+  version "0.122.0"
+  resolved "https://registry.npmjs.org/@oxc-project/types/-/types-0.122.0.tgz"
+  integrity sha512-oLAl5kBpV4w69UtFZ9xqcmTi+GENWOcPF7FCrczTiBbmC0ibXxCwyvZGbO39rCVEuLGAZM84DH0pUIyyv/YJzA==
 
 "@parcel/watcher-darwin-arm64@2.5.6":
   version "2.5.6"
   resolved "https://registry.npmjs.org/@parcel/watcher-darwin-arm64/-/watcher-darwin-arm64-2.5.6.tgz"
   integrity sha512-Z2ZdrnwyXvvvdtRHLmM4knydIdU9adO3D4n/0cVipF3rRiwP+3/sfzpAwA/qKFL6i1ModaabkU7IbpeMBgiVEA==
 
-"@parcel/watcher-darwin-x64@2.5.6":
-  version "2.5.6"
-  resolved "https://registry.npmjs.org/@parcel/watcher-darwin-x64/-/watcher-darwin-x64-2.5.6.tgz#bf05d76a78bc15974f15ec3671848698b0838063"
-  integrity sha512-HgvOf3W9dhithcwOWX9uDZyn1lW9R+7tPZ4sug+NGrGIo4Rk1hAXLEbcH1TQSqxts0NYXXlOWqVpvS1SFS4fRg==
-
-"@parcel/watcher-freebsd-x64@2.5.6":
-  version "2.5.6"
-  resolved "https://registry.npmjs.org/@parcel/watcher-freebsd-x64/-/watcher-freebsd-x64-2.5.6.tgz#8bc26e9848e7303ac82922a5ae1b1ef1bdb48a53"
-  integrity sha512-vJVi8yd/qzJxEKHkeemh7w3YAn6RJCtYlE4HPMoVnCpIXEzSrxErBW5SJBgKLbXU3WdIpkjBTeUNtyBVn8TRng==
-
-"@parcel/watcher-linux-arm-glibc@2.5.6":
-  version "2.5.6"
-  resolved "https://registry.npmjs.org/@parcel/watcher-linux-arm-glibc/-/watcher-linux-arm-glibc-2.5.6.tgz#1328fee1deb0c2d7865079ef53a2ba4cc2f8b40a"
-  integrity sha512-9JiYfB6h6BgV50CCfasfLf/uvOcJskMSwcdH1PHH9rvS1IrNy8zad6IUVPVUfmXr+u+Km9IxcfMLzgdOudz9EQ==
-
-"@parcel/watcher-linux-arm-musl@2.5.6":
-  version "2.5.6"
-  resolved "https://registry.npmjs.org/@parcel/watcher-linux-arm-musl/-/watcher-linux-arm-musl-2.5.6.tgz#bad0f45cb3e2157746db8b9d22db6a125711f152"
-  integrity sha512-Ve3gUCG57nuUUSyjBq/MAM0CzArtuIOxsBdQ+ftz6ho8n7s1i9E1Nmk/xmP323r2YL0SONs1EuwqBp2u1k5fxg==
-
-"@parcel/watcher-linux-arm64-glibc@2.5.6":
-  version "2.5.6"
-  resolved "https://registry.npmjs.org/@parcel/watcher-linux-arm64-glibc/-/watcher-linux-arm64-glibc-2.5.6.tgz#b75913fbd501d9523c5f35d420957bf7d0204809"
-  integrity sha512-f2g/DT3NhGPdBmMWYoxixqYr3v/UXcmLOYy16Bx0TM20Tchduwr4EaCbmxh1321TABqPGDpS8D/ggOTaljijOA==
-
-"@parcel/watcher-linux-arm64-musl@2.5.6":
-  version "2.5.6"
-  resolved "https://registry.npmjs.org/@parcel/watcher-linux-arm64-musl/-/watcher-linux-arm64-musl-2.5.6.tgz#da5621a6a576070c8c0de60dea8b46dc9c3827d4"
-  integrity sha512-qb6naMDGlbCwdhLj6hgoVKJl2odL34z2sqkC7Z6kzir8b5W65WYDpLB6R06KabvZdgoHI/zxke4b3zR0wAbDTA==
-
-"@parcel/watcher-linux-x64-glibc@2.5.6":
-  version "2.5.6"
-  resolved "https://registry.npmjs.org/@parcel/watcher-linux-x64-glibc/-/watcher-linux-x64-glibc-2.5.6.tgz#ce437accdc4b30f93a090b4a221fd95cd9b89639"
-  integrity sha512-kbT5wvNQlx7NaGjzPFu8nVIW1rWqV780O7ZtkjuWaPUgpv2NMFpjYERVi0UYj1msZNyCzGlaCWEtzc+exjMGbQ==
-
-"@parcel/watcher-linux-x64-musl@2.5.6":
-  version "2.5.6"
-  resolved "https://registry.npmjs.org/@parcel/watcher-linux-x64-musl/-/watcher-linux-x64-musl-2.5.6.tgz#02400c54b4a67efcc7e2327b249711920ac969e2"
-  integrity sha512-1JRFeC+h7RdXwldHzTsmdtYR/Ku8SylLgTU/reMuqdVD7CtLwf0VR1FqeprZ0eHQkO0vqsbvFLXUmYm/uNKJBg==
-
-"@parcel/watcher-win32-arm64@2.5.6":
-  version "2.5.6"
-  resolved "https://registry.npmjs.org/@parcel/watcher-win32-arm64/-/watcher-win32-arm64-2.5.6.tgz#caae3d3c7583ca0a7171e6bd142c34d20ea1691e"
-  integrity sha512-3ukyebjc6eGlw9yRt678DxVF7rjXatWiHvTXqphZLvo7aC5NdEgFufVwjFfY51ijYEWpXbqF5jtrK275z52D4Q==
-
-"@parcel/watcher-win32-ia32@2.5.6":
-  version "2.5.6"
-  resolved "https://registry.npmjs.org/@parcel/watcher-win32-ia32/-/watcher-win32-ia32-2.5.6.tgz#9ac922550896dfe47bfc5ae3be4f1bcaf8155d6d"
-  integrity sha512-k35yLp1ZMwwee3Ez/pxBi5cf4AoBKYXj00CZ80jUz5h8prpiaQsiRPKQMxoLstNuqe2vR4RNPEAEcjEFzhEz/g==
-
-"@parcel/watcher-win32-x64@2.5.6":
-  version "2.5.6"
-  resolved "https://registry.npmjs.org/@parcel/watcher-win32-x64/-/watcher-win32-x64-2.5.6.tgz#73fdafba2e21c448f0e456bbe13178d8fe11739d"
-  integrity sha512-hbQlYcCq5dlAX9Qx+kFb0FHue6vbjlf0FrNzSKdYK2APUf7tGfGxQCk2ihEREmbR6ZMc0MVAD5RIX/41gpUzTw==
-
 "@parcel/watcher@^2.4.1":
   version "2.5.6"
   resolved "https://registry.npmjs.org/@parcel/watcher/-/watcher-2.5.6.tgz"
@@ -883,141 +828,26 @@
   resolved "https://registry.npmjs.org/@remix-run/router/-/router-1.23.2.tgz"
   integrity sha512-Ic6m2U/rMjTkhERIa/0ZtXJP17QUi2CbWE7cqx4J58M8aA3QTfW+2UlQ4psvTX9IO1RfNVhK3pcpdjej7L+t2w==
 
+"@rolldown/binding-darwin-arm64@1.0.0-rc.11":
+  version "1.0.0-rc.11"
+  resolved "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.11.tgz"
+  integrity sha512-7WQgR8SfOPwmDZGFkThUvsmd/nwAWv91oCO4I5LS7RKrssPZmOt7jONN0cW17ydGC1n/+puol1IpoieKqQidmg==
+
 "@rolldown/pluginutils@1.0.0-beta.27":
   version "1.0.0-beta.27"
   resolved "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz"
   integrity sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==
 
-"@rollup/rollup-android-arm-eabi@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.60.0.tgz#7e158ddfc16f78da99c0d5ccbae6cae403ef3284"
-  integrity sha512-WOhNW9K8bR3kf4zLxbfg6Pxu2ybOUbB2AjMDHSQx86LIF4rH4Ft7vmMwNt0loO0eonglSNy4cpD3MKXXKQu0/A==
-
-"@rollup/rollup-android-arm64@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.60.0.tgz#49f4ae0e22b6f9ffbcd3818b9a0758fa2d10b1cd"
-  integrity sha512-u6JHLll5QKRvjciE78bQXDmqRqNs5M/3GVqZeMwvmjaNODJih/WIrJlFVEihvV0MiYFmd+ZyPr9wxOVbPAG2Iw==
+"@rolldown/pluginutils@1.0.0-rc.11":
+  version "1.0.0-rc.11"
+  resolved "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.11.tgz"
+  integrity sha512-xQO9vbwBecJRv9EUcQ/y0dzSTJgA7Q6UVN7xp6B81+tBGSLVAK03yJ9NkJaUA7JFD91kbjxRSC/mDnmvXzbHoQ==
 
 "@rollup/rollup-darwin-arm64@4.60.0":
   version "4.60.0"
   resolved "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.60.0.tgz"
   integrity sha512-qEF7CsKKzSRc20Ciu2Zw1wRrBz4g56F7r/vRwY430UPp/nt1x21Q/fpJ9N5l47WWvJlkNCPJz3QRVw008fi7yA==
 
-"@rollup/rollup-darwin-x64@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.60.0.tgz#1bf7a92b27ebdd5e0d1d48503c7811160773be1a"
-  integrity sha512-WADYozJ4QCnXCH4wPB+3FuGmDPoFseVCUrANmA5LWwGmC6FL14BWC7pcq+FstOZv3baGX65tZ378uT6WG8ynTw==
-
-"@rollup/rollup-freebsd-arm64@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.60.0.tgz#5ccf537b99c5175008444702193ad0b1c36f7f16"
-  integrity sha512-6b8wGHJlDrGeSE3aH5mGNHBjA0TTkxdoNHik5EkvPHCt351XnigA4pS7Wsj/Eo9Y8RBU6f35cjN9SYmCFBtzxw==
-
-"@rollup/rollup-freebsd-x64@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.60.0.tgz#1196ecd7bf4e128624ef83cd1f9d785114474a77"
-  integrity sha512-h25Ga0t4jaylMB8M/JKAyrvvfxGRjnPQIR8lnCayyzEjEOx2EJIlIiMbhpWxDRKGKF8jbNH01NnN663dH638mA==
-
-"@rollup/rollup-linux-arm-gnueabihf@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.60.0.tgz#cc147633a4af229fee83a737bf2334fbac3dc28e"
-  integrity sha512-RzeBwv0B3qtVBWtcuABtSuCzToo2IEAIQrcyB/b2zMvBWVbjo8bZDjACUpnaafaxhTw2W+imQbP2BD1usasK4g==
-
-"@rollup/rollup-linux-arm-musleabihf@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.60.0.tgz#3559f9f060153ea54594a42c3b87a297bedcc26e"
-  integrity sha512-Sf7zusNI2CIU1HLzuu9Tc5YGAHEZs5Lu7N1ssJG4Tkw6e0MEsN7NdjUDDfGNHy2IU+ENyWT+L2obgWiguWibWQ==
-
-"@rollup/rollup-linux-arm64-gnu@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.60.0.tgz#e91f887b154123485cfc4b59befe2080fcd8f2df"
-  integrity sha512-DX2x7CMcrJzsE91q7/O02IJQ5/aLkVtYFryqCjduJhUfGKG6yJV8hxaw8pZa93lLEpPTP/ohdN4wFz7yp/ry9A==
-
-"@rollup/rollup-linux-arm64-musl@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.60.0.tgz#660752f040df9ba44a24765df698928917c0bf21"
-  integrity sha512-09EL+yFVbJZlhcQfShpswwRZ0Rg+z/CsSELFCnPt3iK+iqwGsI4zht3secj5vLEs957QvFFXnzAT0FFPIxSrkQ==
-
-"@rollup/rollup-linux-loong64-gnu@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.60.0.tgz#cb0e939a5fa479ccef264f3f45b31971695f869c"
-  integrity sha512-i9IcCMPr3EXm8EQg5jnja0Zyc1iFxJjZWlb4wr7U2Wx/GrddOuEafxRdMPRYVaXjgbhvqalp6np07hN1w9kAKw==
-
-"@rollup/rollup-linux-loong64-musl@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.60.0.tgz#42f86fbc82cd1a81be2d346476dd3231cf5ee442"
-  integrity sha512-DGzdJK9kyJ+B78MCkWeGnpXJ91tK/iKA6HwHxF4TAlPIY7GXEvMe8hBFRgdrR9Ly4qebR/7gfUs9y2IoaVEyog==
-
-"@rollup/rollup-linux-ppc64-gnu@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.60.0.tgz#39776a647a789dc95ea049277c5ef8f098df77f9"
-  integrity sha512-RwpnLsqC8qbS8z1H1AxBA1H6qknR4YpPR9w2XX0vo2Sz10miu57PkNcnHVaZkbqyw/kUWfKMI73jhmfi9BRMUQ==
-
-"@rollup/rollup-linux-ppc64-musl@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.60.0.tgz#466f20029a8e8b3bb2954c7ddebc9586420cac2c"
-  integrity sha512-Z8pPf54Ly3aqtdWC3G4rFigZgNvd+qJlOE52fmko3KST9SoGfAdSRCwyoyG05q1HrrAblLbk1/PSIV+80/pxLg==
-
-"@rollup/rollup-linux-riscv64-gnu@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.60.0.tgz#cff9877c78f12e7aa6246f6902ad913e99edb2b7"
-  integrity sha512-3a3qQustp3COCGvnP4SvrMHnPQ9d1vzCakQVRTliaz8cIp/wULGjiGpbcqrkv0WrHTEp8bQD/B3HBjzujVWLOA==
-
-"@rollup/rollup-linux-riscv64-musl@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.60.0.tgz#9a762fb99b5a82a921017f56491b7e892b9fb17d"
-  integrity sha512-pjZDsVH/1VsghMJ2/kAaxt6dL0psT6ZexQVrijczOf+PeP2BUqTHYejk3l6TlPRydggINOeNRhvpLa0AYpCWSQ==
-
-"@rollup/rollup-linux-s390x-gnu@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.60.0.tgz#9d25ad8ac7dab681935baf78ac5ea92d14629cdf"
-  integrity sha512-3ObQs0BhvPgiUVZrN7gqCSvmFuMWvWvsjG5ayJ3Lraqv+2KhOsp+pUbigqbeWqueGIsnn+09HBw27rJ+gYK4VQ==
-
-"@rollup/rollup-linux-x64-gnu@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.0.tgz#5e5139e11819fa38a052368da79422cb4afcf466"
-  integrity sha512-EtylprDtQPdS5rXvAayrNDYoJhIz1/vzN2fEubo3yLE7tfAw+948dO0g4M0vkTVFhKojnF+n6C8bDNe+gDRdTg==
-
-"@rollup/rollup-linux-x64-gnu@^4.24.4":
-  version "4.60.4"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.4.tgz#23c9bf79771d804fb87415eb0767569f273261e5"
-  integrity sha512-Boiz5+MsaROEWDf+GGEwF8VMHGhlUoQMtIPjOgA5fv4osupqTVnJteQNKJwUcnUog2G55jYXH7KZFFiJe0TEzQ==
-
-"@rollup/rollup-linux-x64-musl@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.60.0.tgz#b6211d46e11b1f945f5504cc794fce839331ed08"
-  integrity sha512-k09oiRCi/bHU9UVFqD17r3eJR9bn03TyKraCrlz5ULFJGdJGi7VOmm9jl44vOJvRJ6P7WuBi/s2A97LxxHGIdw==
-
-"@rollup/rollup-openbsd-x64@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.60.0.tgz#e6e09eebaa7012bb9c7331b437a9e992bd94ca35"
-  integrity sha512-1o/0/pIhozoSaDJoDcec+IVLbnRtQmHwPV730+AOD29lHEEo4F5BEUB24H0OBdhbBBDwIOSuf7vgg0Ywxdfiiw==
-
-"@rollup/rollup-openharmony-arm64@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.60.0.tgz#f7d99ae857032498e57a5e7259fb7100fd24a87e"
-  integrity sha512-pESDkos/PDzYwtyzB5p/UoNU/8fJo68vcXM9ZW2V0kjYayj1KaaUfi1NmTUTUpMn4UhU4gTuK8gIaFO4UGuMbA==
-
-"@rollup/rollup-win32-arm64-msvc@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.60.0.tgz#41e392f5d9f3bf1253fdaf2f6d6f6b1bfc452856"
-  integrity sha512-hj1wFStD7B1YBeYmvY+lWXZ7ey73YGPcViMShYikqKT1GtstIKQAtfUI6yrzPjAy/O7pO0VLXGmUVWXQMaYgTQ==
-
-"@rollup/rollup-win32-ia32-msvc@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.60.0.tgz#f41b0490be0e5d3cf459b4dc076a192b532adea9"
-  integrity sha512-SyaIPFoxmUPlNDq5EHkTbiKzmSEmq/gOYFI/3HHJ8iS/v1mbugVa7dXUzcJGQfoytp9DJFLhHH4U3/eTy2Bq4w==
-
-"@rollup/rollup-win32-x64-gnu@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.60.0.tgz#0fcf9f1fcb750f0317b13aac3b3231687e6397a5"
-  integrity sha512-RdcryEfzZr+lAr5kRm2ucN9aVlCCa2QNq4hXelZxb8GG0NJSazq44Z3PCCc8wISRuCVnGs0lQJVX5Vp6fKA+IA==
-
-"@rollup/rollup-win32-x64-msvc@4.60.0":
-  version "4.60.0"
-  resolved "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.60.0.tgz#3afdb30405f6d4248df5e72e1ca86c5eab55fab8"
-  integrity sha512-PrsWNQ8BuE00O3Xsx3ALh2Df8fAj9+cvvX9AIA6o4KpATR98c9mud4XtDWVvsEuyia5U4tVSTKygawyJkjm60w==
-
 "@standard-schema/spec@^1.1.0":
   version "1.1.0"
   resolved "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.1.0.tgz"
@@ -1028,61 +858,6 @@
   resolved "https://registry.npmjs.org/@swc/core-darwin-arm64/-/core-darwin-arm64-1.15.21.tgz"
   integrity sha512-SA8SFg9dp0qKRH8goWsax6bptFE2EdmPf2YRAQW9WoHGf3XKM1bX0nd5UdwxmC5hXsBUZAYf7xSciCler6/oyA==
 
-"@swc/core-darwin-x64@1.15.21":
-  version "1.15.21"
-  resolved "https://registry.npmjs.org/@swc/core-darwin-x64/-/core-darwin-x64-1.15.21.tgz#05ff28c00a7045d9760c847e19604fff02b6e3ea"
-  integrity sha512-//fOVntgowz9+V90lVsNCtyyrtbHp3jWH6Rch7MXHXbcvbLmbCTmssl5DeedUWLLGiAAW1wksBdqdGYOTjaNLw==
-
-"@swc/core-linux-arm-gnueabihf@1.15.21":
-  version "1.15.21"
-  resolved "https://registry.npmjs.org/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.15.21.tgz#d52a0fac1933fe4e4180a196417053571d6c255f"
-  integrity sha512-meNI4Sh6h9h8DvIfEc0l5URabYMSuNvyisLmG6vnoYAS43s8ON3NJR8sDHvdP7NJTrLe0q/x2XCn6yL/BeHcZg==
-
-"@swc/core-linux-arm64-gnu@1.15.21":
-  version "1.15.21"
-  resolved "https://registry.npmjs.org/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.15.21.tgz#32cd1b9d0d4be4d53ccfbc122ac61289f37735b9"
-  integrity sha512-QrXlNQnHeXqU2EzLlnsPoWEh8/GtNJLvfMiPsDhk+ht6Xv8+vhvZ5YZ/BokNWSIZiWPKLAqR0M7T92YF5tmD3g==
-
-"@swc/core-linux-arm64-musl@1.15.21":
-  version "1.15.21"
-  resolved "https://registry.npmjs.org/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.15.21.tgz#0993e8b2ffac4f1141fa7b158e8dd982c2476c1a"
-  integrity sha512-8/yGCMO333ultDaMQivE5CjO6oXDPeeg1IV4sphojPkb0Pv0i6zvcRIkgp60xDB+UxLr6VgHgt+BBgqS959E9g==
-
-"@swc/core-linux-ppc64-gnu@1.15.21":
-  version "1.15.21"
-  resolved "https://registry.npmjs.org/@swc/core-linux-ppc64-gnu/-/core-linux-ppc64-gnu-1.15.21.tgz#5f6765d9a36235d95fd5c69f6d848973e85d8180"
-  integrity sha512-ucW0HzPx0s1dgRvcvuLSPSA/2Kk/VYTv9st8qe1Kc22Gu0Q0rH9+6TcBTmMuNIp0Xs4BPr1uBttmbO1wEGI49Q==
-
-"@swc/core-linux-s390x-gnu@1.15.21":
-  version "1.15.21"
-  resolved "https://registry.npmjs.org/@swc/core-linux-s390x-gnu/-/core-linux-s390x-gnu-1.15.21.tgz#f96779dc2ba8d47298bca3ceaa961e0f460aa0bd"
-  integrity sha512-ulTnOGc5I7YRObE/9NreAhQg94QkiR5qNhhcUZ1iFAYjzg/JGAi1ch+s/Ixe61pMIr8bfVrF0NOaB0f8wjaAfA==
-
-"@swc/core-linux-x64-gnu@1.15.21":
-  version "1.15.21"
-  resolved "https://registry.npmjs.org/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.15.21.tgz#0ffe779d5fd060bfb7992176f51d317c81c6aaaf"
-  integrity sha512-D0RokxtM+cPvSqJIKR6uja4hbD+scI9ezo95mBhfSyLUs9wnPPl26sLp1ZPR/EXRdYm3F3S6RUtVi+8QXhT24Q==
-
-"@swc/core-linux-x64-musl@1.15.21":
-  version "1.15.21"
-  resolved "https://registry.npmjs.org/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.15.21.tgz#2ea9fab26555d27c715aed6a08604a8296e4af50"
-  integrity sha512-nER8u7VeRfmU6fMDzl1NQAbbB/G7O2avmvCOwIul1uGkZ2/acbPH+DCL9h5+0yd/coNcxMBTL6NGepIew+7C2w==
-
-"@swc/core-win32-arm64-msvc@1.15.21":
-  version "1.15.21"
-  resolved "https://registry.npmjs.org/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.15.21.tgz#b401f34f38d744ca2b800bf2574ef5f7b20ca52f"
-  integrity sha512-+/AgNBnjYugUA8C0Do4YzymgvnGbztv7j8HKSQLvR/DQgZPoXQ2B3PqB2mTtGh/X5DhlJWiqnunN35JUgWcAeQ==
-
-"@swc/core-win32-ia32-msvc@1.15.21":
-  version "1.15.21"
-  resolved "https://registry.npmjs.org/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.15.21.tgz#c761e981725d137abd7abcecff88d1dc2d76baad"
-  integrity sha512-IkSZj8PX/N4HcaFhMQtzmkV8YSnuNoJ0E6OvMwFiOfejPhiKXvl7CdDsn1f4/emYEIDO3fpgZW9DTaCRMDxaDA==
-
-"@swc/core-win32-x64-msvc@1.15.21":
-  version "1.15.21"
-  resolved "https://registry.npmjs.org/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.15.21.tgz#4878cd851b4f98033e19fca78953201aef736edd"
-  integrity sha512-zUyWso7OOENB6e1N1hNuNn8vbvLsTdKQ5WKLgt/JcBNfJhKy/6jmBmqI3GXk/MyvQKd5SLvP7A0F36p7TeDqvw==
-
 "@swc/core@^1.12.11":
   version "1.15.21"
   resolved "https://registry.npmjs.org/@swc/core/-/core-1.15.21.tgz"
@@ -1268,22 +1043,22 @@
   resolved "https://registry.npmjs.org/@tiptap/extension-strike/-/extension-strike-3.22.2.tgz"
   integrity sha512-YFC3elKU1L8PiGbcB6tqd/7vWPF5IbydJz0POJpHzSjstX+VfT8VsvS7ubxVuSIWQ11kGkH3mzX6LX8JHsHZxg==
 
-"@tiptap/extension-table-cell@^3.23.6":
+"@tiptap/extension-table-cell@^3.22.2":
   version "3.23.6"
   resolved "https://registry.npmjs.org/@tiptap/extension-table-cell/-/extension-table-cell-3.23.6.tgz"
   integrity sha512-hS9TmmvRlT9/ikT+0ukACS+hmJuii4zQaH47cg3oJkz/Fv7O7tL7GZniKtK6l2OUZGPhY+4SV2RkDB6bD7DXfw==
 
-"@tiptap/extension-table-header@^3.23.6":
+"@tiptap/extension-table-header@^3.22.2":
   version "3.23.6"
   resolved "https://registry.npmjs.org/@tiptap/extension-table-header/-/extension-table-header-3.23.6.tgz"
   integrity sha512-D6o0a1cJXUU0xWakainBFGPnGHinQkPcdu1YqGd/PoFANY38lnuZt/NW2O/OLfLXu5LXDRfpqF1+dsKww27dUA==
 
-"@tiptap/extension-table-row@^3.23.6":
+"@tiptap/extension-table-row@^3.22.2":
   version "3.23.6"
   resolved "https://registry.npmjs.org/@tiptap/extension-table-row/-/extension-table-row-3.23.6.tgz"
   integrity sha512-OauWVzkyRQg0rKOqM/a3PuKPc1S7YXMb1LRN7Nh8Ytvglvd7GFRTbl1lVqdZRaz4Jzopag4PQnriIZfMPUpxWw==
 
-"@tiptap/extension-table@^3.23.6":
+"@tiptap/extension-table@^3.22.2":
   version "3.23.6"
   resolved "https://registry.npmjs.org/@tiptap/extension-table/-/extension-table-3.23.6.tgz"
   integrity sha512-XbhZXjhsS6AP7ThoZxjAnNs+NiR81YRori25l6E+ORqB7quiPkIXOAi5h4AIpkn/CYIqze6ere11lWsYpDjtaQ==
@@ -1618,7 +1393,7 @@
   dependencies:
     "@types/estree" "*"
 
-"@types/estree@*", "@types/estree@1.0.8", "@types/estree@^1.0.0", "@types/estree@^1.0.6", "@types/estree@^1.0.8":
+"@types/estree@*", "@types/estree@^1.0.0", "@types/estree@^1.0.6", "@types/estree@^1.0.8", "@types/estree@1.0.8":
   version "1.0.8"
   resolved "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz"
   integrity sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==
@@ -1789,7 +1564,7 @@
   resolved "https://registry.npmjs.org/@types/validator/-/validator-13.15.10.tgz"
   integrity sha512-T8L6i7wCuyoK8A/ZeLYt1+q0ty3Zb9+qbSSvrIVitzT3YjZqkTZ40IbRsPanlB4h1QB3JVL1SYCdR6ngtFYcuA==
 
-"@typescript-eslint/eslint-plugin@8.57.2", "@typescript-eslint/eslint-plugin@^8.16.0":
+"@typescript-eslint/eslint-plugin@^8.16.0", "@typescript-eslint/eslint-plugin@8.57.2":
   version "8.57.2"
   resolved "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.57.2.tgz"
   integrity sha512-NZZgp0Fm2IkD+La5PR81sd+g+8oS6JwJje+aRWsDocxHkjyRw0J5L5ZTlN3LI1LlOcGL7ph3eaIUmTXMIjLk0w==
@@ -1803,7 +1578,7 @@
     natural-compare "^1.4.0"
     ts-api-utils "^2.4.0"
 
-"@typescript-eslint/parser@8.57.2", "@typescript-eslint/parser@^8.16.0":
+"@typescript-eslint/parser@^8.16.0", "@typescript-eslint/parser@8.57.2":
   version "8.57.2"
   resolved "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.57.2.tgz"
   integrity sha512-30ScMRHIAD33JJQkgfGW1t8CURZtjc2JpTrq5n2HFhOefbAhb7ucc7xJwdWcrEtqUIYJ73Nybpsggii6GtAHjA==
@@ -1831,7 +1606,7 @@
     "@typescript-eslint/types" "8.57.2"
     "@typescript-eslint/visitor-keys" "8.57.2"
 
-"@typescript-eslint/tsconfig-utils@8.57.2", "@typescript-eslint/tsconfig-utils@^8.57.2":
+"@typescript-eslint/tsconfig-utils@^8.57.2", "@typescript-eslint/tsconfig-utils@8.57.2":
   version "8.57.2"
   resolved "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.57.2.tgz"
   integrity sha512-3Lm5DSM+DCowsUOJC+YqHHnKEfFh5CoGkj5Z31NQSNF4l5wdOwqGn99wmwN/LImhfY3KJnmordBq/4+VDe2eKw==
@@ -1847,7 +1622,7 @@
     debug "^4.4.3"
     ts-api-utils "^2.4.0"
 
-"@typescript-eslint/types@8.57.2", "@typescript-eslint/types@^8.57.2":
+"@typescript-eslint/types@^8.57.2", "@typescript-eslint/types@8.57.2":
   version "8.57.2"
   resolved "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.57.2.tgz"
   integrity sha512-/iZM6FnM4tnx9csuTxspMW4BOSegshwX5oBDznJ7S4WggL7Vczz5d2W11ecc4vRrQMQHXRSxzrCsyG5EsPPTbA==
@@ -2067,6 +1842,11 @@ argparse@^2.0.1:
   resolved "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz"
   integrity sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==
 
+aria-query@^5.0.0, aria-query@^5.3.2:
+  version "5.3.2"
+  resolved "https://registry.npmjs.org/aria-query/-/aria-query-5.3.2.tgz"
+  integrity sha512-COROpnaoap1E2F000S62r6A60uHZnmlvomhfyT2DlTcrY1OrBKn2UhH7qn5wTC9zMvD0AY7csdPSNwKP+7WiQw==
+
 aria-query@5.3.0:
   version "5.3.0"
   resolved "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz"
@@ -2074,11 +1854,6 @@ aria-query@5.3.0:
   dependencies:
     dequal "^2.0.3"
 
-aria-query@^5.0.0, aria-query@^5.3.2:
-  version "5.3.2"
-  resolved "https://registry.npmjs.org/aria-query/-/aria-query-5.3.2.tgz"
-  integrity sha512-COROpnaoap1E2F000S62r6A60uHZnmlvomhfyT2DlTcrY1OrBKn2UhH7qn5wTC9zMvD0AY7csdPSNwKP+7WiQw==
-
 array-buffer-byte-length@^1.0.1, array-buffer-byte-length@^1.0.2:
   version "1.0.2"
   resolved "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.2.tgz"
@@ -2456,6 +2231,11 @@ comma-separated-tokens@^2.0.0:
   resolved "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz"
   integrity sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==
 
+commander@^8.3.0:
+  version "8.3.0"
+  resolved "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz"
+  integrity sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==
+
 commander@2:
   version "2.20.3"
   resolved "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz"
@@ -2466,11 +2246,6 @@ commander@7:
   resolved "https://registry.npmjs.org/commander/-/commander-7.2.0.tgz"
   integrity sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==
 
-commander@^8.3.0:
-  version "8.3.0"
-  resolved "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz"
-  integrity sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==
-
 compress-commons@^4.1.2:
   version "4.1.2"
   resolved "https://registry.npmjs.org/compress-commons/-/compress-commons-4.1.2.tgz"
@@ -2532,7 +2307,7 @@ crelt@^1.0.0:
 
 cross-env@^10.1.0:
   version "10.1.0"
-  resolved "https://registry.npmjs.org/cross-env/-/cross-env-10.1.0.tgz#cfd2a6200df9ed75bfb9cb3d7ce609c13ea21783"
+  resolved "https://registry.npmjs.org/cross-env/-/cross-env-10.1.0.tgz"
   integrity sha512-GsYosgnACZTADcmEyJctkJIoqAhHjttw7RsFrVoJNXbsWWqaq6Ym+7kZjq6mS45O0jij6vtiReppKQEtqWy6Dw==
   dependencies:
     "@epic-web/invariant" "^1.0.0"
@@ -2577,7 +2352,7 @@ culori@^4.0.2:
   resolved "https://registry.npmjs.org/culori/-/culori-4.0.2.tgz"
   integrity sha512-1+BhOB8ahCn4O0cep0Sh2l9KCOfOdY+BXJnKMHFFzDEouSr/el18QwXEMRlOj9UY5nCeA8UN3a/82rUWRBeyBw==
 
-"d3-array@1 - 3", "d3-array@2 - 3", "d3-array@2.10.0 - 3", "d3-array@2.5.0 - 3", d3-array@3, d3-array@3.2.4, d3-array@^3.2.0, d3-array@^3.2.4:
+d3-array@^3.2.0, d3-array@^3.2.4, "d3-array@1 - 3", "d3-array@2 - 3", "d3-array@2.10.0 - 3", "d3-array@2.5.0 - 3", d3-array@3, d3-array@3.2.4:
   version "3.2.4"
   resolved "https://registry.npmjs.org/d3-array/-/d3-array-3.2.4.tgz"
   integrity sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==
@@ -2607,7 +2382,7 @@ d3-chord@3:
   dependencies:
     d3-path "1 - 3"
 
-"d3-color@1 - 3", d3-color@3, d3-color@^3.1.0:
+d3-color@^3.1.0, "d3-color@1 - 3", d3-color@3:
   version "3.1.0"
   resolved "https://registry.npmjs.org/d3-color/-/d3-color-3.1.0.tgz"
   integrity sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==
@@ -2619,7 +2394,7 @@ d3-contour@4:
   dependencies:
     d3-array "^3.2.0"
 
-d3-delaunay@6, d3-delaunay@^6.0.4:
+d3-delaunay@^6.0.4, d3-delaunay@6:
   version "6.0.4"
   resolved "https://registry.npmjs.org/d3-delaunay/-/d3-delaunay-6.0.4.tgz"
   integrity sha512-mdjtIZ1XLAM8bm/hx3WwjfHt6Sggek7qH043O8KEjDXN40xi3vx/6pYSVTwLjEgiXQTbvaouWKynLBiUZ6SK6A==
@@ -2639,7 +2414,7 @@ d3-delaunay@6, d3-delaunay@^6.0.4:
     d3-dispatch "1 - 3"
     d3-selection "3"
 
-"d3-dsv@1 - 3", d3-dsv@3, d3-dsv@^3.0.1:
+d3-dsv@^3.0.1, "d3-dsv@1 - 3", d3-dsv@3:
   version "3.0.1"
   resolved "https://registry.npmjs.org/d3-dsv/-/d3-dsv-3.0.1.tgz"
   integrity sha512-UG6OvdI5afDIFP9w4G0mNq50dSOsXHJaRE8arAS5o9ApWnIElp8GZw1Dun8vP8OyHOZ/QJUKUJwxiiCCnUwm+Q==
@@ -2660,7 +2435,7 @@ d3-fetch@3:
   dependencies:
     d3-dsv "1 - 3"
 
-d3-force@3, d3-force@^3.0.0:
+d3-force@^3.0.0, d3-force@3:
   version "3.0.0"
   resolved "https://registry.npmjs.org/d3-force/-/d3-force-3.0.0.tgz"
   integrity sha512-zxV/SsA+U4yte8051P4ECydjD/S+qeYtnaIyAs9tgHCqfguma/aAQDjo85A9Z6EKhBirHRJHXIgJUlffT4wdLg==
@@ -2669,7 +2444,7 @@ d3-force@3, d3-force@^3.0.0:
     d3-quadtree "1 - 3"
     d3-timer "1 - 3"
 
-"d3-format@1 - 3", d3-format@3, d3-format@^3.1.0:
+d3-format@^3.1.0, "d3-format@1 - 3", d3-format@3:
   version "3.1.2"
   resolved "https://registry.npmjs.org/d3-format/-/d3-format-3.1.2.tgz"
   integrity sha512-AJDdYOdnyRDV5b6ArilzCPPwc1ejkHcoyFarqlPqT7zRYjhavcT3uSrqcMvsgh2CgoPbK3RCwyHaVyxYcP2Arg==
@@ -2683,26 +2458,26 @@ d3-geo-projection@^4.0.0:
     d3-array "1 - 3"
     d3-geo "1.12.0 - 3"
 
-"d3-geo@1.12.0 - 3", d3-geo@3, d3-geo@^3.1.1:
+d3-geo@^3.1.1, "d3-geo@1.12.0 - 3", d3-geo@3:
   version "3.1.1"
   resolved "https://registry.npmjs.org/d3-geo/-/d3-geo-3.1.1.tgz"
   integrity sha512-637ln3gXKXOwhalDzinUgY83KzNWZRKbYubaG+fGVuc/dxO64RRljtCTnf5ecMyE1RIdtqpkVcq0IbtU2S8j2Q==
   dependencies:
     d3-array "2.5.0 - 3"
 
-d3-hierarchy@3, d3-hierarchy@^3.1.2:
+d3-hierarchy@^3.1.2, d3-hierarchy@3:
   version "3.1.2"
   resolved "https://registry.npmjs.org/d3-hierarchy/-/d3-hierarchy-3.1.2.tgz"
   integrity sha512-FX/9frcub54beBdugHjDCdikxThEqjnR93Qt7PvQTOHxyiNCAlvMrHhclk3cD5VeAaq9fxmfRp+CnWw9rEMBuA==
 
-"d3-interpolate@1 - 3", "d3-interpolate@1.2.0 - 3", d3-interpolate@3, d3-interpolate@^3.0.1:
+d3-interpolate@^3.0.1, "d3-interpolate@1 - 3", "d3-interpolate@1.2.0 - 3", d3-interpolate@3:
   version "3.0.1"
   resolved "https://registry.npmjs.org/d3-interpolate/-/d3-interpolate-3.0.1.tgz"
   integrity sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g==
   dependencies:
     d3-color "1 - 3"
 
-"d3-path@1 - 3", d3-path@3, d3-path@^3.1.0:
+d3-path@^3.1.0, "d3-path@1 - 3", d3-path@3:
   version "3.1.0"
   resolved "https://registry.npmjs.org/d3-path/-/d3-path-3.1.0.tgz"
   integrity sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ==
@@ -2722,7 +2497,7 @@ d3-random@3:
   resolved "https://registry.npmjs.org/d3-random/-/d3-random-3.0.1.tgz"
   integrity sha512-FXMe9GfxTxqd5D6jFsQ+DJ8BJS4E/fT5mqqdjovykEB2oFbTMDVdg1MGFxfQW+FBOGoB++k8swBrgwSHT1cUXQ==
 
-d3-scale-chromatic@3, d3-scale-chromatic@^3.1.0:
+d3-scale-chromatic@^3.1.0, d3-scale-chromatic@3:
   version "3.1.0"
   resolved "https://registry.npmjs.org/d3-scale-chromatic/-/d3-scale-chromatic-3.1.0.tgz"
   integrity sha512-A3s5PWiZ9YCXFye1o246KoscMWqf8BsD9eRiJ3He7C9OBaxKhAd5TFCdEx/7VbKtxxTsu//1mMJFrEt572cEyQ==
@@ -2730,7 +2505,7 @@ d3-scale-chromatic@3, d3-scale-chromatic@^3.1.0:
     d3-color "1 - 3"
     d3-interpolate "1 - 3"
 
-d3-scale@4, d3-scale@^4.0.2:
+d3-scale@^4.0.2, d3-scale@4:
   version "4.0.2"
   resolved "https://registry.npmjs.org/d3-scale/-/d3-scale-4.0.2.tgz"
   integrity sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==
@@ -2746,28 +2521,28 @@ d3-scale@4, d3-scale@^4.0.2:
   resolved "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz"
   integrity sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==
 
-d3-shape@3, d3-shape@^3.2.0:
+d3-shape@^3.2.0, d3-shape@3:
   version "3.2.0"
   resolved "https://registry.npmjs.org/d3-shape/-/d3-shape-3.2.0.tgz"
   integrity sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA==
   dependencies:
     d3-path "^3.1.0"
 
-"d3-time-format@2 - 4", d3-time-format@4, d3-time-format@^4.1.0:
+d3-time-format@^4.1.0, "d3-time-format@2 - 4", d3-time-format@4:
   version "4.1.0"
   resolved "https://registry.npmjs.org/d3-time-format/-/d3-time-format-4.1.0.tgz"
   integrity sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==
   dependencies:
     d3-time "1 - 3"
 
-"d3-time@1 - 3", "d3-time@2.1.1 - 3", d3-time@3, d3-time@^3.1.0:
+d3-time@^3.1.0, "d3-time@1 - 3", "d3-time@2.1.1 - 3", d3-time@3:
   version "3.1.0"
   resolved "https://registry.npmjs.org/d3-time/-/d3-time-3.1.0.tgz"
   integrity sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==
   dependencies:
     d3-array "2 - 3"
 
-"d3-timer@1 - 3", d3-timer@3, d3-timer@^3.0.1:
+d3-timer@^3.0.1, "d3-timer@1 - 3", d3-timer@3:
   version "3.0.1"
   resolved "https://registry.npmjs.org/d3-timer/-/d3-timer-3.0.1.tgz"
   integrity sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA==
@@ -2953,15 +2728,6 @@ devlop@^1.0.0, devlop@^1.1.0:
   dependencies:
     dequal "^2.0.0"
 
-dnd-core@14.0.1:
-  version "14.0.1"
-  resolved "https://registry.npmjs.org/dnd-core/-/dnd-core-14.0.1.tgz"
-  integrity sha512-+PVS2VPTgKFPYWo3vAFEA8WPbTf7/xo43TifH9G8S1KqnrQu0o77A3unrF5yOugy4mIz7K5wAVFHUcha7wsz6A==
-  dependencies:
-    "@react-dnd/asap" "^4.0.0"
-    "@react-dnd/invariant" "^2.0.0"
-    redux "^4.1.1"
-
 dnd-core@^16.0.1:
   version "16.0.1"
   resolved "https://registry.npmjs.org/dnd-core/-/dnd-core-16.0.1.tgz"
@@ -2971,6 +2737,15 @@ dnd-core@^16.0.1:
     "@react-dnd/invariant" "^4.0.1"
     redux "^4.2.0"
 
+dnd-core@14.0.1:
+  version "14.0.1"
+  resolved "https://registry.npmjs.org/dnd-core/-/dnd-core-14.0.1.tgz"
+  integrity sha512-+PVS2VPTgKFPYWo3vAFEA8WPbTf7/xo43TifH9G8S1KqnrQu0o77A3unrF5yOugy4mIz7K5wAVFHUcha7wsz6A==
+  dependencies:
+    "@react-dnd/asap" "^4.0.0"
+    "@react-dnd/invariant" "^2.0.0"
+    redux "^4.1.1"
+
 doctrine@^2.1.0:
   version "2.1.0"
   resolved "https://registry.npmjs.org/doctrine/-/doctrine-2.1.0.tgz"
@@ -2996,14 +2771,7 @@ dom-helpers@^5.0.1:
     "@babel/runtime" "^7.8.7"
     csstype "^3.0.2"
 
-dompurify@*, dompurify@^3.4.2:
-  version "3.4.7"
-  resolved "https://registry.npmjs.org/dompurify/-/dompurify-3.4.7.tgz#e2702ea4fd5d83467f1baef62309466ce7d44a82"
-  integrity sha512-2jBxDJY4RR06tQNy4w5FlFH7kfxsQZlufd0sbv+chfHCxeJwrFw2baUDsSwvBISD4K4RDbd0PTfy3uNXsR6siA==
-  optionalDependencies:
-    "@types/trusted-types" "^2.0.7"
-
-dompurify@^3.4.0:
+dompurify@*, dompurify@^3.4.0:
   version "3.4.5"
   resolved "https://registry.npmjs.org/dompurify/-/dompurify-3.4.5.tgz"
   integrity sha512-OrwIBKsdNSVEeubdJ1HBv/wNENRM9ytAVCv7YXt//A3vPdVMNuACRqK9mXCGCBW2ln7BT/A4X0jXHo2Gu89miA==
@@ -3851,7 +3619,7 @@ inflight@^1.0.4:
     once "^1.3.0"
     wrappy "1"
 
-inherits@2, inherits@^2.0.3, inherits@^2.0.4, inherits@~2.0.0, inherits@~2.0.3:
+inherits@^2.0.3, inherits@^2.0.4, inherits@~2.0.0, inherits@~2.0.3, inherits@2:
   version "2.0.4"
   resolved "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz"
   integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
@@ -4263,6 +4031,13 @@ levn@^0.4.1:
     prelude-ls "^1.2.1"
     type-check "~0.4.0"
 
+lie@~3.3.0:
+  version "3.3.0"
+  resolved "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz"
+  integrity sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==
+  dependencies:
+    immediate "~3.0.5"
+
 lie@3.1.1:
   version "3.1.1"
   resolved "https://registry.npmjs.org/lie/-/lie-3.1.1.tgz"
@@ -4270,12 +4045,29 @@ lie@3.1.1:
   dependencies:
     immediate "~3.0.5"
 
-lie@~3.3.0:
-  version "3.3.0"
-  resolved "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz"
-  integrity sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==
+lightningcss-darwin-arm64@1.32.0:
+  version "1.32.0"
+  resolved "https://registry.npmjs.org/lightningcss-darwin-arm64/-/lightningcss-darwin-arm64-1.32.0.tgz"
+  integrity sha512-RzeG9Ju5bag2Bv1/lwlVJvBE3q6TtXskdZLLCyfg5pt+HLz9BqlICO7LZM7VHNTTn/5PRhHFBSjk5lc4cmscPQ==
+
+lightningcss@^1.32.0:
+  version "1.32.0"
+  resolved "https://registry.npmjs.org/lightningcss/-/lightningcss-1.32.0.tgz"
+  integrity sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ==
   dependencies:
-    immediate "~3.0.5"
+    detect-libc "^2.0.3"
+  optionalDependencies:
+    lightningcss-android-arm64 "1.32.0"
+    lightningcss-darwin-arm64 "1.32.0"
+    lightningcss-darwin-x64 "1.32.0"
+    lightningcss-freebsd-x64 "1.32.0"
+    lightningcss-linux-arm-gnueabihf "1.32.0"
+    lightningcss-linux-arm64-gnu "1.32.0"
+    lightningcss-linux-arm64-musl "1.32.0"
+    lightningcss-linux-x64-gnu "1.32.0"
+    lightningcss-linux-x64-musl "1.32.0"
+    lightningcss-win32-arm64-msvc "1.32.0"
+    lightningcss-win32-x64-msvc "1.32.0"
 
 lines-and-columns@^1.1.6:
   version "1.2.4"
@@ -5051,7 +4843,7 @@ pathe@^2.0.3:
   resolved "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz"
   integrity sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==
 
-picocolors@1.1.1, picocolors@^1.1.1:
+picocolors@^1.1.1, picocolors@1.1.1:
   version "1.1.1"
   resolved "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz"
   integrity sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==
@@ -5066,7 +4858,7 @@ possible-typed-array-names@^1.0.0:
   resolved "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.1.0.tgz"
   integrity sha512-/+5VFTchJDoVj3bhoqi6UeymcD00DAwb1nJwamzPvHEszJ4FpF6SNNbUbOS8yI56qHzdV8eK0qEfOSiodkTdxg==
 
-postcss@^8.5.6:
+postcss@^8.5.6, postcss@^8.5.8:
   version "8.5.8"
   resolved "https://registry.npmjs.org/postcss/-/postcss-8.5.8.tgz"
   integrity sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg==
@@ -5335,7 +5127,7 @@ react-animate-on-change@^2.2.0:
 
 react-arborist@3.7.0:
   version "3.7.0"
-  resolved "https://registry.npmjs.org/react-arborist/-/react-arborist-3.7.0.tgz#b39156f1fe4bb31477118c57905827a3ba4d0ec5"
+  resolved "https://registry.npmjs.org/react-arborist/-/react-arborist-3.7.0.tgz"
   integrity sha512-gh2SoO0eXQVSP6zxXMGqFeXF+l2uabDGBVn0+RKqy/s7mrG5xGnfM5mhyB67cMVobC3vWYLqe6HGh7ZEZadW/w==
   dependencies:
     react-dnd "^14.0.3"
@@ -5523,7 +5315,33 @@ react@^18.2.0:
   dependencies:
     loose-envify "^1.1.0"
 
-readable-stream@^2.0.0, readable-stream@^2.0.2, readable-stream@^2.0.5, readable-stream@~2.3.6:
+readable-stream@^2.0.0:
+  version "2.3.8"
+  resolved "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz"
+  integrity sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==
+  dependencies:
+    core-util-is "~1.0.0"
+    inherits "~2.0.3"
+    isarray "~1.0.0"
+    process-nextick-args "~2.0.0"
+    safe-buffer "~5.1.1"
+    string_decoder "~1.1.1"
+    util-deprecate "~1.0.1"
+
+readable-stream@^2.0.2:
+  version "2.3.8"
+  resolved "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz"
+  integrity sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==
+  dependencies:
+    core-util-is "~1.0.0"
+    inherits "~2.0.3"
+    isarray "~1.0.0"
+    process-nextick-args "~2.0.0"
+    safe-buffer "~5.1.1"
+    string_decoder "~1.1.1"
+    util-deprecate "~1.0.1"
+
+readable-stream@^2.0.5:
   version "2.3.8"
   resolved "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz"
   integrity sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==
@@ -5545,6 +5363,19 @@ readable-stream@^3.1.1, readable-stream@^3.4.0, readable-stream@^3.6.0:
     string_decoder "^1.1.1"
     util-deprecate "^1.0.1"
 
+readable-stream@~2.3.6:
+  version "2.3.8"
+  resolved "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz"
+  integrity sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==
+  dependencies:
+    core-util-is "~1.0.0"
+    inherits "~2.0.3"
+    isarray "~1.0.0"
+    process-nextick-args "~2.0.0"
+    safe-buffer "~5.1.1"
+    string_decoder "~1.1.1"
+    util-deprecate "~1.0.1"
+
 readdir-glob@^1.1.2:
   version "1.1.3"
   resolved "https://registry.npmjs.org/readdir-glob/-/readdir-glob-1.1.3.tgz"
@@ -5575,7 +5406,14 @@ redux-thunk@^2.4.2:
   resolved "https://registry.npmjs.org/redux-thunk/-/redux-thunk-2.4.2.tgz"
   integrity sha512-+P3TjtnP0k/FEjcBL5FZpoovtvrTNT/UXd4/sluaSyrURlSlhLSzEdfsTBW7WsKB6yPvgd7q/iZPICFjW4o57Q==
 
-redux@^4.1.1, redux@^4.2.0, redux@^4.2.1:
+redux@^4.1.1:
+  version "4.2.1"
+  resolved "https://registry.npmjs.org/redux/-/redux-4.2.1.tgz"
+  integrity sha512-LAUYz4lc+Do8/g7aeRa8JkyDErK6ekstQaqWQrNRW//MY1TvCEpMtpTWvlQ+FPbWCx+Xixu/6SHt5N0HR+SB4w==
+  dependencies:
+    "@babel/runtime" "^7.9.2"
+
+redux@^4.2.0, redux@^4.2.1:
   version "4.2.1"
   resolved "https://registry.npmjs.org/redux/-/redux-4.2.1.tgz"
   integrity sha512-LAUYz4lc+Do8/g7aeRa8JkyDErK6ekstQaqWQrNRW//MY1TvCEpMtpTWvlQ+FPbWCx+Xixu/6SHt5N0HR+SB4w==
@@ -5687,6 +5525,30 @@ robust-predicates@^3.0.2:
   resolved "https://registry.npmjs.org/robust-predicates/-/robust-predicates-3.0.3.tgz"
   integrity sha512-NS3levdsRIUOmiJ8FZWCP7LG3QpJyrs/TE0Zpf1yvZu8cAJJ6QMW92H1c7kWpdIHo8RvmLxN/o2JXTKHp74lUA==
 
+rolldown@1.0.0-rc.11:
+  version "1.0.0-rc.11"
+  resolved "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.11.tgz"
+  integrity sha512-NRjoKMusSjfRbSYiH3VSumlkgFe7kYAa3pzVOsVYVFY3zb5d7nS+a3KGQ7hJKXuYWbzJKPVQ9Wxq2UvyK+ENpw==
+  dependencies:
+    "@oxc-project/types" "=0.122.0"
+    "@rolldown/pluginutils" "1.0.0-rc.11"
+  optionalDependencies:
+    "@rolldown/binding-android-arm64" "1.0.0-rc.11"
+    "@rolldown/binding-darwin-arm64" "1.0.0-rc.11"
+    "@rolldown/binding-darwin-x64" "1.0.0-rc.11"
+    "@rolldown/binding-freebsd-x64" "1.0.0-rc.11"
+    "@rolldown/binding-linux-arm-gnueabihf" "1.0.0-rc.11"
+    "@rolldown/binding-linux-arm64-gnu" "1.0.0-rc.11"
+    "@rolldown/binding-linux-arm64-musl" "1.0.0-rc.11"
+    "@rolldown/binding-linux-ppc64-gnu" "1.0.0-rc.11"
+    "@rolldown/binding-linux-s390x-gnu" "1.0.0-rc.11"
+    "@rolldown/binding-linux-x64-gnu" "1.0.0-rc.11"
+    "@rolldown/binding-linux-x64-musl" "1.0.0-rc.11"
+    "@rolldown/binding-openharmony-arm64" "1.0.0-rc.11"
+    "@rolldown/binding-wasm32-wasi" "1.0.0-rc.11"
+    "@rolldown/binding-win32-arm64-msvc" "1.0.0-rc.11"
+    "@rolldown/binding-win32-x64-msvc" "1.0.0-rc.11"
+
 rollup@^4.43.0:
   version "4.60.0"
   resolved "https://registry.npmjs.org/rollup/-/rollup-4.60.0.tgz"
@@ -5747,7 +5609,7 @@ safe-array-concat@^1.1.3:
     has-symbols "^1.1.0"
     isarray "^2.0.5"
 
-safe-buffer@^5.0.1, safe-buffer@~5.2.0:
+safe-buffer@^5.0.1:
   version "5.2.1"
   resolved "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz"
   integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
@@ -5757,6 +5619,11 @@ safe-buffer@~5.1.0, safe-buffer@~5.1.1:
   resolved "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz"
   integrity sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==
 
+safe-buffer@~5.2.0:
+  version "5.2.1"
+  resolved "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz"
+  integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
+
 safe-push-apply@^1.0.0:
   version "1.0.0"
   resolved "https://registry.npmjs.org/safe-push-apply/-/safe-push-apply-1.0.0.tgz"
@@ -5947,7 +5814,7 @@ solid-js@^1.9.5:
     seroval "~1.5.0"
     seroval-plugins "~1.5.0"
 
-"source-map-js@>=0.6.2 <2.0.0", source-map-js@^1.2.1:
+source-map-js@^1.2.1, "source-map-js@>=0.6.2 <2.0.0":
   version "1.2.1"
   resolved "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz"
   integrity sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==
@@ -5985,6 +5852,20 @@ stop-iteration-iterator@^1.1.0:
     es-errors "^1.3.0"
     internal-slot "^1.1.0"
 
+string_decoder@^1.1.1:
+  version "1.3.0"
+  resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz"
+  integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==
+  dependencies:
+    safe-buffer "~5.2.0"
+
+string_decoder@~1.1.1:
+  version "1.1.1"
+  resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz"
+  integrity sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==
+  dependencies:
+    safe-buffer "~5.1.0"
+
 string-width@^7.0.0, string-width@^7.2.0:
   version "7.2.0"
   resolved "https://registry.npmjs.org/string-width/-/string-width-7.2.0.tgz"
@@ -6062,20 +5943,6 @@ string.prototype.trimstart@^1.0.8:
     define-properties "^1.2.1"
     es-object-atoms "^1.0.0"
 
-string_decoder@^1.1.1:
-  version "1.3.0"
-  resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz"
-  integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==
-  dependencies:
-    safe-buffer "~5.2.0"
-
-string_decoder@~1.1.1:
-  version "1.1.1"
-  resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz"
-  integrity sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==
-  dependencies:
-    safe-buffer "~5.1.0"
-
 stringify-entities@^4.0.0:
   version "4.0.4"
   resolved "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz"
@@ -6218,9 +6085,9 @@ tldts@^7.0.5:
     tldts-core "^7.0.27"
 
 tmp@^0.2.0:
-  version "0.2.7"
-  resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.2.7.tgz#26f4db11d1601ce8012dcb8a798ece1c06a99059"
-  integrity sha512-e0votIpp4Uo2AJYSzVHV6xCcawuiez3DzqDAbrTc3YxBkplN6e+dM13ZeIcZnDg/QpSuU2zfZ3rzwY8ukEnaXw==
+  version "0.2.5"
+  resolved "https://registry.npmjs.org/tmp/-/tmp-0.2.5.tgz"
+  integrity sha512-voyz6MApa1rQGUxT3E+BK7/ROe8itEx7vD8/HEvt4xwXucvQ5G5oeEiHkmHZJuBO21RpOf+YYm9MOivj709jow==
 
 topojson-client@^3.1.0:
   version "3.1.0"
@@ -6263,16 +6130,16 @@ ts-api-utils@^2.4.0:
   resolved "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.5.0.tgz"
   integrity sha512-OJ/ibxhPlqrMM0UiNHJ/0CKQkoKF243/AEmplt3qpRgkW8VG7IfOS41h7V8TjITqdByHzrjcS/2si+y4lIh8NA==
 
-tslib@2.3.0:
-  version "2.3.0"
-  resolved "https://registry.npmjs.org/tslib/-/tslib-2.3.0.tgz"
-  integrity sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg==
-
 tslib@^2.8.1, tslib@~2.8.1:
   version "2.8.1"
   resolved "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz"
   integrity sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==
 
+tslib@2.3.0:
+  version "2.3.0"
+  resolved "https://registry.npmjs.org/tslib/-/tslib-2.3.0.tgz"
+  integrity sha512-N82ooyxVNm6h1riLCoyS9e3fuJ3AMG2zIZs2Gd1ATcSFjSA23Q0fzjjZeh0jbJvWVDZ0cJT8yaNNaaXHzueNjg==
+
 tunnel-agent@^0.6.0:
   version "0.6.0"
   resolved "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz"
@@ -6503,18 +6370,6 @@ vega-dataflow@^6.1.0, vega-dataflow@~6.1.0:
     vega-loader "^5.1.0"
     vega-util "^2.1.0"
 
-vega-embed@6.5.1:
-  version "6.5.1"
-  resolved "https://registry.npmjs.org/vega-embed/-/vega-embed-6.5.1.tgz"
-  integrity sha512-yz/L1bN3+fLOpgXVb/8sCRv4GlZpD2/ngeKJAFRiHTIRm5zK6W0KuqZZvyGaO7E4s7RuYjW1TWhRIOqh5rS5hA==
-  dependencies:
-    fast-json-patch "^3.0.0-1"
-    json-stringify-pretty-compact "^2.0.0"
-    semver "^7.1.3"
-    vega-schema-url-parser "^1.1.0"
-    vega-themes "^2.8.2"
-    vega-tooltip "^0.22.0"
-
 vega-embed@^6.21.0:
   version "6.29.0"
   resolved "https://registry.npmjs.org/vega-embed/-/vega-embed-6.29.0.tgz"
@@ -6529,6 +6384,18 @@ vega-embed@^6.21.0:
     vega-themes "^2.15.0"
     vega-tooltip "^0.35.2"
 
+vega-embed@6.5.1:
+  version "6.5.1"
+  resolved "https://registry.npmjs.org/vega-embed/-/vega-embed-6.5.1.tgz"
+  integrity sha512-yz/L1bN3+fLOpgXVb/8sCRv4GlZpD2/ngeKJAFRiHTIRm5zK6W0KuqZZvyGaO7E4s7RuYjW1TWhRIOqh5rS5hA==
+  dependencies:
+    fast-json-patch "^3.0.0-1"
+    json-stringify-pretty-compact "^2.0.0"
+    semver "^7.1.3"
+    vega-schema-url-parser "^1.1.0"
+    vega-themes "^2.8.2"
+    vega-tooltip "^0.22.0"
+
 vega-encode@~5.1.0:
   version "5.1.0"
   resolved "https://registry.npmjs.org/vega-encode/-/vega-encode-5.1.0.tgz"
@@ -6791,7 +6658,17 @@ vega-typings@~2.1.0:
     vega-expression "^6.1.0"
     vega-util "^2.1.0"
 
-vega-util@^1.13.1, vega-util@^1.17.2, vega-util@^1.17.4:
+vega-util@^1.13.1:
+  version "1.17.4"
+  resolved "https://registry.npmjs.org/vega-util/-/vega-util-1.17.4.tgz"
+  integrity sha512-+y3ZW7dEqM8Ck+KRsd+jkMfxfE7MrQxUyIpNjkfhIpGEreym+aTn7XUw1DKXqclr8mqTQvbilPo16B3lnBr0wA==
+
+vega-util@^1.17.2:
+  version "1.17.4"
+  resolved "https://registry.npmjs.org/vega-util/-/vega-util-1.17.4.tgz"
+  integrity sha512-+y3ZW7dEqM8Ck+KRsd+jkMfxfE7MrQxUyIpNjkfhIpGEreym+aTn7XUw1DKXqclr8mqTQvbilPo16B3lnBr0wA==
+
+vega-util@^1.17.4:
   version "1.17.4"
   resolved "https://registry.npmjs.org/vega-util/-/vega-util-1.17.4.tgz"
   integrity sha512-+y3ZW7dEqM8Ck+KRsd+jkMfxfE7MrQxUyIpNjkfhIpGEreym+aTn7XUw1DKXqclr8mqTQvbilPo16B3lnBr0wA==
@@ -6893,7 +6770,20 @@ vfile@^6.0.0:
     "@types/unist" "^3.0.0"
     vfile-message "^4.0.0"
 
-"vite@^6.0.0 || ^7.0.0 || ^8.0.0", vite@^7.3.3:
+"vite@^6.0.0 || ^7.0.0 || ^8.0.0":
+  version "8.0.2"
+  resolved "https://registry.npmjs.org/vite/-/vite-8.0.2.tgz"
+  integrity sha512-1gFhNi+bHhRE/qKZOJXACm6tX4bA3Isy9KuKF15AgSRuRazNBOJfdDemPBU16/mpMxApDPrWvZ08DcLPEoRnuA==
+  dependencies:
+    lightningcss "^1.32.0"
+    picomatch "^4.0.3"
+    postcss "^8.5.8"
+    rolldown "1.0.0-rc.11"
+    tinyglobby "^0.2.15"
+  optionalDependencies:
+    fsevents "~2.3.3"
+
+vite@^7.3.3:
   version "7.3.3"
   resolved "https://registry.npmjs.org/vite/-/vite-7.3.3.tgz"
   integrity sha512-/4XH147Ui7OGTjg3HbdWe5arnZQSbfuRzdr9Ec7TQi5I7R+ir0Rlc9GIvD4v0XZurELqA035KVXJXpR61xhiTA==

From 5b45326631f7e4a45a0d167a2d3e8d280d827b2a Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Tue, 9 Jun 2026 13:09:35 -0700
Subject: [PATCH 17/29] minor cleanup

---
 src/views/InteractionEntryCard.tsx | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/src/views/InteractionEntryCard.tsx b/src/views/InteractionEntryCard.tsx
index 79686ca2..6e40346e 100644
--- a/src/views/InteractionEntryCard.tsx
+++ b/src/views/InteractionEntryCard.tsx
@@ -299,7 +299,10 @@ export const InteractionEntryCard: React.FC<InteractionEntryCardProps> = memo(({
                 break;
             }
             case 'summary':
-                color = theme.palette.text.primary;
+                // Chrome-less prose trailing the turn — recede into ambient
+                // text (matching `instruction`); the gutter icon carries the
+                // "finding" cue, not a heavier text color.
+                color = theme.palette.text.secondary;
                 break;
             case 'error':
                 color = theme.palette.error.main;
@@ -385,9 +388,12 @@ export const InteractionEntryCard: React.FC<InteractionEntryCardProps> = memo(({
         const bubbleHover = bubbleAccent
             ? alpha(bubbleAccent, 0.09)
             : alpha(theme.palette.text.primary, 0.05);
-        // Conversational bubbles get card chrome, except resolved pauses
-        // which render as a chrome-less compact trace.
-        const bubbleSx = (isConversational && !isResolvedPause) ? {
+        // Conversational bubbles get card chrome, except resolved pauses and
+        // summaries — both render chrome-less. A summary is the agent's
+        // closing remark on a turn; reading it as plain prose (no box, no
+        // fill) keeps the timeline foregrounding charts/data rather than
+        // persisting the remark as a card.
+        const bubbleSx = (isConversational && !isResolvedPause && !isSummary) ? {
             py: 0.5, px: 1,
             borderRadius: radius.sm,
             backgroundColor: bubbleBg,
@@ -397,6 +403,10 @@ export const InteractionEntryCard: React.FC<InteractionEntryCardProps> = memo(({
             // the gutter icon and adjacent bubbles. No bg, no border.
             py: '2px', px: '4px',
             opacity: 0.7,
+        } : isSummary ? {
+            // Summary as flowing prose: no card chrome, just inline padding
+            // so it aligns with the gutter icon and adjacent bubbles.
+            py: '2px', px: '4px',
         } : {};
 
         return (
@@ -414,7 +424,12 @@ export const InteractionEntryCard: React.FC<InteractionEntryCardProps> = memo(({
                         mx: '-2px',
                         '&:hover': { backgroundColor: 'rgba(0,0,0,0.03)' },
                     } : {}),
-                    ...(isCollapsible && isConversational ? {
+                    ...(isCollapsible && isSummary ? {
+                        // Gentle hover that doesn't reintroduce a card fill.
+                        borderRadius: '4px',
+                        '&:hover': { backgroundColor: 'rgba(0,0,0,0.03)' },
+                    } : {}),
+                    ...(isCollapsible && isConversational && !isSummary ? {
                         '&:hover': { backgroundColor: bubbleHover },
                     } : {}),
                 }}

From b879c61645780f4fc1c4470f441991c1d7aa3ffe Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Wed, 10 Jun 2026 17:01:32 -0700
Subject: [PATCH 18/29] temp unified agent approach

---
 py-src/data_formulator/analyst/__init__.py    |   49 +
 py-src/data_formulator/analyst/agent.py       | 2093 +++++++++++++++++
 .../analyst/skills/__init__.py                |  382 +++
 py-src/data_formulator/analyst/skills/base.py |  185 ++
 .../analyst/skills/core/SKILL.md              |  151 ++
 .../analyst/skills/core/__init__.py           |    8 +
 .../analyst/skills/core/skill.py              |  400 ++++
 .../analyst/skills/core/tools.json            |  163 ++
 .../analyst/skills/report/SKILL.md            |  118 +
 .../analyst/skills/report/__init__.py         |    8 +
 .../analyst/skills/report/skill.py            |  221 ++
 .../analyst/skills/report/tools.json          |   37 +
 py-src/data_formulator/analyst/tools.py       |  151 ++
 py-src/data_formulator/routes/agents.py       |  114 +
 src/app/App.tsx                               |   27 +
 src/app/dfSlice.tsx                           |   19 +-
 src/app/utils.tsx                             |    1 +
 src/i18n/locales/en/common.json               |    3 +
 src/i18n/locales/zh/common.json               |    3 +
 .../agents-chart/vegalite/templates/bar.ts    |   12 +
 src/views/AgentPausePanel.tsx                 |  492 +++-
 src/views/DataThread.tsx                      |  184 +-
 src/views/ReportView.tsx                      |    2 +-
 src/views/SimpleChartRecBox.tsx               |  257 +-
 .../unit/views/ClarificationPanel.test.tsx    |  116 +-
 25 files changed, 4976 insertions(+), 220 deletions(-)
 create mode 100644 py-src/data_formulator/analyst/__init__.py
 create mode 100644 py-src/data_formulator/analyst/agent.py
 create mode 100644 py-src/data_formulator/analyst/skills/__init__.py
 create mode 100644 py-src/data_formulator/analyst/skills/base.py
 create mode 100644 py-src/data_formulator/analyst/skills/core/SKILL.md
 create mode 100644 py-src/data_formulator/analyst/skills/core/__init__.py
 create mode 100644 py-src/data_formulator/analyst/skills/core/skill.py
 create mode 100644 py-src/data_formulator/analyst/skills/core/tools.json
 create mode 100644 py-src/data_formulator/analyst/skills/report/SKILL.md
 create mode 100644 py-src/data_formulator/analyst/skills/report/__init__.py
 create mode 100644 py-src/data_formulator/analyst/skills/report/skill.py
 create mode 100644 py-src/data_formulator/analyst/skills/report/tools.json
 create mode 100644 py-src/data_formulator/analyst/tools.py

diff --git a/py-src/data_formulator/analyst/__init__.py b/py-src/data_formulator/analyst/__init__.py
new file mode 100644
index 00000000..0e56267b
--- /dev/null
+++ b/py-src/data_formulator/analyst/__init__.py
@@ -0,0 +1,49 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Analyst agent — a single user-facing data agent hosting multiple skills.
+
+This package unifies the former ``DataAgent`` (structured-action visualization
+loop) and ``ReportGenAgent`` (streaming report writer) into one agent shell
+that loads *skills* on demand. See ``design-docs/35-unified-agent-skills-
+architecture.md`` for the full design.
+
+Core ideas:
+  - **Inspection tools** gather information and are parallel-safe; their results
+    come back to the agent and are never shown to the user. The shell ships a
+    small core set (``inspect_source_data``, ``execute_python_script``, ``load_skill``); a
+    loaded skill may contribute additional tools (e.g. ``inspect_chart``).
+  - **Actions** are committing surfaces — at most one per turn. Each returns an
+    observation the shell feeds back as the action's tool-call result, so the
+    agent reads it and decides its own next move. ``visualize`` / ``delegate``
+    are core (always available); skill actions (``write_report``,
+    ``restyle_chart``, …) are *gated* until their ``SKILL.md`` is loaded. The
+    run ends when the model commits no action (its final plain text is the
+    completion).
+  - A **skill is a passive plugin**, not a mini-agent: it bundles its
+    ``SKILL.md`` with optional ``tools`` + ``actions`` and the handlers
+    (``handle_tool`` / ``handle_action``) that perform any compute / rendering.
+    Its Python is always imported; ``load_skill`` only exposes it to the model.
+"""
+
+from data_formulator.analyst.skills import (
+    Event,
+    Skill,
+    SkillContext,
+    SkillMeta,
+    SkillRegistry,
+    ToolResult,
+    build_registry,
+)
+from data_formulator.analyst.agent import AnalystAgent
+
+__all__ = [
+    "AnalystAgent",
+    "Event",
+    "Skill",
+    "SkillContext",
+    "SkillMeta",
+    "SkillRegistry",
+    "ToolResult",
+    "build_registry",
+]
diff --git a/py-src/data_formulator/analyst/agent.py b/py-src/data_formulator/analyst/agent.py
new file mode 100644
index 00000000..e83c4432
--- /dev/null
+++ b/py-src/data_formulator/analyst/agent.py
@@ -0,0 +1,2093 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""AnalystAgent — the unified data analyst agent shell.
+
+This is the single user-facing data agent that replaces the separate
+``DataAgent`` (structured-action visualization loop) and ``ReportGenAgent``
+(streaming report writer). It hosts a set of **core actions** plus a registry
+of **skills** that unlock additional **gated actions** on demand. See
+``design-docs/35-unified-agent-skills-architecture.md`` and the action turn
+model in ``design-docs/36-artifact-turn-model.md``.
+
+Architecture (a vanilla tool-calling loop, plus the skills layer):
+  - **Inspection tools** (``execute_python_script``, ``inspect_source_data``, ``load_skill``,
+    plus skill-private tools) are called via the tool-calling API to gather
+    information. Parallel-safe, internal, no side effects.
+  - **Committing actions** (``visualize``, ``delegate``) render a user-visible
+    surface. Each returns an *observation* string that the shell feeds back as
+    the action's tool-call result — the same lane an inspection tool result
+    rides — so the agent reads it and decides its own next move. Always available.
+  - **Gated actions** (e.g. ``write_report``) are unlocked only after their
+    skill is loaded via ``load_skill``; their tool is not offered until then.
+
+The run ends when the model commits **no action** in a turn: its final plain-text
+answer *is* the completion (the frontend renders it as the run's summary). There
+is no control verdict and no separate "stop" action — the agent simply stops
+acting. The shell stays skill-agnostic: it partitions a response into inspection
+tools vs committing actions, enforces the one-action-per-turn cardinality guard,
+routes the chosen action to the owning skill's ``handle_action(...)``, feeds the
+returned observation back, and forwards the channel-tagged events.
+"""
+
+import json
+import logging
+import re
+import time
+import uuid
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any, Generator
+
+from data_formulator.agent_config import reasoning_effort_for
+from data_formulator.agents.agent_utils import (
+    accumulate_reasoning_content,
+    attach_reasoning_content,
+    ensure_output_variable_in_code,
+)
+from data_formulator.agents.context import (
+    build_focused_thread_context,
+    build_lightweight_table_context,
+    build_peripheral_thread_context,
+    handle_inspect_source_data,
+)
+from data_formulator.agents.client_utils import Client
+from data_formulator.agents.chart_creation_guide import CHART_CREATION_GUIDE
+from data_formulator.datalake.parquet_utils import df_to_safe_records
+
+from data_formulator.analyst.skills import (
+    Event,
+    SkillContext,
+    SkillRegistry,
+    ToolResult,
+    build_registry,
+)
+from data_formulator.analyst.tools import build_tools
+
+logger = logging.getLogger(__name__)
+
+_AGENT_ID = "analyst"
+
+# The always-on baseline skill, auto-loaded at the start of every run. It owns
+# the built-in tools (execute_python_script / inspect_source_data) and the always-available
+# actions (visualize / delegate) plus the base prompt body (its SKILL.md). The
+# shell hardcodes nothing about those actions — legality is derived from
+# whichever skills are loaded.
+_CORE_SKILL = "core"
+
+# Banner stamped at the START of a loaded skill's body message. It is the single
+# contract between the emitter (_load_skill_into_context) and the resume parser
+# (_rehydrate_loaded_skills): they share this template + regex so they cannot
+# drift, and the regex is anchored to the message start so only banners *we*
+# emitted match — never the same text pasted by a user or echoed by the model.
+_SKILL_LOADED_BANNER = "[SKILL LOADED: {name}]"
+_SKILL_LOADED_RE = re.compile(r"^\[SKILL LOADED: ([^\]]+)\]")
+
+# ── Action-argument coercion ──────────────────────────────────────────────
+# Weaker models sometimes JSON-encode a nested action argument as a string
+# (e.g. ``"chart": "{...}"``). Parse those back to objects before dispatch so
+# the skill handler sees structured data. Required-field validation lives in the
+# registry (``action_required_fields``) and the skill handler — not here.
+
+
+def _rescue_unpack_json_strings(data: dict) -> None:
+    """In-place: parse values that are JSON-encoded strings back to objects."""
+    for key in (
+        "chart", "input_tables", "questions", "options", "followups",
+        "field_metadata", "field_display_names",
+    ):
+        val = data.get(key)
+        if isinstance(val, str) and val.strip()[:1] in ("{", "["):
+            try:
+                data[key] = json.loads(val)
+            except (json.JSONDecodeError, ValueError):
+                pass
+
+
+# ── Live tool-argument streaming (design-docs/36 §5) ───────────────────────
+# A streaming action (only ``write_report`` today) writes its payload as a
+# tool-call argument. Providers stream that argument as a growing JSON fragment
+# (``delta.tool_calls[].function.arguments`` — Anthropic's ``input_json_delta``).
+# This extractor pulls the *decoded* value of one top-level string key out of
+# that fragment as it grows, surfacing only the newly-completed suffix each feed
+# so the agent can forward it as channel ``text_delta``s. It is forgiving of a
+# partial trailing escape (``\\`` or an incomplete ``\\uXXXX``): it holds those
+# bytes back until the next chunk completes them, never emitting half an escape.
+
+
+class _StreamingArgExtractor:
+    """Incrementally extract the decoded string value of a top-level JSON key
+    from a growing tool-call ``arguments`` fragment.
+
+    ``feed`` is given the full accumulated arguments so far and returns only the
+    newly-decoded suffix of the target field's value (``""`` while nothing new
+    can be safely decoded yet).
+    """
+
+    def __init__(self, field: str):
+        # Matches ``"field"`` then ``:`` then the opening quote of the value.
+        self._open_re = re.compile(r'"' + re.escape(field) + r'"\s*:\s*"')
+        self._emitted = 0
+
+    def feed(self, args_so_far: str) -> str:
+        decoded = self._decode(args_so_far)
+        if decoded is None or len(decoded) <= self._emitted:
+            return ""
+        new = decoded[self._emitted:]
+        self._emitted = len(decoded)
+        return new
+
+    def _decode(self, args: str) -> str | None:
+        """Return the decoded value-so-far of the field, or ``None`` if the
+        value has not started or a trailing escape is incomplete."""
+        m = self._open_re.search(args)
+        if not m:
+            return None
+        rest = args[m.end():]
+        out: list[str] = []
+        i, n = 0, len(rest)
+        while i < n:
+            ch = rest[i]
+            if ch == "\\":
+                if i + 1 >= n:
+                    break  # dangling escape — wait for the next chunk
+                out.append(rest[i:i + 2])
+                i += 2
+                continue
+            if ch == '"':
+                break  # closing quote — value complete
+            out.append(ch)
+            i += 1
+        try:
+            # Re-wrap as a JSON string literal so escapes decode correctly.
+            return json.loads('"' + "".join(out) + '"')
+        except (json.JSONDecodeError, ValueError):
+            return None  # e.g. partial ``\\uXXXX`` — wait for more
+
+
+
+# The agent's system frame — shell-owned, invariant across skills: identity, the
+# tools-vs-actions contract, the skills mechanism, and the action budget /
+# stop criteria. This is the agent's own contract, so it lives here as code (not
+# as a skill body). ``_build_system_prompt`` fills the ``{...}`` slots via plain
+# string substitution (NOT str.format — braces elsewhere stay literal). The
+# always-loaded ``core`` skill's SKILL.md (the concrete tools + action schemas)
+# is appended after this frame, unformatted, exactly like any other skill body.
+SYSTEM_PROMPT = """\
+You are an autonomous data analyst agent.
+
+Your goal is to help the user by exploring their data, producing visualizations,
+and — when asked — packaging the findings (e.g. into a written report). You
+operate in a loop: gather what you need with inspection tools, take an **action**
+when you want to act on the data, read its result, and repeat — then stop by
+giving your final answer in plain text.
+
+## Tools vs. actions
+
+Everything you do is a function/tool call, but calls come in two kinds and
+keeping them straight is essential:
+
+- **Inspection tools** (internal — for gathering information). Functions like
+  `execute_python_script`, `inspect_source_data`, `inspect_chart`, and `load_skill` that
+  inspect data or load instructions *before* you act. Their results return to
+  you and are **not** shown to the user. They commit nothing and are
+  **independent** — none depends on another's result — so call as many as you
+  need, across as many rounds as you need, until you have enough to act.
+- **Actions** (committing — shown to the user). A discrete operation like
+  `visualize`, `ask_user`, `delegate`, and (once the report skill is loaded)
+  `write_report`. Each renders a user-visible surface, and its result is
+  returned to you just like a tool result so you can react to it.
+
+**Actions are sequential — take exactly one, then wait for its result.** This is
+the key difference from inspection tools: those are independent, but each
+action's result shapes your next decision — the chart you'd draw next depends on
+what this one reveals — so choosing two at once would make the second a blind
+guess, decided before you've seen the first's outcome. Do all your inspection
+first, then commit the single action that fits.
+
+Treat each action like one turn in a back-and-forth: **you act → its result
+answers → you act again.** Even when you're planning a sequence of charts,
+surface them one at a time so each reacts to the last. (If you do emit several
+actions at once, only the first runs and the rest are discarded — batching only
+loses work.)
+
+**To finish, reply with plain text and no action.** Plain text is your
+**closing answer** — the run is over and you expect nothing further (the user's
+next message starts a fresh turn). Use it whenever you've done what was asked,
+including answering a question you fully resolved.
+
+**Whenever you expect the user to reply — a question, a clarification, an
+explanation you want them to react to, or a set of choices — use the `ask_user`
+action instead.** It renders a question widget and pauses the run for their
+reply, so the conversation resumes in the same turn. `ask_user` accepts
+free-text questions (no clickable options required), so reach for it for *any*
+followup-seeking turn, not only structured choices. Plain text never asks for
+input; `ask_user` always does. There is no separate "stop" or "summary" action:
+you stop by simply not acting.
+
+The concrete actions available to you — and how to use each well — are
+described in the capability sections below.
+
+## Understanding your context
+
+{context_guide}
+
+## Skills (load on demand)
+
+Your baseline capabilities come from the **core** skill, which is **always loaded
+automatically** (you'll see it below as `[SKILL: core]`). Beyond that baseline,
+extra capabilities are packaged as **extension skills** — each one unlocks an
+additional action (and sometimes extra tools), but only after you load it:
+1. Call the `load_skill("<name>")` tool — this reads the skill's instructions into
+   your context and unlocks its action(s) and any tools it provides.
+2. Follow those instructions and call the action it unlocks (its tool only
+   appears once the skill is loaded).
+
+Calling an extension skill's action **before** loading the skill will not
+execute — you'll be asked to load it first. Extension skills available this run
+(load the one whose `when to use` fits):
+
+{skills_block}
+
+## Working within your budget
+
+- You have a budget of **{max_iterations} actions** for this run — a **hard
+  ceiling, not a target**. Use as few as the goal requires.
+- **Stop as soon as the user's goal is met.** End the run by giving your final
+  answer in plain text rather than taking more actions just because you can.
+- Take a follow-up action only when it addresses a gap the previous step
+  actually raised — not merely another interesting angle.
+- If the request is genuinely ambiguous, ask the user in plain text (no action)
+  rather than guessing.
+
+{agent_exploration_rules}"""
+
+
+# ---------------------------------------------------------------------------
+# Agent
+# ---------------------------------------------------------------------------
+
+
+class AnalystAgent:
+    """Unified data analyst agent — core actions + on-demand skills."""
+
+    def __init__(
+        self,
+        client: Client,
+        workspace,
+        skill_registry: SkillRegistry | None = None,
+        agent_exploration_rules: str = "",
+        agent_coding_rules: str = "",
+        language_instruction: str = "",
+        max_iterations: int = 5,
+        max_repair_attempts: int = 2,
+        identity_id: str | None = None,
+    ):
+        self.client = client
+        self.workspace = workspace
+        self.registry = skill_registry or build_registry()
+        self.agent_exploration_rules = agent_exploration_rules
+        self.agent_coding_rules = agent_coding_rules
+        self.language_instruction = language_instruction
+        self.max_iterations = max_iterations
+        self.max_repair_attempts = max_repair_attempts
+
+        from data_formulator.agents.reasoning_log import (
+            ReasoningLogger, _NullReasoningLogger,
+        )
+        self._session_id = uuid.uuid4().hex[:12]
+        if identity_id:
+            try:
+                self._reasoning_log = ReasoningLogger(
+                    identity_id, "AnalystAgent", self._session_id,
+                )
+            except Exception:
+                logger.warning("Failed to initialise ReasoningLogger", exc_info=True)
+                self._reasoning_log = _NullReasoningLogger()
+        else:
+            self._reasoning_log = _NullReasoningLogger()
+
+        self._knowledge_store = None
+        self._injected_knowledge: list[dict[str, Any]] = []
+        self._injected_rules: list[str] = []
+        _user_home = getattr(workspace, "user_home", None)
+        if _user_home:
+            try:
+                from data_formulator.knowledge.store import KnowledgeStore
+                self._knowledge_store = KnowledgeStore(_user_home)
+            except Exception:
+                logger.warning("Failed to initialise KnowledgeStore", exc_info=True)
+
+        # Per-run skill state (reset at the start of each run()). Skill code
+        # modules themselves live in ``self.registry.skills`` and are always
+        # available; ``_loaded_skills`` only tracks which skills the model has
+        # been *exposed* to (tools + actions + guidance) this run.
+        self._loaded_skills: set[str] = set()
+        # Free-form payload for skill dispatch (charts, etc.), set per run.
+        self._run_payload: dict[str, Any] = {}
+        # Live-streaming bookkeeping (design-docs/36 §5). ``_streamed_channels``
+        # maps a committing action's tool_call_id -> the channel its argument was
+        # already forwarded on during the streaming LLM call; ``_suppress_stream_channel``
+        # is set just before dispatching such an action so the router drops the
+        # skill's duplicate (buffered) emission of the same content.
+        self._streamed_channels: dict[str, str] = {}
+        self._suppress_stream_channel: str | None = None
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+
+    def _explore_ns_dir(self) -> Path:
+        """Directory for cross-turn namespace serialisation."""
+        return self.workspace.confined_scratch.root / "_explore_ns"
+
+    def _legal_actions(self) -> frozenset[str]:
+        """The set of committing actions currently legal to emit.
+
+        Every legal action is owned by a *loaded* skill. ``core`` is always
+        loaded, so its baseline actions are always legal; a gated skill's
+        actions become legal once that skill is loaded.
+        """
+        legal: set[str] = set()
+        for name in self._loaded_skills:
+            meta = self.registry.metas.get(name)
+            if meta:
+                legal.update(meta.action_names)
+        return frozenset(legal)
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def run(
+        self,
+        input_tables: list[dict[str, Any]],
+        user_question: str,
+        focused_thread: list[dict[str, Any]] | None = None,
+        other_threads: list[dict[str, Any]] | None = None,
+        trajectory: list[dict] | None = None,
+        completed_step_count: int = 0,
+        primary_tables: list[str] | None = None,
+        attached_images: list[str] | None = None,
+        charts: list[dict[str, Any]] | None = None,
+    ) -> Generator[dict[str, Any], None, None]:
+        """Run the unified analyst loop.
+
+        Yields event dicts with ``type`` in:
+            ``"action"``        – the agent's committed action (for UI)
+            ``"result"``        – a visualization result (data + chart)
+            ``"tool_start"`` / ``"tool_result"`` – inspection tool activity
+            ``"skill_loaded"``  – a skill's gate opened
+            ``"delegate"``      – hand-off to a peer agent
+            ``"completion"``    – the run's final answer (ends the run)
+            ``"error"``         – error information
+
+        The run ends when the model commits no action in a turn: its final
+        plain-text answer is emitted as the ``completion`` event.
+        """
+        rlog = self._reasoning_log
+        session_start_time = time.time()
+        total_llm_calls = 0
+        completed_steps: list[dict[str, Any]] = []
+        iteration = completed_step_count
+        final_status = "max_iterations"
+
+        # Reset per-run skill + payload state. ``core`` is auto-loaded: its
+        # baseline tools + actions are always available and its SKILL.md body is
+        # appended to the system frame (see _build_system_prompt). Gated skills
+        # are added to this set as the model loads them. The payload carries
+        # everything a dispatched skill handler needs to build its own context
+        # (e.g. the report skill rebuilds [AVAILABLE CHARTS] + thread
+        # context).
+        self._loaded_skills = {_CORE_SKILL}
+        self._run_payload = {
+            "input_tables": input_tables,
+            "charts": charts or [],
+            "focused_thread": focused_thread,
+            "other_threads": other_threads,
+            "primary_tables": primary_tables,
+        }
+
+        try:
+            rlog.log(
+                "session_start",
+                agent="AnalystAgent",
+                session_id=self._session_id,
+                user_question=user_question,
+                input_tables=[t.get("name", "") for t in input_tables],
+                model=self.client.model,
+                rules_injected=[
+                    r for r in [self.agent_exploration_rules, self.agent_coding_rules] if r
+                ],
+                knowledge_injected=[],
+            )
+
+            if trajectory is None:
+                ns_dir = self._explore_ns_dir()
+                if ns_dir.exists():
+                    import shutil
+                    shutil.rmtree(ns_dir, ignore_errors=True)
+
+                trajectory = self._build_initial_messages(
+                    input_tables, user_question, focused_thread, other_threads,
+                    primary_tables=primary_tables,
+                    attached_images=attached_images,
+                    charts=charts,
+                )
+                rlog.log(
+                    "context_built",
+                    system_prompt_tokens=len(trajectory[0].get("content", "")) // 4 if trajectory else 0,
+                    user_msg_tokens=len(str(trajectory[1].get("content", ""))) // 4 if len(trajectory) > 1 else 0,
+                    total_tables=len(input_tables),
+                    primary_tables=primary_tables or [],
+                    knowledge_rules_injected=self._injected_rules,
+                    knowledge_injected=self._injected_knowledge,
+                )
+
+                if self._injected_rules or self._injected_knowledge:
+                    yield {
+                        "type": "context_info",
+                        "rules_injected": self._injected_rules,
+                        "knowledge_injected": [
+                            {"category": k["category"], "title": k["title"]}
+                            for k in self._injected_knowledge
+                        ],
+                    }
+            else:
+                # Resume: the trajectory is the single source of truth. A loaded
+                # skill is just its ``[SKILL LOADED: <name>]`` body sitting in
+                # history (kept for free via prefix caching), so re-open the gate
+                # for every skill whose body is still present. This keeps
+                # ``_loaded_skills`` in sync with what the model actually sees,
+                # avoiding a "body present but gate closed" contradiction.
+                self._rehydrate_loaded_skills(trajectory)
+
+            action_budget = self.max_iterations  # hard ceiling on committing actions
+            actions_committed = completed_step_count  # resume-aware count
+            hard_ceiling = iteration + max(self.max_iterations * 3, 12)
+
+            while iteration < hard_ceiling:
+                iteration += 1
+
+                # --- THINK: call LLM with tools, get the next action ------
+                t_start = time.time()
+                action = None
+                action_reason = "ok"
+                action_error = ""
+                final_text = ""
+                action_tool_call_id = None
+                for event in self._get_next_action(trajectory, input_tables, outer_iteration=iteration):
+                    if event.get("type") == "agent_action":
+                        action = event.get("action_data")
+                        action_reason = event.get("reason", "ok")
+                        action_error = event.get("error_message", "")
+                        final_text = event.get("final_text", "")
+                        action_tool_call_id = event.get("tool_call_id")
+                        total_llm_calls += event.get("llm_calls", 0)
+                    else:
+                        yield event
+                logger.info("[AnalystAgent] iteration %d total=%.2fs reason=%s",
+                            iteration, time.time() - t_start, action_reason)
+
+                if action is None:
+                    # ── No committing action → the run is over ────────────────
+                    # The normal close: the model answered in plain text and
+                    # committed nothing. That final text IS the completion (the
+                    # frontend renders it as the run's summary). An LLM API error
+                    # is fatal; the tool-round backstop also lands here.
+                    if action_reason == "llm_error":
+                        final_status = "llm_error"
+                        yield self._error_event(
+                            iteration,
+                            action_error or "LLM API error",
+                            message_code="agent.llmApiError",
+                        )
+                        self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time)
+                        return
+
+                    final_status = (
+                        "tool_rounds_exhausted"
+                        if action_reason == "tool_rounds_exhausted"
+                        else "success"
+                    )
+                    yield {
+                        "type": "completion",
+                        "iteration": iteration,
+                        "status": final_status,
+                        "content": {
+                            "summary": final_text,
+                            "total_steps": len(completed_steps),
+                        },
+                    }
+                    self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time)
+                    return
+
+                action_type = action.get("action")
+                logger.info(f"[AnalystAgent] Iteration {iteration}: action={action_type}")
+
+                # --- GATE: every action is owned by a skill; its owner must be
+                #     loaded. ``core`` is always loaded, so its actions pass
+                #     straight through.
+                owner = self.registry.action_owner(action_type)
+                if owner is None:
+                    legal = ", ".join(sorted(self._legal_actions()))
+                    self._set_action_observation(
+                        trajectory, action_tool_call_id,
+                        f"[ERROR] Unknown action '{action_type}'. Choose one of: "
+                        f"{legal}, or load a skill that unlocks the action you need.",
+                    )
+                    yield self._error_event(
+                        iteration, f"Unknown action: {action_type}",
+                        message_code="agent.unknownAction",
+                    )
+                    continue
+                if owner not in self._loaded_skills:
+                    # Gate closed — tell the model to load the skill, no execution.
+                    self._set_action_observation(
+                        trajectory, action_tool_call_id,
+                        f"[GATED] The '{action_type}' action requires the "
+                        f"'{owner}' skill. Call load_skill(\"{owner}\") first, "
+                        "follow its instructions, then emit the action again.",
+                    )
+                    rlog.log("action_gated", action=action_type, skill=owner,
+                             iteration=iteration)
+                    continue
+
+                # --- DISPATCH: the owning skill renders the action and RETURNS
+                #     an observation string; the shell feeds it back as the
+                #     action's tool-call result (the same lane an inspection tool
+                #     result rides), then loops so the agent reads it and decides
+                #     its own next move. There is no control verdict.
+                # If this action's argument was streamed live during the LLM call
+                # (e.g. write_report), tell the router to drop the skill's
+                # duplicate buffered emission of the same content.
+                self._suppress_stream_channel = self._streamed_channels.get(
+                    action_tool_call_id
+                )
+                try:
+                    observation = yield from self._dispatch_skill_action(
+                        owner, action_type, action, trajectory, iteration, completed_steps,
+                    )
+                finally:
+                    self._suppress_stream_channel = None
+                self._set_action_observation(
+                    trajectory, action_tool_call_id, observation,
+                )
+
+                if observation is None:
+                    # ── Terminal action → the run pauses ──────────────────────
+                    # A handler that returns no observation (``interact``) has
+                    # nothing for the agent to react to: it already yielded its
+                    # own terminal surface (a question widget) and the run waits
+                    # for the user. Stop here; their next message starts a fresh
+                    # turn. No completion event — the interact event is the close.
+                    self._log_session_end(
+                        rlog, "success", iteration, total_llm_calls, session_start_time,
+                    )
+                    return
+
+                actions_committed += 1
+                remaining = action_budget - actions_committed
+                if remaining <= 0:
+                    # Hard action ceiling reached — stop and let the user steer.
+                    final_status = "max_iterations"
+                    yield {
+                        "type": "completion",
+                        "iteration": iteration,
+                        "status": "max_iterations",
+                        "content": {
+                            "summary": "Reached the maximum number of actions for this run.",
+                            "summary_code": "agent.maxIterationsSummary",
+                            "total_steps": len(completed_steps),
+                        },
+                    }
+                    self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time)
+                    return
+                if remaining == 1:
+                    trajectory.append({
+                        "role": "user",
+                        "content": (
+                            "[SYSTEM] You have 1 action left in your budget. Make it "
+                            "count, or wrap up by giving your final answer in plain "
+                            "text (which ends the run)."
+                        ),
+                    })
+                continue
+
+            # Runaway backstop — too many non-committing rounds without finishing.
+            final_status = "max_iterations"
+            self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time)
+            yield {
+                "type": "completion",
+                "iteration": iteration,
+                "status": "max_iterations",
+                "content": {
+                    "summary": "Reached the maximum number of exploration steps.",
+                    "summary_code": "agent.maxIterationsSummary",
+                    "total_steps": len(completed_steps),
+                },
+            }
+        finally:
+            rlog.close()
+
+    # ------------------------------------------------------------------
+    # Skill loading + dispatch
+    # ------------------------------------------------------------------
+
+    def _rehydrate_loaded_skills(self, trajectory: list[dict]) -> None:
+        """Re-open skill gates for bodies still present in a resumed trajectory.
+
+        A skill is "loaded" iff its ``[SKILL LOADED: <name>]`` body is in
+        context. On resume ``_loaded_skills`` has just been reset to ``{core}``,
+        so scan the (persisted) trajectory for those banners and re-add every
+        known skill whose body survived. Unknown names are ignored — only the
+        registry decides what is real.
+
+        The match is anchored to the start of the message (see
+        ``_SKILL_LOADED_RE``): our emitter always stamps the banner at position
+        0, so a user-pasted or model-echoed ``[SKILL LOADED: ...]`` sitting
+        mid-message will not spuriously open a gate.
+        """
+        for message in trajectory:
+            content = message.get("content")
+            if not isinstance(content, str):
+                continue
+            m = _SKILL_LOADED_RE.match(content)
+            if m:
+                name = m.group(1).strip()
+                if self.registry.has(name):
+                    self._loaded_skills.add(name)
+
+    def _load_skill_into_context(
+        self, name: str, trajectory: list[dict],
+    ) -> tuple[bool, str]:
+        """Load a skill's ``SKILL.md`` body into the trajectory.
+
+        Returns ``(ok, message)``. On success the body is appended as a user
+        message and ``name`` is recorded in ``_loaded_skills``; the gated
+        actions it declares become legal. Idempotent — loading twice is a no-op.
+
+        Convenience wrapper around :meth:`_build_skill_body_message` that appends
+        the body immediately. Prefer the builder directly when loading inside a
+        tool-call round, where the body must be appended *after* the tool-result
+        messages (an assistant ``tool_calls`` turn must be immediately followed
+        by its tool responses — see the readonly loop in ``_tool_loop``).
+        """
+        ok, message, body_msg = self._build_skill_body_message(name)
+        if ok and body_msg is not None:
+            trajectory.append(body_msg)
+        return ok, message
+
+    def _build_skill_body_message(
+        self, name: str,
+    ) -> tuple[bool, str, dict | None]:
+        """Resolve a skill's body into a ``user`` message *without* appending it.
+
+        Returns ``(ok, message, body_msg)``. On success ``name`` is recorded in
+        ``_loaded_skills`` (so the gated actions become legal immediately) and
+        ``body_msg`` is the user turn the caller must append to the trajectory;
+        the caller controls *when* it lands so message ordering stays
+        provider-valid. Idempotent — loading twice yields ``body_msg=None``.
+        """
+        if not self.registry.has(name):
+            return False, f"Unknown skill: {name!r}", None
+        if name in self._loaded_skills:
+            return True, f"Skill '{name}' already loaded.", None
+        try:
+            body = self.registry.load_body(name)
+        except Exception as e:
+            logger.warning("[AnalystAgent] Failed to load skill body %s", name, exc_info=True)
+            return False, f"Failed to load skill {name!r}: {e}", None
+
+        meta = self.registry.metas[name]
+        unlocks = ", ".join(meta.action_names) if meta.action_names else "(none)"
+        tool_names = [
+            spec.get("function", {}).get("name")
+            for spec in self.registry.tools_for([name])
+        ]
+        tool_names = [t for t in tool_names if t]
+        tools_line = (
+            f" New tools available: {', '.join(tool_names)}.\n" if tool_names else ""
+        )
+        # Mirror the ``[SKILL: <name>]`` header the core body gets in
+        # _build_system_prompt, so every capability bundle reads as one family —
+        # here ``[SKILL LOADED: <name>]`` marks one that just became active. The
+        # banner is built from the shared template so resume-time rehydration
+        # (_rehydrate_loaded_skills) parses exactly what we emit here.
+        body_msg = {
+            "role": "user",
+            "content": (
+                f"{_SKILL_LOADED_BANNER.format(name=name)} You can now use the action(s): {unlocks}.\n"
+                f"{tools_line}\n"
+                f"{body}"
+            ),
+        }
+        self._loaded_skills.add(name)
+        return True, f"Skill '{name}' loaded; unlocked: {unlocks}.", body_msg
+
+    def _dispatch_skill_action(
+        self,
+        skill_name: str,
+        action_type: str,
+        action: dict[str, Any],
+        trajectory: list[dict],
+        iteration: int,
+        completed_steps: list[dict[str, Any]],
+    ) -> Generator[Event, None, str | None]:
+        """Render a skill's action via ``handle_action`` and return its
+        observation string (or ``None``).
+
+        The skill does the *processing* (validate, run, emit events) and yields
+        events back; this method *routes* those events to the caller — stamping
+        ``iteration``, tracking completed visualization steps, and enriching the
+        delegate event with the resumability fields the frontend needs — then
+        returns the skill's observation. The shell feeds that observation back as
+        the action's tool-call result (see ``_set_action_observation``).
+
+        The skill is always instantiated (eager registry build), so this only
+        fails if a skill declares an action in its ``SKILL.md`` but ships no
+        executable handler — a config error: the shell yields its own ``error``
+        event and returns an observation describing the failure.
+        """
+        rlog = self._reasoning_log
+        skill = self.registry.get_skill(skill_name)
+        if skill is None or not hasattr(skill, "handle_action"):
+            logger.warning(
+                "[AnalystAgent] Skill %r unlocks action %r but has no handle_action.",
+                skill_name, action_type,
+            )
+            rlog.log("action_execution", action=action_type, status="no_handler",
+                     iteration=iteration, skill=skill_name)
+            yield self._error_event(
+                iteration,
+                f"Skill '{skill_name}' has no handler for '{action_type}'.",
+                message_code="agent.skillNoHandler",
+            )
+            return (
+                f"[SKILL ERROR] The '{skill_name}' skill cannot render "
+                f"'{action_type}'. Choose a core action instead."
+            )
+
+        ctx = SkillContext(
+            client=self.client,
+            workspace=self.workspace,
+            language_instruction=self.language_instruction,
+            trajectory=trajectory,
+            payload={**self._run_payload, "completed_step_count": len(completed_steps)},
+            runtime=self,
+        )
+        rlog.log("action_execution", action=action_type, status="ok",
+                 iteration=iteration, skill=skill_name)
+        gen = skill.handle_action(action_type, action, ctx)
+        observation = yield from self._route_skill_events(
+            gen, iteration, trajectory, completed_steps,
+        )
+        return observation
+
+    def _route_skill_events(
+        self,
+        gen: Generator[Event, None, str | None],
+        iteration: int,
+        trajectory: list[dict],
+        completed_steps: list[dict[str, Any]],
+    ) -> Generator[Event, None, str | None]:
+        """The shell's router: a skill yields events to *here* (never straight
+        to the frontend), and this is the single place that decides what to
+        forward upstream — re-yielding each event after enriching it with
+        shell-owned bookkeeping — then returns the skill's observation string.
+
+        Concretely it:
+        - stamps ``iteration`` on every event;
+        - records each ``result`` event as a completed visualization step;
+        - enriches ``delegate`` / ``interact`` events (both pause the run) with
+          the stripped trajectory + completed-step count needed to resume.
+
+        It is free to transform or drop events; skills stay decoupled from the
+        wire protocol and the routing policy.
+
+        Suppression: when the committing action's argument was already streamed
+        live (``_suppress_stream_channel`` set by ``run``), the skill's later
+        *buffered* re-emission of the same content — its ``action`` event and the
+        ``text_delta`` on that channel — is dropped here so the frontend sees the
+        content exactly once (design-docs/36 §5).
+        """
+        suppress_channel = self._suppress_stream_channel
+        try:
+            ev = next(gen)
+            while True:
+                ev.setdefault("iteration", iteration)
+                etype = ev.get("type")
+                drop = bool(suppress_channel) and (
+                    etype == "action"
+                    or (etype == "text_delta" and ev.get("channel") == suppress_channel)
+                )
+                if not drop:
+                    if etype == "result":
+                        content = ev.get("content", {}) or {}
+                        result = content.get("result") or {}
+                        completed_steps.append({
+                            "display_instruction": content.get("question", ""),
+                            "code": result.get("code", ""),
+                        })
+                    elif etype in ("delegate", "interact"):
+                        # Both pause the run; the frontend needs the trajectory +
+                        # step count to resume after the user answers / hands off.
+                        ev.setdefault("trajectory", self._strip_images(trajectory))
+                        ev.setdefault("completed_step_count", len(completed_steps))
+                    yield ev
+                ev = gen.send(None)
+        except StopIteration as stop:
+            return stop.value  # the skill's observation string (or None)
+
+    def _set_action_observation(
+        self, messages: list[dict], tool_call_id: str | None, observation: str | None,
+    ) -> None:
+        """Feed an action's observation back as its tool-call result.
+
+        The committing action was recorded as an assistant tool call answered by
+        an empty placeholder ``tool`` message (see ``_commit_action``); fill that
+        placeholder with the skill's observation so the agent reads it exactly
+        like an inspection tool result. Falls back to appending a user message if
+        the id is missing (safety).
+        """
+        text = observation if observation else "ok"
+        if tool_call_id:
+            for msg in reversed(messages):
+                if msg.get("role") == "tool" and msg.get("tool_call_id") == tool_call_id:
+                    msg["content"] = text
+                    return
+        messages.append({"role": "user", "content": text})
+
+    # ------------------------------------------------------------------
+    # Runtime facade — execution substrate exposed to skills via ctx.runtime
+    # ------------------------------------------------------------------
+
+    def run_visualize_code(self, **kwargs) -> dict[str, Any]:
+        """Public alias so skills can run visualize code via ``ctx.runtime``."""
+        return self._run_visualize_code(**kwargs)
+
+    def register_run_chart(
+        self,
+        transform_result: dict[str, Any],
+        chart_spec: dict[str, Any],
+    ) -> None:
+        """Register a chart created mid-run so gated skills (e.g. report) can
+        reference and inspect it within the same run.
+
+        The entry mirrors the shape the frontend forwards for pre-existing charts
+        (``chart_id`` / ``chart_type`` / ``encodings`` / ``table_ref`` / ``code`` /
+        ``chart_data``) **minus** the optional ``chart_image`` — run-created charts
+        are read by the agent from their encodings + sample data (and code), not a
+        rendered image. The mutation lands on ``self._run_payload['charts']`` so the
+        next dispatched skill ctx sees it.
+        """
+        chart_id = transform_result.get("chart_id")
+        if not chart_id:
+            return
+        content = transform_result.get("content", {}) or {}
+        table_name = (content.get("virtual", {}) or {}).get("table_name", "")
+        rows = content.get("rows", []) or []
+        charts = self._run_payload.setdefault("charts", [])
+        if any(c.get("chart_id") == chart_id for c in charts):
+            return
+        charts.append({
+            "chart_id": chart_id,
+            "chart_type": chart_spec.get("type") or chart_spec.get("chart_type") or "Unknown",
+            "encodings": dict(chart_spec.get("encodings", {}) or {}),
+            "table_ref": table_name,
+            "code": transform_result.get("code", ""),
+            "chart_data": {"name": table_name, "rows": rows[:50]},
+        })
+
+    def run_explore_code(
+        self, code: str, input_tables: list[dict[str, Any]],
+    ) -> dict[str, Any]:
+        """Public alias so skills can run explore code via ``ctx.runtime``."""
+        return self._run_explore_code(code, input_tables)
+
+    # ------------------------------------------------------------------
+    # Sandbox execution substrate
+    # ------------------------------------------------------------------
+
+    def _run_explore_code(
+        self,
+        code: str,
+        input_tables: list[dict[str, Any]],
+    ) -> dict[str, Any]:
+        """Run explore code in sandbox, capturing stdout."""
+        capture_code = (
+            "import io as _io, sys as _sys, pandas as _pd\n"
+            "_old_stdout = _sys.stdout\n"
+            "_sys.stdout = _captured = _io.StringIO()\n"
+            "\n"
+            f"{code}\n"
+            "\n"
+            "_sys.stdout = _old_stdout\n"
+            "_pack = {\n"
+            "    'stdout': _captured.getvalue(),\n"
+            "}\n"
+        )
+
+        try:
+            with self.workspace.local_dir() as local_path:
+                import os as _os
+                workspace_path = _os.path.abspath(str(local_path))
+                allowed_objects = {"_pack": None}
+
+                session = getattr(self, "_explore_session", None)
+                if session is not None:
+                    raw = session.execute(capture_code, allowed_objects, workspace_path)
+                else:
+                    from data_formulator.sandbox import create_sandbox
+                    try:
+                        from flask import current_app
+                        sandbox_mode = current_app.config.get('CLI_ARGS', {}).get('sandbox', 'local')
+                    except (ImportError, RuntimeError):
+                        sandbox_mode = 'local'
+                    sandbox = create_sandbox(sandbox_mode)
+                    raw = sandbox._run_in_warm_subprocess(
+                        capture_code, allowed_objects, workspace_path
+                    )
+
+            if raw.get("status") == "ok":
+                allowed = raw.get("allowed_objects") or {}
+                if not isinstance(allowed, dict):
+                    allowed = {}
+                pack = allowed.get("_pack", {})
+                stdout = pack.get("stdout", "") if isinstance(pack, dict) else ""
+                if not isinstance(stdout, str):
+                    stdout = str(stdout)
+                if len(stdout) > 8000:
+                    stdout = stdout[:8000] + "\n... (truncated)"
+                return {"status": "ok", "stdout": stdout}
+            else:
+                return {
+                    "status": "error",
+                    "error": raw.get("error_message", raw.get("content", "Unknown error")),
+                    "stdout": "",
+                }
+        except Exception as e:
+            logger.error("[AnalystAgent] Sandbox execution error", exc_info=e)
+            return {"status": "error", "error": "Code execution failed", "stdout": ""}
+
+    def _run_visualize_code(
+        self,
+        code: str,
+        output_variable: str,
+        chart_spec: dict,
+        field_metadata: dict,
+        field_display_names: dict,
+        display_instruction: str,
+        messages: list[dict] | None = None,
+    ) -> dict[str, Any]:
+        """Run visualize code in sandbox and assemble chart."""
+        from data_formulator.sandbox import create_sandbox
+
+        try:
+            from flask import current_app
+            sandbox_mode = current_app.config.get('CLI_ARGS', {}).get('sandbox', 'local')
+            max_display_rows = current_app.config['CLI_ARGS'].get('max_display_rows', 5000)
+        except (ImportError, RuntimeError):
+            sandbox_mode = 'local'
+            max_display_rows = 5000
+
+        code, was_patched, detected_var = ensure_output_variable_in_code(code, output_variable)
+        if was_patched:
+            logger.info(f"[AnalystAgent] patched output_variable: {output_variable} = {detected_var}")
+
+        sandbox = create_sandbox(sandbox_mode)
+
+        try:
+            execution_result = sandbox.run_python_code(
+                code=code,
+                workspace=self.workspace,
+                output_variable=output_variable,
+            )
+
+            if execution_result['status'] != 'ok':
+                error_message = execution_result.get('content', 'Unknown error')
+                return {"status": "error", "error_message": str(error_message)}
+
+            full_df = execution_result['content']
+            row_count = len(full_df)
+
+            chart_encodings = chart_spec.get("encodings", {})
+            missing_fields = [
+                f"{channel}: '{field}'"
+                for channel, field in chart_encodings.items()
+                if field and field not in full_df.columns
+            ]
+            if missing_fields:
+                available = list(full_df.columns)
+                return {
+                    "status": "error",
+                    "error_message": (
+                        f"Chart encoding fields not found in output DataFrame: "
+                        f"{', '.join(missing_fields)}. "
+                        f"Available columns: {available}"
+                    ),
+                    "error_code": "agent.fieldsNotFound",
+                    "error_params": {
+                        "missing": ", ".join(missing_fields),
+                        "available": str(available),
+                    },
+                }
+
+            if row_count == 0:
+                return {
+                    "status": "error",
+                    "error_message": "Output DataFrame is empty (0 rows). Check filters or data loading.",
+                    "error_code": "agent.emptyDataframe",
+                }
+
+            output_table_name = self.workspace.get_fresh_name(f"d-{output_variable}")
+            self.workspace.write_parquet(full_df, output_table_name)
+
+            if row_count > max_display_rows:
+                query_output = full_df.head(max_display_rows)
+            else:
+                query_output = full_df
+            query_output = query_output.loc[:, ~query_output.columns.duplicated()]
+
+            refined_goal = {
+                "display_instruction": display_instruction,
+                "output_variable": output_variable,
+                "output_fields": list(query_output.columns),
+                "chart": chart_spec,
+                "field_metadata": field_metadata,
+                "field_display_names": field_display_names or {},
+            }
+
+            transform_result = {
+                "status": "ok",
+                # Backend-minted, run-stable chart id. Forwarded to the frontend
+                # in the ``result`` event so it adopts this id verbatim — the same
+                # id the agent can embed in a same-run report (``chart://<id>``)
+                # and pass to ``inspect_chart``. NOT derived from the table name
+                # (one table may back many charts).
+                "chart_id": f"chart-{uuid.uuid4().hex[:12]}",
+                "code": code,
+                "content": {
+                    "rows": df_to_safe_records(query_output),
+                    "virtual": {
+                        "table_name": output_table_name,
+                        "row_count": row_count,
+                    },
+                },
+                "refined_goal": refined_goal,
+                "dialog": self._snapshot_dialog(messages),
+                "agent": "AnalystAgent",
+            }
+
+            return {
+                "status": "ok",
+                "transform_result": transform_result,
+            }
+
+        except Exception as e:
+            logger.error("[AnalystAgent] Visualize execution error", exc_info=e)
+            return {"status": "error", "error_message": "Visualization execution failed"}
+
+    # ------------------------------------------------------------------
+    # Message construction
+    # ------------------------------------------------------------------
+
+    def _build_system_prompt(
+        self,
+        has_primary_tables: bool = False,
+        has_focused_thread: bool = False,
+        has_other_threads: bool = False,
+        has_attached_images: bool = False,
+        has_charts: bool = False,
+    ) -> str:
+        rules_block = ""
+        if self.agent_exploration_rules and self.agent_exploration_rules.strip():
+            rules_block = (
+                "\n## Additional exploration rules\n\n"
+                + self.agent_exploration_rules.strip()
+                + "\n\nPlease follow the above rules when exploring data."
+            )
+
+        context_lines = []
+        if has_primary_tables:
+            context_lines.append(
+                "- **[PRIMARY TABLE(S)]**: The table(s) the user is focused on. "
+                "Prioritize these, but freely use other available tables if needed."
+            )
+            context_lines.append(
+                "- **[OTHER AVAILABLE TABLES]**: Additional tables in the workspace."
+            )
+        else:
+            context_lines.append(
+                "- **[AVAILABLE TABLES]**: All tables in the workspace."
+            )
+        context_lines.append(
+            "  Use `inspect_source_data` to get detailed stats and sample rows. "
+            "Use `execute_python_script` for custom computations."
+        )
+        if has_focused_thread:
+            context_lines.append(
+                "- **[FOCUSED THREAD]**: The thread the user is continuing. "
+                "Build on this — do not repeat visualizations already created here."
+            )
+        if has_other_threads:
+            context_lines.append(
+                "- **[OTHER THREADS]**: Brief summaries of other exploration threads in this workspace. "
+            )
+        if has_charts:
+            context_lines.append(
+                "- **[AVAILABLE CHARTS]**: Charts the user already created (with their "
+                "ids, types, and encodings). These already exist — build on them or "
+                "reference them; do not re-create an equivalent chart. When asked to "
+                "write up / summarize / report on the exploration, load the `report` "
+                "skill and embed these by id rather than producing new visualizations."
+            )
+        if has_attached_images:
+            context_lines.append(
+                "- **[USER ATTACHMENT(S)]**: Image(s) provided by the user. "
+                "Refer to these when relevant to the user's question."
+            )
+        context_guide = "\n".join(context_lines)
+
+        # The skill catalog is static capability config (fixed at agent build,
+        # independent of the user's question), so it belongs in the frame next to
+        # the skills mechanism — not in the per-run user message. The only truly
+        # dynamic skill data is a loaded skill body, which arrives as a
+        # ``load_skill`` tool result.
+        skills_block = self.registry.render_registry_block() or "_(no loadable skills)_"
+
+        # Fill the system frame's slots via plain substitution (brace-safe: any
+        # other braces in the text stay literal). The frame is the agent's own
+        # contract — identity, tools-vs-actions, skills mechanism, budget.
+        substitutions = {
+            "{context_guide}": context_guide,
+            "{skills_block}": skills_block,
+            "{max_iterations}": str(self.max_iterations),
+            "{agent_exploration_rules}": rules_block,
+        }
+        prompt = SYSTEM_PROMPT
+        for slot, value in substitutions.items():
+            prompt = prompt.replace(slot, value)
+
+        # Append the always-loaded ``core`` skill's capability body (the concrete
+        # tools + action schemas). It is plain content — no placeholders — and is
+        # framed with the same ``[SKILL: <name>]`` header as on-demand skills (see
+        # _load_skill_into_context) so every capability bundle reads as one family:
+        # core is the always-active baseline, gated skills announce themselves when
+        # loaded.
+        core_body = self.registry.load_body(_CORE_SKILL)
+        prompt += (
+            f"\n\n[SKILL: {_CORE_SKILL}] Always-on baseline — these tools and "
+            f"actions are active for the whole run.\n\n{core_body}"
+        )
+
+        if self._knowledge_store:
+            knowledge_rules = self._knowledge_store.load_always_apply_rules()
+            self._injected_rules = [r["title"] for r in knowledge_rules]
+            prompt += self._knowledge_store.format_rules_block(knowledge_rules)
+        else:
+            self._injected_rules = []
+
+        prompt += "\n\n" + CHART_CREATION_GUIDE
+        if self.agent_coding_rules and self.agent_coding_rules.strip():
+            prompt += (
+                "\n\n## Agent Coding Rules\n\n"
+                + self.agent_coding_rules.strip()
+            )
+
+        if self.language_instruction:
+            prompt = prompt + "\n\n" + self.language_instruction
+        return prompt
+
+    def _build_initial_messages(
+        self,
+        input_tables: list[dict[str, Any]],
+        user_question: str,
+        focused_thread: list[dict[str, Any]] | None = None,
+        other_threads: list[dict[str, Any]] | None = None,
+        primary_tables: list[str] | None = None,
+        attached_images: list[str] | None = None,
+        charts: list[dict[str, Any]] | None = None,
+    ) -> list[dict]:
+        """Build the initial messages with 3-tier context."""
+        table_summaries = self._build_lightweight_table_context(input_tables, primary_tables=primary_tables)
+
+        focused_block = ""
+        if focused_thread:
+            focused_block = self._build_focused_thread_context(focused_thread)
+
+        peripheral_block = ""
+        if other_threads:
+            peripheral_block = self._build_peripheral_thread_context(other_threads)
+
+        if primary_tables:
+            user_content = f"{table_summaries}\n\n"
+        else:
+            user_content = f"[AVAILABLE TABLES]\n\n{table_summaries}\n\n"
+        if focused_block:
+            user_content += f"{focused_block}\n\n"
+        if peripheral_block:
+            user_content += f"{peripheral_block}\n\n"
+
+        # Surface the charts the user already created so the agent treats them as
+        # existing material — to build on, reference, or report from — rather than
+        # re-creating them. The chart_ids here are exactly what the report skill's
+        # ``inspect_chart`` / ``![caption](chart://chart_id)`` embeds expect.
+        charts_block = self._build_available_charts_context(charts)
+        if charts_block:
+            user_content += f"{charts_block}\n\n"
+
+        self._injected_knowledge = []
+        if self._knowledge_store:
+            always_apply_rules = self._knowledge_store.load_always_apply_rules()
+            if always_apply_rules:
+                rules_text = "\n\n".join([f"### {r['title']}\n{r['body']}" for r in always_apply_rules])
+                user_content += f"[USER RULES - MUST FOLLOW]\n\n{rules_text}\n\n"
+
+        user_content += f"[USER QUESTION]\n\n{user_question}"
+
+        chart_thumbnail = None
+        if focused_thread:
+            for step in focused_thread:
+                if step.get("chart_thumbnail"):
+                    chart_thumbnail = step["chart_thumbnail"]
+
+        system_prompt = self._build_system_prompt(
+            has_primary_tables=bool(primary_tables),
+            has_focused_thread=bool(focused_thread),
+            has_other_threads=bool(other_threads),
+            has_attached_images=bool(attached_images),
+            has_charts=bool(charts_block),
+        )
+
+        has_images = (chart_thumbnail and chart_thumbnail.startswith("data:")) or (attached_images and len(attached_images) > 0)
+
+        if has_images:
+            content_parts: list[dict] = [{"type": "text", "text": user_content}]
+            if chart_thumbnail and chart_thumbnail.startswith("data:"):
+                content_parts.append({"type": "text", "text": "\n[CURRENT CHART] (the chart the user is currently viewing):"})
+                content_parts.append({"type": "image_url", "image_url": {"url": chart_thumbnail, "detail": "low"}})
+            if attached_images:
+                label = "[USER ATTACHMENT]" if len(attached_images) == 1 else "[USER ATTACHMENTS]"
+                content_parts.append({"type": "text", "text": f"\n{label} (image(s) provided by the user):"})
+                for img in attached_images:
+                    if img.startswith("data:"):
+                        content_parts.append({"type": "image_url", "image_url": {"url": img, "detail": "low"}})
+            return [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": content_parts},
+            ]
+        else:
+            return [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_content},
+            ]
+
+    def _build_focused_thread_context(
+        self, focused_thread: list[dict[str, Any]]
+    ) -> str:
+        return build_focused_thread_context(focused_thread)
+
+    def _build_peripheral_thread_context(
+        self, other_threads: list[dict[str, Any]]
+    ) -> str:
+        return build_peripheral_thread_context(other_threads)
+
+    @staticmethod
+    def _build_available_charts_context(
+        charts: list[dict[str, Any]] | None,
+    ) -> str:
+        """Render the ``[AVAILABLE CHARTS]`` block from the chart descriptors.
+
+        Mirrors the legacy report agent's listing (id, type, encodings, table
+        ref) so chart_ids stay stable across the run — the report skill's
+        ``inspect_chart`` and ``chart://chart_id`` embeds reference these ids.
+        Returns ``""`` when there are no charts.
+        """
+        if not charts:
+            return ""
+        lines = ["[AVAILABLE CHARTS]"]
+        for c in charts:
+            chart_id = c.get("chart_id")
+            if not chart_id:
+                continue
+            enc_str = ", ".join(
+                f"{k}: {v}" for k, v in (c.get("encodings") or {}).items() if v
+            )
+            lines.append(
+                f"  - {chart_id}: {c.get('chart_type', 'Unknown')}"
+                + (f" ({enc_str})" if enc_str else "")
+                + f" → table: {c.get('table_ref', '?')}"
+            )
+        return "\n".join(lines) if len(lines) > 1 else ""
+
+    def _build_lightweight_table_context(
+        self, input_tables: list[dict[str, Any]], primary_tables: list[str] | None = None
+    ) -> str:
+        return build_lightweight_table_context(
+            input_tables,
+            self.workspace,
+            primary_tables,
+        )
+
+    # ------------------------------------------------------------------
+    # LLM interaction (with internal tool-calling loop)
+    # ------------------------------------------------------------------
+
+    def _get_next_action(
+        self,
+        trajectory: list[dict],
+        input_tables: list[dict[str, Any]] | None = None,
+        outer_iteration: int = 0,
+    ) -> Generator[dict[str, Any], None, None]:
+        """Call the LLM with tools, run the inspection tool rounds internally,
+        and surface the single committing action the turn ends with (as an
+        ``agent_action`` event)."""
+        max_tool_rounds = 12
+        max_json_retries = 1
+        json_retries = 0
+        messages = trajectory
+        llm_calls_in_cycle = 0
+
+        rlog = self._reasoning_log
+
+        from data_formulator.sandbox.local_sandbox import SandboxSession
+        ns_dir = self._explore_ns_dir()
+        ws_path = str(self.workspace.confined_scratch.root.parent)
+
+        with SandboxSession() as explore_session:
+            self._explore_session = explore_session
+
+            if ns_dir.exists():
+                ok = SandboxSession.restore_namespace(explore_session, ns_dir, ws_path)
+                if ok:
+                    logger.info("[AnalystAgent] Restored explore namespace from %s", ns_dir)
+                import shutil
+                shutil.rmtree(ns_dir, ignore_errors=True)
+
+            self._tool_loop_exit_reason = None
+            yield from self._tool_loop(
+                messages, max_tool_rounds, max_json_retries, json_retries,
+                llm_calls_in_cycle, rlog, input_tables, outer_iteration,
+            )
+
+            if self._tool_loop_exit_reason == "tool_rounds_exhausted":
+                saved = explore_session.save_namespace(ns_dir, ws_path)
+                if saved:
+                    logger.info("[AnalystAgent] Saved explore namespace to %s", ns_dir)
+
+            self._explore_session = None
+
+    def _current_tools(self) -> list[dict[str, Any]]:
+        """The tool set offered this turn: inspection tools (core tools +
+        load_skill + loaded skills' tools) plus the committing **action**
+        tools of loaded skills (core's visualize/delegate always; write_report
+        once the report skill is loaded). The model gathers with inspection tools
+        and acts with at most one action per turn."""
+        extra_tools = self.registry.tools_for(self._loaded_skills)
+        action_tools = self.registry.action_tools_for(self._loaded_skills)
+        return build_tools(
+            self.registry.gated_skill_names(),
+            extra_tools,
+            action_tools=action_tools,
+        )
+
+    def _loaded_skill_tool_map(self) -> dict[str, Any]:
+        """Map ``tool_name -> skill instance`` for inspection tools unlocked by
+        loaded skills. Tool names come from the registry's ``tools.json`` specs;
+        the value is the skill processor that handles them."""
+        mapping: dict[str, Any] = {}
+        for name in self._loaded_skills:
+            skill = self.registry.get_skill(name)
+            if skill is None:
+                continue
+            for spec in self.registry.tools_for([name]):
+                fn_name = spec.get("function", {}).get("name")
+                if fn_name:
+                    mapping[fn_name] = skill
+        return mapping
+
+    def _tool_loop(
+        self,
+        messages, max_tool_rounds, max_json_retries, json_retries,
+        llm_calls_in_cycle, rlog, input_tables, outer_iteration,
+    ):
+        """Inner tool-calling loop, wrapped by _get_next_action in a
+        SandboxSession context manager."""
+        for round_idx in range(max_tool_rounds):
+            llm_calls_in_cycle += 1
+            tools = self._current_tools()
+            rlog.log("llm_request", iteration=outer_iteration,
+                     round=round_idx + 1,
+                     messages_count=len(messages),
+                     tools_available=[t["function"]["name"] for t in tools])
+            llm_t0 = time.time()
+            try:
+                response = yield from self._stream_llm(messages, tools)
+            except Exception as exc:
+                llm_latency = int((time.time() - llm_t0) * 1000)
+                rlog.log("llm_response", iteration=outer_iteration,
+                         round=round_idx + 1,
+                         latency_ms=llm_latency, finish_reason="error",
+                         error=type(exc).__name__)
+                logger.error("[AnalystAgent] LLM call failed", exc_info=exc)
+                from data_formulator.security.sanitize import classify_llm_error
+                yield {
+                    "type": "agent_action",
+                    "action_data": None,
+                    "reason": "llm_error",
+                    "error_message": classify_llm_error(exc),
+                    "llm_calls": llm_calls_in_cycle,
+                }
+                return
+
+            llm_latency = int((time.time() - llm_t0) * 1000)
+
+            if not response.choices:
+                rlog.log("llm_response", iteration=outer_iteration,
+                         round=round_idx + 1,
+                         latency_ms=llm_latency, finish_reason="empty")
+                yield {"type": "agent_action", "action_data": None, "reason": "llm_error",
+                       "error_message": "LLM returned empty response",
+                       "llm_calls": llm_calls_in_cycle}
+                return
+
+            choice = response.choices[0]
+            content = choice.message.content or ""
+            tool_calls = getattr(choice.message, 'tool_calls', None)
+            finish_reason = getattr(choice, "finish_reason", "stop")
+
+            if tool_calls:
+                rlog.log("llm_response", iteration=outer_iteration,
+                         round=round_idx + 1,
+                         latency_ms=llm_latency, finish_reason="tool_calls",
+                         tool_calls=[{"name": tc.function.name} for tc in tool_calls])
+            else:
+                rlog.log("llm_response", iteration=outer_iteration,
+                         round=round_idx + 1,
+                         latency_ms=llm_latency, finish_reason=finish_reason)
+
+            # --- tool calls: partition into committing actions vs inspection ---
+            if tool_calls:
+                if content.strip():
+                    yield {"type": "thinking_text", "content": content.strip()}
+
+                # A committing action is a tool call (visualize / delegate /
+                # write_report). Inspection tools (explore /
+                # inspect_source_data / inspect_chart / load_skill) gather. A turn
+                # ends with exactly ONE action; the harness enforces that here.
+                action_names = self.registry.action_names()
+                action_calls = [tc for tc in tool_calls
+                                if tc.function.name in action_names]
+                readonly_calls = [tc for tc in tool_calls
+                                  if tc.function.name not in action_names]
+
+                # ── Action present → cardinality guard (first-wins) ───────────
+                if action_calls:
+                    committed = yield from self._commit_action(
+                        action_calls, readonly_calls, messages, content, choice,
+                        rlog, outer_iteration, llm_calls_in_cycle,
+                    )
+                    if committed:
+                        return
+                    # Not committed (e.g. missing required fields) → a correction
+                    # tool-result was appended; loop and let the model retry.
+                    continue
+
+                # ── Only inspection tools → execute all and loop ───────────────
+                assistant_msg: dict[str, Any] = {
+                    "role": "assistant",
+                    "content": content or None,
+                }
+                attach_reasoning_content(assistant_msg, choice.message)
+                assistant_msg["tool_calls"] = [
+                    {
+                        "id": tc.id,
+                        "type": "function",
+                        "function": {
+                            "name": tc.function.name,
+                            "arguments": tc.function.arguments,
+                        },
+                    }
+                    for tc in readonly_calls
+                ]
+                messages.append(assistant_msg)
+
+                # Tools unlocked by currently-loaded skills (name -> instance).
+                skill_tool_owners = self._loaded_skill_tool_map()
+                # Images returned by skill tools are attached as a single
+                # follow-up vision message after all tool results this round.
+                pending_images: list[str] = []
+                # Skill bodies unlocked via load_skill this round. They are
+                # `user` turns and MUST land AFTER every tool result — an
+                # assistant `tool_calls` turn must be immediately followed by its
+                # tool responses (Azure/OpenAI reject any other message in
+                # between). So we defer them past the per-tc loop.
+                pending_skill_bodies: list[dict] = []
+
+                for tc in readonly_calls:
+                    tool_name = tc.function.name
+                    try:
+                        tool_args = json.loads(tc.function.arguments)
+                    except json.JSONDecodeError:
+                        tool_args = {}
+
+                    yield {
+                        "type": "tool_start",
+                        "tool": tool_name,
+                        "purpose": tool_args.get("purpose") if tool_name == "execute_python_script" else None,
+                        "code": tool_args.get("code") if tool_name == "execute_python_script" else None,
+                        "table_names": tool_args.get("table_names") if tool_name == "inspect_source_data" else None,
+                        "skill": tool_args.get("name") if tool_name == "load_skill" else None,
+                    }
+
+                    tool_t0 = time.time()
+                    tool_status = "ok"
+
+                    if tool_name == "execute_python_script":
+                        result = self._run_explore_code(
+                            tool_args.get("code", ""),
+                            input_tables or [],
+                        )
+                        tool_content = result.get("stdout", "")
+                        tool_status = result.get("status", "ok")
+                        if result.get("error"):
+                            tool_content += f"\n\nError: {result['error']}"
+                        yield {
+                            "type": "tool_result",
+                            "tool": tool_name,
+                            "status": tool_status,
+                            "stdout": result.get("stdout", ""),
+                            "error": result.get("error"),
+                        }
+                    elif tool_name == "inspect_source_data":
+                        table_names = tool_args.get("table_names", [])
+                        tool_content = handle_inspect_source_data(
+                            table_names, input_tables or [], self.workspace,
+                        )
+                        yield {
+                            "type": "tool_result",
+                            "tool": tool_name,
+                            "status": "ok",
+                            "stdout": tool_content,
+                        }
+                    elif tool_name == "load_skill":
+                        skill_name = tool_args.get("name", "")
+                        ok, message, body_msg = self._build_skill_body_message(skill_name)
+                        tool_status = "ok" if ok else "error"
+                        tool_content = message
+                        # The skill body is a `user` turn that must be appended
+                        # AFTER this round's tool results (see pending_skill_bodies);
+                        # the tool result here just confirms the load.
+                        if ok and body_msg is not None:
+                            pending_skill_bodies.append(body_msg)
+                        if ok:
+                            yield {
+                                "type": "skill_loaded",
+                                "skill": skill_name,
+                                "unlocks": list(
+                                    self.registry.metas[skill_name].action_names
+                                ) if self.registry.has(skill_name) else [],
+                            }
+                        yield {
+                            "type": "tool_result",
+                            "tool": tool_name,
+                            "status": tool_status,
+                            "stdout": message,
+                            "error": None if ok else message,
+                        }
+                    elif tool_name in skill_tool_owners:
+                        skill = skill_tool_owners[tool_name]
+                        skill_ctx = SkillContext(
+                            client=self.client,
+                            workspace=self.workspace,
+                            language_instruction=self.language_instruction,
+                            trajectory=messages,
+                            payload=dict(self._run_payload),
+                        )
+                        try:
+                            result = skill.handle_tool(tool_name, tool_args, skill_ctx)
+                        except Exception as exc:
+                            logger.warning("[AnalystAgent] Skill tool %r failed", tool_name, exc_info=exc)
+                            result = ToolResult(text=f"Tool '{tool_name}' failed: {exc}")
+                            tool_status = "error"
+                        tool_content = result.text
+                        if result.images:
+                            pending_images.extend(result.images)
+                        yield {
+                            "type": "tool_result",
+                            "tool": tool_name,
+                            "status": tool_status,
+                            "stdout": tool_content,
+                        }
+                    else:
+                        tool_content = f"Unknown tool: {tool_name}"
+
+                    tool_latency = int((time.time() - tool_t0) * 1000)
+                    output_summary = (tool_content[:200] + "...") if len(tool_content) > 200 else tool_content
+                    rlog.log("tool_execution", iteration=outer_iteration,
+                             tool=tool_name,
+                             input_summary=tool_args.get("purpose", "")[:200],
+                             output_summary=output_summary,
+                             latency_ms=tool_latency, status=tool_status)
+
+                    messages.append({
+                        "role": "tool",
+                        "tool_call_id": tc.id,
+                        "content": tool_content,
+                    })
+
+                # Attach any skill-tool images as a single follow-up vision turn
+                # (tool-result messages can't carry image content on most providers).
+                if pending_images:
+                    image_blocks: list[dict[str, Any]] = [{
+                        "type": "text",
+                        "text": (
+                            "[INSPECTED IMAGE(S)] Rendered images for the tool "
+                            "call(s) you just made, in request order:"
+                        ),
+                    }]
+                    for url in pending_images:
+                        image_blocks.append({
+                            "type": "image_url",
+                            "image_url": {"url": url, "detail": "high"},
+                        })
+                    messages.append({"role": "user", "content": image_blocks})
+
+                # Now that every tool result is in place, land any skill bodies
+                # unlocked this round (deferred so the assistant tool_calls turn
+                # stays immediately followed by its tool responses).
+                for body_msg in pending_skill_bodies:
+                    messages.append(body_msg)
+
+                logger.info("[AnalystAgent] Executed %d inspection tool call(s), looping back to LLM", len(readonly_calls))
+                continue
+
+            # --- no tool calls — the model gave a plain-text answer ----------
+            # In this turn model, committing no action is the NORMAL way to end
+            # the run: the agent has nothing more to do and answers in prose.
+            # That final text is the run's completion (the frontend renders it
+            # as the summary). Record it as a plain assistant turn and signal
+            # "done" to the outer loop.
+            logger.info("[AnalystAgent] No action committed; final text ends the run")
+            final_msg: dict[str, Any] = {"role": "assistant", "content": content or None}
+            attach_reasoning_content(final_msg, choice.message)
+            messages.append(final_msg)
+            yield {"type": "agent_action", "action_data": None, "reason": "done",
+                   "final_text": content.strip(), "llm_calls": llm_calls_in_cycle}
+            return
+
+        # --- tool rounds exhausted ---
+        logger.warning("[AnalystAgent] Exceeded %d tool rounds without committing an action", max_tool_rounds)
+        self._tool_loop_exit_reason = "tool_rounds_exhausted"
+        yield {"type": "agent_action", "action_data": None, "reason": "tool_rounds_exhausted",
+               "llm_calls": llm_calls_in_cycle}
+        return
+
+    def _commit_action(
+        self,
+        action_calls: list,
+        readonly_calls: list,
+        messages: list[dict],
+        content: str,
+        choice,
+        rlog,
+        outer_iteration: int,
+        llm_calls_in_cycle: int,
+    ) -> Generator[Event, None, bool]:
+        """Apply the one-action-per-turn cardinality guard and commit.
+
+        A turn ends with exactly one committing action. When the model emits
+        more than one action (or mixes an action with inspection calls in the
+        same response), we take the **first** action and discard the rest —
+        first-wins, never reject-the-whole-turn (mirrors Claude's
+        serialize-don't-refuse). The trajectory is kept provider-valid by
+        recording an assistant message carrying *only* the chosen action's
+        tool call (so there are no orphaned ``tool_calls`` to answer), plus its
+        single ``ok`` tool result; any drop is noted so the model learns the
+        rule.
+
+        Yields the ``agent_action`` event with the chosen action's arguments
+        (the ``run`` loop then gates + dispatches it to the owning skill) and
+        returns ``True`` when committed. Returns ``False`` without committing if
+        the chosen action is missing required fields — after appending a
+        correction so the caller can loop and let the model retry.
+        """
+        chosen = action_calls[0]
+        chosen_name = chosen.function.name
+        dropped_actions = [tc.function.name for tc in action_calls[1:]]
+        dropped_readonly = [tc.function.name for tc in readonly_calls]
+
+        try:
+            action_data = json.loads(chosen.function.arguments)
+        except json.JSONDecodeError:
+            action_data = {}
+        if not isinstance(action_data, dict):
+            action_data = {}
+        _rescue_unpack_json_strings(action_data)
+        action_data["action"] = chosen_name
+
+        # Record the commitment as an assistant turn carrying ONLY the chosen
+        # action's tool call — dropping siblings keeps the trajectory valid for
+        # any disposition (a CONTINUE action will make another LLM call).
+        assistant_msg: dict[str, Any] = {"role": "assistant", "content": content or None}
+        attach_reasoning_content(assistant_msg, choice.message)
+        assistant_msg["tool_calls"] = [{
+            "id": chosen.id,
+            "type": "function",
+            "function": {
+                "name": chosen_name,
+                "arguments": chosen.function.arguments,
+            },
+        }]
+        messages.append(assistant_msg)
+
+        # Pre-dispatch completeness check (belt-and-suspenders on top of the
+        # skill handler's own validation). Missing fields → correct + retry.
+        required = self.registry.action_required_fields(chosen_name)
+        missing = [f for f in required if not action_data.get(f)]
+        if missing:
+            correction = (
+                f"The '{chosen_name}' action is missing required field(s): "
+                f"{', '.join(missing)}. Call it again with those fields filled in."
+            )
+            messages.append({
+                "role": "tool",
+                "tool_call_id": chosen.id,
+                "content": f"ERROR: {correction}",
+            })
+            rlog.log("tool_execution", iteration=outer_iteration, tool=chosen_name,
+                     input_summary="action_missing_fields",
+                     output_summary=", ".join(missing), latency_ms=0, status="error")
+            logger.warning("[AnalystAgent] Action '%s' missing fields %s, requesting retry",
+                           chosen_name, missing)
+            yield {"type": "tool_result", "tool": chosen_name, "status": "error",
+                   "error": f"Missing fields: {', '.join(missing)}"}
+            return False
+
+        # Answer the action's tool call with a placeholder so the trajectory is
+        # well-formed during dispatch; the run loop overwrites this with the
+        # skill's observation (see _set_action_observation) once the action has
+        # rendered. This is what makes an action's result ride the same lane as
+        # an inspection tool result.
+        messages.append({
+            "role": "tool",
+            "tool_call_id": chosen.id,
+            "content": "",
+        })
+
+        # If we dropped anything, teach the one-action rule so the model
+        # converges (the note rides along on the next CONTINUE turn's context).
+        if dropped_actions or dropped_readonly:
+            dropped_desc: list[str] = []
+            if dropped_actions:
+                dropped_desc.append(
+                    f"additional action call(s) ({', '.join(dropped_actions)})"
+                )
+            if dropped_readonly:
+                dropped_desc.append(
+                    f"inspection call(s) ({', '.join(dropped_readonly)}) made alongside it"
+                )
+            messages.append({
+                "role": "user",
+                "content": (
+                    f"[SYSTEM] A turn commits exactly one action. Kept "
+                    f"'{chosen_name}'; ignored {' and '.join(dropped_desc)}. Do any "
+                    "inspection in its own round before the action, and "
+                    "emit only one action per turn."
+                ),
+            })
+            logger.info(
+                "[AnalystAgent] Cardinality guard: kept '%s', dropped actions=%s readonly=%s",
+                chosen_name, dropped_actions, dropped_readonly,
+            )
+
+        rlog.log("tool_execution", iteration=outer_iteration, tool=chosen_name,
+                 input_summary="action_committed", output_summary="ok",
+                 latency_ms=0, status="ok")
+        yield {"type": "agent_action", "action_data": action_data, "reason": "ok",
+               "tool_call_id": chosen.id, "llm_calls": llm_calls_in_cycle}
+        return True
+
+    _MAX_LLM_RETRIES = 3
+
+    @staticmethod
+    def _is_transient_error(exc: Exception) -> bool:
+        msg = str(exc).lower()
+        if any(kw in msg for kw in (
+            "timeout", "timed out", "rate limit", "rate_limit",
+            "429", "503", "502", "connection", "reset by peer",
+        )):
+            return True
+        name = type(exc).__name__.lower()
+        return any(kw in name for kw in ("timeout", "ratelimit", "connection"))
+
+    def _open_stream(self, messages: list[dict], tools: list[dict]):
+        """Open a *streaming* LLM call with tool definitions, retrying on
+        transient errors *before* any tokens are consumed.
+
+        ``stream=True`` is what makes live report streaming possible: the loop's
+        LLM call always streams, and the agent forwards a streaming action's
+        argument as it arrives (design-docs/36 §5). ``parallel_tool_calls=False``
+        forces one tool call per response — the structural backstop for the
+        one-action-per-turn rule: actions are sequential (each result shapes the
+        next), so the model must never batch them. It also serializes inspection
+        tools — a minor extra round-trip — an acceptable trade for never silently
+        dropping batched actions. Providers that don't support the flag drop it
+        (``drop_params=True``); the first-wins cardinality guard remains as a
+        belt-and-suspenders net.
+        """
+        last_exc: Exception | None = None
+        for attempt in range(self._MAX_LLM_RETRIES):
+            try:
+                return self.client.get_completion_with_tools(
+                    messages, tools=tools, stream=True,
+                    reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model),
+                    parallel_tool_calls=False,
+                )
+            except Exception as e:
+                last_exc = e
+                if self._is_transient_error(e) and attempt < self._MAX_LLM_RETRIES - 1:
+                    wait = 2 ** attempt
+                    logger.warning(
+                        "[AnalystAgent] Transient LLM error (attempt %d/%d), "
+                        "retrying in %ds: %s",
+                        attempt + 1, self._MAX_LLM_RETRIES, wait, e,
+                    )
+                    time.sleep(wait)
+                    continue
+                raise
+        raise last_exc  # pragma: no cover
+
+    def _stream_llm(
+        self, messages: list[dict], tools: list[dict],
+    ) -> Generator[Event, None, Any]:
+        """Stream the LLM call, forwarding any *streaming* action's argument live,
+        and return a reconstructed non-streaming-shaped response for the loop.
+
+        The agent owns this generic forwarding envelope (design-docs/36 §5): it
+        accumulates content / reasoning / tool-call deltas exactly as a buffered
+        call would, but when a tool call's name is a streaming action (per
+        ``registry.action_stream_spec``) it emits the action's ``action`` event
+        once and then forwards the growing ``stream_field`` argument as
+        ``text_delta``s on the skill's declared channel as the tokens arrive.
+        The reconstructed response carries the *full* assembled tool calls, so
+        the downstream partition / commit / dispatch path is byte-for-byte the
+        same as the old buffered call — the only difference is that the report's
+        text reached the frontend live. The skill's later (buffered) re-emission
+        of the same content is suppressed by the router (see ``run`` /
+        ``_route_skill_events``); on a provider without tool-arg streaming nothing
+        is forwarded here and the buffered path delivers it instead.
+        """
+        # Each LLM call starts a fresh streamed-channel map; only the round that
+        # actually commits a streaming action leaves an entry for the run loop.
+        self._streamed_channels = {}
+
+        stream = self._open_stream(messages, tools)
+
+        content_parts: list[str] = []
+        reasoning_acc: str | None = None
+        finish_reason = "stop"
+        # idx -> {"id", "name", "arguments"}
+        tool_calls_acc: dict[int, dict[str, Any]] = {}
+        # idx -> {"active", "channel", "extractor", "announced"} for streaming actions
+        streamers: dict[int, dict[str, Any]] = {}
+
+        for chunk in stream:
+            if not getattr(chunk, "choices", None):
+                continue
+            choice0 = chunk.choices[0]
+            delta = getattr(choice0, "delta", None)
+            if delta is None:
+                continue
+            if getattr(choice0, "finish_reason", None):
+                finish_reason = choice0.finish_reason
+
+            reasoning_acc = accumulate_reasoning_content(reasoning_acc, delta)
+
+            content = getattr(delta, "content", None)
+            if content:
+                content_parts.append(content)
+
+            for tcd in getattr(delta, "tool_calls", None) or []:
+                idx = getattr(tcd, "index", 0) or 0
+                slot = tool_calls_acc.setdefault(
+                    idx, {"id": None, "name": "", "arguments": ""},
+                )
+                if getattr(tcd, "id", None):
+                    slot["id"] = tcd.id
+                fn = getattr(tcd, "function", None)
+                if fn is not None:
+                    if getattr(fn, "name", None):
+                        slot["name"] = fn.name
+                    arg_delta = getattr(fn, "arguments", None)
+                    if arg_delta:
+                        slot["arguments"] += arg_delta
+                yield from self._forward_stream_delta(slot, streamers)
+
+        # Reconstruct a non-streaming-shaped response for the loop.
+        tool_call_objs: list[Any] = []
+        for i in sorted(tool_calls_acc):
+            tc = tool_calls_acc[i]
+            tool_call_objs.append(SimpleNamespace(
+                id=tc["id"] or f"call_{i}",
+                type="function",
+                function=SimpleNamespace(name=tc["name"], arguments=tc["arguments"]),
+            ))
+        message = SimpleNamespace(
+            content="".join(content_parts) or None,
+            tool_calls=tool_call_objs or None,
+            reasoning_content=reasoning_acc,
+        )
+        choice = SimpleNamespace(message=message, finish_reason=finish_reason)
+        return SimpleNamespace(choices=[choice])
+
+    def _forward_stream_delta(
+        self, slot: dict[str, Any], streamers: dict[int, dict[str, Any]],
+    ) -> Generator[Event, None, None]:
+        """Forward a streaming action's growing argument as channel ``text_delta``s.
+
+        Decides once per tool-call slot whether it is a streaming action (by
+        name, via the registry); if so, emits the ``action`` commitment event the
+        first time and then surfaces newly-decoded ``stream_field`` text as it
+        arrives. No-ops for buffered actions and inspection tools.
+        """
+        name = slot.get("name") or ""
+        if not name:
+            return
+        idx = id(slot)  # stable key for this slot within the call
+        st = streamers.get(idx)
+        if st is None:
+            spec = self.registry.action_stream_spec(name)
+            if spec is None:
+                streamers[idx] = {"active": False}
+                return
+            field, channel = spec
+            st = {
+                "active": True,
+                "channel": channel,
+                "extractor": _StreamingArgExtractor(field),
+                "announced": False,
+            }
+            streamers[idx] = st
+        if not st["active"]:
+            return
+
+        if not st["announced"]:
+            # Preserve the buffered order (action first, then report text).
+            yield {"type": "action", "action": name}
+            st["announced"] = True
+
+        new_text = st["extractor"].feed(slot["arguments"])
+        if new_text:
+            yield {"type": "text_delta", "channel": st["channel"], "content": new_text}
+            tcid = slot.get("id")
+            if tcid:
+                self._streamed_channels[tcid] = st["channel"]
+
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _strip_images(trajectory: list[dict]) -> list[dict]:
+        """Return a copy of the trajectory with image_url blocks removed."""
+        stripped: list[dict] = []
+        for msg in trajectory:
+            content = msg.get("content")
+            if isinstance(content, list):
+                text_parts = [p for p in content if p.get("type") == "text"]
+                if text_parts:
+                    stripped.append({**msg, "content": text_parts})
+                else:
+                    stripped.append({**msg, "content": "[image removed]"})
+            else:
+                stripped.append(msg)
+        return stripped
+
+    @staticmethod
+    def _log_session_end(
+        rlog,
+        status: str,
+        total_iterations: int,
+        total_llm_calls: int,
+        session_start_time: float,
+    ) -> None:
+        """Write ``session_end`` to the reasoning log (does not close it)."""
+        rlog.log(
+            "session_end",
+            status=status,
+            total_iterations=total_iterations,
+            total_llm_calls=total_llm_calls,
+            total_latency_ms=int((time.time() - session_start_time) * 1000),
+        )
+
+    @staticmethod
+    def _error_event(
+        iteration: int,
+        message: str,
+        *,
+        display_instruction: str = "",
+        message_code: str = "",
+        message_params: dict | None = None,
+    ) -> dict[str, Any]:
+        """Build an ``"error"`` event dict for the streaming response."""
+        event: dict[str, Any] = {
+            "type": "error",
+            "iteration": iteration,
+            "message": message,
+        }
+        if message_code:
+            event["message_code"] = message_code
+        if message_params:
+            event["message_params"] = message_params
+        if display_instruction:
+            event["display_instruction"] = display_instruction
+        return event
+
+    @staticmethod
+    def _snapshot_dialog(messages: list[dict] | None) -> list[dict]:
+        """Snapshot the conversation for the Agent Log dialog."""
+        if not messages:
+            return []
+        snapshot: list[dict] = []
+        for msg in messages:
+            role = msg.get("role", "")
+            content = msg.get("content")
+
+            if isinstance(content, list):
+                content = "\n".join(
+                    p.get("text", "") for p in content if p.get("type") == "text"
+                )
+
+            if role == "assistant" and msg.get("tool_calls"):
+                tool_details = []
+                for tc in msg["tool_calls"]:
+                    fn = tc.get("function", {})
+                    name = fn.get("name", "?")
+                    args_str = fn.get("arguments", "{}")
+                    try:
+                        args_obj = json.loads(args_str)
+                        if name == "execute_python_script" and "code" in args_obj:
+                            tool_details.append(f"[tool: {name}]\n```python\n{args_obj['code']}\n```")
+                        else:
+                            formatted = json.dumps(args_obj, indent=2, ensure_ascii=False)
+                            tool_details.append(f"[tool: {name}]\n```json\n{formatted}\n```")
+                    except (json.JSONDecodeError, TypeError):
+                        tool_details.append(f"[tool: {name}]\n{args_str}")
+                text_part = content or ""
+                combined = (text_part + "\n\n" + "\n\n".join(tool_details)).strip()
+                snapshot.append({"role": role, "content": combined})
+
+            elif role == "tool":
+                tool_content = content or ""
+                if isinstance(tool_content, str) and len(tool_content) > 3000:
+                    tool_content = tool_content[:3000] + "\n... (truncated)"
+                snapshot.append({"role": "assistant", "content": f"[tool result]\n{tool_content}"})
+
+            elif content:
+                if role != "system" and isinstance(content, str) and len(content) > 4000:
+                    content = content[:4000] + "\n... (truncated)"
+                snapshot.append({"role": role, "content": content})
+        return snapshot
diff --git a/py-src/data_formulator/analyst/skills/__init__.py b/py-src/data_formulator/analyst/skills/__init__.py
new file mode 100644
index 00000000..b6b6bac5
--- /dev/null
+++ b/py-src/data_formulator/analyst/skills/__init__.py
@@ -0,0 +1,382 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Skill registry — discovery and eager instantiation of analyst skills.
+
+Each skill lives in its own sub-package under this directory and ships a
+``SKILL.md`` with YAML frontmatter (``name`` / ``description`` /
+``when_to_use`` / ``always_on`` / ``actions``). At startup the registry scans
+those frontmatter blocks to build a cheap, always-resident index (tier-1
+progressive disclosure) **and** imports each skill's Python code module so the
+skill instance is always available to the agent.
+
+The distinction is deliberate: a skill's code is always imported and callable;
+what ``load_skill(name)`` does is flip a *switch* that exposes the skill's
+tools, opens its action gate, and injects its ``SKILL.md`` body into context —
+i.e. it controls exposure to the model, not availability of the code.
+
+Convention for a skill code module: ``skills/<name>/skill.py`` exposing a
+``get_skill() -> Skill`` factory. A skill that ships only a ``SKILL.md`` (pure
+guidance, no code) is still discoverable — it simply has no tools or handlers.
+"""
+
+from __future__ import annotations
+
+import importlib
+import json
+import logging
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+from data_formulator.analyst.skills.base import (
+    Event,
+    Skill,
+    SkillContext,
+    SkillMeta,
+    ToolResult,
+)
+
+logger = logging.getLogger(__name__)
+
+SKILLS_DIR = Path(__file__).parent
+SKILL_DOC_NAME = "SKILL.md"
+TOOLS_FILE_NAME = "tools.json"
+
+_FM_PATTERN = re.compile(r"^---\s*\n(.*?)\n---\s*\n?", re.DOTALL)
+
+
+def _parse_front_matter(content: str) -> tuple[dict[str, Any], str]:
+    """Return ``(frontmatter_dict, body)``. Degrades gracefully to ``({}, content)``."""
+    m = _FM_PATTERN.match(content)
+    if not m:
+        return {}, content
+    try:
+        import yaml  # local import — only needed when parsing
+
+        meta = yaml.safe_load(m.group(1))
+        if not isinstance(meta, dict):
+            return {}, content
+    except Exception:
+        return {}, content
+    return meta, content[m.end():]
+
+
+def _coerce_name_list(raw: Any) -> tuple[str, ...]:
+    """Normalize a frontmatter name list (``tools``/``actions``) to a tuple."""
+    if isinstance(raw, str):
+        return (raw.strip(),) if raw.strip() else ()
+    if isinstance(raw, (list, tuple)):
+        return tuple(str(a).strip() for a in raw if str(a).strip())
+    return ()
+
+
+def _meta_from_frontmatter(raw: dict[str, Any], fallback_name: str) -> SkillMeta:
+    return SkillMeta(
+        name=str(raw.get("name") or fallback_name),
+        description=str(raw.get("description") or ""),
+        when_to_use=str(raw.get("when_to_use") or ""),
+        always_on=bool(raw.get("always_on", False)),
+        tool_names=_coerce_name_list(raw.get("tools")),
+        action_names=_coerce_name_list(raw.get("actions")),
+    )
+
+
+@dataclass
+class SkillRegistry:
+    """Index of discovered skills, keyed by skill name.
+
+    Holds three declarative things per skill, all resolved at build time:
+    the cheap frontmatter (``SkillMeta``), the eagerly-instantiated code module
+    (the *processor*: ``handle_tool`` / ``handle_action``), and the skill's
+    ``tools.json`` schemas (``tool_specs``). The doc *body* is read lazily.
+    """
+
+    metas: dict[str, SkillMeta] = field(default_factory=dict)
+    # Eagerly-instantiated skill code modules, keyed by name. A name present in
+    # ``metas`` but absent here is a guidance-only skill (SKILL.md, no code).
+    skills: dict[str, Skill] = field(default_factory=dict)
+    # Declarative tool/action schemas per skill, keyed by name. Each value is a
+    # flat list of standard OpenAI function-tool specs (``{"type":"function",
+    # "function":{name,description,parameters}}``) covering BOTH the skill's
+    # inspection tools and its committing actions; the split is decided by the
+    # frontmatter ``tools:`` / ``actions:`` lists (a spec whose name is in
+    # ``actions`` is a committing action, in ``tools`` an inspection tool).
+    tool_specs: dict[str, list[dict[str, Any]]] = field(default_factory=dict)
+    _doc_paths: dict[str, Path] = field(default_factory=dict)
+
+    def _specs_split(self, name: str) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+        """Partition a skill's ``tool_specs`` into ``(inspection_tools, actions)``
+        using its frontmatter ``tools:`` / ``actions:`` lists as the authority.
+
+        A spec whose function name is declared in ``actions:`` is a committing
+        action; everything else is an inspection tool. The ``tools:`` list is the
+        symmetric companion declaration: any spec not named in *either* list is
+        flagged as drift (it lives in ``tools.json`` but is undeclared in
+        ``SKILL.md``) and treated as an inspection tool.
+        """
+        meta = self.metas.get(name)
+        action_set = set(meta.action_names) if meta else set()
+        tool_set = set(meta.tool_names) if meta else set()
+        tools: list[dict[str, Any]] = []
+        actions: list[dict[str, Any]] = []
+        for spec in self.tool_specs.get(name, ()):  # may be empty
+            fn = spec.get("function", {}).get("name")
+            if fn in action_set:
+                actions.append(spec)
+            else:
+                if fn not in tool_set:
+                    logger.warning(
+                        "[skills] %s: tools.json declares %r but SKILL.md "
+                        "frontmatter lists it in neither tools: nor actions: "
+                        "— treating as an inspection tool.",
+                        name, fn,
+                    )
+                tools.append(spec)
+        return tools, actions
+
+    def names(self) -> list[str]:
+        return sorted(self.metas)
+
+    def list_metas(self) -> list[SkillMeta]:
+        return [self.metas[n] for n in self.names()]
+
+    def has(self, name: str) -> bool:
+        return name in self.metas
+
+    def gated_skill_names(self) -> list[str]:
+        """Skills that load on demand (not ``always_on``)."""
+        return [n for n in self.names() if not self.metas[n].always_on]
+
+    def action_owner(self, action: str) -> str | None:
+        """Return the skill name that unlocks ``action``, or ``None`` if no
+        gated skill declares it (i.e. it is a core action)."""
+        for name in self.names():
+            if action in self.metas[name].action_names:
+                return name
+        return None
+
+    def render_registry_block(self) -> str:
+        """Tier-1 progressive-disclosure listing for the base prompt.
+
+        One line per gated skill: name, the actions it unlocks, and a short
+        ``when_to_use``/``description``. Bodies are pulled on demand via
+        ``load_skill``; only this cheap index stays resident.
+        """
+        lines: list[str] = []
+        for name in self.gated_skill_names():
+            meta = self.metas[name]
+            blurb = (meta.when_to_use or meta.description or "").strip().replace("\n", " ")
+            unlocks = ", ".join(meta.action_names) if meta.action_names else "(no actions)"
+            lines.append(f"- **{name}** — unlocks `{unlocks}`. {blurb}")
+        return "\n".join(lines)
+
+    def load_body(self, name: str) -> str:
+        """Return the ``SKILL.md`` body (frontmatter stripped) for ``name``."""
+        path = self._doc_paths.get(name)
+        if not path or not path.exists():
+            raise KeyError(f"Unknown skill: {name!r}")
+        _, body = _parse_front_matter(path.read_text(encoding="utf-8"))
+        return body.strip()
+
+    def get_skill(self, name: str) -> Skill | None:
+        """Return the (eagerly-instantiated) skill code module, or ``None`` for
+        an unknown or guidance-only skill."""
+        return self.skills.get(name)
+
+    def tools_for(self, names) -> list[dict[str, Any]]:
+        """Merge the inspection tool specs contributed by the named (loaded) skills."""
+        out: list[dict[str, Any]] = []
+        for name in names:
+            out.extend(self._specs_split(name)[0])
+        return out
+
+    # ------------------------------------------------------------------
+    # Actions (design-docs/36): the committing tool calls a turn may end with.
+    # A skill's ``tools.json`` lists tools and actions together as standard
+    # function specs; the frontmatter ``actions:`` list says which are committing
+    # actions. The agent offers their tool specs and dispatches the chosen one.
+    # (Inspection tools gather; a committing action ends the turn.)
+    # ------------------------------------------------------------------
+
+    def action_tools_for(self, names) -> list[dict[str, Any]]:
+        """Render the committing-action tool specs unlocked by the named (loaded)
+        skills.
+
+        These are offered alongside the inspection tools each round; the agent
+        partitions the model's response by which tool names are committing
+        actions vs inspection tools.
+        """
+        out: list[dict[str, Any]] = []
+        for name in names:
+            out.extend(self._specs_split(name)[1])
+        return out
+
+    def action_required_fields(self, name: str) -> tuple[str, ...]:
+        """Return the required argument names for the action ``name`` (empty if
+        unknown), read from the action schema's ``parameters.required``. Used for
+        a cheap pre-dispatch completeness check."""
+        for skill_name in self.names():
+            for spec in self._specs_split(skill_name)[1]:
+                if spec.get("function", {}).get("name") == name:
+                    params = spec.get("function", {}).get("parameters") or {}
+                    return tuple(params.get("required") or ())
+        return ()
+
+    def action_names(self) -> set[str]:
+        """All committing-action names declared by any skill's frontmatter
+        ``actions:`` — the universe of committing tool names, used to partition a
+        response's tool calls into inspection tools vs committing actions."""
+        out: set[str] = set()
+        for meta in self.metas.values():
+            out.update(meta.action_names)
+        return out
+
+    def action_stream_spec(self, action: str) -> tuple[str, str] | None:
+        """Return ``(stream_field, stream_channel)`` for a *streaming* action, or
+        ``None`` for a buffered one.
+
+        Streaming is a property of the **loop**, not the schema (design-docs/36
+        §5): a skill declares which of its actions stream by exposing a
+        ``streaming_actions = {action: (field, channel)}`` mapping on its code
+        module (behaviour lives in code, not the JSON sent to the model). The
+        agent reads this to know whether to forward the action's argument live
+        on its declared channel as the model writes it. Today only the report
+        skill's ``write_report`` streams (its ``report`` field on the ``report``
+        channel)."""
+        for name in self.names():
+            skill = self.skills.get(name)
+            spec = getattr(skill, "streaming_actions", None)
+            if spec and action in spec:
+                field, channel = spec[action]
+                return (str(field), str(channel))
+        return None
+
+
+def _instantiate_skill(name: str) -> Skill | None:
+    """Import ``skills/<name>/skill.py`` and call ``get_skill()``.
+
+    Returns ``None`` (not an error) for a guidance-only skill with no code
+    module, and logs a warning for a malformed one.
+    """
+    module_path = f"{__name__}.{name}.skill"
+    try:
+        module = importlib.import_module(module_path)
+    except ModuleNotFoundError:
+        return None  # guidance-only skill (SKILL.md, no skill.py)
+    factory = getattr(module, "get_skill", None)
+    if not callable(factory):
+        logger.warning("Skill module %s is missing a get_skill() factory.", module_path)
+        return None
+    try:
+        return factory()
+    except Exception:
+        logger.warning("Failed to instantiate skill %r", name, exc_info=True)
+        return None
+
+
+def _load_tool_specs(skill_dir: Path) -> list[dict[str, Any]]:
+    """Load a skill's declarative tool/action schemas from ``tools.json``.
+
+    ``tools.json`` sits next to ``SKILL.md`` and is a flat JSON list of standard
+    OpenAI function-tool specs covering BOTH the skill's inspection tools and its
+    committing actions; which is which is decided by the frontmatter ``tools:`` /
+    ``actions:`` lists. A skill with no ``tools.json`` (e.g. guidance-only) gets
+    an empty list.
+    """
+    f = skill_dir / TOOLS_FILE_NAME
+    if not f.exists():
+        return []
+    try:
+        data = json.loads(f.read_text(encoding="utf-8"))
+    except Exception:
+        logger.warning("Failed to parse %s", f, exc_info=True)
+        return []
+    return [s for s in data if isinstance(s, dict)] if isinstance(data, list) else []
+
+
+def build_registry(skills_dir: Path | None = None) -> SkillRegistry:
+    """Scan ``skills_dir`` for ``<name>/SKILL.md``, build the index, eagerly
+    instantiate each skill's code module, and load its ``tools.json`` schemas."""
+    root = skills_dir or SKILLS_DIR
+    registry = SkillRegistry()
+    for child in sorted(root.iterdir()):
+        if not child.is_dir() or child.name.startswith((".", "_")):
+            continue
+        doc = child / SKILL_DOC_NAME
+        if not doc.exists():
+            continue
+        try:
+            raw, _ = _parse_front_matter(doc.read_text(encoding="utf-8"))
+        except Exception:
+            logger.warning("Failed to read SKILL.md for %s", child.name, exc_info=True)
+            continue
+        meta = _meta_from_frontmatter(raw, child.name)
+        registry.metas[meta.name] = meta
+        registry._doc_paths[meta.name] = doc
+        instance = _instantiate_skill(meta.name)
+        if instance is not None:
+            registry.skills[meta.name] = instance
+        registry.tool_specs[meta.name] = _load_tool_specs(child)
+    _warn_on_name_collisions(registry)
+    return registry
+
+
+def _warn_on_name_collisions(registry: SkillRegistry) -> None:
+    """Warn (don't raise) when skills declare clashing action or tool names.
+
+    Two flat namespaces share one function-calling surface: a committing action
+    resolves to a single owner (first declarer wins) and inspection tools are
+    merged into one name-unique list — and since a committing action is *also* a
+    tool call, its name must not clash with an inspection tool name either. A
+    clash means one skill silently shadows another. Today the built-in skills
+    don't collide, so this is a guard for when users drop in new skills — it
+    surfaces the problem loudly at startup instead of letting it fail
+    mysteriously mid-run.
+    """
+    action_sources: dict[str, list[str]] = {}
+    tool_sources: dict[str, list[str]] = {}
+    for name in registry.names():
+        tools, actions = registry._specs_split(name)
+        for action in registry.metas[name].action_names:
+            action_sources.setdefault(action, []).append(name)
+        # Inspection tools and committing actions share one tool namespace.
+        for spec in (*tools, *actions):
+            tool_name = spec.get("function", {}).get("name")
+            if tool_name:
+                tool_sources.setdefault(tool_name, []).append(name)
+
+    for action, owners in action_sources.items():
+        if len(owners) > 1:
+            logger.warning(
+                "Action name collision: %r is declared by multiple skills (%s). "
+                "Only %r will own it; the rest are shadowed. Rename the action in "
+                "the conflicting SKILL.md frontmatter.",
+                action, ", ".join(owners), owners[0],
+            )
+    for tool_name, owners in tool_sources.items():
+        if len(owners) > 1:
+            logger.warning(
+                "Tool name collision: %r is provided by multiple skills (%s). "
+                "Function-calling tool names (inspection tools and committing "
+                "actions share one namespace) must be globally unique, so one "
+                "will shadow the others. Give each a distinct (e.g. "
+                "skill-prefixed) name.",
+                tool_name, ", ".join(owners),
+            )
+
+
+
+__all__ = [
+    # Re-exported skill substrate (defined in skills/base.py)
+    "Event",
+    "Skill",
+    "SkillContext",
+    "SkillMeta",
+    "ToolResult",
+    # Registry
+    "SkillRegistry",
+    "build_registry",
+    "SKILLS_DIR",
+]
diff --git a/py-src/data_formulator/analyst/skills/base.py b/py-src/data_formulator/analyst/skills/base.py
new file mode 100644
index 00000000..c542f5f2
--- /dev/null
+++ b/py-src/data_formulator/analyst/skills/base.py
@@ -0,0 +1,185 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Skill protocol and shared types for the analyst agent.
+
+A *skill* is a passive plugin the single analyst agent can switch on. It never
+runs its own agent loop; instead it contributes:
+  1. a ``SKILL.md`` doc (frontmatter + how-to body) — progressive disclosure,
+  2. zero or more **tools** the model may call once the skill is loaded,
+  3. zero or more **gated actions** it unlocks, and
+  4. **handlers** (``handle_tool`` / ``handle_action``) that perform any
+     compute / rendering and yield channel-tagged events.
+
+The shell stays skill-agnostic: it merges a loaded skill's tools into the
+model's tool list, opens the gate for its actions, routes tool calls to
+``handle_tool`` and emitted actions to ``handle_action``, and forwards whatever
+events come back. "Loading" a skill controls only *exposure to the model* — the
+skill's Python is always imported and callable.
+
+Two output channels, never crossed:
+  * **frontend** — a handler *yields* ``Event``s. A skill never yields straight
+    to the user; it yields to the **agent**, whose router (see the shell's
+    ``_route_skill_events``) is the single place that forwards / stamps /
+    enriches / could drop each event before it reaches the stream. Yielding is
+    how streaming works: the route consumes ``agent.run()`` as a synchronous
+    generator, so nested output must propagate up via ``yield from``.
+  * **agent loop** — a handler *returns* an ``observation`` string (or ``None``):
+    LLM-facing feedback that the shell appends to the trajectory as the action's
+    tool-call result, exactly like an inspection tool's output. There is no
+    control verdict — the agent simply reads the result and decides its next
+    move (commit another action, or stop by answering). A recoverable failure is
+    just an observation describing what went wrong; the agent re-decides freely.
+
+Frontend payloads therefore live in yielded events, never in the returned
+observation; the ``observation`` is LLM-facing trajectory text, never shown to
+the user.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any, Generator, Protocol, runtime_checkable
+
+# An ``Event`` is a channel-tagged dict yielded on the unified output stream.
+# See design-docs/35 §5. Examples:
+#   {"type": "text_delta", "channel": "report", "content": "..."}
+#   {"type": "tool_start", "tool": "inspect_chart", ...}
+#   {"type": "action", "action": "visualize", ...}
+#   {"type": "result", ...}
+#   {"type": "completion", ...}
+# A committing action (visualize / delegate / write_report) is dispatched from a
+# committing tool call and yields these same events; see design-docs/36.
+Event = dict[str, Any]
+
+
+@dataclass(frozen=True)
+class SkillMeta:
+    """A skill's frontmatter — the cheap, always-resident registry entry.
+
+    Mirrors Anthropic Agent Skills tier-1 disclosure: only ``name`` and
+    ``description`` (plus an optional ``when_to_use``) are kept resident in the
+    base prompt so the model knows *when* to reach for the skill; the body is
+    loaded on demand via the ``load_skill`` tool.
+    """
+
+    name: str
+    description: str
+    when_to_use: str = ""
+    # ``always_on`` skills (e.g. visualization) are pre-loaded and their actions
+    # are never gated. Everything else loads dynamically.
+    always_on: bool = False
+    # The inspection **tool** names this skill exposes (data gathering, no turn
+    # commit). Declared in the ``SKILL.md`` frontmatter (``tools: [inspect_chart]``)
+    # so the frontmatter is the complete, symmetric surface declaration; the
+    # matching JSON schemas live in ``tools.json``.
+    tool_names: tuple[str, ...] = ()
+    # The gated **action** names this skill unlocks once loaded. Declared in the
+    # ``SKILL.md`` frontmatter (``actions: [write_report]``) so the shell can
+    # build its legal-action set from tier-1 metadata alone — without importing
+    # the skill's code module.
+    action_names: tuple[str, ...] = ()
+
+
+@dataclass
+class SkillContext:
+    """Shared handles + per-turn state passed to a skill handler.
+
+    Carries the substrate a handler needs (LLM client, workspace, language
+    instruction) plus the live trajectory and any data the action operates on.
+    Skills read from here rather than reaching into the agent shell.
+    """
+
+    client: Any
+    workspace: Any
+    language_instruction: str = ""
+    # The running message trajectory (read/append as the handler streams).
+    trajectory: list[dict] = field(default_factory=list)
+    # Free-form per-turn payload (input tables, charts, etc.) the action needs.
+    payload: dict[str, Any] = field(default_factory=dict)
+    # Shell-provided execution substrate (sandbox-backed). Skills call back
+    # through this for raw compute that the loop owns — e.g.
+    # ``ctx.runtime.run_visualize_code(...)`` / ``run_explore_code(...)``. The
+    # shell sets it to itself; ``None`` in standalone unit tests.
+    runtime: Any = None
+
+
+@dataclass(frozen=True)
+class ToolResult:
+    """Return value of a skill's ``handle_tool``.
+
+    ``text`` is fed back to the model as the tool-result message. ``images``
+    are base64 data-URLs (e.g. a rendered chart) that the shell attaches as a
+    follow-up vision message, since tool-result messages cannot carry image
+    content on most providers.
+    """
+
+    text: str = ""
+    images: tuple[str, ...] = ()
+
+
+@runtime_checkable
+class Skill(Protocol):
+    """A passive plugin the agent shell exposes once its skill is *loaded*.
+
+    A skill never runs its own agent loop. It is a pure **processor**: two
+    handlers that perform any compute / rendering. Its *declarative* surface —
+    metadata (``SKILL.md`` frontmatter → ``SkillMeta``) and the inspection tool /
+    committing action *schemas* (``tools.json``) — lives in data files the
+    registry loads, not on the class. The frontmatter ``tools:`` / ``actions:``
+    lists decide which schemas are inspection tools vs committing actions.
+
+    The Python module is always imported and instantiated at registry build
+    time; "loading" a skill only controls *exposure to the model*, never the
+    availability of the code.
+    """
+
+    def handle_tool(
+        self,
+        name: str,
+        args: dict[str, Any],
+        ctx: SkillContext,
+    ) -> ToolResult:
+        """Execute an inspection tool the model called. ``name`` is one of this
+        skill's ``tools``; ``args`` is the parsed tool arguments. Parallel-safe;
+        returns text (and optional images) for the model to read."""
+        ...
+
+    def handle_action(
+        self,
+        action: str,
+        spec: dict[str, Any],
+        ctx: SkillContext,
+    ) -> Generator[Event, None, str | None]:
+        """Dispatch a committing **action** the model emitted as a tool call:
+        validate the arguments, run any compute / rendering, and yield
+        channel-tagged events as it goes (result / delegate / text_delta / …).
+        It then **returns** an ``observation`` string (or ``None``): LLM-facing
+        feedback the shell appends to the trajectory as the action's tool-call
+        result, exactly like an inspection tool's output.
+
+        There is no control verdict. The agent reads the observation and decides
+        its own next move — commit another action, or stop by giving its final
+        answer (a turn with no action ends the run). A recoverable failure is
+        just an observation describing what went wrong; the agent re-decides.
+
+        Yielded events go to the **agent**, not the frontend: the shell's router
+        forwards them (stamping ``iteration``, tracking steps) and is free to
+        transform or drop any of them. Frontend output therefore lives only in
+        these yields; the returned observation is never shown to the user.
+
+        ``action`` is one of the skill's frontmatter ``actions:`` names; ``spec``
+        is the parsed action tool-call arguments. Implement as a generator that
+        ``return``s the observation; the shell captures it via ``yield from`` /
+        ``StopIteration``.
+        """
+        ...
+
+
+__all__ = [
+    "Event",
+    "Skill",
+    "SkillContext",
+    "SkillMeta",
+    "ToolResult",
+]
diff --git a/py-src/data_formulator/analyst/skills/core/SKILL.md b/py-src/data_formulator/analyst/skills/core/SKILL.md
new file mode 100644
index 00000000..9820d65f
--- /dev/null
+++ b/py-src/data_formulator/analyst/skills/core/SKILL.md
@@ -0,0 +1,151 @@
+---
+name: core
+description: >-
+  The analyst's built-in capabilities: data-inspection tools and the
+  always-available actions (visualize, ask_user, delegate).
+when_to_use: Always loaded by default — this is the agent's baseline.
+always_on: true
+tools:
+  - execute_python_script
+  - inspect_source_data
+actions:
+  - visualize
+  - ask_user
+  - delegate
+---
+
+# Core capabilities
+
+This describes the built-in **inspection tools** you use to gather data and the
+always-available **actions** you take on it. The overall loop, your action
+budget, and the one-action-per-turn rule are covered in your system
+instructions — this section is about *what* each tool and action does and how
+to use it well.
+
+## Tools (for data gathering)
+
+- **execute_python_script(code)** — run a general-purpose Python script to
+  inspect data, compute stats, transform tables, or verify assumptions. Its
+  stdout is returned to you (use `print()`); the script is for *your* analysis
+  and its output is never shown to the user. pandas, numpy, duckdb, sklearn,
+  scipy are available. **Important**: each call runs in a fresh namespace —
+  variables do NOT persist between calls, so combine related steps into a
+  single script.
+- **inspect_source_data(table_names)** — get schema, stats, and sample rows for
+  source tables (cheaper than `execute_python_script` for basic inspection).
+- **load_skill(name)** — load a skill's instructions into context so you can use
+  the action it unlocks (see the Skills section of your system instructions).
+
+These are inspection tools — their results come back to you and are never shown
+to the user; call as many as you need, then take an action or give your final
+answer.
+
+You analyse data that is **already in the workspace**. If the user's question
+requires data that isn't present, do NOT try to find it yourself — use the
+`delegate` action targeting the Data Loading agent.
+
+The initial context already includes sample rows and statistics for each table.
+If the data is straightforward, go straight to the action without calling
+tools. Tool results are returned to you before you act.
+
+## Actions
+
+Call an action as a tool call when you want to act on the data. Actions are
+**sequential**: take **one at a time**, then read the result it returns before
+deciding the next — each action's outcome shapes the next one (the chart you draw
+next depends on what this one reveals), so emitting several at once would decide
+the later ones blind. After each result you choose what to do — take another
+action, or stop. **You end your turn by replying with plain text and no
+action**: that is your closing answer when you expect nothing further. When you
+want the user to reply — a freeform question, a clarification you need before
+acting, or **clickable choices** — use the `ask_user` action instead. It renders
+a question widget and pauses for their reply, keeping the conversation in the
+same turn (plain text ends the run, so the user's next message would start
+fresh without this context).
+
+**Be extremely concise.** Your plain-text replies — the closing answer that ends
+the run and any per-step commentary — are shown verbatim to the user and double
+as the artifact summary. Keep the closing answer to **one short sentence (≤20
+words)**: state the finding, not the process. Never narrate what you're about to
+do or recap the chart's axes; let the charts and report speak for themselves.
+
+### `visualize` — chart a transform
+
+Run code that produces a DataFrame and render it as a chart. You then observe the
+result and decide your next move.
+
+- `display_instruction` — ≤12 words; the question/hypothesis the chart
+  investigates (don't recap x/y/color — those are visible). Wrap a **column** in
+  `**…**` if it anchors the question.
+- `code` — Python producing a DataFrame assigned to `output_variable`.
+- `output_variable` — snake_case name the code assigns.
+- `chart` — `{chart_type, encodings:{x,y,…}, config:{}}` (chart_type from the
+  chart type reference).
+- `input_tables` — table names from [SOURCE TABLES] the code reads.
+- `field_metadata` — field → SemanticType; `field_display_names` — field →
+  human-readable label.
+
+### `ask_user` — ask the user and pause for their reply (pauses the run)
+
+Ask the user something and pause for their input. Reach for this on **any** turn
+where you want a reply — a freeform question, a clarification you need before
+acting, or an explanation you want them to react to. Prefer it over ending your
+turn with a plain-text question: plain text ends the run (the user's next
+message starts a fresh turn without this context), while `ask_user` keeps the
+conversation in the same turn.
+
+- `questions` — 1–3 items. Each is either a question that awaits an answer
+  (clarification) or a statement the user need not answer (explanation). A
+  question with no required answer and no options renders as a plain
+  explanation; offer chart-producing follow-ups as its `options`.
+- each question: `text` (wrap a **column** in `**…**`), `responseType`
+  (`single_choice` when offering `options`, else `free_text`), `required`
+  (`true` for a clarification the run depends on, `false` for an explanation /
+  optional follow-up), and `options` (plain-text choices, **at most 3** — just
+  the most likely answers; the user can always type a freeform reply, so don't
+  enumerate every case).
+
+This is **terminal**: the run pauses after it and resumes when the user replies.
+
+### `delegate` — hand off to a peer agent
+
+Hand off to a peer agent when the question needs work outside your scope.
+
+- `target` — `"data_loading"` (the user's question needs data not in the
+  workspace).
+- `options` — 1–2 seed prompts for the target agent; each becomes a one-click
+  button (label == seed prompt). If two, make them meaningfully distinct (e.g.
+  `'monthly orders 2024'`).
+- `message` — a short note to the user that you're handing off.
+
+Only delegate if the workspace tables genuinely can't cover the question.
+
+## Choosing what to do
+
+Classify the question first (silently) to pick the right move and calibrate
+effort:
+
+- *Conceptual / informational* (meaning, schema, what a field represents — no
+  chart needed): **answer directly in plain text** (no action).
+- *Ambiguous* (you genuinely can't tell what's being asked): ask the user
+  rather than guessing — use the `ask_user` action (freeform or with clickable
+  choices) so their reply resumes the same turn.
+- *Concrete* (one specific answer): **1 visualization**, then give your final
+  answer in plain text.
+- *Progressive* (a small sequence, e.g. "why did revenue drop?"): **2–3
+  visualizations**, then a closing plain-text answer tying them together.
+- *Open-ended* (explicit exploration): **3–5 visualizations** forming a
+  narrative, then a closing plain-text answer.
+- *Missing data* (needs tables not in the workspace):
+  `delegate(target="data_loading")`.
+- *Report / write-up request* (e.g. "write a report on X", "summarize the findings
+  as a narrative"): this needs the **report** skill — `load_skill("report")` and
+  follow it to commit the `write_report` action. **Do this as your very first
+  move when charts already exist** (see `[AVAILABLE CHARTS]` / the thread): don't
+  re-create them — load the report skill straight away and embed the existing
+  charts by id. Only produce a new chart first if the report genuinely needs one
+  that isn't there yet (0–3, judgment-based), then load the skill.
+
+When chaining visualizations, add the next chart only if it answers a gap *raised*
+by the previous one — not just another interesting angle. **Never** repeat a
+visualization already in the trajectory or in another thread.
diff --git a/py-src/data_formulator/analyst/skills/core/__init__.py b/py-src/data_formulator/analyst/skills/core/__init__.py
new file mode 100644
index 00000000..e546479a
--- /dev/null
+++ b/py-src/data_formulator/analyst/skills/core/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""core skill — always-on baseline tools + actions for the analyst.
+
+``SKILL.md`` holds the base prompt body (the shell formats it into the system
+message); ``skill.py`` exposes ``get_skill()`` (the executable handler).
+"""
diff --git a/py-src/data_formulator/analyst/skills/core/skill.py b/py-src/data_formulator/analyst/skills/core/skill.py
new file mode 100644
index 00000000..c15986c0
--- /dev/null
+++ b/py-src/data_formulator/analyst/skills/core/skill.py
@@ -0,0 +1,400 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""core skill — the analyst's always-on baseline capabilities.
+
+Every other skill is optional and gated; ``core`` is ``always_on`` and loaded
+automatically at the start of each run, so the agent is never truly empty. It
+contributes the built-in data-inspection **tools** (``explore`` /
+``inspect_source_data`` — ``load_skill`` is assembled by the shell because its
+enum is dynamic) and the always-available **actions** — the committing tool
+calls the agent acts with (``visualize`` / ``interact`` / ``delegate``; see
+``design-docs/36``).
+
+Each handler does *processing* (validate the action arguments, run/normalize,
+emit events) and **returns an observation string** that the shell appends to the
+trajectory as the action's tool-call result — exactly like an inspection tool.
+There is no control verdict: the agent reads the observation and decides its own
+next move (commit another action, or stop by giving its final answer — a turn
+with no action ends the run). The one exception is ``interact``: it puts a
+question widget to the user, which the agent cannot observe, so it **returns
+``None``** — the shell reads that as "no observation to continue from" and ends
+the run, pausing for the user's reply. Heavy execution substrate (sandbox-backed
+``run_visualize_code`` / ``run_explore_code``) lives on the shell and is reached
+via ``ctx.runtime``.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Generator
+
+from data_formulator.agents.agent_utils import generate_data_summary
+from data_formulator.agents.context import handle_inspect_source_data
+from data_formulator.security.code_signing import sign_result
+
+from data_formulator.analyst.skills.base import (
+    Event,
+    SkillContext,
+    ToolResult,
+)
+
+logger = logging.getLogger(__name__)
+
+# Valid targets for a ``delegate`` action. Report generation is NOT a delegate
+# target — it is the ``write_report`` action unlocked by the report skill.
+_DELEGATE_TARGETS: tuple[str, ...] = ("data_loading",)
+
+
+class CoreSkill:
+    """The core skill processor: the ``explore`` / ``inspect_source_data`` tool
+    handlers and the ``visualize`` / ``interact`` / ``delegate`` action handlers.
+
+    Tool/action *schemas* live in ``core/tools.json`` and the skill's metadata
+    in ``SKILL.md`` frontmatter (``load_skill`` is assembled by the shell because
+    its enum is dynamic); this class is purely behaviour — it validates an
+    action's arguments and returns an observation string that the shell feeds
+    back as the action's tool-call result (or ``None`` for ``interact``, the one
+    terminal action that ends the run by pausing for the user). There is no
+    control verdict.
+    """
+
+    # ------------------------------------------------------------------
+    # Tools
+    # ------------------------------------------------------------------
+
+    def handle_tool(
+        self,
+        name: str,
+        args: dict[str, Any],
+        ctx: SkillContext,
+    ) -> ToolResult:
+        """Execute a core inspection tool by delegating to the shell runtime.
+
+        (In practice the shell's tool loop intercepts these inline — they need
+        loop-level sandbox state — but implementing them here keeps the skill
+        self-consistent and lets the shell route them generically if it stops
+        special-casing.)
+        """
+        input_tables = (ctx.payload or {}).get("input_tables") or []
+        if name == "execute_python_script":
+            result = ctx.runtime.run_explore_code(args.get("code", ""), input_tables)
+            text = result.get("stdout", "")
+            if result.get("error"):
+                text += f"\n\nError: {result['error']}"
+            return ToolResult(text=text)
+        if name == "inspect_source_data":
+            text = handle_inspect_source_data(
+                args.get("table_names", []), input_tables, ctx.workspace,
+            )
+            return ToolResult(text=text)
+        return ToolResult(text=f"core has no tool '{name}'.")
+
+    # ------------------------------------------------------------------
+    # Actions — dispatch (each committing tool call routes to one handler)
+    # ------------------------------------------------------------------
+
+    def handle_action(
+        self,
+        action: str,
+        spec: dict[str, Any],
+        ctx: SkillContext,
+    ) -> Generator[Event, None, str | None]:
+        if action == "visualize":
+            return (yield from self._handle_visualize(spec, ctx))
+        if action == "ask_user":
+            return (yield from self._handle_interact(spec, ctx))
+        if action == "delegate":
+            return (yield from self._handle_delegate(spec, ctx))
+        yield {
+            "type": "error",
+            "message": f"core cannot handle action '{action}'.",
+            "message_code": "agent.unknownAction",
+        }
+        return f"core cannot handle action '{action}'."
+
+    # ------------------------------------------------------------------
+    # visualize
+    # ------------------------------------------------------------------
+
+    def _handle_visualize(
+        self, action: dict[str, Any], ctx: SkillContext,
+    ) -> Generator[Event, None, str | None]:
+        code = action.get("code", "")
+        output_variable = action.get("output_variable", "result_df")
+        chart_spec = action.get("chart", {})
+        field_metadata = action.get("field_metadata", {})
+        field_display_names = action.get("field_display_names", {})
+        display_instruction = action.get("display_instruction", "")
+        step_index = int((ctx.payload or {}).get("completed_step_count", 0)) + 1
+
+        yield {
+            "type": "action",
+            "action": "visualize",
+            "thought": action.get("thought", ""),
+            "display_instruction": display_instruction,
+            "input_tables": action.get("input_tables", []),
+        }
+
+        viz_result = ctx.runtime.run_visualize_code(
+            code=code,
+            output_variable=output_variable,
+            chart_spec=chart_spec,
+            field_metadata=field_metadata,
+            field_display_names=field_display_names,
+            display_instruction=display_instruction,
+            messages=ctx.trajectory,
+        )
+
+        if viz_result["status"] != "ok":
+            error_msg = viz_result.get("error_message", "Unknown error")
+            observation = (
+                f"[OBSERVATION – Step {step_index} FAILED]\n\nError: {error_msg}"
+            )
+            yield {
+                "type": "error",
+                "message": error_msg,
+                "display_instruction": display_instruction,
+            }
+            # Recoverable: hand the error back and let the agent re-decide.
+            return observation
+
+        transform_result = viz_result["transform_result"]
+        sign_result(transform_result)
+        transformed_data = transform_result["content"]
+
+        # Register the chart so a same-run report (and inspect_chart) can
+        # reference it by its forwarded, run-stable id.
+        ctx.runtime.register_run_chart(transform_result, chart_spec)
+
+        yield {
+            "type": "result",
+            "status": "success",
+            "content": {
+                "question": display_instruction,
+                "result": transform_result,
+            },
+        }
+
+        observation = self._format_observation(
+            step_index=step_index,
+            display_instruction=display_instruction,
+            thought=action.get("thought", ""),
+            code=transform_result.get("code", ""),
+            data=transformed_data,
+            chart_id=transform_result.get("chart_id"),
+            workspace=ctx.workspace,
+        )
+        return observation
+
+    # ------------------------------------------------------------------
+    # interact — put question(s) to the user and pause (terminal)
+    # ------------------------------------------------------------------
+
+    def _handle_interact(
+        self, action: dict[str, Any], ctx: SkillContext,
+    ) -> Generator[Event, None, str | None]:
+        """Render a structured question/explanation widget and end the run.
+
+        ``interact`` is the one *terminal* action: the agent cannot observe its
+        own question, so there is nothing to feed back. On a valid payload it
+        yields the widget event and **returns ``None``** — the shell reads that
+        as "no observation to continue from" and stops the loop, waiting for the
+        user's reply (which starts a fresh turn). A malformed payload is instead
+        recoverable: it returns an error string so the agent can retry.
+        """
+        try:
+            payload = self._normalize_interact_action(action)
+        except ValueError:
+            msg = "ask_user action requires non-empty questions."
+            yield {
+                "type": "error",
+                "message": msg,
+                "message_code": "agent.parseActionFailed",
+            }
+            return msg
+        yield {
+            "type": "interact",
+            "thought": action.get("thought", ""),
+            **payload,
+        }
+        return None
+
+    # ------------------------------------------------------------------
+    # delegate — hand off to a peer agent
+    # ------------------------------------------------------------------
+
+    def _handle_delegate(
+        self, action: dict[str, Any], ctx: SkillContext,
+    ) -> Generator[Event, None, str | None]:
+        try:
+            payload = self._normalize_delegate_action(action)
+        except ValueError as exc:
+            msg = str(exc) or "delegate action requires target and options."
+            yield {
+                "type": "error",
+                "message": msg,
+                "message_code": "agent.parseActionFailed",
+            }
+            return msg
+        yield {
+            "type": "delegate",
+            "thought": action.get("thought", ""),
+            **payload,
+        }
+        return (
+            f"[DELEGATED to {payload['target']}] Handed off to the "
+            f"'{payload['target']}' agent; this run is complete."
+        )
+
+    # ------------------------------------------------------------------
+    # Observation formatting
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _format_observation(
+        step_index: int,
+        display_instruction: str,
+        thought: str,
+        code: str,
+        data: dict[str, Any],
+        workspace: Any,
+        chart_id: str | None = None,
+    ) -> str:
+        """Build the trajectory observation for a successful visualize step."""
+        data_summary = generate_data_summary(
+            [{
+                "name": data.get("virtual", {}).get("table_name", f"step_{step_index}"),
+                "rows": data["rows"],
+            }],
+            workspace=workspace,
+        )
+        chart_ref = ""
+        if chart_id:
+            chart_ref = (
+                f"\n\n**Chart id**: `{chart_id}` — to embed this chart in a report, "
+                f"write `![caption](chart://{chart_id})`; to read it again, pass this "
+                f"id to `inspect_chart`."
+            )
+        return (
+            f"[OBSERVATION – Step {step_index}]\n\n"
+            f"**Visualization**: {display_instruction}\n\n"
+            f"**Code**:\n```python\n{code}\n```\n\n"
+            f"**Transformed Data**:\n{data_summary}"
+            f"{chart_ref}"
+        )
+
+    # ------------------------------------------------------------------
+    # Action-argument normalizers (moved verbatim from the shell)
+    # ------------------------------------------------------------------
+
+    @classmethod
+    def _sanitize_clarification_options(cls, raw_options: Any) -> list[dict[str, Any]]:
+        if not isinstance(raw_options, list):
+            return []
+        options: list[dict[str, Any]] = []
+        for raw_option in raw_options[:3]:
+            if isinstance(raw_option, str):
+                label = raw_option.strip()
+                label_code = ""
+            elif isinstance(raw_option, dict):
+                label = str(raw_option.get("label", "")).strip()
+                label_code = str(raw_option.get("label_code", "")).strip()
+            else:
+                continue
+            if not label and not label_code:
+                continue
+            option: dict[str, Any] = {}
+            if label:
+                option["label"] = label
+            if label_code:
+                option["label_code"] = label_code
+            options.append(option)
+        return options
+
+    @classmethod
+    def _sanitize_clarification_questions(cls, raw_questions: Any) -> list[dict[str, Any]]:
+        if not isinstance(raw_questions, list):
+            return []
+        questions: list[dict[str, Any]] = []
+        for raw_question in raw_questions[:3]:
+            if not isinstance(raw_question, dict):
+                continue
+            text = str(raw_question.get("text", "")).strip()
+            text_code = str(raw_question.get("text_code", "")).strip()
+            if not text and not text_code:
+                continue
+            options = cls._sanitize_clarification_options(raw_question.get("options"))
+            response_type = raw_question.get("responseType") or raw_question.get("response_type")
+            if response_type not in ("single_choice", "free_text"):
+                response_type = "single_choice" if options else "free_text"
+            question: dict[str, Any] = {
+                "responseType": response_type,
+                "required": bool(raw_question.get("required", True)),
+            }
+            if text:
+                question["text"] = text
+            if text_code:
+                question["text_code"] = text_code
+            if isinstance(raw_question.get("text_params"), dict):
+                question["text_params"] = raw_question["text_params"]
+            if options:
+                question["options"] = options
+            questions.append(question)
+        return questions
+
+    @classmethod
+    def _normalize_interact_action(cls, action: dict[str, Any]) -> dict[str, Any]:
+        """Normalize the ``interact`` action to ``{questions: [...]}``.
+
+        Subsumes the clarify + explain shapes:
+          * the native shape carries ``questions: [{text, options?, required?,
+            responseType?}, ...]`` — clarifications (required answers / options)
+            and explanations (a statement the user need not answer) side by side;
+          * for back-compat we also accept a bare ``explanation`` string (+ an
+            optional ``followups`` list rendered as that question's options),
+            which becomes one non-required, free-text question.
+        """
+        questions = cls._sanitize_clarification_questions(action.get("questions"))
+
+        explanation = str(action.get("explanation", "")).strip()
+        if explanation:
+            followups = cls._sanitize_clarification_options(action.get("followups"))
+            explain_q: dict[str, Any] = {
+                "text": explanation,
+                "responseType": "single_choice",
+                "required": False,
+            }
+            if followups:
+                explain_q["options"] = followups
+            questions.append(explain_q)
+
+        if not questions:
+            raise ValueError("ask_user action requires non-empty questions[]")
+        return {"questions": questions}
+
+    @classmethod
+    def _normalize_delegate_action(cls, action: dict[str, Any]) -> dict[str, Any]:
+        target = str(action.get("target", "")).strip()
+        if target not in _DELEGATE_TARGETS:
+            raise ValueError(
+                f"delegate action requires 'target' ∈ {_DELEGATE_TARGETS}, got {target!r}"
+            )
+        message = str(action.get("message") or "").strip()
+        raw_options = action.get("options")
+        cleaned: list[str] = []
+        if isinstance(raw_options, list):
+            for opt in raw_options:
+                if isinstance(opt, str) and opt.strip():
+                    cleaned.append(opt.strip())
+        if not cleaned:
+            raise ValueError("delegate action requires non-empty 'options[]'")
+        payload: dict[str, Any] = {"target": target, "options": cleaned[:2]}
+        if message:
+            payload["message"] = message
+        return payload
+
+
+def get_skill() -> CoreSkill:
+    """Factory used by the registry's eager instantiation."""
+    return CoreSkill()
diff --git a/py-src/data_formulator/analyst/skills/core/tools.json b/py-src/data_formulator/analyst/skills/core/tools.json
new file mode 100644
index 00000000..e8a60209
--- /dev/null
+++ b/py-src/data_formulator/analyst/skills/core/tools.json
@@ -0,0 +1,163 @@
+[
+  {
+    "type": "function",
+    "function": {
+      "name": "execute_python_script",
+      "description": "Execute a general-purpose Python script in the sandbox. Here you use it to inspect data, compute statistics, transform tables, or verify assumptions before you act — write results to stdout with print() and that output is returned to you (it is NOT shown to the user). The script is for your own analysis, not for producing the final visualization. pandas, numpy, duckdb, sklearn, scipy are available.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "purpose": {
+            "type": "string",
+            "description": "One-sentence description of what this script does and why (shown to user as progress)."
+          },
+          "code": {
+            "type": "string",
+            "description": "Python script to execute. Use print() to surface output."
+          }
+        },
+        "required": ["purpose", "code"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "inspect_source_data",
+      "description": "Get a detailed summary of one or more source tables — schema, field-level statistics, and sample rows.  Cheaper than execute_python_script for basic data inspection.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "table_names": {
+            "type": "array",
+            "items": { "type": "string" },
+            "description": "List of table names from [SOURCE TABLES] to inspect."
+          }
+        },
+        "required": ["table_names"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "visualize",
+      "description": "Commit a data transform + chart: run code producing a DataFrame and render it. The agent observes the result and continues.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "thought": {
+            "type": "string",
+            "description": "Brief rationale for this visualization (not shown to the user)."
+          },
+          "display_instruction": {
+            "type": "string",
+            "description": "≤12 words. State the question or hypothesis the chart investigates — don't recap the chart spec (x/y/color/split are already visible). Wrap a **column** in ** ** if it anchors the question."
+          },
+          "input_tables": {
+            "type": "array",
+            "items": { "type": "string" },
+            "description": "Table names from [SOURCE TABLES] that the code reads."
+          },
+          "code": {
+            "type": "string",
+            "description": "Python code producing a DataFrame assigned to output_variable."
+          },
+          "output_variable": {
+            "type": "string",
+            "description": "snake_case name of the DataFrame variable the code assigns."
+          },
+          "chart": {
+            "type": "object",
+            "description": "Chart spec: {chart_type, encodings:{x,y,...}, config:{}}. chart_type from the chart type reference."
+          },
+          "field_metadata": {
+            "type": "object",
+            "description": "Map of field name -> SemanticType for the output columns."
+          },
+          "field_display_names": {
+            "type": "object",
+            "description": "Map of field name -> human-readable display name for chart axes and table headers."
+          }
+        },
+        "required": ["code", "output_variable", "chart"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ask_user",
+      "description": "Ask the user something and pause for their reply — the run resumes in the same turn with their answer in context. Use this for ANY turn where you want the user to respond: a freeform question, a clarification you need before acting, or an explanation you want them to react to. Freeform is fine (no clickable options required). Prefer this over ending your turn with a plain-text question: plain text ends the run and the user's next message starts a fresh turn without this context, whereas ask_user keeps the conversation going. Reserve plain text (no action) for your final answer when you expect nothing further.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "thought": {
+            "type": "string",
+            "description": "Brief rationale (not shown to the user)."
+          },
+          "questions": {
+            "type": "array",
+            "description": "One or more things to put to the user. Each is either a question that awaits an answer (clarification) or a statement the user need not answer (explanation). A question with no required answer and no options renders as a plain explanation. Ask at most 3.",
+            "items": {
+              "type": "object",
+              "properties": {
+                "text": {
+                  "type": "string",
+                  "description": "The question or explanation. For an explanation, keep it to 1–3 grounded sentences. Wrap a **column** in ** ** to highlight it."
+                },
+                "responseType": {
+                  "type": "string",
+                  "enum": ["single_choice", "free_text"],
+                  "description": "single_choice when options are offered; free_text when the user types a custom answer."
+                },
+                "required": {
+                  "type": "boolean",
+                  "description": "false for an explanation / optional follow-up; true for a clarification the run depends on."
+                },
+                "options": {
+                  "type": "array",
+                  "items": { "type": "string" },
+                  "description": "Plain-text choices, at most 3. Keep them to the few most likely answers — the user can always type a freeform reply, so don't try to enumerate every case. For a clarification these are answers; for an explanation these are short chart-producing follow-up prompts the user might click next (≤8 words each, phrased as the user would say them)."
+                }
+              },
+              "required": ["text"]
+            }
+          }
+        },
+        "required": ["questions"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "delegate",
+      "description": "Hand off to a peer agent (e.g. data loading) when the question needs work outside this agent's scope, then end the run.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "thought": {
+            "type": "string",
+            "description": "Brief rationale (not shown to the user)."
+          },
+          "target": {
+            "type": "string",
+            "enum": ["data_loading"],
+            "description": "The peer agent to hand off to."
+          },
+          "message": {
+            "type": "string",
+            "description": "Short note to the user that you're handing off, e.g. 'I'll hand this to the data loading agent — pick a search:'."
+          },
+          "options": {
+            "type": "array",
+            "items": { "type": "string" },
+            "description": "1–2 seed prompts for the target agent. Each becomes a one-click button (label == seed prompt); if two, make them meaningfully distinct."
+          }
+        },
+        "required": ["target", "options"]
+      }
+    }
+  }
+]
diff --git a/py-src/data_formulator/analyst/skills/report/SKILL.md b/py-src/data_formulator/analyst/skills/report/SKILL.md
new file mode 100644
index 00000000..1397fcd5
--- /dev/null
+++ b/py-src/data_formulator/analyst/skills/report/SKILL.md
@@ -0,0 +1,118 @@
+---
+name: report
+description: >-
+  Turn an exploration (threads, findings, charts) into a single Markdown
+  report — note, blog post, executive summary, KPI dashboard, slide brief, or
+  multi-section analytical report, with embedded charts.
+when_to_use: >-
+  The user asks to write up / summarize / report on what they explored, or
+  wants a shareable narrative document built from the charts and findings in
+  the data thread. Not for producing a single new chart (use visualize).
+always_on: false
+tools:
+  - inspect_chart
+actions:
+  - write_report
+---
+
+# Skill: Report writing
+
+You are a data journalist / analyst who creates insightful, well-organized
+reports based on data explorations. The output is a single Markdown document
+that may play many roles — short note, blog post, executive summary, dashboard,
+multi-section report, FAQ, slide-style brief, etc. Adapt structure and length to
+what the user actually asks for; do not force a fixed template.
+
+## Emitting the report (the `write_report` action)
+
+First inspect whatever charts and data you need (see below), then write the
+entire report and commit it by **calling the `write_report` tool** — it is the
+committing action that ends this turn. Its `report` argument carries the
+**full Markdown** of the finished report:
+
+- `report` — the complete report in Markdown: headings, prose, tables, and
+  embedded charts via `![caption](chart://chart_id)`.
+
+Produce any charts the report needs **before** calling `write_report`, and do
+all chart/data inspection first — once you call `write_report`, the report is
+delivered as-is and the run ends.
+
+## Context available to you
+- **[PRIMARY TABLE(S)]** / **[OTHER AVAILABLE TABLES]**: Lightweight schema of datasets.
+- **[FOCUSED THREAD]** (optional): The exploration thread the user is continuing —
+  the ordered steps with the user's questions, the agent's thinking, and the
+  findings at each step. This is the spine of the story you are telling.
+- **[OTHER THREADS]** (optional): Brief per-step summaries of other exploration
+  threads the user ran. These are additional findings worth weaving in.
+- **[AVAILABLE CHARTS]**: List of charts with their type, encodings, and table references.
+
+## Ground the report in the exploration
+The thread context is your most important input. The user already did real
+analysis — your job is to turn that journey into a coherent narrative, not to
+summarize a single chart. Before writing:
+- Read the FOCUSED THREAD and OTHER THREADS to understand the full set of
+  questions asked and findings reached.
+- Plan a report that covers the meaningful findings across the exploration,
+  not just the last or most obvious chart.
+
+## Inspecting charts and data
+You have two inspection tools available the whole time: `inspect_chart` and
+`inspect_source_data`. Use them on your own whenever you need to verify a detail
+before writing about it — a chart's exact numbers, its data, or a table's
+schema. `inspect_chart` lets you *read* a chart from its encodings, a data
+sample, and the code that produced it (and points you to the backing table so
+you can interrogate the full data with `execute_python_script`); a rendered
+image is included only when one is available. Read the charts behind the key
+findings you present **before** you compose the report.
+
+## Write the report
+Write the complete report in Markdown and pass it as the `report` argument of the
+`write_report` tool. Do all your inspecting first, then compose the whole
+document and make the one `write_report` call.
+
+### Embedding charts (REQUIRED FORMAT — do not change this)
+To embed a chart image, use markdown image syntax with a `chart://` URL:
+  ![Caption describing the chart](chart://chart_id)
+
+Example: `![Monthly trade balance trend](chart://chart-123)`
+
+The chart_id must match one from [AVAILABLE CHARTS]. Place each chart embed on
+its own line (it renders as a block). You can embed the same chart at most
+once. Captions are short — one line describing what the chart shows.
+
+### Tables
+For data tables, write standard markdown tables directly:
+| date | value |
+| --- | --- |
+| 2020-01 | -43.5 |
+
+### Style & structure — adapt to the user's request
+The user may ask for any of:
+- a short note or social-style summary (a few sentences, one or two charts),
+- a blog post / narrative report (intro → findings → takeaway),
+- an executive summary (key numbers up top, then context),
+- a KPI dashboard / multi-section overview (headings per topic, multiple charts
+  arranged with short commentary between them),
+- a slide-style brief (compact sections with bullet points and embedded charts),
+- a deeper analytical report with sub-sections, methodology notes, and caveats.
+
+Pick the structure that fits the request and the available material. Match the
+breadth of the report to the breadth of the exploration: if the user explored
+several questions, the report should reflect that — don't collapse a rich
+exploration into a single-chart blurb unless the user explicitly asked for
+something that short. Reasonable defaults if the user is vague:
+- Start with a `# Title` that reflects the topic.
+- Group related findings under `##` (and `###` if useful) headings, typically
+  one section per key finding / thread.
+- Around each embedded chart, briefly explain what it shows and the key insight.
+- Use bullets / short paragraphs / tables where they help; don't pad.
+- Close with a brief takeaway or summary section if the report is more than a
+  few paragraphs. For very short outputs (notes, single-chart blurbs), a closing
+  summary is optional.
+
+### Guardrails
+- Write in Markdown. Keep prose tight; let the data and charts carry the weight.
+- Stay faithful to the data — do not invent numbers, comparisons, or causation
+  that the data does not actually support.
+- It is fine to flag uncertainty ("based on the sample shown…") when appropriate.
+- Embed every chart you discuss; don't reference a chart in prose without showing it.
diff --git a/py-src/data_formulator/analyst/skills/report/__init__.py b/py-src/data_formulator/analyst/skills/report/__init__.py
new file mode 100644
index 00000000..18189ba1
--- /dev/null
+++ b/py-src/data_formulator/analyst/skills/report/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""report skill — streams a Markdown report from an exploration.
+
+``SKILL.md`` holds the instructions/action contract; ``skill.py`` exposes
+``get_skill()`` (the executable handler, ported from ``agent_report_gen.py``).
+"""
diff --git a/py-src/data_formulator/analyst/skills/report/skill.py b/py-src/data_formulator/analyst/skills/report/skill.py
new file mode 100644
index 00000000..d94be32f
--- /dev/null
+++ b/py-src/data_formulator/analyst/skills/report/skill.py
@@ -0,0 +1,221 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""report skill — turns an exploration into a Markdown report.
+
+The analyst shell decides to write a report (the ``write_report`` **action**),
+then dispatches here. The model assembles the report in the **main agent loop**:
+it loads this skill, inspects whatever charts/data it needs via the
+skill-private ``inspect_chart`` tool (plus the always-on ``inspect_source_data``),
+and then emits ``write_report`` — a committing tool call carrying the **full
+Markdown** in its ``report`` argument.
+
+``write_report`` is the one *streaming* action (``stream_field="report"`` on
+the ``report`` channel — declared via ``streaming_actions`` below). When the
+model writes the report as that argument, the **agent loop** forwards it live as
+incremental ``report``-channel ``text_delta``s as the tokens arrive
+(design-docs/36 §5: the agent owns the generic forwarding envelope, the skill
+stays declarative). This handler is then the buffered *fallback*: if the report
+was not streamed (e.g. a provider without tool-arg streaming), it validates the
+report, runs defense-in-depth cleanup, and yields the whole report as a single
+``report``-channel event. Either way the emitted events are identical in shape —
+live streaming is just the *same* event with more, smaller deltas, so the shell
+and frontend contract is unchanged. ``write_report`` does not end the run on its
+own — the shell feeds the returned observation back and the agent stops on the
+next turn by committing no action.
+
+  - ``{"type": "action", "action": "write_report"}``              — commitment
+  - ``{"type": "text_delta", "channel": "report", "content": …}`` — report prose
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from typing import Any, Generator
+
+import pandas as pd
+
+from data_formulator.analyst.skills.base import (
+    Event,
+    SkillContext,
+    ToolResult,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ── Leaked-tool-syntax stripping (defense in depth) ───────────────────────
+
+_LEAK_SPECIAL_TOKEN = re.compile(r"<\|[^|>]*\|>")
+_LEAK_TOOLCALL = re.compile(
+    r"(?:\bcommentary\b\s*)?\bto\s*=\s*functions\.[A-Za-z0-9_]+"
+    r"[\s\S]*?\{[\s\S]*?\}",
+)
+
+
+def _strip_leaked_tool_syntax(text: str) -> str:
+    """Remove leaked harmony special tokens and tool-call headers (with their
+    trailing JSON args) from the report. Clean prose is untouched."""
+    text = _LEAK_TOOLCALL.sub("", text)
+    text = _LEAK_SPECIAL_TOKEN.sub("", text)
+    return text
+
+
+# ---------------------------------------------------------------------------
+# Skill
+# ---------------------------------------------------------------------------
+
+
+class ReportWritingSkill:
+    """The report skill processor: the ``inspect_chart`` tool handler and the
+    ``write_report`` action handler.
+
+    Tool/action *schemas* live in ``report/tools.json`` and the skill's
+    metadata in ``SKILL.md`` frontmatter; this class is purely behaviour. The
+    ``write_report`` action streams its ``report`` argument on the ``report``
+    channel; the agent loop owns that forwarding envelope and this handler is
+    the buffered fallback (see ``handle_action``).
+    """
+
+    # Streaming declaration (design-docs/36 §5): ``write_report`` streams its
+    # ``report`` argument live on the ``report`` channel. The agent reads this
+    # via ``registry.action_stream_spec`` to forward the argument as the model
+    # writes it; behaviour (which arg, which channel) lives here in code, not in
+    # the JSON schema sent to the model.
+    streaming_actions = {"write_report": ("report", "report")}
+
+    # ------------------------------------------------------------------
+    # Tool handler (inspection, called by the shell's tool loop)
+    # ------------------------------------------------------------------
+
+    def handle_tool(
+        self,
+        name: str,
+        args: dict[str, Any],
+        ctx: SkillContext,
+    ) -> ToolResult:
+        if name != "inspect_chart":
+            return ToolResult(text=f"report has no tool '{name}'.")
+        charts: list[dict[str, Any]] = (ctx.payload or {}).get("charts") or []
+        text, images = self._handle_inspect_chart(args.get("chart_ids", []), charts)
+        return ToolResult(text=text, images=tuple(images))
+
+    # ------------------------------------------------------------------
+    # Action handler (buffered fallback — delivers the finished report)
+    #
+    # When the agent loop streamed the ``report`` argument live, it already
+    # emitted the ``action`` + ``report``-channel ``text_delta`` events and
+    # suppresses the duplicates this handler yields below; this handler still
+    # runs to validate and return the observation. On a provider without
+    # tool-arg streaming nothing was forwarded, so these yields are what the
+    # frontend receives — the same events, buffered.
+    # ------------------------------------------------------------------
+
+    def handle_action(
+        self,
+        action: str,
+        spec: dict[str, Any],
+        ctx: SkillContext,
+    ) -> Generator[Event, None, str | None]:
+        if action != "write_report":
+            yield {
+                "type": "error",
+                "message": f"report cannot handle action '{action}'.",
+                "message_code": "agent.unknownAction",
+            }
+            return f"report cannot handle action '{action}'."
+
+        report = str(spec.get("report") or "").strip()
+        if not report:
+            msg = "write_report action requires a non-empty 'report'."
+            yield {
+                "type": "error",
+                "message": msg,
+                "message_code": "agent.parseActionFailed",
+            }
+            return msg
+
+        # Announce the commitment (mirrors how visualize emits an action event).
+        yield {
+            "type": "action",
+            "action": "write_report",
+        }
+
+        # Buffered delivery: emit the whole report as a single ``report``-channel
+        # event. Streaming later is the same event with more, smaller deltas.
+        yield {
+            "type": "text_delta",
+            "channel": "report",
+            "content": _strip_leaked_tool_syntax(report),
+        }
+
+        return "[REPORT DELIVERED] The report was written and shown to the user."
+
+
+    def _handle_inspect_chart(
+        self,
+        chart_ids: list[str],
+        charts: list[dict[str, Any]],
+    ) -> tuple[str, list[str]]:
+        """Inspect charts by *reading their data*, not by rendering them.
+
+        The agent "reads" a chart from its encodings + sample rows (+ the code
+        that produced it), which it can further interrogate with
+        ``execute_python_script``. This avoids fragile server-side rasterization
+        and the multi-modal round-trip. A rendered image is attached **only when
+        one is already supplied** (``chart_image`` — e.g. a pre-existing chart's
+        cached PNG forwarded by the frontend); run-created charts carry none.
+
+        Returns ``(text_summary, image_urls)`` where ``image_urls`` is the list
+        of optional base64 PNG data URLs. Images are returned separately so the
+        caller can attach them as a follow-up vision message — tool-result
+        messages cannot carry image content on most providers.
+        """
+        results = []
+        image_urls: list[str] = []
+        for chart_id in chart_ids:
+            chart = next((c for c in charts if c["chart_id"] == chart_id), None)
+            if not chart:
+                results.append(f"Chart {chart_id}: not found")
+                continue
+
+            parts = [f"Chart: {chart_id}"]
+            parts.append(f"  Type: {chart.get('chart_type', 'Unknown')}")
+
+            encodings = chart.get("encodings", {})
+            if encodings:
+                enc_str = ", ".join(f"{k}: {v}" for k, v in encodings.items() if v)
+                parts.append(f"  Encodings: {enc_str}")
+
+            if chart.get("code"):
+                parts.append(f"  Code:\n```python\n{chart['code']}\n```")
+
+            chart_data = chart.get("chart_data")
+            if chart_data and chart_data.get("rows"):
+                df = pd.DataFrame(chart_data["rows"])
+                parts.append(f"  Data ({len(df)} rows, {len(df.columns)} cols):")
+                parts.append(f"  Columns: {', '.join(df.columns.tolist())}")
+                parts.append(f"  Sample:\n{df.head(5).to_string()}")
+                if chart_data.get("name"):
+                    parts.append(
+                        f"  To analyze the full chart data, run execute_python_script "
+                        f"against table '{chart_data['name']}'."
+                    )
+
+            # Image is strictly optional: only a frontend-supplied render is used.
+            image = chart.get("chart_image")
+            if image:
+                image_urls.append(image)
+                parts.append("  [Chart image attached below for visual confirmation]")
+            else:
+                parts.append("  [No image — read the chart from its encodings + data above]")
+
+            results.append("\n".join(parts))
+
+        return "\n\n".join(results), image_urls
+
+
+def get_skill() -> ReportWritingSkill:
+    """Factory used by the registry's eager instantiation."""
+    return ReportWritingSkill()
diff --git a/py-src/data_formulator/analyst/skills/report/tools.json b/py-src/data_formulator/analyst/skills/report/tools.json
new file mode 100644
index 00000000..19e6e66c
--- /dev/null
+++ b/py-src/data_formulator/analyst/skills/report/tools.json
@@ -0,0 +1,37 @@
+[
+  {
+    "type": "function",
+    "function": {
+      "name": "inspect_chart",
+      "description": "Read one or more charts. Returns each chart's encodings, a sample of its data, and the transformation code that created it so you can reason about what it shows (use execute_python_script on the backing table for the full data). A rendered PNG is included only when one is available.",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "chart_ids": {
+            "type": "array",
+            "items": { "type": "string" },
+            "description": "List of chart IDs from [AVAILABLE CHARTS] to inspect."
+          }
+        },
+        "required": ["chart_ids"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "write_report",
+      "description": "Deliver a Markdown report and end the run. `report` is the full report text (embed charts with ![caption](chart://chart_id)).",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "report": {
+            "type": "string",
+            "description": "The full Markdown report text. Embed charts with ![caption](chart://chart_id) referencing IDs from [AVAILABLE CHARTS]."
+          }
+        },
+        "required": ["report"]
+      }
+    }
+  }
+]
diff --git a/py-src/data_formulator/analyst/tools.py b/py-src/data_formulator/analyst/tools.py
new file mode 100644
index 00000000..5713105d
--- /dev/null
+++ b/py-src/data_formulator/analyst/tools.py
@@ -0,0 +1,151 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Inspection tools for the analyst agent.
+
+Tools are parallel-safe, internal, side-effect-free capabilities the agent may
+call freely within a turn to gather information before committing to a single
+user-visible action. See ``design-docs/35`` §4.1.
+
+  - ``execute_python_script`` — run a general-purpose Python script in the
+    sandbox to inspect/compute (stdout returned).
+  - ``inspect_source_data`` — schema + stats + sample rows for source tables.
+  - ``load_skill`` — pull a skill's ``SKILL.md`` body into context, unlocking
+    its gated actions (progressive disclosure; reading a doc is read-only).
+
+``inspect_chart`` is a skill-private tool used by report-style skills and is
+contributed by those skills rather than living in the always-on tool set.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+EXECUTE_PYTHON_SCRIPT_TOOL: dict[str, Any] = {
+    "type": "function",
+    "function": {
+        "name": "execute_python_script",
+        "description": (
+            "Execute a general-purpose Python script in the sandbox. Here you "
+            "use it to inspect data, compute statistics, or verify assumptions "
+            "before you act — print() to stdout, which is returned to you and is "
+            "not shown to the user. pandas, numpy, duckdb, sklearn, scipy are available."
+        ),
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "purpose": {
+                    "type": "string",
+                    "description": "One-sentence description of what this script does and why (shown to user as progress).",
+                },
+                "code": {
+                    "type": "string",
+                    "description": "Python script to execute. Use print() to surface output.",
+                },
+            },
+            "required": ["purpose", "code"],
+        },
+    },
+}
+
+INSPECT_SOURCE_DATA_TOOL: dict[str, Any] = {
+    "type": "function",
+    "function": {
+        "name": "inspect_source_data",
+        "description": (
+            "Get a detailed summary of one or more source tables — schema, "
+            "field-level statistics, and sample rows.  Cheaper than explore() "
+            "for basic data inspection."
+        ),
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "table_names": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "List of table names from [SOURCE TABLES] to inspect.",
+                },
+            },
+            "required": ["table_names"],
+        },
+    },
+}
+
+
+def build_load_skill_tool(skill_names: list[str]) -> dict[str, Any]:
+    """Build the ``load_skill`` tool, constraining ``name`` to known skills.
+
+    Loading a skill pulls its ``SKILL.md`` body into context and unlocks the
+    gated actions it declares. Reading a doc is read-only and idempotent, so
+    this is a tool (parallel-safe) rather than a serialized action.
+    """
+    name_schema: dict[str, Any] = {
+        "type": "string",
+        "description": "The skill to load (unlocks the actions it declares).",
+    }
+    if skill_names:
+        name_schema["enum"] = list(skill_names)
+    return {
+        "type": "function",
+        "function": {
+            "name": "load_skill",
+            "description": (
+                "Load a skill's instructions into context so you can use the "
+                "actions it unlocks. Call this BEFORE emitting a gated action "
+                "(e.g. load_skill('report') before write_report)."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {"name": name_schema},
+                "required": ["name"],
+            },
+        },
+    }
+
+
+def build_tools(
+    skill_names: list[str],
+    extra_tools: list[dict[str, Any]] | None = None,
+    action_tools: list[dict[str, Any]] | None = None,
+) -> list[dict[str, Any]]:
+    """Assemble the tool set exposed to the LLM each turn.
+
+    Three groups share the one function-calling surface (see ``design-docs/36``):
+
+      * **inspection tools** (``explore`` / ``inspect_source_data`` / a loaded
+        skill's own tools) — contributed by the always-on ``core`` skill and any
+        loaded skills, arriving via ``extra_tools``. Parallel-safe, non-committing.
+      * **``load_skill``** — the progressive-disclosure switch, added here with
+        its ``name`` enum built from ``skill_names`` (the loadable/gated skills).
+      * **action tools** — the committing surfaces a turn may end with
+        (``visualize`` / ``delegate`` always;
+        ``write_report`` once the report skill is loaded). Passed via
+        ``action_tools``; the agent partitions a response by which tool names
+        are committing actions and enforces the one-per-turn cardinality guard.
+
+    Inspection tools are listed first, then ``load_skill``, then the committing
+    actions. De-duplicates by function name as a safety net (a clash is also
+    warned at registry-build time).
+    """
+    tools: list[dict[str, Any]] = list(extra_tools or [])
+    if skill_names:
+        tools.append(build_load_skill_tool(skill_names))
+    tools.extend(action_tools or [])
+
+    seen: set[str] = set()
+    deduped: list[dict[str, Any]] = []
+    for tool in tools:
+        name = tool.get("function", {}).get("name", "")
+        if name and name in seen:
+            continue
+        seen.add(name)
+        deduped.append(tool)
+    return deduped
+
+
+__all__ = [
+    "EXECUTE_PYTHON_SCRIPT_TOOL",
+    "INSPECT_SOURCE_DATA_TOOL",
+    "build_load_skill_tool",
+    "build_tools",
+]
diff --git a/py-src/data_formulator/routes/agents.py b/py-src/data_formulator/routes/agents.py
index cdbf8808..b9e546f0 100644
--- a/py-src/data_formulator/routes/agents.py
+++ b/py-src/data_formulator/routes/agents.py
@@ -39,6 +39,7 @@
 from data_formulator.knowledge.store import KnowledgeStore
 
 from data_formulator.agents.data_agent import DataAgent
+from data_formulator.analyst.agent import AnalystAgent
 from data_formulator.agents.agent_language import build_language_instruction
 from data_formulator.security.sanitize import classify_llm_error, sanitize_error_message
 from data_formulator.error_handler import json_ok, stream_preflight_error, classify_and_wrap_llm_error
@@ -531,6 +532,119 @@ def generate():
     )
 
 
+@agent_bp.route('/analyst-streaming', methods=['GET', 'POST'])
+def analyst_streaming():
+    """Unified AnalystAgent streaming endpoint (design-docs/35 + /36).
+
+    Parallel to ``/data-agent-streaming`` while the unified agent is validated
+    end-to-end; the legacy data-agent and report routes stay live and untouched.
+    The single ``AnalystAgent`` subsumes both data exploration and report
+    writing: it gathers with inspection tools, commits one action per turn
+    (``visualize`` / ``interact`` / ``delegate`` / ``write_report``), and streams
+    the report live on the ``report`` channel (same ``text_delta`` event the
+    frontend already routes).
+
+    Streams newline-delimited JSON. Terminal events: ``completion`` (the run
+    finished or hit its budget), ``interact`` (a question widget pauses the run),
+    and ``error``. To resume after ``interact`` the client sends ``trajectory``
+    (from the event) plus ``user_question`` (the assembled reply).
+    """
+    from data_formulator.error_handler import stream_error_event
+
+    if not request.is_json:
+        return stream_preflight_error(AppError(ErrorCode.INVALID_REQUEST, "Invalid request format"))
+
+    content = request.get_json()
+
+    identity_id = get_identity_id()
+    if not identity_id:
+        return stream_preflight_error(AppError(ErrorCode.AUTH_REQUIRED, "Identity ID required"))
+
+    client = get_client(content['model'])
+    workspace = get_workspace(identity_id)
+
+    input_tables = content["input_tables"]
+    user_question = content.get("user_question", "")
+    max_iterations = content.get("max_iterations", 5)
+    max_repair_attempts = content.get("max_repair_attempts", 1)
+    agent_exploration_rules = content.get("agent_exploration_rules", "")
+    agent_coding_rules = content.get("agent_coding_rules", "")
+    focused_thread = content.get("focused_thread", None)
+    other_threads = content.get("other_threads", None)
+    primary_tables = content.get("primary_tables", None)
+    attached_images = content.get("attached_images", None)
+    charts = content.get("charts", None)
+    resume_trajectory = content.get("trajectory", None)
+    completed_step_count = content.get("completed_step_count", 0)
+
+    if resume_trajectory is not None and not str(user_question or "").strip():
+        return stream_preflight_error(AppError(ErrorCode.INVALID_REQUEST, "user_question is required to resume after interaction"))
+
+    logger.setLevel(logging.INFO)
+    logger.info("# analyst-streaming request")
+    logger.debug("== input tables ===>")
+    for table in input_tables:
+        logger.debug(f"===> Table: {table['name']}")
+    logger.debug(f"== user question ===> {user_question}")
+    if attached_images:
+        logger.info(f"== attached_images ===> {len(attached_images)} image(s), sizes: {[len(img) for img in attached_images]}")
+
+    language_instruction = get_language_instruction(mode="full")
+
+    def generate():
+        try:
+            agent = AnalystAgent(
+                client=client,
+                workspace=workspace,
+                agent_exploration_rules=agent_exploration_rules,
+                agent_coding_rules=agent_coding_rules,
+                language_instruction=language_instruction,
+                max_iterations=max_iterations,
+                max_repair_attempts=max_repair_attempts,
+                identity_id=identity_id,
+            )
+
+            trajectory = None
+            if resume_trajectory:
+                # Append the user's reply (already assembled by the frontend
+                # from option clicks + any typed instructions) as a normal user
+                # message; the LLM correlates the selections back to the
+                # questions in the immediately preceding assistant turn.
+                trajectory = list(resume_trajectory)
+                trajectory.append({
+                    "role": "user",
+                    "content": user_question,
+                })
+                logger.debug("== resuming after interaction ===>")
+
+            for event in agent.run(
+                input_tables=input_tables,
+                user_question=user_question,
+                focused_thread=focused_thread,
+                other_threads=other_threads,
+                trajectory=trajectory,
+                completed_step_count=completed_step_count,
+                primary_tables=primary_tables,
+                attached_images=attached_images,
+                charts=charts,
+            ):
+                yield json.dumps(event, ensure_ascii=False) + '\n'
+
+                if event.get("type") in ("completion", "interact"):
+                    break
+
+        except Exception as e:
+            logger.error("Error in analyst-streaming", exc_info=e)
+            yield stream_error_event(classify_and_wrap_llm_error(e))
+
+        logger.setLevel(logging.WARNING)
+
+    return Response(
+        stream_with_context(_with_warnings(generate())),
+        mimetype='application/x-ndjson',
+    )
+
+
 @agent_bp.route('/refine-data', methods=['GET', 'POST'])
 def refine_data():
     if not request.is_json:
diff --git a/src/app/App.tsx b/src/app/App.tsx
index 22d76510..b63c0986 100644
--- a/src/app/App.tsx
+++ b/src/app/App.tsx
@@ -162,6 +162,32 @@ const TopNavButton: FC<{ to: string; label: string; selected: boolean }> = ({ to
     </Button>
 );
 
+// Dev-only toggle to route the data-agent chat through the unified
+// AnalystAgent (design-docs/35/36). Source of truth is localStorage
+// (`df_useAnalystAgent`), which `exploreFromChat` reads fresh per run, so the
+// switch stays in sync without any shared store wiring.
+const AnalystAgentToggle: FC = () => {
+    const [on, setOn] = useState(() => localStorage.getItem('df_useAnalystAgent') === '1');
+    return (
+        <FormControlLabel
+            sx={{ ml: 0.5, mr: 0.5 }}
+            control={
+                <Switch
+                    size="small"
+                    checked={on}
+                    onChange={(e) => {
+                        const next = e.target.checked;
+                        setOn(next);
+                        if (next) localStorage.setItem('df_useAnalystAgent', '1');
+                        else localStorage.removeItem('df_useAnalystAgent');
+                    }}
+                />
+            }
+            label={<Typography sx={{ fontSize: '0.7rem', color: 'text.secondary' }}>Analyst</Typography>}
+        />
+    );
+};
+
 declare module '@mui/material/styles' {
     interface PaletteColor {
         bgcolor?: string;
@@ -838,6 +864,7 @@ const AppShell: FC = () => {
                         )}
                         {isAppPage && (
                             <Box sx={{ display: 'flex', ml: 'auto', fontSize: 14, alignItems: 'center' }}>
+                                <AnalystAgentToggle />
                                 <LanguageSwitcher />
                                 <ConfigDialog />
                                 <Divider orientation="vertical" variant="middle" flexItem />
diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx
index f08fec27..8482786c 100644
--- a/src/app/dfSlice.tsx
+++ b/src/app/dfSlice.tsx
@@ -177,6 +177,11 @@ export interface DataFormulatorState {
 
     focusedId: FocusedId;
 
+    // Draft id of a completed plain-text answer surfaced as an explanation card
+    // above the chat box. Auto-set when a Q&A run completes, cleared when the
+    // user focuses another item or sends a followup (see `setFocused`).
+    focusedAnswerDraftId: string | undefined;
+
     viewMode: 'editor' | 'report';
 
     chartSynthesisInProgress: string[];
@@ -290,6 +295,7 @@ const initialState: DataFormulatorState = {
 
     focusedDataCleanBlockId: undefined,
     focusedId: undefined,
+    focusedAnswerDraftId: undefined,
 
     viewMode: 'editor',
 
@@ -890,6 +896,7 @@ export const dataFormulatorSlice = createSlice({
                 conceptShelfItems: saved.conceptShelfItems || [],
                 focusedDataCleanBlockId: saved.focusedDataCleanBlockId || undefined,
                 focusedId: saved.focusedId || undefined,
+                focusedAnswerDraftId: undefined,
                 config: { ...initialState.config, ...(saved.config || {}) },
                 dataCleanBlocks: saved.dataCleanBlocks || [],
                 dataLoadingChatMessages: saved.dataLoadingChatMessages || [],
@@ -1669,6 +1676,8 @@ export const dataFormulatorSlice = createSlice({
         setFocused: (state, action: PayloadAction<FocusedId>) => {
             const payload = action.payload;
             state.focusedId = payload;
+            // Focusing any concrete item dismisses a lingering answer card.
+            state.focusedAnswerDraftId = undefined;
 
             if (payload?.type === 'chart' && state.viewMode == 'report') {
                 state.viewMode = 'editor';
@@ -1687,6 +1696,9 @@ export const dataFormulatorSlice = createSlice({
         setFocusedDataCleanBlockId: (state, action: PayloadAction<{blockId: string, itemId: number} | undefined>) => {
             state.focusedDataCleanBlockId = action.payload;
         },
+        setFocusedAnswer: (state, action: PayloadAction<string | undefined>) => {
+            state.focusedAnswerDraftId = action.payload;
+        },
         changeChartRunningStatus: (state, action: PayloadAction<{chartId: string, status: boolean}>) => {
             if (action.payload.status) {
                 state.chartSynthesisInProgress = [...new Set([...state.chartSynthesisInProgress, action.payload.chartId])]
@@ -1853,13 +1865,16 @@ export const dataFormulatorSlice = createSlice({
                 state.viewMode = 'editor';
             }
         },
-        updateGeneratedReportContent: (state, action: PayloadAction<{ id: string; content: string; status?: GeneratedReport['status']; title?: string }>) => {
-            const { id, content, status, title } = action.payload;
+        updateGeneratedReportContent: (state, action: PayloadAction<{ id: string; content: string; status?: GeneratedReport['status']; title?: string; triggerTableId?: string }>) => {
+            const { id, content, status, title, triggerTableId } = action.payload;
             const report = state.generatedReports.find(r => r.id === id);
             if (report) {
                 report.content = content;
                 if (title) report.title = title;
                 if (status) report.status = status;
+                // Re-anchor the report to the latest table produced during the
+                // run so it renders against the newest thread item (like charts).
+                if (triggerTableId) report.triggerTableId = triggerTableId;
                 // Once real report text starts streaming, switch the indicator to
                 // the "writing" phase. When generation ends, clear transient state.
                 if (content) report.generatingPhase = 'writing';
diff --git a/src/app/utils.tsx b/src/app/utils.tsx
index 7688bd80..cd10b87f 100644
--- a/src/app/utils.tsx
+++ b/src/app/utils.tsx
@@ -34,6 +34,7 @@ export function getUrls() {
         DERIVE_DATA: `/api/agent/derive-data`,
         REFINE_DATA: `/api/agent/refine-data`,
         DATA_AGENT_STREAMING: `/api/agent/data-agent-streaming`,
+        ANALYST_STREAMING: `/api/agent/analyst-streaming`,
 
         // these functions involves database
         UPLOAD_DB_FILE: `/api/tables/upload-db-file`,
diff --git a/src/i18n/locales/en/common.json b/src/i18n/locales/en/common.json
index aea3505d..2e6c1de6 100644
--- a/src/i18n/locales/en/common.json
+++ b/src/i18n/locales/en/common.json
@@ -434,6 +434,8 @@
     "creatingChart": "creating chart...",
     "inspectingData": "inspecting source data...",
     "inspectedData": "inspected source data",
+    "inspectingChart": "reading chart...",
+    "loadingSkill": "loading skill: {{skill}}...",
     "rulesLoaded": "Reading rules: {{rules}}",
     "knowledgeLoaded": "Reading knowledge: {{knowledge}}",
     "searching": "searching...",
@@ -558,6 +560,7 @@
     "clarificationQuestionLabel": "{{index}}.",
     "optionalClarification": "(optional)",
     "freeTextClarificationPlaceholder": "Type your answer...",
+    "customAnswerPlaceholder": "Or type your own answer...",
     "freeTextClarificationHint": "Type your answer in the chat box below.",
     "directClarificationLabel": "Or explain your choice directly:",
     "directClarificationPlaceholder": "Describe what you want the agent to do...",
diff --git a/src/i18n/locales/zh/common.json b/src/i18n/locales/zh/common.json
index 354c3bbc..bf8bed1c 100644
--- a/src/i18n/locales/zh/common.json
+++ b/src/i18n/locales/zh/common.json
@@ -433,6 +433,8 @@
     "runningCode": "运行代码中...",
     "creatingChart": "生成图表中...",
     "inspectingData": "检查数据源中...",
+    "inspectingChart": "读取图表中...",
+    "loadingSkill": "加载技能: {{skill}}...",
     "inspectedData": "已检查数据源",
     "rulesLoaded": "读取规则：{{rules}}",
     "knowledgeLoaded": "读取知识：{{knowledge}}",
@@ -609,6 +611,7 @@
     "clarificationQuestionLabel": "{{index}}.",
     "optionalClarification": "（可选）",
     "freeTextClarificationPlaceholder": "输入你的回答...",
+    "customAnswerPlaceholder": "或输入你自己的回答...",
     "freeTextClarificationHint": "请在下方聊天框中输入你的回答。",
     "directClarificationLabel": "或直接说明你的选择：",
     "directClarificationPlaceholder": "描述你希望 Agent 怎么做...",
diff --git a/src/lib/agents-chart/vegalite/templates/bar.ts b/src/lib/agents-chart/vegalite/templates/bar.ts
index a5c23352..7e4a6040 100644
--- a/src/lib/agents-chart/vegalite/templates/bar.ts
+++ b/src/lib/agents-chart/vegalite/templates/bar.ts
@@ -3,6 +3,7 @@
 
 import { ChartTemplateDef, ChartPropertyDef, EncodingActionDef } from '../../core/types';
 import { makeSortAction } from '../../core/encoding-actions';
+import { snapToBoundHeuristic } from '../../core/field-semantics';
 import {
     defaultBuildEncodings, setMarkProp, adjustBarMarks, adjustRectTiling,
     detectBandedAxisFromSemantics, detectBandedAxisForceDiscrete,
@@ -375,6 +376,17 @@ export const heatmapDef: ChartTemplateDef = {
                 spec.encoding.color.scale.domain = [-sym, sym];
                 spec.encoding.color.scale.domainMid = 0;
             } else if (intrinsicDomain) {
+                // Sequential color with a known intrinsic domain (e.g. a
+                // Percentage field with [0, 100]). Don't force the full
+                // theoretical range — that washes out the scale when every
+                // value is concentrated low (all cells look pale because the
+                // legend stretches to 100%). Snap to an intrinsic bound only
+                // when the data actually approaches it; otherwise fit the
+                // color domain to the observed data range. Mirrors the
+                // snap-to-bound behaviour already used on the x/y axes.
+                const snapped = snapToBoundHeuristic(intrinsicDomain, colorVals);
+                effectiveMin = snapped?.min ?? observedMin;
+                effectiveMax = snapped?.max ?? observedMax;
                 spec.encoding.color.scale.domain = [effectiveMin, effectiveMax];
             }
         }
diff --git a/src/views/AgentPausePanel.tsx b/src/views/AgentPausePanel.tsx
index f902752e..4ea481c6 100644
--- a/src/views/AgentPausePanel.tsx
+++ b/src/views/AgentPausePanel.tsx
@@ -8,19 +8,21 @@
  * differ only in their body content and the callback wired to the primary
  * action:
  *
- *  - `ClarificationPanel` — agent asks a question (warning palette).
- *  - `ExplanationPanel`   — agent gives an answer with follow-ups (info palette).
+ *  - `ClarificationPanel` — agent asks a question.
+ *  - `ExplanationPanel`   — agent gives an answer with follow-ups.
  *    (rendered by `ClarificationPanel` with `variant="explain"`)
  *  - `DelegatePanel`      — agent recommends handing off to a peer
  *                           agent (Data Loading or Report Gen).
  *
+ * Clarify and explain share a unified muted (neutral greyscale) chrome; the
+ * only spot of color is the header icon's badge (`?` / `i`).
  * Keeping them in one file makes shared styling/layout tweaks (header
  * spacing, palette use, collapse animation) trivial to maintain.
  */
 
 import React, { FC, ReactNode, useEffect, useRef, useState } from 'react';
 import {
-    Box, Collapse, IconButton, Tooltip, Typography, useTheme,
+    Box, Collapse, IconButton, InputAdornment, TextField, Tooltip, Typography, useTheme,
 } from '@mui/material';
 import { alpha } from '@mui/material/styles';
 import SearchIcon from '@mui/icons-material/Search';
@@ -28,6 +30,8 @@ import DescriptionOutlinedIcon from '@mui/icons-material/DescriptionOutlined';
 import DeleteOutlineIcon from '@mui/icons-material/DeleteOutline';
 import UnfoldLessIcon from '@mui/icons-material/UnfoldLess';
 import UnfoldMoreIcon from '@mui/icons-material/UnfoldMore';
+import ArrowForwardRoundedIcon from '@mui/icons-material/ArrowForwardRounded';
+import CheckRoundedIcon from '@mui/icons-material/CheckRounded';
 import { useTranslation } from 'react-i18next';
 import { useDispatch } from 'react-redux';
 import { dfActions } from '../app/dfSlice';
@@ -37,7 +41,7 @@ import {
     ClarificationResponse,
     DelegateTarget,
 } from '../components/ComponentType';
-import { renderFieldHighlights } from './InteractionEntryCard';
+import { renderFieldHighlights, CompactMarkdown } from './InteractionEntryCard';
 
 // ---------------------------------------------------------------------------
 // Shared shell
@@ -89,21 +93,16 @@ const AgentPauseShell: FC<AgentPauseShellProps> = ({
     const theme = useTheme();
     const [minimized, setMinimized] = useState(false);
 
-    // Chrome is either neutral greyscale (no accent) or a soft tinted fill
-    // in the variant's semantic hue (clarify=warning, explain/suggest=primary).
-    // The tint is intentionally faint so the panel sits quietly above the
-    // chat input — interactive affordances (option chips, CTAs) still carry
-    // the strongest color.
-    const tinted = !!accentColor;
-    const panelBg = tinted
-        ? alpha(accentColor!, 0.05)
-        : alpha(theme.palette.text.primary, 0.03);
-    const panelBorder = tinted
-        ? alpha(accentColor!, 0.18)
-        : alpha(theme.palette.text.primary, 0.10);
-    const panelHover = tinted
-        ? alpha(accentColor!, 0.09)
-        : alpha(theme.palette.text.primary, 0.04);
+    // Chrome is a soft tinted fill in the accent hue. When no explicit accent
+    // is given the panel falls back to a faint wash of the theme's SECONDARY
+    // color — a different, theme-derived hue from the primary blue used by the
+    // chat affordances, so the panel reads as its own subtle surface without
+    // echoing the primary CTA color. Interactive affordances (chips, CTAs)
+    // keep the primary hue for the strongest color.
+    const fillAccent = accentColor ?? theme.palette.secondary.main;
+    const panelBg = alpha(fillAccent, 0.05);
+    const panelBorder = alpha(fillAccent, 0.18);
+    const panelHover = alpha(fillAccent, 0.09);
     const primaryColor = theme.palette.primary.main;
 
     // Reset minimize when the underlying pause changes so a brand-new
@@ -226,9 +225,9 @@ const AgentPauseShell: FC<AgentPauseShellProps> = ({
 interface ClarificationPanelProps {
     questions: ClarificationQuestion[];
     /**
-     * 'clarify' (default) — agent is asking the user a question (warning palette).
+     * 'clarify' (default) — agent is asking the user a question.
      * 'explain'           — agent gave an answer; options are suggested chart
-     *                       follow-ups the user can click (info palette).
+     *                       follow-ups the user can click.
      */
     variant?: 'clarify' | 'explain';
     /**
@@ -239,7 +238,16 @@ interface ClarificationPanelProps {
      * (e.g. after all questions are answered).
      */
     selectedAnswers?: Record<number, ClarificationResponse>;
-    onSelectAnswer?: (questionIndex: number, response: ClarificationResponse) => void;
+    /**
+     * Record an answer for a question. `autoSubmit` (default true) lets the
+     * caller distinguish an explicit confirm (option click / check button /
+     * Enter — which may auto-submit the whole panel once every question is
+     * answered) from an implicit one (blur auto-confirm — which records the
+     * answer but must never trigger a submission).
+     */
+    onSelectAnswer?: (questionIndex: number, response: ClarificationResponse, autoSubmit?: boolean) => void;
+    /** Clear a question's recorded answer (e.g. the user edits its field). */
+    onClearAnswer?: (questionIndex: number) => void;
     onSubmit: (responses: ClarificationResponse[]) => void;
     onCancel: () => void;
 }
@@ -249,6 +257,7 @@ export const ClarificationPanel: FC<ClarificationPanelProps> = ({
     variant = 'clarify',
     selectedAnswers,
     onSelectAnswer,
+    onClearAnswer,
     onSubmit,
     onCancel,
 }) => {
@@ -256,7 +265,31 @@ export const ClarificationPanel: FC<ClarificationPanelProps> = ({
     const { t } = useTranslation();
     const submittedRef = useRef(false);
 
-    useEffect(() => { submittedRef.current = false; }, [questions]);
+    // Freeform replies typed directly inside the panel, keyed by the question
+    // they answer. A question's own index holds its typed text; the sentinel
+    // key -1 holds the explain variant's panel-level custom-followup override.
+    const [freeTexts, setFreeTexts] = useState<Record<number, string>>({});
+
+    useEffect(() => { submittedRef.current = false; setFreeTexts({}); }, [questions]);
+
+    const setFreeText = (key: number, value: string) =>
+        setFreeTexts(prev => ({ ...prev, [key]: value }));
+
+    const isExplain = variant === 'explain';
+    // Two-context color scheme, kept gentle. Each pause carries its own
+    // semantic hue (clarify=warning, explain=info) so the two read as distinct
+    // moments — but the hue only tints the quiet chrome (panel wash, border,
+    // chips, focus underline, field highlights, badge). The strong CTA (submit
+    // button) stays in the neutral brand primary so a clarify panel never
+    // shouts in aggressive amber. `chromeAccent` = the variant hue, used
+    // everywhere except the submit button (`submitAccent`).
+    const chromeAccent = isExplain
+        ? theme.palette.info.main
+        : theme.palette.warning.main;
+    const badgeAccent = chromeAccent;
+    const submitAccent = theme.palette.primary.main;
+    // Field highlights (`**name**` tokens) follow the variant hue too.
+    const accentColor = chromeAccent;
 
     const submitResponses = (responses: ClarificationResponse[]) => {
         if (responses.length === 0 || submittedRef.current) return;
@@ -264,13 +297,185 @@ export const ClarificationPanel: FC<ClarificationPanelProps> = ({
         onSubmit(responses);
     };
 
+    // A question counts as answered once it has either a clicked option (held
+    // by the parent in `selectedAnswers`) OR non-empty typed text. No explicit
+    // "confirm" step — typing is the answer. Submit enables when every
+    // question is answered. Explain additionally accepts a panel-level custom
+    // followup (sentinel -1).
+    const isAnswered = (idx: number) =>
+        !!selectedAnswers?.[idx] || (freeTexts[idx] || '').trim().length > 0;
+    const allQuestionsAnswered = questions.every((_q, idx) => isAnswered(idx));
+    const explainOverrideTyped = (freeTexts[-1] || '').trim().length > 0;
+    const canSubmit = isExplain ? (allQuestionsAnswered || explainOverrideTyped) : allQuestionsAnswered;
+
+    // A clarify panel auto-submits (on the click that completes it) only when
+    // EVERY answer is a clicked option — a pure "click your way through" flow.
+    // The moment any text answer is in play (a free_text question, or the user
+    // typed into a single_choice's "type your own" field), we show an explicit
+    // shared submit button instead, so a stray option click can never sweep up
+    // an unfinished typed answer. The button belongs to the panel, not a row.
+    const hasFreeTextQuestion = !isExplain && questions.some(q => q.responseType === 'free_text');
+    const anyTextTyped = questions.some((_q, idx) => (freeTexts[idx] || '').trim().length > 0);
+    const showPanelSubmit = !isExplain && (hasFreeTextQuestion || anyTextTyped);
+
+    // Gather the reply: each question's clicked option, else its typed
+    // free-text; plus (explain only) the optional panel-level custom override.
+    // The backend formats these by index, so the correlation stays intact.
+    const handlePanelSubmit = () => {
+        const responses: ClarificationResponse[] = [];
+        questions.forEach((_q, idx) => {
+            const sel = selectedAnswers?.[idx];
+            if (sel) {
+                responses.push(sel);
+            } else {
+                const typed = (freeTexts[idx] || '').trim();
+                if (typed) responses.push({ question_index: idx, answer: typed, source: 'free_text' });
+            }
+        });
+        if (isExplain) {
+            const custom = (freeTexts[-1] || '').trim();
+            if (custom) responses.push({ question_index: -1, answer: custom, source: 'freeform' });
+        }
+        submitResponses(responses);
+    };
+
+    // Typing in a question's field IS the answer — recorded live (never
+    // auto-submitting). Clearing the text removes the answer; a prior option
+    // pick is invalidated the moment the user starts typing.
+    const recordFreeText = (idx: number, value: string) => {
+        setFreeText(idx, value);
+        const typed = value.trim();
+        if (typed) {
+            onSelectAnswer?.(idx, { question_index: idx, answer: typed, source: 'free_text' }, false);
+        } else {
+            onClearAnswer?.(idx);
+        }
+    };
+
+    // Shared muted standard-input chrome for all freeform fields.
+    const freeTextSx = {
+        '& .MuiInput-root': {
+            fontSize: 11,
+            color: theme.palette.text.secondary,
+            '&:before': { borderBottomColor: alpha(theme.palette.text.primary, 0.1) },
+            '&:hover:not(.Mui-disabled):before': { borderBottomColor: alpha(theme.palette.text.primary, 0.25) },
+            '&:after': { borderBottomColor: alpha(chromeAccent, 0.6) },
+        },
+        '& .MuiInput-input::placeholder': {
+            color: theme.palette.text.disabled,
+            opacity: 0.7,
+            fontSize: 11,
+        },
+    } as const;
+
+    // Per-question freeform field with a lightweight inline "answered" check.
+    // free_text questions use it as their sole input; single_choice questions
+    // use it as a "type your own instead" companion beneath the chips.
+    // `trailing` docks a control (the panel submit button) to the right of the
+    // input on the same line — used on the last clarify question so submit
+    // shares the row instead of taking its own.
+    const renderQuestionField = (idx: number, placeholder: string, trailing?: ReactNode) => {
+        // A small inline check appears once the user has typed an answer (an
+        // option click is already self-evident from the highlighted chip). It
+        // sits at the end of the input line via an InputAdornment for tight
+        // spacing rather than floating in its own column.
+        const hasTypedAnswer = (freeTexts[idx] || '').trim().length > 0;
+        return (
+            <Box sx={{ display: 'flex', alignItems: 'flex-end', gap: '8px', pr: '4px' }}>
+                <Box sx={{ flex: '0 1 auto', width: '100%', maxWidth: 320 }}>
+                    <TextField
+                        value={freeTexts[idx] || ''}
+                        onChange={(e) => recordFreeText(idx, e.target.value)}
+                        onKeyDown={(e) => {
+                            if (e.key === 'Enter' && !e.shiftKey) {
+                                e.preventDefault();
+                                if (canSubmit) handlePanelSubmit();
+                            }
+                        }}
+                        placeholder={placeholder}
+                        variant="standard"
+                        multiline
+                        maxRows={4}
+                        fullWidth
+                        slotProps={{
+                            input: {
+                                endAdornment: hasTypedAnswer ? (
+                                    <InputAdornment position="end">
+                                        <CheckRoundedIcon sx={{ fontSize: 14, color: alpha(chromeAccent, 0.7) }} />
+                                    </InputAdornment>
+                                ) : undefined,
+                            },
+                        }}
+                        sx={freeTextSx}
+                    />
+                </Box>
+                {trailing && <Box sx={{ flexShrink: 0, mb: '2px', ml: 'auto' }}>{trailing}</Box>}
+            </Box>
+        );
+    };
+
+    // Shared panel-level submit button. Shown whenever a text answer is in
+    // play (otherwise the panel auto-submits on the completing option click).
+    // Muted outline until every question is answered, then fills with the
+    // accent and becomes clickable.
+    const panelSubmitButton = (
+        <Tooltip title={t('chartRec.submitClarification')}>
+            <span>
+                <IconButton
+                    size="small"
+                    disabled={!canSubmit}
+                    onClick={handlePanelSubmit}
+                    sx={{
+                        width: 26, height: 26, flexShrink: 0,
+                        color: canSubmit ? theme.palette.common.white : alpha(theme.palette.text.primary, 0.3),
+                        backgroundColor: canSubmit ? submitAccent : 'transparent',
+                        border: `1px solid ${canSubmit ? submitAccent : alpha(theme.palette.text.primary, 0.2)}`,
+                        '&:hover': { backgroundColor: canSubmit ? alpha(submitAccent, 0.85) : alpha(theme.palette.text.primary, 0.06) },
+                        '&.Mui-disabled': {
+                            color: alpha(theme.palette.text.primary, 0.3),
+                            border: `1px solid ${alpha(theme.palette.text.primary, 0.2)}`,
+                        },
+                    }}
+                >
+                    <ArrowForwardRoundedIcon sx={{ fontSize: 16 }} />
+                </IconButton>
+            </span>
+        </Tooltip>
+    );
+
+    // Explain's panel-level override keeps a plain field (no per-question
+    // confirm): clicking a followup or typing here + the panel submit button
+    // is the flow.
+    const renderOverrideInput = (placeholder: string) => (
+        <TextField
+            value={freeTexts[-1] || ''}
+            onChange={(e) => setFreeText(-1, e.target.value)}
+            onKeyDown={(e) => {
+                if (e.key === 'Enter' && !e.shiftKey) {
+                    e.preventDefault();
+                    if (canSubmit) handlePanelSubmit();
+                }
+            }}
+            placeholder={placeholder}
+            variant="standard"
+            multiline
+            maxRows={4}
+            fullWidth
+            sx={freeTextSx}
+        />
+    );
+
     /**
-     * Handle a single answer (option click or free-text Enter). When the
-     * parent provides `onSelectAnswer`, the panel defers to it and the
-     * parent decides when to actually submit. Otherwise we fall back to
-     * the legacy "click = submit immediately" behavior.
+     * Handle a clicked option. When the parent provides `onSelectAnswer` the
+     * panel defers to it (the parent decides when to submit); otherwise we
+     * fall back to the legacy "click = submit immediately" behavior.
      */
     const handleAnswer = (response: ClarificationResponse) => {
+        // Clicking an option supersedes any text typed for this question, so
+        // clear the field to keep the answer unambiguous.
+        if ((freeTexts[response.question_index] || '').length > 0) {
+            setFreeText(response.question_index, '');
+        }
         if (onSelectAnswer) {
             onSelectAnswer(response.question_index, response);
             return;
@@ -278,24 +483,14 @@ export const ClarificationPanel: FC<ClarificationPanelProps> = ({
         submitResponses([response]);
     };
 
-    const isExplain = variant === 'explain';
-    // Per-variant accent color drives both the panel chrome (bg/border) and
-    // the option chip affordances (border, hover, selection) so a clarify
-    // panel reads entirely in the warning hue and an explain panel entirely
-    // in the primary hue — no cross-color clashes between chrome and chips.
-    const chromeAccent = isExplain
-        ? theme.palette.primary.main
-        : theme.palette.warning.main;
-    // Field highlights (`**name**` tokens in question/option text) also use
-    // the variant accent so the underline color matches the panel.
-    const accentColor = chromeAccent;
     const title = t(isExplain ? 'chartRec.explanationTitle' : 'chartRec.clarificationTitle');
 
+
     return (
         <AgentPauseShell
             icon={<AgentToyIcon
                 variant={isExplain ? 'explain' : 'clarify'}
-                sx={{ fontSize: 16, color: chromeAccent }}
+                sx={{ fontSize: 16, color: badgeAccent }}
             />}
             accentColor={chromeAccent}
             title={title}
@@ -306,8 +501,21 @@ export const ClarificationPanel: FC<ClarificationPanelProps> = ({
             onCancel={onCancel}
             resetKey={questions}
         >
-            <Box sx={{ display: 'flex', flexDirection: 'column', gap: '4px', pb: '8px' }}>
-                {questions.map((question, questionIndex) => (
+            <Box sx={{ display: 'flex', flexDirection: 'column', gap: questions.length > 1 ? '14px' : '4px', pb: '8px' }}>
+                {questions.map((question, questionIndex) => {
+                    // free_text → freeform field only. single_choice → chips
+                    // PLUS a "type your own" freeform companion. explain keeps
+                    // its lightweight clickable-followups display (no per-question
+                    // freeform; the user types custom followups in the override).
+                    const isFreeTextOnly = !isExplain && question.responseType === 'free_text';
+                    const showChips = isExplain || question.responseType !== 'free_text';
+                    // Dock the shared submit button to the right of the LAST
+                    // clarify question's input (only when a text answer is in
+                    // play; pure-choice panels auto-submit and need no button).
+                    const isLast = questionIndex === questions.length - 1;
+                    const fieldTrailing = (!isExplain && isLast && showPanelSubmit)
+                        ? panelSubmitButton : undefined;
+                    return (
                     <Box key={questionIndex} sx={{ display: 'flex', flexDirection: 'column', gap: '4px', pl: '20px' }}>
                         {/* Text portion is height-bounded and scrollable so very
                             long explanations don't push options off-screen.
@@ -327,74 +535,105 @@ export const ClarificationPanel: FC<ClarificationPanelProps> = ({
                             </Typography>
                         </Box>
 
-                        {question.responseType === 'free_text' ? (
-                            // Free-text questions don't render their own input.
-                            // The user types the answer in the main chat box
-                            // below and hits Send (or Enter).
-                            <Typography sx={{
-                                fontSize: 10,
-                                color: theme.palette.text.disabled,
-                                fontStyle: 'italic',
-                                mt: '2px',
-                            }}>
-                                {t('chartRec.freeTextClarificationHint')}
-                            </Typography>
+                        {isFreeTextOnly ? (
+                            // A free_text question is answered right here, with
+                            // its own input directly beneath the text.
+                            renderQuestionField(questionIndex, t('chartRec.freeTextClarificationPlaceholder'), fieldTrailing)
                         ) : (
                             <Box sx={{ display: 'flex', flexDirection: 'column', gap: '4px' }}>
-                                {isExplain && (question.options || []).length > 0 && (
-                                    <Typography sx={{
-                                        fontSize: 10,
-                                        color: theme.palette.text.disabled,
-                                        fontStyle: 'italic',
-                                        mt: '2px',
-                                    }}>
-                                        {t('chartRec.explanationFollowupsLabel')}
-                                    </Typography>
+                                {showChips && (question.options || []).length > 0 && (
+                                    <>
+                                        {isExplain && (
+                                            <Typography sx={{
+                                                fontSize: 10,
+                                                color: theme.palette.text.disabled,
+                                                fontStyle: 'italic',
+                                                mt: '2px',
+                                            }}>
+                                                {t('chartRec.explanationFollowupsLabel')}
+                                            </Typography>
+                                        )}
+                                        <Box sx={{ display: 'flex', flexWrap: 'wrap', gap: '4px' }}>
+                                            {(question.options || []).map((option, optionIndex) => {
+                                                const selected = selectedAnswers?.[questionIndex];
+                                                const isSelected = selected?.source === 'option' && selected.answer === option.label;
+                                                return (
+                                                    <Box key={optionIndex} sx={{ position: 'relative', overflow: 'hidden', borderRadius: '6px' }}>
+                                                        <Typography
+                                                            component="button"
+                                                            type="button"
+                                                            onClick={() => handleAnswer({
+                                                                question_index: questionIndex,
+                                                                answer: option.label,
+                                                                source: 'option',
+                                                            })}
+                                                            sx={{
+                                                                position: 'relative', zIndex: 1,
+                                                                px: '8px', py: '4px',
+                                                                borderRadius: '6px',
+                                                                border: `1px solid ${isSelected ? alpha(accentColor, 0.6) : alpha(theme.palette.text.primary, 0.12)}`,
+                                                                backgroundColor: isSelected ? alpha(accentColor, 0.12) : theme.palette.background.paper,
+                                                                cursor: 'pointer',
+                                                                fontSize: 11,
+                                                                fontWeight: isSelected ? 600 : 400,
+                                                                display: 'inline-block',
+                                                                whiteSpace: 'normal',
+                                                                wordBreak: 'break-word',
+                                                                lineHeight: 1.4,
+                                                                color: theme.palette.text.primary,
+                                                                textAlign: 'left',
+                                                                fontFamily: theme.typography.fontFamily,
+                                                                '&:hover': { backgroundColor: alpha(accentColor, isSelected ? 0.16 : 0.08) },
+                                                            }}
+                                                        >
+                                                            {renderFieldHighlights(option.label, accentColor)}
+                                                        </Typography>
+                                                    </Box>
+                                                );
+                                            })}
+                                        </Box>
+                                    </>
                                 )}
-                                <Box sx={{ display: 'flex', flexWrap: 'wrap', gap: '4px' }}>
-                                    {(question.options || []).map((option, optionIndex) => {
-                                        const selected = selectedAnswers?.[questionIndex];
-                                        const isSelected = !!selected && selected.answer === option.label;
-                                        return (
-                                            <Box key={optionIndex} sx={{ position: 'relative', overflow: 'hidden', borderRadius: '6px' }}>
-                                                <Typography
-                                                    component="button"
-                                                    type="button"
-                                                    onClick={() => handleAnswer({
-                                                        question_index: questionIndex,
-                                                        answer: option.label,
-                                                        source: 'option',
-                                                    })}
-                                                    sx={{
-                                                        position: 'relative', zIndex: 1,
-                                                        px: '8px', py: '4px',
-                                                        borderRadius: '6px',
-                                                        border: `1px solid ${isSelected ? alpha(accentColor, 0.6) : alpha(theme.palette.text.primary, 0.12)}`,
-                                                        backgroundColor: isSelected ? alpha(accentColor, 0.12) : theme.palette.background.paper,
-                                                        cursor: 'pointer',
-                                                        fontSize: 11,
-                                                        fontWeight: isSelected ? 600 : 400,
-                                                        display: 'inline-block',
-                                                        whiteSpace: 'normal',
-                                                        wordBreak: 'break-word',
-                                                        lineHeight: 1.4,
-                                                        color: theme.palette.text.primary,
-                                                        textAlign: 'left',
-                                                        fontFamily: theme.typography.fontFamily,
-                                                        '&:hover': { backgroundColor: alpha(accentColor, isSelected ? 0.16 : 0.08) },
-                                                    }}
-                                                >
-                                                    {renderFieldHighlights(option.label, accentColor)}
-                                                </Typography>
-                                            </Box>
-                                        );
-                                    })}
-                                </Box>
+                                {/* single_choice questions also accept a typed
+                                    answer (chips are shortcuts, not the only
+                                    option). explain has no per-question freeform. */}
+                                {!isExplain && renderQuestionField(questionIndex, t('chartRec.customAnswerPlaceholder'), fieldTrailing)}
                             </Box>
                         )}
                     </Box>
-                ))}
+                    );
+                })}
             </Box>
+
+            {/* Footer. Explain keeps a row: panel-level custom-followup input +
+                submit. Clarify docks its shared submit button inline to the
+                right of the last question's input (above), so no footer row. */}
+            {isExplain && (
+                <Box sx={{
+                    display: 'flex', alignItems: 'center', gap: '6px',
+                    pl: '20px', pr: '4px', pb: '8px',
+                }}>
+                    {renderOverrideInput(t('chartRec.customAnswerPlaceholder'))}
+                    <Tooltip title={t('chartRec.submitClarification')}>
+                        <span>
+                            <IconButton
+                                size="small"
+                                disabled={!canSubmit}
+                                onClick={handlePanelSubmit}
+                                sx={{
+                                    width: 26, height: 26, flexShrink: 0,
+                                    color: theme.palette.common.white,
+                                    backgroundColor: canSubmit ? submitAccent : alpha(theme.palette.text.primary, 0.18),
+                                    '&:hover': { backgroundColor: canSubmit ? alpha(submitAccent, 0.85) : alpha(theme.palette.text.primary, 0.18) },
+                                    '&.Mui-disabled': { color: theme.palette.common.white, backgroundColor: alpha(theme.palette.text.primary, 0.18) },
+                                }}
+                            >
+                                <ArrowForwardRoundedIcon sx={{ fontSize: 16 }} />
+                            </IconButton>
+                        </span>
+                    </Tooltip>
+                </Box>
+            )}
         </AgentPauseShell>
     );
 };
@@ -544,3 +783,52 @@ export const DelegatePanel: FC<DelegatePanelProps> = ({
         </AgentPauseShell>
     );
 };
+
+// ---------------------------------------------------------------------------
+// ExplanationPanel
+// ---------------------------------------------------------------------------
+
+interface ExplanationPanelProps {
+    /** The agent's plain-text answer (markdown) to display read-only. */
+    content: string;
+    /** Dismiss the panel (the user is done reading the answer). */
+    onCancel: () => void;
+}
+
+/**
+ * Read-only display of a completed plain-text answer, surfaced above the
+ * chat box when the user clicks that answer's collapsed trace in the data
+ * thread. Reuses the `explain` pause chrome (primary accent + AgentToyIcon)
+ * but carries no inputs or actions — it's purely "here's what I said",
+ * dismissible by the header's delete button or by focusing another item.
+ */
+export const ExplanationPanel: FC<ExplanationPanelProps> = ({ content, onCancel }) => {
+    const theme = useTheme();
+    const { t } = useTranslation();
+
+    return (
+        <AgentPauseShell
+            icon={<AgentToyIcon
+                variant="explain"
+                sx={{ fontSize: 16, color: theme.palette.primary.main }}
+            />}
+            accentColor={theme.palette.primary.main}
+            title={t('chartRec.explanationTitle')}
+            minimizedPreview={content}
+            dismissTooltip={t('chartRec.dismissExplanation')}
+            minimizeTooltip={t('chartRec.minimizeClarification')}
+            expandTooltip={t('chartRec.expandClarification')}
+            onCancel={onCancel}
+            resetKey={content}
+        >
+            <Box sx={{
+                maxHeight: 'clamp(120px, 32vh, 360px)',
+                overflowY: 'auto',
+                pb: '8px', pl: '20px', pr: '8px',
+                fontSize: 12,
+            }}>
+                <CompactMarkdown content={content} color={theme.palette.text.primary} />
+            </Box>
+        </AgentPauseShell>
+    );
+};
diff --git a/src/views/DataThread.tsx b/src/views/DataThread.tsx
index 90280098..25abdfc5 100644
--- a/src/views/DataThread.tsx
+++ b/src/views/DataThread.tsx
@@ -83,8 +83,8 @@ import CheckCircleOutlineIcon from '@mui/icons-material/CheckCircleOutline';
 
 import SmartToyOutlinedIcon from '@mui/icons-material/SmartToyOutlined';
 import { AgentToyIcon } from './AgentToyIcon';
-import AutoAwesomeIcon from '@mui/icons-material/AutoAwesome';
 import ArticleIcon from '@mui/icons-material/Article';
+import AutoAwesomeIcon from '@mui/icons-material/AutoAwesome';
 import TerminalIcon from '@mui/icons-material/Terminal';
 import ErrorOutlineIcon from '@mui/icons-material/ErrorOutline';
 import WarningAmberIcon from '@mui/icons-material/WarningAmber';
@@ -150,13 +150,13 @@ const LiveStatus: React.FC<{ startTime?: number; resetKey?: string }> = ({ start
  *  ThinkingBanner — rather than right-flushed in a separate column.
  *  The timer resets whenever the active step changes so it shows the time
  *  spent on the **current** action, not the cumulative wait. */
-export const ThinkingStepsBanner = (steps: string[], sx?: SxProps, startTime?: number) => {
+export const ThinkingStepsBanner = (steps: string[], sx?: SxProps, startTime?: number, active: boolean = true) => {
     const activeStep = steps.length > 0 ? steps[steps.length - 1] : '';
     return (
         <Box sx={{ ...sx }}>
             <PlanStepsView
                 steps={steps}
-                activeLastStep
+                activeLastStep={active}
                 trailing={startTime != null ? <LiveStatus startTime={startTime} resetKey={activeStep} /> : undefined}
             />
         </Box>
@@ -1178,7 +1178,7 @@ let SingleThreadGroupView: FC<{
     });
 
     // Build a flat sequence of timeline items: [trigger, table, charts, trigger, table, charts, ...]
-    type TimelineItem = { key: string; element: React.ReactNode; type: 'used-table' | 'trigger' | 'table' | 'chart' | 'leaf-trigger' | 'leaf-table' | 'report' | 'merge'; highlighted: boolean; tableId?: string; chartType?: string; isRunning?: boolean; isClarifying?: boolean; isCompleted?: boolean; interactionEntry?: InteractionEntry; reportId?: string; stepLabel?: string; gutterIcon?: React.ReactNode };
+    type TimelineItem = { key: string; element: React.ReactNode; type: 'used-table' | 'trigger' | 'table' | 'chart' | 'leaf-trigger' | 'leaf-table' | 'artifact' | 'merge'; highlighted: boolean; tableId?: string; chartType?: string; isRunning?: boolean; isClarifying?: boolean; isCompleted?: boolean; interactionEntry?: InteractionEntry; reportId?: string; stepLabel?: string; gutterIcon?: React.ReactNode };
     let timelineItems: TimelineItem[] = [];
 
     // Each running/clarifying draft should produce at most ONE banner per
@@ -1413,7 +1413,7 @@ let SingleThreadGroupView: FC<{
                     type: triggerType,
                     highlighted,
                     isRunning,
-                    element: ThinkingStepsBanner(planLines, { px: 1, py: 0.5 }, isRunning ? lastUserTs : undefined),
+                    element: ThinkingStepsBanner(planLines, { px: 1, py: 0.5 }, isRunning ? lastUserTs : undefined, isRunning),
                 });
                 return;
             }
@@ -1437,7 +1437,7 @@ let SingleThreadGroupView: FC<{
                         type: triggerType,
                         highlighted,
                         isRunning: false,
-                        element: ThinkingStepsBanner(priorLines, { px: 1, py: 0.5 }),
+                        element: ThinkingStepsBanner(priorLines, { px: 1, py: 0.5 }, undefined, false),
                     });
                 }
             }
@@ -1474,13 +1474,21 @@ let SingleThreadGroupView: FC<{
                 );
             } else {
                 const runningAction = runningAgentTableIds.get(tableId);
-                const message = runningAction?.description || t('dataThread.working');
+                // `description` is the running plan: steps joined by STEP_SEP
+                // ('\x1E'), which renders invisibly. Split it back into discrete
+                // steps and render through the per-step banner (icons + ✓), the
+                // same way the interaction-present path does — otherwise the
+                // steps collapse into one run-on blob.
+                const planLines = (runningAction?.description || '')
+                    .split('\x1E').map(s => s.trim()).filter(Boolean);
                 timelineItems.push({
                     key: `agent-running-${tableId}`,
                     type: 'chart',
                     highlighted,
                     isRunning: true,
-                    element: ThinkingBanner(message, { px: 1, py: 0.5 }, true, true),
+                    element: planLines.length > 0
+                        ? ThinkingStepsBanner(planLines, { px: 1, py: 0.5 })
+                        : ThinkingBanner(t('dataThread.working'), { px: 1, py: 0.5 }, true, true),
                 });
             }
         } else if (clarifyAgentTableIds.has(tableId)) {
@@ -1545,80 +1553,65 @@ let SingleThreadGroupView: FC<{
             });
         }
     };
-
-    // Push report cards triggered from the given table
+    // Push report artifacts triggered from the given table. A report is just
+    // another *output card* of the run — treated exactly like a table/chart
+    // card: the run's question (the triggering instruction) and the agent's
+    // closing summary are rendered ONCE by the thread machinery (trigger entry
+    // above, after-table summary below), so the report card never re-renders
+    // them (that would duplicate the run's opening instruction).
     const pushReportItems = (tableId: string, highlighted: boolean) => {
         const reports = reportsByTriggerTable.get(tableId);
         if (!reports) return;
         for (const report of reports) {
-                const isFocused = focusedId?.type === 'report' && focusedId.reportId === report.id;
-                const isGenerating = report.status === 'generating';
-                const selectedClassName = isFocused ? 'selected-report-card' : '';
-                timelineItems.push({
-                    key: `report-${report.id}`,
-                    type: 'report',
-                    reportId: report.id,
-                    highlighted: highlighted || isFocused,
-                    element: (
-                        <Card className={`data-thread-card ${selectedClassName}`} elevation={0}
-                            sx={{
-                                width: '100%',
-                                backgroundColor: theme.palette.secondary.bgcolor,
-                                ...ComponentBorderStyle,
-                                ...(highlighted ? { borderLeft: '2px solid', borderLeftColor: 'secondary.main' } : {}),
-                                borderRadius: '6px',
-                                cursor: 'pointer',
-                            }}
-                            onClick={() => {
-                                dispatch(dfActions.setFocused({ type: 'report', reportId: report.id }));
-                            }}
-                        >
-                            <Box sx={{ margin: '0px', display: 'flex', minWidth: 0, alignItems: 'center',
-                                '& .report-delete-btn': { opacity: 0, transition: 'opacity 0.15s' },
-                                '&:hover .report-delete-btn': { opacity: 1 },
+            const isFocused = focusedId?.type === 'report' && focusedId.reportId === report.id;
+            const rowHL = highlighted || isFocused;
+            const isGenerating = report.status === 'generating';
+            const gutterIcon = isGenerating
+                ? <CircularProgress size={12} thickness={5} sx={{ color: theme.palette.secondary.main }} />
+                : <ArticleIcon sx={{ width: 14, height: 14, color: rowHL ? theme.palette.secondary.main : 'rgba(0,0,0,0.3)' }} />;
+            const card = (
+                <Card className={`data-thread-card ${isFocused ? 'selected-report-card' : ''}`} elevation={0}
+                    sx={{
+                        width: '100%', backgroundColor: theme.palette.secondary.bgcolor,
+                        ...ComponentBorderStyle,
+                        ...(rowHL ? { borderLeft: '2px solid', borderLeftColor: 'secondary.main' } : {}),
+                        borderRadius: '6px', cursor: 'pointer',
+                    }}
+                    onClick={() => dispatch(dfActions.setFocused({ type: 'report', reportId: report.id }))}
+                >
+                    <Box sx={{ margin: '0px', display: 'flex', minWidth: 0, alignItems: 'center',
+                        '& .report-delete-btn': { opacity: 0, transition: 'opacity 0.15s' },
+                        '&:hover .report-delete-btn': { opacity: 1 },
+                    }}>
+                        <Box sx={{ margin: '4px 8px 4px 6px', minWidth: 0, flex: 1 }}>
+                            <Typography sx={{
+                                fontSize: 11, fontWeight: 500, color: 'text.primary',
+                                display: '-webkit-box', WebkitLineClamp: 2, WebkitBoxOrient: 'vertical',
+                                overflow: 'hidden', wordBreak: 'break-all',
                             }}>
-                                <Box sx={{ margin: '4px 8px 4px 6px', minWidth: 0, flex: 1 }}>
-                                    <Typography sx={{
-                                        fontSize: 11,
-                                        fontWeight: 500,
-                                        color: 'text.primary',
-                                        display: '-webkit-box',
-                                        WebkitLineClamp: 2,
-                                        WebkitBoxOrient: 'vertical',
-                                        overflow: 'hidden',
-                                        wordBreak: 'break-all',
-                                    }}>
-                                        {report.title || t('report.untitled')}
-                                    </Typography>
-                                    {isGenerating && (
-                                        <Typography sx={{
-                                            fontSize: 9,
-                                            color: 'text.disabled',
-                                            lineHeight: 1.3,
-                                            mt: 0.25,
-                                        }}>
-                                            {t('report.composing')}
-                                        </Typography>
-                                    )}
-                                </Box>
-                                <Tooltip title={t('dataThread.deleteReport')}>
-                                    <IconButton
-                                        className="report-delete-btn"
-                                        size="small"
-                                        color="error"
-                                        sx={{ p: 0.5, mr: 0.5, '&:hover': { transform: 'scale(1.15)' } }}
-                                        onClick={(e) => {
-                                            e.stopPropagation();
-                                            dispatch(dfActions.deleteGeneratedReport(report.id));
-                                        }}
-                                    >
-                                        <DeleteIcon sx={{ fontSize: 16 }} />
-                                    </IconButton>
-                                </Tooltip>
-                            </Box>
-                        </Card>
-                    ),
-                });
+                                {report.title || t('report.untitled')}
+                            </Typography>
+                            {isGenerating && (
+                                <Typography sx={{ fontSize: 9, color: 'text.disabled', lineHeight: 1.3, mt: 0.25 }}>
+                                    {t('report.composing')}
+                                </Typography>
+                            )}
+                        </Box>
+                        <Tooltip title={t('dataThread.deleteReport')}>
+                            <IconButton className="report-delete-btn" size="small" color="error"
+                                sx={{ p: 0.5, mr: 0.5, '&:hover': { transform: 'scale(1.15)' } }}
+                                onClick={(e) => { e.stopPropagation(); dispatch(dfActions.deleteGeneratedReport(report.id)); }}
+                            >
+                                <DeleteIcon sx={{ fontSize: 16 }} />
+                            </IconButton>
+                        </Tooltip>
+                    </Box>
+                </Card>
+            );
+            timelineItems.push({
+                key: `report-${report.id}`, type: 'artifact', highlighted: rowHL,
+                reportId: report.id, gutterIcon, element: card,
+            });
         }
     };
 
@@ -1686,16 +1679,19 @@ let SingleThreadGroupView: FC<{
         // Add table card and its charts
         pushTableAndChartItems(tableId, tableElementList[i], 'table', isHighlighted);
 
-        // After-table entries (e.g. summary)
+        // Add report cards anchored to this table. Reports are output cards of
+        // the run (like charts), so they sit with the other outputs, BEFORE the
+        // run's closing summary.
+        pushReportItems(tableId, isHighlighted);
+
+        // After-table entries (e.g. summary). The run's closing summary is the
+        // final word and must follow the LAST artifact (table, chart, or
+        // report), so it is pushed after pushReportItems.
         const afterTable = afterTableMap.get(tableId);
         if (afterTable && afterTable.length > 0) {
             pushInteractionEntries(afterTable, tableId, 'trigger', isHighlighted, 'interaction-after');
         }
 
-        // Add report cards anchored to charts of this table — placed after the
-        // summary block so the report/chat node follows the agent's summary.
-        pushReportItems(tableId, isHighlighted);
-
         // Running or clarifying agent state
         pushAgentDraftItems(tableId, 'trigger', isHighlighted);
     });
@@ -1724,16 +1720,19 @@ let SingleThreadGroupView: FC<{
 
         pushTableAndChartItems(lt.id, _buildTableCard(lt.id), 'leaf-table', isHL);
 
-        // After-table entries (e.g. summary)
+        // Add report cards anchored to this leaf table. Reports are output cards
+        // of the run (like charts), so they sit with the other outputs, BEFORE
+        // the run's closing summary.
+        pushReportItems(lt.id, isHL);
+
+        // After-table entries (e.g. summary). The run's closing summary is the
+        // final word and must follow the LAST artifact (table, chart, or
+        // report), so it is pushed after pushReportItems.
         const leafAfterEntries = leafAfterTableMap.get(lt.id);
         if (leafAfterEntries && leafAfterEntries.length > 0) {
             pushInteractionEntries(leafAfterEntries, lt.id, 'leaf-trigger', isHL, 'leaf-after');
         }
 
-        // Add report cards anchored to charts of this leaf table — placed after
-        // the summary block so the report/chat node follows the agent's summary.
-        pushReportItems(lt.id, isHL);
-
         // Running or clarifying agent state
         pushAgentDraftItems(lt.id, 'leaf-trigger', isHL);
     });
@@ -1786,13 +1785,10 @@ let SingleThreadGroupView: FC<{
             ? theme.palette.primary.main
             : 'rgba(0,0,0,0.15)';
 
-        // For report items, show an article icon or spinner if generating
-        if (item.type === 'report') {
-            const report = item.reportId ? generatedReports.find(r => r.id === item.reportId) : undefined;
-            if (report?.status === 'generating') {
-                return <CircularProgress size={12} thickness={5} sx={{ color: theme.palette.secondary.main }} />;
-            }
-            return <ArticleIcon sx={{ width: 14, height: 14, color: item.highlighted ? theme.palette.secondary.main : 'rgba(0,0,0,0.3)' }} />;
+        // Artifact output rows (reports today, future skill outputs) carry
+        // their own precomputed gutter dot from the artifact factory.
+        if (item.type === 'artifact') {
+            return item.gutterIcon ?? <Box sx={{ width: DOT_SIZE, height: DOT_SIZE, borderRadius: '50%', backgroundColor: color }} />;
         }
 
         // For running agent items, show a spinner instead of a dot
diff --git a/src/views/ReportView.tsx b/src/views/ReportView.tsx
index 339c4373..1e0ec9fc 100644
--- a/src/views/ReportView.tsx
+++ b/src/views/ReportView.tsx
@@ -516,7 +516,7 @@ ${styles}
             const timer = setTimeout(() => loadReport(focusedReportId), 800);
             return () => clearTimeout(timer);
         }
-    }, [focusedReportId, charts, tables]);
+    }, [focusedReportId, charts, tables, chartThumbnails]);
 
     // Keep local content in sync with Redux during streaming (status === 'generating')
     useEffect(() => {
diff --git a/src/views/SimpleChartRecBox.tsx b/src/views/SimpleChartRecBox.tsx
index a1c0cf70..bd8642f8 100644
--- a/src/views/SimpleChartRecBox.tsx
+++ b/src/views/SimpleChartRecBox.tsx
@@ -28,6 +28,7 @@ import { resolveRecommendedChart, getUrls, getTriggers, translateBackend } from
 import { streamRequest } from '../app/apiClient';
 import { getErrorMessage } from '../app/errorCodes';
 import { persistEphemeralDerivedTable } from '../app/tableThunks';
+import { getCachedChart } from '../app/chartCache';
 import { Chart, ClarificationResponse, DictTable, FieldItem, createDictTable, InteractionEntry } from "../components/ComponentType";
 import { normalizeClarifyEvent, formatClarificationResponses } from '../app/clarification';
 
@@ -44,11 +45,20 @@ import { borderColor, transition } from '../app/tokens';
 import { Theme } from '@mui/material/styles';
 import { useTranslation } from 'react-i18next';
 import { shouldAutoFocusGeneratedChart } from '../app/agentInteractionPolicy';
-import { ClarificationPanel, DelegatePanel } from './AgentPausePanel';
+import { ClarificationPanel, DelegatePanel, ExplanationPanel } from './AgentPausePanel';
 
 const AgentWorkingOverlay: FC<{ message?: string; elapsed?: number; theme: Theme; onCancel?: () => void; color?: 'primary' | 'warning' }> = ({ message, elapsed, theme, onCancel, color = 'primary' }) => {
     const { t } = useTranslation();
-    const latestMessage = message || t('dataThread.thinking');
+    // `message` is the running plan: steps joined by the STEP_SEP control char
+    // ('\x1E'), which renders invisibly and would otherwise collapse every step
+    // into one run-on blob. This overlay is a compact status, so show only the
+    // latest (active) step rather than the whole accumulated trace.
+    const latestStep = (message ?? '')
+        .split('\x1E')
+        .map(s => s.trim())
+        .filter(Boolean)
+        .pop();
+    const latestMessage = latestStep || t('dataThread.thinking');
     const elapsedSuffix = elapsed != null && elapsed > 0 ? ` (${elapsed}s)` : '';
     const progressColor = color === 'warning' ? theme.palette.warning.main : theme.palette.primary.main;
     return (
@@ -386,6 +396,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                     kind: 'clarification' as const,
                     questions: entry.clarificationQuestions || null,
                     variant: entry.role === 'explain' ? 'explain' as const : 'clarify' as const,
+                    content: entry.content || '',
                 };
             }
         }
@@ -594,6 +605,45 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             max_iterations: 10,
         };
 
+        // ── Dev toggle: route through the unified AnalystAgent (design-35/36) ──
+        // Set localStorage `df_useAnalystAgent` = '1' to opt in. The unified
+        // agent can also write reports inside the same run, so we ship the
+        // available charts (same shape the report agent gets) for the report
+        // skill's inspect_chart. Additive: the legacy data agent ignores them.
+        const useAnalyst = localStorage.getItem('df_useAnalystAgent') === '1';
+        const streamUrl = useAnalyst ? getUrls().ANALYST_STREAMING : getUrls().DATA_AGENT_STREAMING;
+        const availableCharts = useAnalyst
+            ? charts
+                .filter(c => c.chartType !== 'Table' && c.chartType !== 'Auto')
+                .filter(c => tables.some(t => t.id === c.tableRef))
+                .map(c => {
+                    const tbl = tables.find(t => t.id === c.tableRef);
+                    const encodings: Record<string, string> = {};
+                    if (c.encodingMap) {
+                        for (const [ch, enc] of Object.entries(c.encodingMap)) {
+                            if ((enc as any)?.fieldID) {
+                                const field = conceptShelfItems.find(f => f.id === (enc as any).fieldID);
+                                if (field) encodings[ch] = field.name;
+                            }
+                        }
+                    }
+                    return {
+                        chart_id: c.id,
+                        chart_type: c.chartType,
+                        encodings,
+                        table_ref: tbl?.virtual?.tableId || c.tableRef,
+                        code: tbl?.derive?.code || '',
+                        chart_data: tbl ? { name: tbl.virtual?.tableId || tbl.id, rows: tbl.rows.slice(0, 50) } : undefined,
+                        // Optional rendered image: the agent reads charts from
+                        // data + encodings, but a cached PNG (when available)
+                        // lets it visually confirm a pre-existing chart. Prefer
+                        // the downscaled thumbnail to keep the request lean.
+                        chart_image: chartThumbnails[c.id] || getCachedChart(c.id)?.thumbnailDataUrl || undefined,
+                    };
+                })
+            : [];
+        if (useAnalyst) requestBody.charts = availableCharts;
+
         if (isResume) {
             // Resume: just send the assembled prompt as user_question. The
             // backend appends it to the trajectory as a normal user message.
@@ -683,7 +733,91 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         let thinkingSteps: string[] = [];
         let pendingThought: string = '';
 
+        // ── Live report streaming (AnalystAgent only) ──
+        // The unified agent can write a report inside the same run: it emits an
+        // `action`(write_report) commitment followed by `text_delta` events on
+        // channel "report". We create a GeneratedReport on first signal, switch
+        // to the report view, and stream the markdown in — mirroring the
+        // standalone reportFromChat coalescing (90ms flush so Tiptap re-parses
+        // ~10×/sec instead of per-token).
+        let reportId: string | null = null;
+        let accumulatedReportMarkdown = '';
+        let reportLastDispatched = '';
+        let reportFlushTimer: ReturnType<typeof setTimeout> | null = null;
+        // Ids of charts created during THIS run, adopted from the backend's
+        // forwarded chart_id. Merged into the report's selectedChartIds so a
+        // same-run report can embed them via chart://<id>.
+        const runCreatedChartIds: string[] = [];
+        const reportFlushNow = () => {
+            if (reportFlushTimer) { clearTimeout(reportFlushTimer); reportFlushTimer = null; }
+            if (!reportId || accumulatedReportMarkdown === reportLastDispatched) return;
+            reportLastDispatched = accumulatedReportMarkdown;
+            const titleMatch = accumulatedReportMarkdown.match(/^#\s+(.+)$/m);
+            dispatch(dfActions.updateGeneratedReportContent({
+                id: reportId,
+                content: accumulatedReportMarkdown,
+                title: titleMatch ? titleMatch[1].trim() : undefined,
+            }));
+        };
+        const reportScheduleFlush = () => {
+            if (reportFlushTimer) return;
+            reportFlushTimer = setTimeout(() => { reportFlushTimer = null; reportFlushNow(); }, 90);
+        };
+        const ensureReport = () => {
+            if (reportId) return reportId;
+            const newId = `report-${Date.now()}`;
+            const inProgressReport: GeneratedReport = {
+                id: newId,
+                content: '',
+                selectedChartIds: Array.from(new Set([
+                    ...availableCharts.map(c => c.chart_id),
+                    ...runCreatedChartIds,
+                ])),
+                createdAt: Date.now(),
+                status: 'generating',
+                prompt: agentPrompt,
+                triggerTableId: focusedTableId,
+            };
+            dispatch(dfActions.saveGeneratedReport(inProgressReport));
+            dispatch(dfActions.setFocused({ type: 'report', reportId: newId }));
+            dispatch(dfActions.setViewMode('report'));
+            reportId = newId;
+            return newId;
+        };
+
         const processStreamingResult = async (result: any) => {
+            // ── interact: the unified agent's clarify/explain pause ──
+            // Alias to the legacy clarify path (same questions[] shape + the
+            // backend now stamps trajectory/completed_step_count for resume).
+            if (result.type === "interact") {
+                result = { ...result, type: "clarify" };
+            }
+
+            // ── report streaming (AnalystAgent only) ──
+            // write_report commitment → create the report + switch view.
+            if (result.type === "action" && result.action === "write_report") {
+                ensureReport();
+                // Flush any buffered agent reasoning as its own step first, so
+                // it reads as a discrete prior step rather than running into the
+                // "outputting write_report" line (mirrors the tool_start flush).
+                if (pendingThought) {
+                    thinkingSteps.push(pendingThought);
+                    pendingThought = '';
+                }
+                thinkingSteps.push(t('dataThread.producingAction', { action: 'write_report' }));
+                if (currentDraftId) {
+                    dispatch(dfActions.updateDraftRunningPlan({ draftId: currentDraftId, plan: thinkingSteps.join(STEP_SEP) }));
+                }
+                return;
+            }
+            // report-channel markdown deltas → stream into the report content.
+            if (result.type === "text_delta" && result.channel === "report") {
+                ensureReport();
+                accumulatedReportMarkdown += result.content || '';
+                reportScheduleFlush();
+                return;
+            }
+
             // ── context_info: show injected rules/knowledge at the top ──
             // Rendered as already-completed tool-style steps (✓ prefix) so they
             // visually match the rest of the agent's tool-call timeline.
@@ -725,7 +859,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                     thinkingSteps.push(pendingThought);
                     pendingThought = '';
                 }
-                if (result.tool === "explore") {
+                if (result.tool === "explore" || result.tool === "execute_python_script") {
                     const purpose = result.purpose || '';
                     if (purpose) {
                         thinkingSteps.push(t('dataThread.runningCode') + ' ' + purpose);
@@ -737,6 +871,10 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                 } else if (result.tool === "inspect_source_data") {
                     const tableNames = result.table_names?.join(', ') || '';
                     thinkingSteps.push(t('dataThread.inspectingData') + (tableNames ? ` ${tableNames}` : ''));
+                } else if (result.tool === "inspect_chart") {
+                    thinkingSteps.push(t('dataThread.inspectingChart'));
+                } else if (result.tool === "load_skill") {
+                    thinkingSteps.push(t('dataThread.loadingSkill', { skill: result.skill || '' }));
                 } else if (result.tool === "search_data_tables" || result.tool === "search_knowledge") {
                     const query = result.query || '';
                     thinkingSteps.push(t('dataThread.searching') + (query ? ` "${query}"` : ''));
@@ -907,6 +1045,16 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
 
                 const currentConcepts = [...conceptShelfItems.filter(c => names.includes(c.name)), ...allNewConcepts, ...conceptsToAdd];
                 let newChart = resolveRecommendedChart(refinedGoal, currentConcepts, candidateTable);
+                // Adopt the backend's forwarded chart_id so the agent and the
+                // frontend share one id (it can embed/inspect this chart in the
+                // same run). Guard against an id that somehow already exists.
+                const forwardedChartId = transformResult.chart_id;
+                if (forwardedChartId
+                    && !charts.some(c => c.id === forwardedChartId)
+                    && !createdCharts.some(c => c.id === forwardedChartId)) {
+                    newChart.id = forwardedChartId;
+                }
+                runCreatedChartIds.push(newChart.id);
                 // Mark as unread by default; cleared below if we auto-focus it
                 // (i.e. it's the first artifact this run) or by setFocused when
                 // the user clicks the card.
@@ -1073,11 +1221,28 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
 
             // ── completion: final summary ──
             if (result.type === "completion") {
+                const rawSummary = result.content?.summary || "";
+                const summary = result.status === "max_iterations"
+                    ? translateBackend(rawSummary, result.content?.summary_code) || t('chartRec.maxIterationsReached')
+                    : rawSummary;
+                // Finalize any report streamed during this run.
+                if (reportId) {
+                    reportFlushNow();
+                    const titleMatch = accumulatedReportMarkdown.match(/^#\s+(.+)$/m);
+                    dispatch(dfActions.updateGeneratedReportContent({
+                        id: reportId,
+                        content: accumulatedReportMarkdown,
+                        status: 'completed',
+                        title: titleMatch ? titleMatch[1].trim() : undefined,
+                        // Anchor the report to the latest table created this run
+                        // so it attaches to the newest thread item, like charts.
+                        triggerTableId: lastCreatedTableId || undefined,
+                    }));
+                }
                 if (lastCreatedTableId) {
-                    const rawSummary = result.content?.summary || "";
-                    const summary = result.status === "max_iterations"
-                        ? translateBackend(rawSummary, result.content?.summary_code) || t('chartRec.maxIterationsReached')
-                        : rawSummary;
+                    // The run produced an artifact (table / chart / report). Its
+                    // closing answer renders once as that table's after-summary
+                    // entry — exactly like a chart's summary.
                     if (summary) {
                         const entry: InteractionEntry = {
                             from: 'data-agent', to: 'user', role: 'summary',
@@ -1087,6 +1252,34 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                         };
                         dispatch(dfActions.appendTriggerInteraction({ tableId: lastCreatedTableId, entries: [entry] }));
                     }
+                } else if (summary && currentDraftId) {
+                    // Pure Q&A run — the agent committed no action and answered in
+                    // plain text (e.g. the user just asked a question). There's no
+                    // table to anchor to. Treat the closing answer as an `explain`
+                    // pause: the draft enters the pause phase with the answer text,
+                    // so it surfaces in the explanation panel above the chat box
+                    // and the user can reply to continue the conversation (resume).
+                    const priorSteps = thinkingSteps.filter(s => s.trim()).join('\n');
+                    thinkingSteps = [];
+                    pendingThought = '';
+                    dispatch(dfActions.updateDraftRunningPlan({ draftId: currentDraftId, plan: '' }));
+
+                    const pauseEntry: InteractionEntry = {
+                        from: 'data-agent', to: 'user', role: 'explain',
+                        plan: priorSteps || result.content?.thought || undefined,
+                        content: summary,
+                        timestamp: Date.now(),
+                    };
+                    dispatch(dfActions.appendDraftInteraction({ draftId: currentDraftId, entry: pauseEntry }));
+                    dispatch(dfActions.updateDeriveStatus({ nodeId: currentDraftId, status: 'clarifying' }));
+                    dispatch(dfActions.updateDraftClarification({ draftId: currentDraftId, pendingClarification: {
+                        trajectory: result.trajectory || result.content?.trajectory || [],
+                        completedStepCount: result.completed_step_count || result.content?.completed_step_count || 0,
+                        lastCreatedTableId,
+                    }}));
+                    // Keep the node; clear the handle so handleCompletion's draft
+                    // cleanup doesn't remove the pause we just persisted.
+                    currentDraftId = null;
                 }
             }
         };
@@ -1097,6 +1290,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             setIsChatFormulating(false);
             agentAbortRef.current = null;
             clearTimeout(timeoutId);
+            if (reportFlushTimer) { clearTimeout(reportFlushTimer); reportFlushTimer = null; }
 
             // Clean up any remaining draft (the last step created a new draft that was never filled)
             if (currentDraftId) {
@@ -1114,7 +1308,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
 
         (async () => {
             try {
-                for await (const data of streamRequest(getUrls().DATA_AGENT_STREAMING, {
+                for await (const data of streamRequest(streamUrl, {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
                     body: messageBody,
@@ -1153,7 +1347,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
 
                     allResults.push(data);
                     await processStreamingResult(data);
-                    if (data.type === "completion" || data.type === "clarify" || data.type === "explain" || data.type === "delegate") {
+                    if (data.type === "completion" || data.type === "clarify" || data.type === "explain" || data.type === "interact" || data.type === "delegate") {
                         handleCompletion();
                         return;
                     }
@@ -1191,7 +1385,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                 }
             }
         })();
-    }, [focusedTableId, tables, draftNodes, activeModel, config, conceptShelfItems, dispatch, t, attachedImages, attachedFiles]);
+    }, [focusedTableId, tables, draftNodes, activeModel, config, conceptShelfItems, charts, dispatch, t, attachedImages, attachedFiles]);
 
     // ── Report generation via report agent ──────────────────────────
 
@@ -1511,11 +1705,13 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         return chatPrompt.trim().length > 0;
     }, [chatPrompt, focusedTableId]);
 
-    // Handle a single clicked option (or free-text Enter) inside the
-    // ClarificationPanel. We just record the selection by question index
-    // — the chat box is NOT mutated. Auto-submit fires only when EVERY
-    // question is answered.
-    const handleSelectAnswer = useCallback((questionIndex: number, response: ClarificationResponse) => {
+    // Handle a single clicked option (or confirmed free-text) inside the
+    // ClarificationPanel. We record the selection by question index — the
+    // chat box is NOT mutated. `autoSubmit` (default true) lets an explicit
+    // confirm (option click / check button / Enter) fire the whole panel once
+    // every question is answered; an implicit confirm (blur auto-record)
+    // passes false so it records but never submits.
+    const handleSelectAnswer = useCallback((questionIndex: number, response: ClarificationResponse, autoSubmit: boolean = true) => {
         const questions = clarificationQuestions?.questions;
         if (!questions || !pendingClarification) return;
         if (clarifySubmittedRef.current === pendingClarification.draftId) return;
@@ -1523,16 +1719,31 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         const newAnswers = { ...clarifyAnswers, [questionIndex]: response };
         setClarifyAnswers(newAnswers);
 
-        // Auto-submit only when ALL questions are answered. Otherwise we
-        // wait for the user to either answer the rest or hit Send manually.
+        // Auto-submit only when EVERY question is answered by a clicked option
+        // AND this was an explicit confirm. If any answer is typed text, we
+        // never auto-fire — the user submits via the shared panel button — so a
+        // stray option click can't sweep up an unfinished typed answer.
         const allAnswered = questions.every((_q, idx) => !!newAnswers[idx]);
-        if (allAnswered) {
+        const allOptions = questions.every((_q, idx) => newAnswers[idx]?.source === 'option');
+        if (allAnswered && allOptions && autoSubmit) {
             clarifySubmittedRef.current = pendingClarification.draftId;
             const responses: ClarificationResponse[] = questions.map((_q, idx) => newAnswers[idx]);
             resumeFromClarification(responses);
         }
     }, [clarificationQuestions, pendingClarification, clarifyAnswers, resumeFromClarification]);
 
+    // Clear a question's recorded answer (the user started editing its field,
+    // which invalidates a prior option pick or confirmed free-text reply).
+    const handleClearAnswer = useCallback((questionIndex: number) => {
+        setClarifyAnswers(prev => {
+            if (!(questionIndex in prev)) return prev;
+            const next = { ...prev };
+            delete next[questionIndex];
+            return next;
+        });
+    }, []);
+
+
     const cancelAgent = useCallback(() => {
         if (agentAbortRef.current) {
             agentAbortRef.current.abort();
@@ -1624,10 +1835,20 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                     variant={clarificationQuestions.variant}
                     selectedAnswers={clarifyAnswers}
                     onSelectAnswer={handleSelectAnswer}
+                    onClearAnswer={handleClearAnswer}
                     onSubmit={resumeFromClarification}
                     onCancel={cancelAgent}
                 />
             )}
+            {clarificationQuestions?.kind === 'clarification' && !clarificationQuestions.questions && clarificationQuestions.variant === 'explain' && clarificationQuestions.content && pendingClarification && !isChatFormulating && (
+                // Plain-text closing answer surfaced as an explanation pause:
+                // read-only, no questions. The user can still type a followup
+                // in the chat box below (which resumes the conversation).
+                <ExplanationPanel
+                    content={clarificationQuestions.content}
+                    onCancel={cancelAgent}
+                />
+            )}
             {clarificationQuestions?.kind === 'delegate' && pendingClarification && !isChatFormulating && (
                 <DelegatePanel
                     target={clarificationQuestions.target}
diff --git a/tests/frontend/unit/views/ClarificationPanel.test.tsx b/tests/frontend/unit/views/ClarificationPanel.test.tsx
index 86274f34..c0648d30 100644
--- a/tests/frontend/unit/views/ClarificationPanel.test.tsx
+++ b/tests/frontend/unit/views/ClarificationPanel.test.tsx
@@ -16,6 +16,8 @@ vi.mock('react-i18next', () => ({
         'chartRec.clarificationQuestionLabel': `${params?.index}.`,
         'chartRec.optionalClarification': '(optional)',
         'chartRec.freeTextClarificationPlaceholder': 'Type your answer...',
+        'chartRec.customAnswerPlaceholder': 'Or type your own answer...',
+        'chartRec.confirmAnswer': 'Confirm answer',
         'chartRec.freeTextClarificationHint': 'Type your answer in the chat box below.',
       };
       return labels[key] || key;
@@ -82,7 +84,7 @@ describe('ClarificationPanel', () => {
     expect(onSubmit).not.toHaveBeenCalled();
   });
 
-  it('shows a chat-box hint for free-text questions and renders no input', () => {
+  it('renders an inline input under a free-text question and submits it tagged to that question', () => {
     const onSubmit = vi.fn();
 
     render(
@@ -96,8 +98,116 @@ describe('ClarificationPanel', () => {
       />,
     );
 
-    expect(screen.getByText('Type your answer in the chat box below.')).toBeInTheDocument();
-    expect(screen.queryByPlaceholderText('Type your answer...')).toBeNull();
+    // No "use the chat box" hint anymore — the panel is self-contained.
+    expect(screen.queryByText('Type your answer in the chat box below.')).toBeNull();
+
+    // The input sits inline under the question (its own answer field), not the
+    // choice-only override.
+    const input = screen.getByPlaceholderText('Type your answer...');
+    expect(input).toBeInTheDocument();
+    expect(screen.queryByPlaceholderText('Or type your own answer...')).toBeNull();
+
+    // Empty input → nothing to submit yet.
     expect(onSubmit).not.toHaveBeenCalled();
+
+    fireEvent.change(input, { target: { value: 'Focus on 2024.' } });
+    fireEvent.keyDown(input, { key: 'Enter' });
+
+    // Tagged to the question it answers (index 0), not a generic freeform blob.
+    expect(onSubmit).toHaveBeenCalledWith([{
+      question_index: 0,
+      answer: 'Focus on 2024.',
+      source: 'free_text',
+    }]);
+  });
+
+  it('lets a single-choice question take a typed answer instead of a chip', () => {
+    const onSubmit = vi.fn();
+
+    render(
+      <ClarificationPanel
+        questions={[{
+          text: 'Which metric?',
+          responseType: 'single_choice',
+          options: [{ label: 'Revenue' }],
+        }]}
+        onSubmit={onSubmit}
+        onCancel={vi.fn()}
+      />,
+    );
+
+    // single_choice now offers BOTH the chip and its own freeform field.
+    expect(screen.getByRole('button', { name: 'Revenue' })).toBeInTheDocument();
+    const input = screen.getByPlaceholderText('Or type your own answer...');
+
+    fireEvent.change(input, { target: { value: 'Actually, profit margin.' } });
+    fireEvent.keyDown(input, { key: 'Enter' });
+
+    // Tagged to question 0 as a free_text answer (not a generic -1 override).
+    expect(onSubmit).toHaveBeenCalledWith([{
+      question_index: 0,
+      answer: 'Actually, profit margin.',
+      source: 'free_text',
+    }]);
+  });
+
+  it('supersedes a selected option when the user types a custom answer', () => {
+    const onSelectAnswer = vi.fn();
+    const onClearAnswer = vi.fn();
+
+    render(
+      <ClarificationPanel
+        questions={[{
+          text: 'Which metric?',
+          responseType: 'single_choice',
+          options: [{ label: 'Revenue' }],
+        }]}
+        selectedAnswers={{ 0: { question_index: 0, answer: 'Revenue', source: 'option' } }}
+        onSelectAnswer={onSelectAnswer}
+        onClearAnswer={onClearAnswer}
+        onSubmit={vi.fn()}
+        onCancel={vi.fn()}
+      />,
+    );
+
+    const input = screen.getByPlaceholderText('Or type your own answer...');
+    fireEvent.change(input, { target: { value: 'profit margin' } });
+
+    // Typing records a free_text answer (autoSubmit=false) that overrides the
+    // prior option pick.
+    expect(onSelectAnswer).toHaveBeenCalledWith(
+      0,
+      { question_index: 0, answer: 'profit margin', source: 'free_text' },
+      false,
+    );
+
+    // Clearing the field removes the answer entirely.
+    fireEvent.change(input, { target: { value: '' } });
+    expect(onClearAnswer).toHaveBeenCalledWith(0);
+  });
+
+  it('records a typed answer live and submits it on Enter', () => {
+    const onSubmit = vi.fn();
+
+    render(
+      <ClarificationPanel
+        questions={[{
+          text: 'Anything else?',
+          responseType: 'free_text',
+        }]}
+        onSubmit={onSubmit}
+        onCancel={vi.fn()}
+      />,
+    );
+
+    const input = screen.getByPlaceholderText('Type your answer...');
+    fireEvent.change(input, { target: { value: 'Focus on 2024.' } });
+    fireEvent.keyDown(input, { key: 'Enter' });
+
+    expect(onSubmit).toHaveBeenCalledWith([{
+      question_index: 0,
+      answer: 'Focus on 2024.',
+      source: 'free_text',
+    }]);
   });
 });

From e846fcb39676fd88cb6d1072dffeb534143b26ce Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Wed, 10 Jun 2026 21:24:39 -0700
Subject: [PATCH 19/29] minor fix

---
 src/app/dfSlice.tsx | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx
index 8482786c..aca29fe3 100644
--- a/src/app/dfSlice.tsx
+++ b/src/app/dfSlice.tsx
@@ -177,11 +177,6 @@ export interface DataFormulatorState {
 
     focusedId: FocusedId;
 
-    // Draft id of a completed plain-text answer surfaced as an explanation card
-    // above the chat box. Auto-set when a Q&A run completes, cleared when the
-    // user focuses another item or sends a followup (see `setFocused`).
-    focusedAnswerDraftId: string | undefined;
-
     viewMode: 'editor' | 'report';
 
     chartSynthesisInProgress: string[];
@@ -295,7 +290,6 @@ const initialState: DataFormulatorState = {
 
     focusedDataCleanBlockId: undefined,
     focusedId: undefined,
-    focusedAnswerDraftId: undefined,
 
     viewMode: 'editor',
 
@@ -896,7 +890,6 @@ export const dataFormulatorSlice = createSlice({
                 conceptShelfItems: saved.conceptShelfItems || [],
                 focusedDataCleanBlockId: saved.focusedDataCleanBlockId || undefined,
                 focusedId: saved.focusedId || undefined,
-                focusedAnswerDraftId: undefined,
                 config: { ...initialState.config, ...(saved.config || {}) },
                 dataCleanBlocks: saved.dataCleanBlocks || [],
                 dataLoadingChatMessages: saved.dataLoadingChatMessages || [],
@@ -1676,8 +1669,6 @@ export const dataFormulatorSlice = createSlice({
         setFocused: (state, action: PayloadAction<FocusedId>) => {
             const payload = action.payload;
             state.focusedId = payload;
-            // Focusing any concrete item dismisses a lingering answer card.
-            state.focusedAnswerDraftId = undefined;
 
             if (payload?.type === 'chart' && state.viewMode == 'report') {
                 state.viewMode = 'editor';
@@ -1696,9 +1687,6 @@ export const dataFormulatorSlice = createSlice({
         setFocusedDataCleanBlockId: (state, action: PayloadAction<{blockId: string, itemId: number} | undefined>) => {
             state.focusedDataCleanBlockId = action.payload;
         },
-        setFocusedAnswer: (state, action: PayloadAction<string | undefined>) => {
-            state.focusedAnswerDraftId = action.payload;
-        },
         changeChartRunningStatus: (state, action: PayloadAction<{chartId: string, status: boolean}>) => {
             if (action.payload.status) {
                 state.chartSynthesisInProgress = [...new Set([...state.chartSynthesisInProgress, action.payload.chartId])]

From d210a3f46f9a3b6e84bffac7a0fea5adca0814fe Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Fri, 12 Jun 2026 00:40:15 -0700
Subject: [PATCH 20/29] cleaning up

---
 py-src/data_formulator/agent_config.py        |    3 +-
 py-src/data_formulator/agents/__init__.py     |    7 -
 .../data_formulator/agents/agent_data_rec.py  |  400 ----
 .../agents/agent_data_transform.py            |  462 ----
 .../agents/agent_interactive_explore.py       |  343 ---
 .../agents/agent_report_gen.py                |  585 -----
 .../agents/chart_creation_guide.py            |  153 --
 py-src/data_formulator/agents/data_agent.py   | 1930 -----------------
 py-src/data_formulator/analyst/agent.py       |   20 +-
 .../analyst/skills/core/SKILL.md              |  117 +
 py-src/data_formulator/routes/agents.py       |  460 +---
 src/app/App.tsx                               |   29 -
 src/app/dfSlice.tsx                           |   14 +-
 src/app/useFormulateData.ts                   |  615 ------
 src/app/utils.tsx                             |    4 -
 src/i18n/locales/en/common.json               |   12 +-
 src/i18n/locales/zh/common.json               |   12 +-
 src/views/ChartRecBox.tsx                     |  863 --------
 src/views/DataThread.tsx                      |  176 +-
 src/views/EncodingShelfCard.tsx               |  577 +----
 src/views/SimpleChartRecBox.tsx               |  440 +---
 src/views/VisualizationView.tsx               |   13 +-
 .../test_agent_knowledge_integration.py       |  278 ---
 .../agents/test_data_agent_clarification.py   |  241 --
 .../agents/test_duckdb_notes_prompt.py        |   32 +-
 .../test_interactive_explore_context.py       |  127 --
 .../test_api_error_protocol_contract.py       |    8 +-
 .../routes/test_agent_diagnostics_wiring.py   |   12 +-
 .../routes/test_derive_data_repair_loop.py    |  387 ----
 29 files changed, 416 insertions(+), 7904 deletions(-)
 delete mode 100644 py-src/data_formulator/agents/agent_data_rec.py
 delete mode 100644 py-src/data_formulator/agents/agent_data_transform.py
 delete mode 100644 py-src/data_formulator/agents/agent_interactive_explore.py
 delete mode 100644 py-src/data_formulator/agents/agent_report_gen.py
 delete mode 100644 py-src/data_formulator/agents/chart_creation_guide.py
 delete mode 100644 py-src/data_formulator/agents/data_agent.py
 delete mode 100644 src/app/useFormulateData.ts
 delete mode 100644 src/views/ChartRecBox.tsx
 delete mode 100644 tests/backend/agents/test_agent_knowledge_integration.py
 delete mode 100644 tests/backend/agents/test_data_agent_clarification.py
 delete mode 100644 tests/backend/agents/test_interactive_explore_context.py
 delete mode 100644 tests/backend/routes/test_derive_data_repair_loop.py

diff --git a/py-src/data_formulator/agent_config.py b/py-src/data_formulator/agent_config.py
index 67dbbe31..3e4c2b51 100644
--- a/py-src/data_formulator/agent_config.py
+++ b/py-src/data_formulator/agent_config.py
@@ -48,8 +48,7 @@
     # ── Heavy: code-gen, multi-step, tool-using ─────────────────────────────
     "data_transform":      "low",      # generates Python transform scripts
     "data_rec":            "low",      # chart / transformation recommendation
-    "data_agent":          "low",      # multi-step exploration agent
-    "report_gen":          "low",      # narrative + inspect/embed tools
+    "analyst":             "low",      # unified multi-step exploration + report agent
     "interactive_explore": "low",      # exploration idea agent
     "data_loading_chat":   "low",      # conversational data loading w/ tools
 
diff --git a/py-src/data_formulator/agents/__init__.py b/py-src/data_formulator/agents/__init__.py
index e602fd67..d5e439a7 100644
--- a/py-src/data_formulator/agents/__init__.py
+++ b/py-src/data_formulator/agents/__init__.py
@@ -1,22 +1,15 @@
 # Copyright (c) Microsoft Corporation.
 # Licensed under the MIT License.
 
-from data_formulator.agents.agent_data_transform import DataTransformationAgent
-from data_formulator.agents.agent_data_rec import DataRecAgent
-
 from data_formulator.agents.agent_data_load import DataLoadAgent
 from data_formulator.agents.agent_sort_data import SortDataAgent
 from data_formulator.agents.agent_simple import SimpleAgents
-from data_formulator.agents.agent_interactive_explore import InteractiveExploreAgent
 from data_formulator.agents.agent_chart_insight import ChartInsightAgent
 from data_formulator.agents.agent_chart_restyle import ChartRestyleAgent
 
 __all__ = [
-    "DataTransformationAgent",
-    "DataRecAgent",
     "DataLoadAgent",
     "SortDataAgent",
-    "InteractiveExploreAgent",
     "ChartInsightAgent",
     "ChartRestyleAgent",
 ]
diff --git a/py-src/data_formulator/agents/agent_data_rec.py b/py-src/data_formulator/agents/agent_data_rec.py
deleted file mode 100644
index 8bd9f054..00000000
--- a/py-src/data_formulator/agents/agent_data_rec.py
+++ /dev/null
@@ -1,400 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-import json
-import time
-
-from data_formulator.agent_config import reasoning_effort_for
-from data_formulator.agents.agent_utils import extract_json_objects, extract_code_from_gpt_response, generate_data_summary, supplement_missing_block, ensure_output_variable_in_code, compose_system_prompt
-from data_formulator.agents.agent_diagnostics import AgentDiagnostics
-from data_formulator.datalake.parquet_utils import df_to_safe_records
-from data_formulator.security.sanitize import sanitize_error_message
-
-import pandas as pd
-
-import logging
-
-logger = logging.getLogger(__name__)
-
-_AGENT_ID = "data_rec"
-
-from data_formulator.agents.chart_creation_guide import (
-    SHARED_ENVIRONMENT,
-    SHARED_SEMANTIC_TYPE_REFERENCE,
-    SHARED_CHART_REFERENCE,
-    SHARED_STATISTICAL_ANALYSIS,
-    SHARED_DUCKDB_NOTES,
-)
-
-# =============================================================================
-# DataRecAgent system prompt
-# =============================================================================
-
-SYSTEM_PROMPT = f'''You are a data scientist who recommends data and visualizations.
-Given [CONTEXT] (dataset summaries) and [GOAL] (user intent), recommend a transformed dataset and visualization, then write a Python script to produce it.
-
-{SHARED_ENVIRONMENT}
-
-You will produce two outputs: a JSON spec (```json```) and a Python script (```python```). No extra text.
-
-**Step 1: JSON spec** — infer user intent and recommend a visualization.
-
-```json
-{{{{
-    "display_instruction": "", // short verb phrase (<12 words) capturing computation intent. Bold **column names** (semantic matches count). For follow-ups, describe only the new part.
-    "input_tables": [...],   // table names from [CONTEXT] to use
-    "output_fields": [...],  // desired output fields (include intermediate fields)
-    "chart": {{{{
-        "chart_type": "",    // from [CHART TYPE REFERENCE]
-        "encodings": {{{{}}}},   // visual channels → output field names
-        "config": {{{{}}}}       // optional styling
-    }}}},
-    "field_metadata": {{{{     // semantic type for each encoding field
-        "<field>": "Category"    // from [SEMANTIC TYPE REFERENCE]
-    }}}},
-    "output_variable": ""   // descriptive snake_case name (e.g. "sales_by_region"), not "result_df"
-}}}}
-```
-
-**Data format rules:**
-- Output must be tidy (one field per visual channel, like VegaLite/ggplot2).
-- For multiple similar columns: reshape to long format (only same semantic type in one column).
-- For derived metrics: compute new fields (correlation, difference, profit, etc.).
-- Keep encodings to 2–3 channels (x, y, color/size). Add facet only when needed.
-
-{SHARED_SEMANTIC_TYPE_REFERENCE}
-
-{SHARED_CHART_REFERENCE}
-
-{SHARED_STATISTICAL_ANALYSIS}
-
-**Step 2: Python script** — transform input data to produce a DataFrame with all "output_fields". Keep it simple and readable. The script MUST assign the final result to the variable named in `"output_variable"` from Step 1.
-
-**Datetime handling:**
-- Year → number. Year-month / year-month-day → string ("2020-01" / "2020-01-01").
-- Hour alone → number. Hour:min or h:m:s → string. Never return raw datetime objects.
-
-{SHARED_DUCKDB_NOTES}'''
-
-
-def _combine_rules(text_rules: str, knowledge_rules: list[dict]) -> str:
-    """Merge text rules and knowledge-file rules into a single string."""
-    parts = []
-    if text_rules and text_rules.strip():
-        parts.append(text_rules.strip())
-    for rule in knowledge_rules:
-        parts.append(f"### {rule['title']}\n{rule['body']}")
-    return "\n\n".join(parts)
-
-
-class DataRecAgent(object):
-
-    def __init__(self, client, workspace, system_prompt=None, agent_coding_rules="", language_instruction="", max_display_rows=10000, model_info=None, knowledge_store=None):
-        self.client = client
-        self.workspace = workspace
-        self.max_display_rows = max_display_rows
-        self._model_info = model_info or {}
-        self._agent_coding_rules = agent_coding_rules
-        self._language_instruction = language_instruction
-
-        knowledge_rules = knowledge_store.load_always_apply_rules() if knowledge_store else []
-        combined_rules = _combine_rules(agent_coding_rules, knowledge_rules)
-
-        if system_prompt is not None:
-            self._base_prompt = system_prompt
-        else:
-            self._base_prompt = SYSTEM_PROMPT
-
-        # Insert language instruction early (after role definition, before technical
-        # sections) so the LLM's "last impression" remains chart/code rules,
-        # reducing recency-bias interference on chart-type selection.
-        self.system_prompt = compose_system_prompt(
-            self._base_prompt,
-            agent_coding_rules=combined_rules if system_prompt is None else "",
-            language_instruction=language_instruction,
-            language_marker="**About the execution environment:**",
-        )
-
-        self._diag = AgentDiagnostics(
-            agent_name="DataRecAgent",
-            model_info=self._model_info,
-            base_system_prompt=self._base_prompt,
-            agent_coding_rules=self._agent_coding_rules,
-            language_instruction=self._language_instruction,
-            assembled_system_prompt=self.system_prompt,
-        )
-
-    def process_gpt_response(self, input_tables, messages, response, t_llm=None):
-        """Process GPT response to handle Python code execution"""
-        t_start = time.time()
-        t_exec_total = 0.0
-
-        if isinstance(response, Exception):
-            raw_error = str(getattr(response, "body", response))
-            safe_error = sanitize_error_message(raw_error)
-            result = {'status': 'other error', 'content': safe_error,
-                      'diagnostics': self._diag.for_error(messages, error=safe_error)}
-            return [result]
-
-        candidates = []
-        for choice in response.choices:
-
-            logger.debug("\n=== Data recommendation result ===>\n")
-            logger.debug(choice.message.content + "\n")
-
-            # --- Parse JSON spec and Python code ---
-            json_blocks = extract_json_objects(choice.message.content + "\n")
-            refined_goal = None
-            for jb in json_blocks:
-                if isinstance(jb, dict):
-                    refined_goal = jb
-                    break
-            code_blocks = extract_code_from_gpt_response(choice.message.content + "\n", "python")
-
-            # If only one block was produced, request the missing one
-            refined_goal, code_blocks, _supplement_content, t_supplement = supplement_missing_block(
-                self.client, messages, choice.message.content,
-                refined_goal, code_blocks, prefix="[DataRecAgent]"
-            )
-
-            # Apply fallbacks for missing JSON
-            json_fallback_used = refined_goal is None
-            if refined_goal is None:
-                refined_goal = {'output_fields': [], 'chart': {'chart_type': "", 'encodings': {}, 'config': {}}, 'output_variable': 'result_df'}
-                logger.warning(
-                    "[DataRecAgent] JSON spec parsing failed — using fallback defaults. "
-                    f"Response snippet: {choice.message.content[:300]!r}"
-                )
-            output_variable = refined_goal.get('output_variable', 'result_df') or 'result_df'
-            logger.info(f"[DataRecAgent] extracted output_variable={output_variable!r}")
-
-            # Diagnostics tracking
-            import re as _re
-            _diag_code = code_blocks[-1] if code_blocks else None
-            _diag_output_var_in_code = bool(
-                _diag_code and output_variable
-                and _re.search(rf'(?:^|\n)\s*{_re.escape(output_variable)}\s*=(?!=)', _diag_code)
-            )
-            _diag_sandbox_mode = None
-            _diag_exec = {"status": None}
-            _diag_code_patched = False
-
-            if len(code_blocks) > 0:
-                code = code_blocks[-1]
-
-                if output_variable and not _diag_output_var_in_code:
-                    code, was_patched, detected_var = ensure_output_variable_in_code(code, output_variable)
-                    _diag_code_patched = was_patched
-                    if was_patched:
-                        logger.info(
-                            f"[DataRecAgent] output_variable {output_variable!r} not in code — "
-                            f"patched: appended `{output_variable} = {detected_var}`"
-                        )
-                    else:
-                        logger.warning(
-                            f"[DataRecAgent] output_variable {output_variable!r} not in code "
-                            f"and auto-patch found no candidate variable."
-                        )
-
-                try:
-                    from data_formulator.sandbox import create_sandbox
-
-                    try:
-                        from flask import current_app
-                        sandbox_mode = current_app.config.get('CLI_ARGS', {}).get('sandbox', 'local')
-                    except (ImportError, RuntimeError):
-                        sandbox_mode = 'local'
-                    _diag_sandbox_mode = sandbox_mode
-
-                    t_exec_start = time.time()
-                    sandbox = create_sandbox(sandbox_mode)
-                    execution_result = sandbox.run_python_code(
-                        code=code,
-                        workspace=self.workspace,
-                        output_variable=output_variable,
-                    )
-                    t_exec_total += time.time() - t_exec_start
-
-                    if execution_result['status'] != 'ok':
-                        diagnostics = execution_result.get("diagnostics", {})
-                        raw_exec_error = diagnostics.get(
-                            "safe_detail",
-                            execution_result.get('content', execution_result.get('error_message', 'Unknown error')),
-                        )
-                        safe_exec_error = sanitize_error_message(raw_exec_error)
-                    else:
-                        safe_exec_error = None
-                    _diag_exec = {
-                        "status": execution_result['status'],
-                        "error_message": safe_exec_error,
-                        "available_dataframes": execution_result.get('df_names', []),
-                    }
-
-                    if execution_result['status'] == 'ok':
-                        full_df = execution_result['content']
-                        row_count = len(full_df)
-
-                        output_table_name = self.workspace.get_fresh_name(f"d-{output_variable}")
-                        self.workspace.write_parquet(full_df, output_table_name)
-
-                        if row_count > self.max_display_rows:
-                            query_output = full_df.head(self.max_display_rows)
-                        else:
-                            query_output = full_df
-                        query_output = query_output.loc[:, ~query_output.columns.duplicated()]
-
-                        result = {
-                            "status": "ok",
-                            "code": code,
-                            "content": {
-                                'rows': df_to_safe_records(query_output),
-                                'virtual': {
-                                    'table_name': output_table_name,
-                                    'row_count': row_count
-                                }
-                            },
-                        }
-                    else:
-                        result = {
-                            'status': 'error',
-                            'code': code,
-                            'content': safe_exec_error or 'Unknown error'
-                        }
-
-                except Exception as e:
-                    logger.exception('Error occurred during code execution')
-                    safe_error = sanitize_error_message(f"{type(e).__name__}: {e}")
-                    result = {
-                        'status': 'other error',
-                        'code': code,
-                        'content': "Unexpected error during code execution.",
-                        'content_code': 'agent.unexpectedError'
-                    }
-                    _diag_exec = {"status": "exception", "error_message": safe_error}
-            else:
-                result = {'status': 'error', 'code': "", 'content': "No code block found in the response. The model is unable to generate code to complete the task.", 'content_code': 'agent.noCodeBlock'}
-
-            _effective_content = choice.message.content
-            if _supplement_content:
-                _effective_content += "\n\n" + _supplement_content
-            result['dialog'] = [*messages, {"role": choice.message.role, "content": _effective_content}]
-            result['agent'] = 'DataRecAgent'
-            result['refined_goal'] = refined_goal
-
-            # --- Build diagnostics ---
-            usage = getattr(response, 'usage', None)
-            result['diagnostics'] = self._diag.for_response(
-                messages,
-                raw_content=choice.message.content,
-                finish_reason=getattr(choice, 'finish_reason', None),
-                json_spec=refined_goal,
-                json_fallback_used=json_fallback_used,
-                code_found=len(code_blocks) > 0,
-                code=_diag_code,
-                output_variable=output_variable,
-                output_variable_in_code=_diag_output_var_in_code,
-                code_patched=_diag_code_patched,
-                supplemented=_supplement_content is not None,
-                sandbox_mode=_diag_sandbox_mode,
-                exec_status=_diag_exec.get("status"),
-                exec_error=_diag_exec.get("error_message"),
-                exec_df_names=_diag_exec.get("available_dataframes"),
-                t_llm=t_llm or 0,
-                t_supplement=t_supplement,
-                t_exec=t_exec_total,
-                prompt_tokens=getattr(usage, 'prompt_tokens', None) if usage else None,
-                completion_tokens=getattr(usage, 'completion_tokens', None) if usage else None,
-            )
-
-            candidates.append(result)
-
-        t_total = time.time() - t_start
-        t_llm_val = t_llm or 0.0
-
-        logger.debug("=== Recommendation Candidates ===>")
-        for candidate in candidates:
-            for key, value in candidate.items():
-                if key in ['dialog', 'content', 'diagnostics']:
-                    logger.debug(f"##{key}:\n{str(value)[:1000]}...")
-                else:
-                    logger.debug(f"## {key}:\n{value}")
-
-        usage = getattr(response, 'usage', None)
-        usage_str = ""
-        if usage:
-            usage_str = f" | tokens: in={getattr(usage, 'prompt_tokens', None)}, out={getattr(usage, 'completion_tokens', None)}"
-        logger.info(f"[DataRecAgent] timing: llm={t_llm_val:.3f}s, supplement={t_supplement:.3f}s, exec={t_exec_total:.3f}s, total={t_total + t_llm_val:.3f}s{usage_str}")
-        return candidates
-
-    def run(self, input_tables, description, n=1, prev_messages: list[dict] = [], primary_tables=None):
-        """
-        Args:
-            input_tables: list[dict], each dict contains 'name' (table name in workspace) and 'rows'
-            description: str, the description of what the user wants
-            n: int, the number of candidates
-            prev_messages: list[dict], the previous messages
-            primary_tables: list[str], names of the primary (focused) tables for context prioritization
-        """
-        table_names = [t.get('name', '?') for t in input_tables]
-        logger.info(f"[DataRecAgent] run start | tables={table_names} | primary={primary_tables}")
-
-        # Generate data summary with file references
-        data_summary = generate_data_summary(
-            input_tables,
-            workspace=self.workspace,
-            primary_tables=primary_tables,
-        )
-
-        user_query = f"[CONTEXT]\n\n{data_summary}\n\n[GOAL]\n\n{description}"
-        if len(prev_messages) > 0:
-            user_query = f"The user wants a new recommendation based off the following updated context and goal:\n\n[CONTEXT]\n\n{data_summary}\n\n[GOAL]\n\n{description}"
-
-        logger.debug(user_query)
-
-        # Filter out system messages from prev_messages
-        filtered_prev_messages = [msg for msg in prev_messages if msg.get("role") != "system"]
-
-        messages = [{"role":"system", "content": self.system_prompt},
-                    *filtered_prev_messages,
-                    {"role":"user","content": user_query}]
-
-        t_llm_start = time.time()
-        response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model))
-        t_llm = time.time() - t_llm_start
-
-        candidates = self.process_gpt_response(input_tables, messages, response, t_llm=t_llm)
-        status = candidates[0].get('status', '?') if candidates else 'empty'
-        logger.info(f"[DataRecAgent] run done | status={status}")
-        return candidates
-
-
-    def followup(self, input_tables, dialog, latest_data_sample, new_instruction: str, n=1):
-        """
-        Followup recommendation based on previous dialog and new instruction.
-
-        Args:
-            input_tables: list of input tables
-            dialog: previous conversation history
-            latest_data_sample: sample of the latest transformation result
-            new_instruction: new user instruction for followup
-            n: number of candidates
-        """
-        logger.debug(f"GOAL: \n\n{new_instruction}")
-        logger.info(f"[DataRecAgent] followup start")
-
-        # Format sample data
-        sample_data_str = pd.DataFrame(latest_data_sample).head(10).to_string() + '\n......'
-
-        # Replace the old system prompt with the current one so that
-        # conversations continued from older threads pick up prompt changes.
-        updated_dialog = [{"role": "system", "content": self.system_prompt}, *dialog[1:]]
-
-        messages = [*updated_dialog,
-                    {"role":"user",
-                    "content": f"This is the result from the latest transformation:\n\n{sample_data_str}\n\nUpdate the Python script above based on the following instruction:\n\n{new_instruction}"}]
-
-        t_llm_start = time.time()
-        response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model))
-        t_llm = time.time() - t_llm_start
-
-        return self.process_gpt_response(input_tables, messages, response, t_llm=t_llm)
diff --git a/py-src/data_formulator/agents/agent_data_transform.py b/py-src/data_formulator/agents/agent_data_transform.py
deleted file mode 100644
index bdd842d2..00000000
--- a/py-src/data_formulator/agents/agent_data_transform.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-import json
-import time
-
-from data_formulator.agent_config import reasoning_effort_for
-from data_formulator.agents.agent_utils import extract_json_objects, extract_code_from_gpt_response, supplement_missing_block, ensure_output_variable_in_code, compose_system_prompt
-from data_formulator.agents.agent_diagnostics import AgentDiagnostics
-from data_formulator.datalake.parquet_utils import df_to_safe_records
-from data_formulator.security.sanitize import sanitize_error_message
-from data_formulator.agents.chart_creation_guide import (
-    SHARED_ENVIRONMENT,
-    SHARED_SEMANTIC_TYPE_REFERENCE,
-    SHARED_CHART_REFERENCE,
-    SHARED_STATISTICAL_ANALYSIS,
-    SHARED_DUCKDB_NOTES,
-)
-import pandas as pd
-
-import logging
-
-logger = logging.getLogger(__name__)
-
-_AGENT_ID = "data_transform"
-
-SYSTEM_PROMPT = f'''You are a data scientist who transforms data for visualization.
-Given [CONTEXT] (dataset summaries) and [GOAL] (user intent + chart spec), refine the goal and write a Python script to produce the transformed data.
-
-The user's [GOAL] includes a "chart" object (chart_type, encodings, config) describing the desired visualization and a natural language "goal".
-
-{SHARED_ENVIRONMENT}
-
-You will produce two outputs: a JSON spec (```json```) and a Python script (```python```). No extra text.
-
-**Step 1: JSON spec** — refine the user's goal and finalize the chart.
-
-Check if the user's "chart" (chart_type + encodings) is sufficient for their "goal":
-- If encodings are sufficient, copy them.
-- If encodings are missing fields, add minimal fields needed (aim for ≤3 channels: x, y, color/size).
-- If encodings can be optimized, reorder for better visualization.
-- If the user says "use B instead of A" while A is in encodings, update accordingly.
-- For lat/lon data, use "latitude"/"longitude" as channel names, not "x"/"y".
-- The user's chart_type may not be in [CHART TYPE REFERENCE] (e.g., "Radar Chart", "Bump Chart"). Preserve it as-is and infer valid encodings from channel names in the input.
-
-```json
-{{{{
-    "input_tables": [...],       // table names from [CONTEXT]. Table 1 = currently viewed — prioritize it.
-    "detailed_instruction": "",  // elaborated user instruction with details
-    "display_instruction": "", // short verb phrase (<12 words) capturing computation intent. Bold **column names** (semantic matches count). For follow-ups, describe only the new part.
-    "output_fields": [...],      // desired output fields (include intermediate fields)
-    "chart": {{{{
-        "chart_type": "",        // from [CHART TYPE REFERENCE], or keep the user's chart_type as-is if not listed
-        "encodings": {{{{}}}},       // visual channels → output field names
-        "config": {{{{}}}}           // optional styling
-    }}}},
-    "field_metadata": {{{{         // semantic type for each encoding field
-        "<field>": "Category"        // from [SEMANTIC TYPE REFERENCE]
-    }}}},
-    "output_variable": "",       // descriptive snake_case name (e.g. "sales_by_region"), not "result_df"
-    "reason": ""                 // why this refinement is made
-}}}}
-```
-
-{SHARED_SEMANTIC_TYPE_REFERENCE}
-
-{SHARED_CHART_REFERENCE}
-
-{SHARED_STATISTICAL_ANALYSIS}
-
-**Step 2: Python script** — transform input data to produce a DataFrame with all "output_fields". Keep it simple and readable. The script MUST assign the final result to the variable named in `"output_variable"` from Step 1.
-
-**Datetime handling:**
-- Year → number. Year-month / year-month-day → string ("2020-01" / "2020-01-01").
-- Hour alone → number. Hour:min or h:m:s → string. Never return raw datetime objects.
-
-{SHARED_DUCKDB_NOTES}'''
-
-
-def _combine_rules(text_rules: str, knowledge_rules: list[dict]) -> str:
-    """Merge text rules and knowledge-file rules into a single string."""
-    parts = []
-    if text_rules and text_rules.strip():
-        parts.append(text_rules.strip())
-    for rule in knowledge_rules:
-        parts.append(f"### {rule['title']}\n{rule['body']}")
-    return "\n\n".join(parts)
-
-
-class DataTransformationAgent(object):
-
-    def __init__(self, client, workspace, system_prompt=None, agent_coding_rules="", language_instruction="", max_display_rows=10000, model_info=None, knowledge_store=None):
-        self.client = client
-        self.workspace = workspace
-        self.max_display_rows = max_display_rows
-        self._model_info = model_info or {}
-        self._agent_coding_rules = agent_coding_rules
-        self._language_instruction = language_instruction
-
-        knowledge_rules = knowledge_store.load_always_apply_rules() if knowledge_store else []
-        combined_rules = _combine_rules(agent_coding_rules, knowledge_rules)
-
-        if system_prompt is not None:
-            self._base_prompt = system_prompt
-        else:
-            self._base_prompt = SYSTEM_PROMPT
-
-        self.system_prompt = compose_system_prompt(
-            self._base_prompt,
-            agent_coding_rules=combined_rules if system_prompt is None else "",
-            language_instruction=language_instruction,
-            language_marker="**About the execution environment:**",
-        )
-
-        self._diag = AgentDiagnostics(
-            agent_name="DataTransformationAgent",
-            model_info=self._model_info,
-            base_system_prompt=self._base_prompt,
-            agent_coding_rules=self._agent_coding_rules,
-            language_instruction=self._language_instruction,
-            assembled_system_prompt=self.system_prompt,
-        )
-
-    def process_gpt_response(self, response, messages, t_llm=None):
-        """Process GPT response to handle Python code execution"""
-        t_start = time.time()
-        t_exec_total = 0.0
-
-        if isinstance(response, Exception):
-            raw_error = str(getattr(response, "body", response))
-            safe_error = sanitize_error_message(raw_error)
-            result = {'status': 'other error', 'content': safe_error,
-                      'diagnostics': self._diag.for_error(messages, error=safe_error)}
-            return [result]
-
-        candidates = []
-        for choice in response.choices:
-            logger.debug("=== Python script result ===>")
-            logger.debug(choice.message.content + "\n")
-
-            # --- Parse JSON spec and Python code ---
-            json_blocks = extract_json_objects(choice.message.content + "\n")
-            refined_goal = None
-            for jb in json_blocks:
-                if isinstance(jb, dict):
-                    refined_goal = jb
-                    break
-            code_blocks = extract_code_from_gpt_response(choice.message.content + "\n", "python")
-
-            # If only one block was produced, request the missing one
-            refined_goal, code_blocks, _supplement_content, t_supplement = supplement_missing_block(
-                self.client, messages, choice.message.content,
-                refined_goal, code_blocks, prefix="[DataTransformAgent]"
-            )
-
-            # Apply fallbacks for missing JSON
-            json_fallback_used = refined_goal is None
-            if refined_goal is None:
-                refined_goal = {'chart': {'chart_type': '', 'encodings': {}, 'config': {}}, 'instruction': '', 'reason': '', 'output_variable': 'result_df'}
-                logger.warning(
-                    "[DataTransformAgent] JSON spec parsing failed — using fallback defaults. "
-                    f"Response snippet: {choice.message.content[:300]!r}"
-                )
-            output_variable = refined_goal.get('output_variable', 'result_df') or 'result_df'
-            logger.info(f"[DataTransformAgent] extracted output_variable={output_variable!r}")
-
-            import re as _re
-            _diag_code = code_blocks[-1] if code_blocks else None
-            _diag_output_var_in_code = bool(
-                _diag_code and output_variable
-                and _re.search(rf'(?:^|\n)\s*{_re.escape(output_variable)}\s*=(?!=)', _diag_code)
-            )
-            _diag_sandbox_mode = None
-            _diag_exec = {"status": None}
-            _diag_code_patched = False
-
-            if len(code_blocks) > 0:
-                code = code_blocks[-1]
-
-                if output_variable and not _diag_output_var_in_code:
-                    code, was_patched, detected_var = ensure_output_variable_in_code(code, output_variable)
-                    _diag_code_patched = was_patched
-                    if was_patched:
-                        logger.info(
-                            f"[DataTransformAgent] output_variable {output_variable!r} not in code — "
-                            f"patched: appended `{output_variable} = {detected_var}`"
-                        )
-                    else:
-                        logger.warning(
-                            f"[DataTransformAgent] output_variable {output_variable!r} not in code "
-                            f"and auto-patch found no candidate variable."
-                        )
-
-                try:
-                    from data_formulator.sandbox import create_sandbox
-
-                    try:
-                        from flask import current_app
-                        sandbox_mode = current_app.config.get('CLI_ARGS', {}).get('sandbox', 'local')
-                    except (ImportError, RuntimeError):
-                        sandbox_mode = 'local'
-                    _diag_sandbox_mode = sandbox_mode
-
-                    t_exec_start = time.time()
-                    sandbox = create_sandbox(sandbox_mode)
-                    execution_result = sandbox.run_python_code(
-                        code=code,
-                        workspace=self.workspace,
-                        output_variable=output_variable,
-                    )
-                    t_exec_total += time.time() - t_exec_start
-
-                    if execution_result['status'] != 'ok':
-                        diagnostics = execution_result.get("diagnostics", {})
-                        raw_exec_error = diagnostics.get(
-                            "safe_detail",
-                            execution_result.get('content', execution_result.get('error_message', 'Unknown error')),
-                        )
-                        safe_exec_error = sanitize_error_message(raw_exec_error)
-                    else:
-                        safe_exec_error = None
-                    _diag_exec = {
-                        "status": execution_result['status'],
-                        "error_message": safe_exec_error,
-                        "available_dataframes": execution_result.get('df_names', []),
-                    }
-
-                    if execution_result['status'] == 'ok':
-                        full_df = execution_result['content']
-                        row_count = len(full_df)
-
-                        output_table_name = self.workspace.get_fresh_name(f"d-{output_variable}")
-                        self.workspace.write_parquet(full_df, output_table_name)
-
-                        if row_count > self.max_display_rows:
-                            query_output = full_df.head(self.max_display_rows)
-                        else:
-                            query_output = full_df
-                        query_output = query_output.loc[:, ~query_output.columns.duplicated()]
-
-                        result = {
-                            "status": "ok",
-                            "code": code,
-                            "content": {
-                                'rows': df_to_safe_records(query_output),
-                                'virtual': {
-                                    'table_name': output_table_name,
-                                    'row_count': row_count
-                                }
-                            },
-                        }
-                    else:
-                        result = {
-                            'status': 'error',
-                            'code': code,
-                            'content': safe_exec_error or 'Unknown error'
-                        }
-
-                except Exception as e:
-                    logger.exception('Error occurred during code execution')
-                    safe_error = sanitize_error_message(f"{type(e).__name__}: {e}")
-                    result = {
-                        'status': 'error',
-                        'code': code,
-                        'content': "An error occurred during code execution."
-                    }
-                    _diag_exec = {"status": "exception", "error_message": safe_error}
-
-            else:
-                result = {'status': 'error', 'code': "", 'content': "No code block found in the response. The model is unable to generate code to complete the task.", 'content_code': 'agent.noCodeBlock'}
-
-            _effective_content = choice.message.content
-            if _supplement_content:
-                _effective_content += "\n\n" + _supplement_content
-            result['dialog'] = [*messages, {"role": choice.message.role, "content": _effective_content}]
-            result['agent'] = 'DataTransformationAgent'
-            result['refined_goal'] = refined_goal
-
-            # --- Build diagnostics ---
-            usage = getattr(response, 'usage', None)
-            result['diagnostics'] = self._diag.for_response(
-                messages,
-                raw_content=choice.message.content,
-                finish_reason=getattr(choice, 'finish_reason', None),
-                json_spec=refined_goal,
-                json_fallback_used=json_fallback_used,
-                code_found=len(code_blocks) > 0,
-                code=_diag_code,
-                output_variable=output_variable,
-                output_variable_in_code=_diag_output_var_in_code,
-                code_patched=_diag_code_patched,
-                supplemented=_supplement_content is not None,
-                sandbox_mode=_diag_sandbox_mode,
-                exec_status=_diag_exec.get("status"),
-                exec_error=_diag_exec.get("error_message"),
-                exec_df_names=_diag_exec.get("available_dataframes"),
-                t_llm=t_llm or 0,
-                t_supplement=t_supplement,
-                t_exec=t_exec_total,
-                prompt_tokens=getattr(usage, 'prompt_tokens', None) if usage else None,
-                completion_tokens=getattr(usage, 'completion_tokens', None) if usage else None,
-            )
-
-            candidates.append(result)
-
-        t_total = time.time() - t_start
-        t_llm_val = t_llm or 0.0
-
-        logger.debug("=== Transform Candidates ===>")
-        for candidate in candidates:
-            for key, value in candidate.items():
-                if key in ['dialog', 'content', 'diagnostics']:
-                    logger.debug(f"##{key}:\n{str(value)[:1000]}...")
-                else:
-                    logger.debug(f"## {key}:\n{value}")
-
-        usage = getattr(response, 'usage', None)
-        usage_str = ""
-        if usage:
-            usage_str = f" | tokens: in={getattr(usage, 'prompt_tokens', None)}, out={getattr(usage, 'completion_tokens', None)}"
-        logger.info(f"[DataTransformAgent] timing: llm={t_llm_val:.3f}s, supplement={t_supplement:.3f}s, exec={t_exec_total:.3f}s, total={t_total + t_llm_val:.3f}s{usage_str}")
-        return candidates
-
-
-    def run(self, input_tables, description, prev_messages: list[dict] = [], n=1,
-             current_visualization=None, expected_visualization=None):
-        """Args:
-            input_tables: list[dict], each dict contains 'name' (table name in workspace)
-            description: str, the description of the data transformation
-            prev_messages: list[dict], the previous messages
-            n: int, the number of candidates
-            current_visualization: dict or None, contains chart_spec and optional chart_image for complete charts
-            expected_visualization: dict or None, contains chart_spec for incomplete charts
-        """
-        table_names = [t.get('name', '?') for t in input_tables]
-        logger.info(f"[DataTransformAgent] run start | tables={table_names}")
-
-        # Generate data summary with file references
-        from data_formulator.agents.agent_utils import generate_data_summary
-        data_summary = generate_data_summary(input_tables, workspace=self.workspace)
-
-        # Build visualization context section
-        vis_section = ""
-        if current_visualization:
-            vis_section = f"\n\n[CURRENT VISUALIZATION] This is the current visualization the user has:\n\n{json.dumps(current_visualization.get('chart_spec', {}), indent=4, ensure_ascii=False)}"
-        elif expected_visualization:
-            vis_section = f"\n\n[EXPECTED VISUALIZATION] This is the visualization expected by the user:\n\n{json.dumps(expected_visualization.get('chart_spec', {}), indent=4, ensure_ascii=False)}"
-
-        # Order: context → visualization → goal
-        if len(prev_messages) > 0:
-            user_query = f"The user wants a new transformation based off the following updated context and goal:\n\n[CONTEXT]\n\n{data_summary}{vis_section}\n\n[GOAL]\n\n{description}"
-        else:
-            user_query = f"[CONTEXT]\n\n{data_summary}{vis_section}\n\n[GOAL]\n\n{description}"
-
-        logger.debug(user_query)
-
-        # Filter out system messages from prev_messages
-        filtered_prev_messages = [msg for msg in prev_messages if msg.get("role") != "system"]
-
-        # Build user message content: include chart image if available
-        chart_image = current_visualization.get('chart_image') if current_visualization else None
-        has_image = bool(chart_image)
-        logger.info(f"[DataTransformAgent] run LLM call | messages={1 + len(filtered_prev_messages) + 1}, has_image={has_image}")
-        try:
-            if chart_image:
-                user_content = [
-                    {"type": "text", "text": user_query},
-                    {"type": "image_url", "image_url": {"url": chart_image, "detail": "low"}}
-                ]
-            else:
-                user_content = user_query
-
-            messages = [{"role":"system", "content": self.system_prompt},
-                        *filtered_prev_messages,
-                        {"role":"user","content": user_content}]
-
-            t_llm_start = time.time()
-            response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model))
-            t_llm = time.time() - t_llm_start
-        except Exception as e:
-            # Fallback to text-only if model doesn't support images
-            logger.warning(f"Image-based completion failed, falling back to text-only: {e}")
-            messages = [{'role':'system', 'content': self.system_prompt},
-                        *filtered_prev_messages,
-                        {'role':'user','content': user_query}]
-            t_llm_start = time.time()
-            response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model))
-            t_llm = time.time() - t_llm_start
-
-        candidates = self.process_gpt_response(response, messages, t_llm=t_llm)
-        status = candidates[0].get('status', '?') if candidates else 'empty'
-        logger.info(f"[DataTransformAgent] run done | status={status}")
-        return candidates
-
-
-    def followup(self, input_tables, dialog, latest_data_sample, new_instruction: str, n=1,
-                 current_visualization=None, expected_visualization=None):
-        """
-        Followup transformation based on previous dialog and new instruction.
-
-        Args:
-            input_tables: list of input tables
-            dialog: previous conversation history
-            latest_data_sample: sample of the latest transformation result
-            new_instruction: new user instruction for followup
-            n: number of candidates
-            current_visualization: dict or None, contains chart_spec and optional chart_image for complete charts
-            expected_visualization: dict or None, contains chart_spec for incomplete charts
-        """
-        if not new_instruction or not new_instruction.strip():
-            new_instruction = "Update the transformation based on the updated visualization context."
-
-        logger.debug(f"GOAL: \n\n{new_instruction}")
-        logger.info(f"[DataTransformAgent] followup start")
-
-        updated_dialog = [{"role":"system", "content": self.system_prompt}, *dialog[1:]]
-
-        # Format sample data
-        sample_data_str = pd.DataFrame(latest_data_sample).head(10).to_string() + '\n......'
-
-        # Build visualization context section
-        vis_section = ""
-        if current_visualization:
-            vis_section = f"\n\n[CURRENT VISUALIZATION] This is the current visualization the user has:\n\n{json.dumps(current_visualization.get('chart_spec', {}), indent=4, ensure_ascii=False)}"
-        elif expected_visualization:
-            vis_section = f"\n\n[EXPECTED VISUALIZATION] This is the visualization expected by the user:\n\n{json.dumps(expected_visualization.get('chart_spec', {}), indent=4, ensure_ascii=False)}"
-
-        # Order: data sample → visualization → instruction
-        followup_text = f"This is the result from the latest transformation:\n\n{sample_data_str}{vis_section}\n\nUpdate the Python script above based on the following instruction:\n\n{new_instruction}"
-
-        logger.debug(followup_text)
-
-        # Build user message content: include chart image if available
-        chart_image = current_visualization.get('chart_image') if current_visualization else None
-        has_image = bool(chart_image)
-        logger.info(f"[DataTransformAgent] followup LLM call | messages={len(updated_dialog) + 1}, has_image={has_image}")
-        try:
-            if chart_image:
-                user_content = [
-                    {"type": "text", "text": followup_text},
-                    {"type": "image_url", "image_url": {"url": chart_image, "detail": "low"}}
-                ]
-            else:
-                user_content = followup_text
-
-            messages = [*updated_dialog, {"role":"user", "content": user_content}]
-
-            t_llm_start = time.time()
-            response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model))
-            t_llm = time.time() - t_llm_start
-        except Exception as e:
-            # Fallback to text-only if model doesn't support images
-            logger.warning(f"Image-based completion failed, falling back to text-only: {e}")
-            messages = [*updated_dialog, {'role':'user', 'content': followup_text}]
-            t_llm_start = time.time()
-            response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model))
-            t_llm = time.time() - t_llm_start
-
-        candidates = self.process_gpt_response(response, messages, t_llm=t_llm)
-        status = candidates[0].get('status', 'unknown') if candidates else 'empty'
-        logger.info(f"[DataTransformAgent] followup done | status={status}")
-        return candidates
diff --git a/py-src/data_formulator/agents/agent_interactive_explore.py b/py-src/data_formulator/agents/agent_interactive_explore.py
deleted file mode 100644
index 0f5f90fb..00000000
--- a/py-src/data_formulator/agents/agent_interactive_explore.py
+++ /dev/null
@@ -1,343 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-import json
-import logging
-import time
-
-import pandas as pd
-
-from data_formulator.agent_config import reasoning_effort_for
-from data_formulator.agents.agent_utils import (
-    attach_reasoning_content,
-    extract_json_objects,
-    generate_data_summary,
-)
-from data_formulator.agents.agent_language import inject_language_instruction
-from data_formulator.agents.context import (
-    build_focused_thread_context,
-    build_lightweight_table_context,
-    build_peripheral_thread_context,
-    handle_inspect_source_data,
-)
-
-logger = logging.getLogger(__name__)
-
-_AGENT_ID = "interactive_explore"
-
-# ── Tool definition (inspect only) ────────────────────────────────────────
-
-INSPECT_TOOL = {
-    "type": "function",
-    "function": {
-        "name": "inspect_source_data",
-        "description": (
-            "Get a detailed summary of one or more source tables — schema, "
-            "field-level statistics, and sample rows. Call this before suggesting "
-            "questions if you need to understand a table's contents."
-        ),
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "table_names": {
-                    "type": "array",
-                    "items": {"type": "string"},
-                    "description": "List of table names to inspect.",
-                },
-            },
-            "required": ["table_names"],
-        },
-    },
-}
-
-# ── Intent tags ───────────────────────────────────────────────────────────
-
-INTENT_TAGS = ['deep-dive', 'pivot', 'broaden', 'cross-data', 'statistical']
-
-# ── System prompt ─────────────────────────────────────────────────────────
-
-SYSTEM_PROMPT = '''You are a data exploration expert who suggests interesting questions to help users explore their datasets.
-
-The user message contains tiered context:
-- **[PRIMARY TABLE(S)]** / **[OTHER AVAILABLE TABLES]**: Compact dataset context with schema, metadata descriptions, representative field values, numeric stats, and bounded sample rows.
-- **[FOCUSED THREAD]** (optional): The exploration thread the user is continuing — each step shows what was asked, what was created, and what chart was made.
-- **[OTHER THREADS]** (optional): Brief summaries of other exploration threads in the workspace.
-- **[CURRENT CHART]** (optional): Image of the chart the user is currently viewing.
-- **[START QUESTION]** (optional): A seed question from the user for context.
-
-Your task is to suggest 4 exploration questions that users can follow to gain insights from their data.
-
-Guidelines:
-1. Suggest interesting analytical questions that can uncover new insights from the data.
-2. Use a diverse language style (questions, statements, etc).
-3. If there are multiple datasets, consider relationships between them.
-4. CONCISENESS: questions should be concise and to the point.
-5. QUESTION QUALITY:
-    - If no exploration thread is provided, start with high-level overview questions.
-    - If a thread exists, build on it — do not repeat questions already explored.
-    - If the current analysis is already very specialized, suggest broadening or pivoting rather than drilling deeper into a tiny subset.
-    - Leverage other tables in the workspace to suggest cross-data questions.
-6. DIVERSITY: each question MUST have a different intent tag. Cover diverse exploration directions:
-    - `deep-dive`: Zoom in — refine, filter, drill down, focus on outliers or sub-dimensions.
-    - `pivot`: Same data, different analytical angle — change the metric, aggregation, or chart type.
-    - `broaden`: Zoom out — higher-level view, remove filters, return to an earlier table.
-    - `cross-data`: Bring in another workspace table not yet used in this thread. Only suggest when other tables are available.
-    - `statistical`: Apply a statistical technique — forecasting, regression, clustering, anomaly detection.
-7. VISUALIZATION: each question should be visualizable with a chart.
-8. FORMATTING: for each question, include:
-    - `text`: The full question text.
-    - `goal`: A concise summary (<10 words) with **bold** keywords for key attributes/metrics.
-    - `tag`: One of: `deep-dive`, `pivot`, `broaden`, `cross-data`, `statistical`.
-
-Output a list of JSON objects, one per line (NDJSON format). Each line must be valid JSON with NO prefix:
-
-{"type": "question", "text": ..., "goal": ..., "tag": ...}
-{"type": "question", "text": ..., "goal": ..., "tag": ...}
-...
-'''
-
-class InteractiveExploreAgent(object):
-
-    def __init__(self, client, workspace, agent_exploration_rules="", language_instruction="", knowledge_store=None):
-        self.client = client
-        self.agent_exploration_rules = agent_exploration_rules
-        self.workspace = workspace
-        self.language_instruction = language_instruction
-        self._knowledge_store = knowledge_store
-
-    def run(self, input_tables, start_question=None,
-            focused_thread=None, other_threads=None,
-            primary_tables=None,
-            current_chart=None,
-            # Legacy params — kept for backward compatibility
-            exploration_thread=None, current_data_sample=None,
-            enable_inspect_round=False,
-            **kwargs):
-        """
-        Suggest exploration questions for a dataset or exploration thread.
-
-        Args:
-            input_tables: List of dataset objects with name, rows, description
-            start_question: Optional seed question for context
-            focused_thread: Rich thread context (list of step dicts from frontend)
-            other_threads: Peripheral thread summaries
-            primary_tables: List of primary table names for prioritization
-            current_chart: PNG data URL of the current visualization
-            exploration_thread: Legacy — flat list of tables (used if focused_thread not provided)
-            current_data_sample: Legacy — raw rows (ignored when focused_thread is provided)
-            enable_inspect_round: Optional fallback for unusual cases where an
-                extra inspect_source_data tool round is explicitly requested.
-        """
-
-        # ── Progress: context building ─────────────────────────────────
-        yield {"type": "progress", "phase": "building_context"}
-
-        # ── Build tiered context ──────────────────────────────────────
-        t_ctx = time.time()
-
-        context = build_lightweight_table_context(
-            input_tables, self.workspace, primary_tables=primary_tables,
-        )
-
-        if focused_thread:
-            context += "\n\n" + build_focused_thread_context(focused_thread)
-        elif exploration_thread:
-            # Legacy fallback: build a simple thread summary from flat table list
-            thread_summary = generate_data_summary(
-                [{
-                    'name': table.get('name', f'Table {i}'),
-                    'rows': table.get('rows', []),
-                } for i, table in enumerate(exploration_thread, 1)],
-                self.workspace,
-                table_name_prefix="Thread Table",
-            )
-            context += f"\n\n[EXPLORATION THREAD]\n\n{thread_summary}"
-
-        if other_threads:
-            context += "\n\n" + build_peripheral_thread_context(other_threads)
-
-        if current_data_sample and not focused_thread:
-            context += f"\n\n[CURRENT DATA SAMPLE]\n\n{pd.DataFrame(current_data_sample).head(10).to_string()}"
-
-        if start_question:
-            context += f"\n\n[START QUESTION]\n\n{start_question}"
-
-        # ── Inject relevant workflows from knowledge store ──────────
-        if self._knowledge_store:
-            try:
-                query = start_question or ""
-                table_names = [t.get("name", "") for t in input_tables if t.get("name")]
-                search_query = " ".join([query] + table_names[:5]).strip()
-                if search_query:
-                    relevant = self._knowledge_store.search(
-                        search_query, categories=["workflows"], max_results=3,
-                    )
-                    if relevant:
-                        knowledge_block = "[RELEVANT KNOWLEDGE]\n"
-                        for item in relevant:
-                            knowledge_block += f"\n### {item['title']}\n{item['snippet']}\n"
-                        context += f"\n\n{knowledge_block}"
-            except Exception:
-                logger.warning("Failed to search knowledge experiences", exc_info=True)
-
-        # ── Build system prompt ───────────────────────────────────────
-        system_prompt = SYSTEM_PROMPT
-
-        if self.agent_exploration_rules and self.agent_exploration_rules.strip():
-            system_prompt += "\n\n[AGENT EXPLORATION RULES]\n\n" + self.agent_exploration_rules.strip() + "\n\nPlease follow the above agent exploration rules when suggesting questions."
-
-        if self._knowledge_store:
-            system_prompt += self._knowledge_store.format_rules_block()
-
-        system_prompt = inject_language_instruction(system_prompt, self.language_instruction)
-
-        ctx_elapsed = time.time() - t_ctx
-        logger.info(
-            "[InteractiveExploreAgent] context: %d chars (~%d tokens), "
-            "tables=%d, primary=%s, built in %.2fs",
-            len(context), len(context) // 4,
-            len(input_tables),
-            primary_tables,
-            ctx_elapsed,
-        )
-        logger.debug("Interactive explore agent input: %s", context)
-        logger.info("[InteractiveExploreAgent] run start")
-
-        # ── Build initial messages ────────────────────────────────────
-        if current_chart:
-            user_content = [
-                {"type": "text", "text": context},
-                {"type": "image_url", "image_url": {"url": current_chart, "detail": "low"}}
-            ]
-        else:
-            user_content = context
-
-        messages = [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_content},
-        ]
-
-        # ── Optional inspect_source_data fallback ─────────────────────
-        if enable_inspect_round:
-            messages = self._run_inspect_round(messages, input_tables)
-
-        # ── Progress: generating ──────────────────────────────────────
-        yield {"type": "progress", "phase": "generating"}
-
-        # ── Stream the final response ─────────────────────────────────
-        try:
-            stream = self.client.get_completion(messages=messages, stream=True, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model))
-        except Exception as e:
-            # If image fails, retry without it
-            if current_chart:
-                messages[1] = {"role": "user", "content": context}
-                stream = self.client.get_completion(messages=messages, stream=True)
-            else:
-                raise
-
-        t_llm = time.time()
-        first_token = False
-        for part in stream:
-            if hasattr(part, 'choices') and len(part.choices) > 0:
-                delta = part.choices[0].delta
-                if hasattr(delta, 'content') and delta.content:
-                    if not first_token:
-                        first_token = True
-                        logger.info(
-                            "[InteractiveExploreAgent] TTFB: %.2fs",
-                            time.time() - t_llm,
-                        )
-                    yield delta.content
-
-        logger.info(
-            "[InteractiveExploreAgent] LLM total: %.2fs, run done",
-            time.time() - t_llm,
-        )
-
-    def _run_inspect_round(self, messages, input_tables):
-        """Run one non-streaming LLM call with the inspect_source_data tool.
-
-        If the model calls the tool, execute it and append the result.
-        If the model produces text without tool calls, skip (the main
-        streaming call will generate the final output).
-
-        Returns the updated messages list.
-        """
-        max_rounds = 3
-        tools = [INSPECT_TOOL]
-
-        for _ in range(max_rounds):
-            try:
-                response = self._call_llm_with_tools(messages, tools)
-            except Exception as e:
-                logger.warning(f"[InteractiveExploreAgent] Inspect round failed: {e}")
-                from data_formulator.error_handler import collect_stream_warning
-                collect_stream_warning(
-                    "Data inspection round failed — results may be less accurate",
-                    detail=str(e),
-                    message_code="INSPECT_ROUND_FAILED",
-                )
-                break
-
-            if not response or not response.choices:
-                break
-
-            choice = response.choices[0]
-            content = choice.message.content or ""
-            tool_calls = getattr(choice.message, 'tool_calls', None)
-
-            if not tool_calls:
-                # No tool call — model is ready to answer.
-                # Don't append its text; we'll re-stream for the final response.
-                break
-
-            # Append assistant message with tool calls
-            assistant_msg = {
-                "role": "assistant",
-                "content": content or None,
-                "tool_calls": [
-                    {
-                        "id": tc.id,
-                        "type": "function",
-                        "function": {
-                            "name": tc.function.name,
-                            "arguments": tc.function.arguments,
-                        },
-                    }
-                    for tc in tool_calls
-                ],
-            }
-            attach_reasoning_content(assistant_msg, choice.message)
-            messages.append(assistant_msg)
-
-            # Execute each tool call
-            for tc in tool_calls:
-                tool_name = tc.function.name
-                try:
-                    tool_args = json.loads(tc.function.arguments)
-                except json.JSONDecodeError:
-                    tool_args = {}
-
-                if tool_name == "inspect_source_data":
-                    table_names = tool_args.get("table_names", [])
-                    tool_content = handle_inspect_source_data(
-                        table_names, input_tables, self.workspace
-                    )
-                else:
-                    tool_content = f"Unknown tool: {tool_name}"
-
-                messages.append({
-                    "role": "tool",
-                    "tool_call_id": tc.id,
-                    "content": tool_content,
-                })
-
-            logger.info(f"[InteractiveExploreAgent] Inspect round: executed {len(tool_calls)} tool call(s)")
-
-        return messages
-
-    def _call_llm_with_tools(self, messages, tools):
-        """Non-streaming LLM call with tool definitions."""
-        return self.client.get_completion_with_tools(
-            messages, tools=tools, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model),
-        )
\ No newline at end of file
diff --git a/py-src/data_formulator/agents/agent_report_gen.py b/py-src/data_formulator/agents/agent_report_gen.py
deleted file mode 100644
index 540c5a0c..00000000
--- a/py-src/data_formulator/agents/agent_report_gen.py
+++ /dev/null
@@ -1,585 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-"""Report generation agent with tool-calling for inspect + embed.
-
-Single agentic loop:
-  - Each round is a streaming LLM call with the inspection tools available.
-    The agent calls inspect_chart / inspect_source_data to gather information
-    whenever it needs it; the results (and rendered chart images) are fed back
-    as context and the loop continues.
-  - When the agent stops calling tools and starts writing prose, that prose IS
-    the report — it streams token-by-token to the user, with charts embedded
-    inline via ![caption](chart://chart_id) markdown links.
-  - Because the tool channel stays available throughout, the agent uses real
-    tool calls instead of leaking tool-call syntax into the report text.
-"""
-
-import json
-import logging
-import re
-from typing import Any, Generator
-
-import pandas as pd
-
-from data_formulator.agent_config import reasoning_effort_for
-from data_formulator.agents.agent_utils import (
-    accumulate_reasoning_content,
-    generate_data_summary,
-)
-from data_formulator.agents.agent_language import inject_language_instruction
-from data_formulator.datalake.parquet_utils import df_to_safe_records
-from data_formulator.agents.context import (
-    build_focused_thread_context,
-    build_lightweight_table_context,
-    build_peripheral_thread_context,
-    handle_inspect_source_data,
-)
-from data_formulator.workflows.create_vl_plots import (
-    assemble_vegailte_chart,
-    coerce_field_type,
-    resolve_field_type,
-    spec_to_base64,
-    field_metadata_to_semantic_types,
-)
-
-logger = logging.getLogger(__name__)
-
-_AGENT_ID = "report_gen"
-
-# ── Tool definitions ──────────────────────────────────────────────────────
-
-INSPECT_TOOLS = [
-    {
-        "type": "function",
-        "function": {
-            "name": "inspect_chart",
-            "description": (
-                "Get the visualization image and underlying data for one or more charts. "
-                "Returns the chart image (PNG), a sample of the chart's data, "
-                "and the transformation code that created it."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "chart_ids": {
-                        "type": "array",
-                        "items": {"type": "string"},
-                        "description": "List of chart IDs from [AVAILABLE CHARTS] to inspect.",
-                    },
-                },
-                "required": ["chart_ids"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "inspect_source_data",
-            "description": (
-                "Get a detailed summary of one or more source tables — schema, "
-                "field-level statistics, and sample rows."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "table_names": {
-                        "type": "array",
-                        "items": {"type": "string"},
-                        "description": "List of table names to inspect.",
-                    },
-                },
-                "required": ["table_names"],
-            },
-        },
-    },
-]
-
-
-# ── System prompt ─────────────────────────────────────────────────────────
-
-SYSTEM_PROMPT = """\
-You are a data journalist / analyst who creates insightful, well-organized reports
-based on data explorations. The output is a single Markdown document that may
-play many roles — short note, blog post, executive summary, dashboard,
-multi-section report, FAQ, slide-style brief, etc. Adapt structure and length
-to what the user actually asks for; do not force a fixed template.
-
-The user message contains context about the workspace:
-- **[PRIMARY TABLE(S)]** / **[OTHER AVAILABLE TABLES]**: Lightweight schema of datasets.
-- **[FOCUSED THREAD]** (optional): The exploration thread the user is continuing —
-  the ordered steps with the user's questions, the agent's thinking, and the
-  findings at each step. This is the spine of the story you are telling.
-- **[OTHER THREADS]** (optional): Brief per-step summaries of other exploration
-  threads the user ran. These are additional findings worth weaving in.
-- **[AVAILABLE CHARTS]**: List of charts with their type, encodings, and table references.
-
-## Ground the report in the exploration
-The thread context is your most important input. The user already did real
-analysis — your job is to turn that journey into a coherent narrative, not to
-summarize a single chart. Before writing:
-- Read the FOCUSED THREAD and OTHER THREADS to understand the full set of
-  questions asked and findings reached.
-- Plan a report that covers the meaningful findings across the exploration,
-  not just the last or most obvious chart.
-
-## Inspecting charts and data
-You have two tools available the whole time: `inspect_chart` and
-`inspect_source_data`. Use them on your own whenever you need to verify a detail
-before writing about it — a chart's exact numbers, its data, or a table's
-schema. `inspect_chart` returns the chart's rendered image, a data sample, and
-the code that produced it. Check the charts behind the key findings you present.
-
-## Write the report
-Write the report directly in markdown — your prose streams straight to the
-reader. Inspect whatever you need as you go.
-
-### Embedding charts (REQUIRED FORMAT — do not change this)
-To embed a chart image, use markdown image syntax with a `chart://` URL:
-  ![Caption describing the chart](chart://chart_id)
-
-Example: `![Monthly trade balance trend](chart://chart-123)`
-
-The chart_id must match one from [AVAILABLE CHARTS]. Place each chart embed on
-its own line (it renders as a block). You can embed the same chart at most
-once. Captions are short — one line describing what the chart shows.
-
-### Tables
-For data tables, write standard markdown tables directly:
-| date | value |
-| --- | --- |
-| 2020-01 | -43.5 |
-
-### Style & structure — adapt to the user's request
-The user may ask for any of:
-- a short note or social-style summary (a few sentences, one or two charts),
-- a blog post / narrative report (intro → findings → takeaway),
-- an executive summary (key numbers up top, then context),
-- a KPI dashboard / multi-section overview (headings per topic, multiple charts
-  arranged with short commentary between them),
-- a slide-style brief (compact sections with bullet points and embedded charts),
-- a deeper analytical report with sub-sections, methodology notes, and caveats.
-
-Pick the structure that fits the request and the available material. Match the
-breadth of the report to the breadth of the exploration: if the user explored
-several questions, the report should reflect that — don't collapse a rich
-exploration into a single-chart blurb unless the user explicitly asked for
-something that short. Reasonable defaults if the user is vague:
-- Start with a `# Title` that reflects the topic.
-- Group related findings under `##` (and `###` if useful) headings, typically
-  one section per key finding / thread.
-- Around each embedded chart, briefly explain what it shows and the key insight.
-- Use bullets / short paragraphs / tables where they help; don't pad.
-- Close with a brief takeaway or summary section if the report is more than a
-  few paragraphs. For very short outputs (notes, single-chart blurbs), a closing
-  summary is optional.
-
-### Guardrails
-- Write in Markdown. Keep prose tight; let the data and charts carry the weight.
-- Stay faithful to the data — do not invent numbers, comparisons, or causation
-  that the data does not actually support.
-- It is fine to flag uncertainty ("based on the sample shown…") when appropriate.
-- Embed every chart you discuss; don't reference a chart in prose without showing it.
-"""
-
-
-# Defense-in-depth: keeping the tool channel available across the whole loop
-# means the model normally uses real tool calls instead of writing tool-call
-# syntax as text. But some harmony / gpt-oss style models still occasionally leak
-# their tool-call channel into the text stream (e.g. "to=functions.inspect_chart
-# ... json {\"chart_ids\": [...]}"), sometimes with degenerate spam tokens. As a
-# cheap last line of defense we strip the obvious leak markers out of each
-# streamed delta before it reaches the report.
-_LEAK_SPECIAL_TOKEN = re.compile(r"<\|[^|>]*\|>")
-_LEAK_TOOLCALL = re.compile(
-    r"(?:\bcommentary\b\s*)?\bto\s*=\s*functions\.[A-Za-z0-9_]+"
-    r"[\s\S]*?\{[\s\S]*?\}",
-)
-
-
-def _strip_leaked_tool_syntax(text: str) -> str:
-    """Remove leaked harmony special tokens and tool-call headers (with their
-    trailing JSON args) from a streamed report delta. Clean prose is untouched."""
-    text = _LEAK_TOOLCALL.sub("", text)
-    text = _LEAK_SPECIAL_TOKEN.sub("", text)
-    return text
-
-
-class ReportGenAgent:
-    """Tool-calling report generation agent with a single streaming loop."""
-
-    def __init__(self, client, workspace, language_instruction=""):
-        self.client = client
-        self.workspace = workspace
-        self.language_instruction = language_instruction
-
-    def run(
-        self,
-        input_tables: list[dict[str, Any]],
-        charts: list[dict[str, Any]],
-        user_prompt: str = "Create a report summarizing the exploration.",
-        focused_thread: list[dict[str, Any]] | None = None,
-        other_threads: list[dict[str, Any]] | None = None,
-        primary_tables: list[str] | None = None,
-    ) -> Generator[dict[str, Any], None, None]:
-        """Generate a report via a single tool-calling loop.
-
-        Yields SSE-style dicts:
-            {"type": "text_delta", "content": "..."}
-            {"type": "embed_chart", "chart_id": "...", "caption": "..."}
-            {"type": "embed_table", "table_id": "...", ...}
-
-        Args:
-            input_tables: Source table objects with name (rows optional for lightweight mode)
-            charts: Chart descriptors: {chart_id, chart_type, encodings, table_ref, code?, chart_data?, chart_image?}
-            user_prompt: The user's report request
-            focused_thread: Rich thread context (from buildFocusedThread)
-            other_threads: Peripheral thread summaries
-            primary_tables: List of primary table names for prioritization
-        """
-        # Build context
-        context = build_lightweight_table_context(
-            input_tables, self.workspace, primary_tables=primary_tables,
-        )
-        if focused_thread:
-            context += "\n\n" + build_focused_thread_context(focused_thread)
-        if other_threads:
-            context += "\n\n" + build_peripheral_thread_context(other_threads)
-
-        # Build available charts section
-        if charts:
-            chart_lines = ["[AVAILABLE CHARTS]"]
-            for c in charts:
-                enc_str = ", ".join(f"{k}: {v}" for k, v in c.get("encodings", {}).items() if v)
-                chart_lines.append(
-                    f"  - {c['chart_id']}: {c.get('chart_type', 'Unknown')} "
-                    f"({enc_str}) → table: {c.get('table_ref', '?')}"
-                )
-            context += "\n\n" + "\n".join(chart_lines)
-
-        # Build system prompt
-        system_prompt = SYSTEM_PROMPT
-        system_prompt = inject_language_instruction(system_prompt, self.language_instruction)
-
-        write_instruction = (
-            "Write a report in markdown that covers the key findings across the "
-            "exploration — don't reduce it to a single chart unless the request "
-            "explicitly asks for something that brief. Pull up whatever charts or "
-            "data you need to look at as you go (this happens automatically and "
-            "is invisible to the reader), and embed each chart you discuss with "
-            "![caption](chart://chart_id)."
-        )
-        messages: list[dict] = [
-            {"role": "system", "content": system_prompt},
-            {
-                "role": "user",
-                "content": f"{context}\n\n[USER REQUEST]\n\n{user_prompt}\n\n{write_instruction}",
-            },
-        ]
-
-        # Single agentic loop: the model inspects via tool calls as needed, then
-        # streams the report. Tools stay available throughout, so it uses the
-        # real tool channel instead of leaking tool-call syntax as text.
-        yield from self._run_agent_loop(messages, charts, input_tables)
-
-    # ------------------------------------------------------------------
-    # Agentic loop: inspect-as-needed, then stream the report
-    # ------------------------------------------------------------------
-
-    def _run_agent_loop(
-        self,
-        messages: list[dict],
-        charts: list[dict[str, Any]],
-        input_tables: list[dict[str, Any]],
-    ) -> Generator[dict[str, Any], None, None]:
-        """Single streaming tool-calling loop.
-
-        Each round is a streaming LLM call with the inspect tools available. If
-        the model emits tool calls, we execute them (attaching rendered chart
-        images) and loop. When the model stops calling tools and just writes
-        prose, that prose IS the report and streams straight to the user.
-        Because the tool channel stays available the whole time, the model never
-        has to fall back to writing tool-call syntax as text.
-        """
-        max_rounds = 6
-
-        for round_idx in range(max_rounds):
-            try:
-                stream = self._call_llm_streaming(messages, tools=INSPECT_TOOLS)
-            except Exception as e:
-                logger.error(f"[ReportAgent] LLM call failed: {e}")
-                yield {"type": "text_delta", "content": f"Error generating report: {e}"}
-                return
-
-            text_parts: list[str] = []
-            reasoning_acc: str | None = None
-            tool_calls_acc: dict[int, dict[str, Any]] = {}
-
-            for chunk in stream:
-                if not chunk.choices:
-                    continue
-                delta = chunk.choices[0].delta
-                reasoning_acc = accumulate_reasoning_content(reasoning_acc, delta)
-
-                content = getattr(delta, "content", None)
-                if content:
-                    text_parts.append(content)
-                    cleaned = _strip_leaked_tool_syntax(content)
-                    if cleaned:
-                        yield {"type": "text_delta", "content": cleaned}
-
-                for tcd in getattr(delta, "tool_calls", None) or []:
-                    idx = getattr(tcd, "index", 0) or 0
-                    slot = tool_calls_acc.setdefault(
-                        idx, {"id": None, "name": "", "arguments": ""}
-                    )
-                    if getattr(tcd, "id", None):
-                        slot["id"] = tcd.id
-                    fn = getattr(tcd, "function", None)
-                    if fn is not None:
-                        if getattr(fn, "name", None):
-                            slot["name"] = fn.name
-                        if getattr(fn, "arguments", None):
-                            slot["arguments"] += fn.arguments
-
-            # No tool calls this round → the model wrote the report. Done.
-            if not tool_calls_acc:
-                return
-
-            # Inspection round: record the tool calls, execute them, then loop.
-            ordered = [tool_calls_acc[i] for i in sorted(tool_calls_acc)]
-            for i, tc in enumerate(ordered):
-                if not tc["id"]:
-                    tc["id"] = f"call_{round_idx}_{i}"
-
-            assistant_msg: dict[str, Any] = {
-                "role": "assistant",
-                "content": "".join(text_parts) or None,
-                "tool_calls": [
-                    {
-                        "id": tc["id"],
-                        "type": "function",
-                        "function": {
-                            "name": tc["name"],
-                            "arguments": tc["arguments"] or "{}",
-                        },
-                    }
-                    for tc in ordered
-                ],
-            }
-            if reasoning_acc:
-                assistant_msg["reasoning_content"] = reasoning_acc
-            messages.append(assistant_msg)
-
-            # Chart images can't ride along in tool-result messages on most
-            # providers, so we collect them and attach them as a single
-            # follow-up vision message after all tool results.
-            pending_images: list[str] = []
-            for tc in ordered:
-                tool_name = tc["name"]
-                try:
-                    tool_args = json.loads(tc["arguments"] or "{}")
-                except json.JSONDecodeError:
-                    tool_args = {}
-
-                # Tell the frontend what the agent is doing (start/end), the
-                # same way the data agent streams tool_start / tool_result.
-                yield {
-                    "type": "tool_start",
-                    "tool": tool_name,
-                    "chart_ids": tool_args.get("chart_ids") if tool_name == "inspect_chart" else None,
-                    "table_names": tool_args.get("table_names") if tool_name == "inspect_source_data" else None,
-                }
-
-                if tool_name == "inspect_chart":
-                    tool_content, image_urls = self._handle_inspect_chart(
-                        tool_args.get("chart_ids", []), charts
-                    )
-                    pending_images.extend(image_urls)
-                elif tool_name == "inspect_source_data":
-                    tool_content = handle_inspect_source_data(
-                        tool_args.get("table_names", []),
-                        input_tables,
-                        self.workspace,
-                    )
-                else:
-                    tool_content = f"Unknown tool: {tool_name}"
-
-                yield {"type": "tool_result", "tool": tool_name, "status": "ok"}
-
-                messages.append({
-                    "role": "tool",
-                    "tool_call_id": tc["id"],
-                    "content": tool_content,
-                })
-
-            # Attach rendered chart images so the agent can visually inspect
-            # them before deciding what to embed.
-            if pending_images:
-                image_blocks: list[dict[str, Any]] = [{
-                    "type": "text",
-                    "text": (
-                        "[INSPECTED CHART IMAGE(S)] Rendered images for the "
-                        "charts you just inspected, in request order:"
-                    ),
-                }]
-                for url in pending_images:
-                    image_blocks.append({
-                        "type": "image_url",
-                        "image_url": {"url": url, "detail": "high"},
-                    })
-                messages.append({"role": "user", "content": image_blocks})
-
-            logger.info(
-                f"[ReportAgent] Round {round_idx + 1}: executed "
-                f"{len(ordered)} tool call(s)"
-            )
-
-        logger.warning("[ReportAgent] Tool-call rounds exhausted without a report")
-
-    # ------------------------------------------------------------------
-    # Tool handlers
-    # ------------------------------------------------------------------
-
-    def _handle_inspect_chart(
-        self,
-        chart_ids: list[str],
-        charts: list[dict[str, Any]],
-    ) -> tuple[str, list[str]]:
-        """Inspect charts: return a text summary plus rendered chart images.
-
-        Returns ``(text_summary, image_urls)`` where ``image_urls`` is a list of
-        base64 PNG data URLs (one per chart that could be rendered). Images are
-        returned separately so the caller can attach them as a follow-up vision
-        message — tool-result messages cannot carry image content on most
-        providers.
-        """
-        results = []
-        image_urls: list[str] = []
-        for chart_id in chart_ids:
-            chart = next((c for c in charts if c["chart_id"] == chart_id), None)
-            if not chart:
-                results.append(f"Chart {chart_id}: not found")
-                continue
-
-            parts = [f"Chart: {chart_id}"]
-            parts.append(f"  Type: {chart.get('chart_type', 'Unknown')}")
-
-            encodings = chart.get("encodings", {})
-            if encodings:
-                enc_str = ", ".join(f"{k}: {v}" for k, v in encodings.items() if v)
-                parts.append(f"  Encodings: {enc_str}")
-
-            if chart.get("code"):
-                parts.append(f"  Code:\n```python\n{chart['code']}\n```")
-
-            # Data sample
-            chart_data = chart.get("chart_data")
-            if chart_data and chart_data.get("rows"):
-                df = pd.DataFrame(chart_data["rows"])
-                parts.append(f"  Data ({len(df)} rows, {len(df.columns)} cols):")
-                parts.append(f"  Columns: {', '.join(df.columns.tolist())}")
-                parts.append(f"  Sample:\n{df.head(5).to_string()}")
-
-            # Render the chart image server-side, on demand. We prefer a
-            # frontend-supplied thumbnail; otherwise we render from the chart
-            # data + encodings so the agent can actually see what it embeds.
-            image = chart.get("chart_image") or self._render_chart_image(chart)
-            if image:
-                image_urls.append(image)
-                parts.append("  [Chart image attached below for visual inspection]")
-            else:
-                parts.append("  [Chart image unavailable — reason about it from data + encodings]")
-
-            results.append("\n".join(parts))
-
-        return "\n\n".join(results), image_urls
-
-    def _render_chart_image(self, chart: dict[str, Any]) -> str | None:
-        """Render a chart to a base64 PNG data URL from its data + encodings.
-
-        Mirrors the DataAgent thumbnail path: resolve field types from the
-        chart's sample data, assemble a Vega-Lite spec, and rasterize it.
-        Returns ``None`` if there is not enough information to render.
-        """
-        chart_data = chart.get("chart_data") or {}
-        rows = chart_data.get("rows")
-        if not rows:
-            return None
-
-        chart_type = chart.get("chart_type", "Bar Chart")
-        raw_encodings = chart.get("encodings", {}) or {}
-        try:
-            df = pd.DataFrame(rows)
-            if df.empty:
-                return None
-
-            encodings: dict[str, dict[str, str]] = {}
-            for channel, field in raw_encodings.items():
-                if field and field in df.columns:
-                    field_type = resolve_field_type(df[field], field)
-                    field_type = coerce_field_type(chart_type, channel, field_type)
-                    encodings[channel] = {"field": field, "type": field_type}
-
-            if not encodings:
-                return None
-
-            spec = assemble_vegailte_chart(df, chart_type, encodings)
-            return spec_to_base64(spec) if spec else None
-        except Exception as e:
-            logger.warning(f"[ReportAgent] Chart render error for {chart.get('chart_id')}: {e}")
-            return None
-
-
-    def _resolve_table_data(
-        self,
-        table_id: str,
-        input_tables: list[dict[str, Any]],
-        charts: list[dict[str, Any]],
-        columns: list[str] | None = None,
-        max_rows: int = 10,
-        sort_by: str | None = None,
-    ) -> dict[str, Any]:
-        """Resolve table data for embed_table — check both source tables and chart data tables."""
-        # Check input tables
-        table = next((t for t in input_tables if t.get("name") == table_id), None)
-
-        # Check chart data tables
-        if not table:
-            for c in charts:
-                cd = c.get("chart_data", {})
-                if cd.get("name") == table_id:
-                    table = cd
-                    break
-
-        if not table or not table.get("rows"):
-            return {"columns": [], "rows": []}
-
-        try:
-            df = pd.DataFrame(table["rows"])
-            if sort_by and sort_by in df.columns:
-                df = df.sort_values(sort_by, ascending=False)
-            if columns:
-                valid_cols = [c for c in columns if c in df.columns]
-                if valid_cols:
-                    df = df[valid_cols]
-            df = df.head(max_rows)
-            return {
-                "columns": df.columns.tolist(),
-                "rows": df_to_safe_records(df),
-            }
-        except Exception as e:
-            logger.error(f"[ReportAgent] resolve_table_data error: {e}")
-            return {"columns": [], "rows": []}
-
-    # ------------------------------------------------------------------
-    # LLM call helpers
-    # ------------------------------------------------------------------
-
-    def _call_llm_streaming(self, messages: list[dict], tools: list[dict] | None = None):
-        """Streaming LLM call with optional tool definitions."""
-        if tools:
-            return self.client.get_completion_with_tools(
-                messages, tools=tools, stream=True, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model),
-            )
-        return self.client.get_completion(messages, stream=True, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model))
diff --git a/py-src/data_formulator/agents/chart_creation_guide.py b/py-src/data_formulator/agents/chart_creation_guide.py
deleted file mode 100644
index 20a6ab94..00000000
--- a/py-src/data_formulator/agents/chart_creation_guide.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-"""Chart creation guide and shared prompt fragments.
-
-This module is the canonical source of truth for the prompt fragments
-that describe how the agent should write code, choose chart types,
-annotate semantic types, run statistical analyses, and use DuckDB.
-
-The individual ``SHARED_*`` fragments are imported by ``DataRecAgent``
-and ``DataTransformationAgent`` (which weave them into their system
-prompts) and are also composed into ``CHART_CREATION_GUIDE``, the
-single block injected lazily on the first ``visualize`` tool call.
-"""
-
-
-SHARED_ENVIRONMENT = '''**About the execution environment:**
-- You can use BOTH DuckDB SQL and pandas operations in the same script
-- The script will run in the workspace data directory (all data files are in the current directory)
-- Each table in [CONTEXT] has a **file path** (e.g., `student_exam.parquet`, `sales.csv`). Use EXACTLY that path to load data:
-    - `.parquet`: `pd.read_parquet('file.parquet')` or DuckDB `read_parquet('file.parquet')`
-    - `.csv`: `pd.read_csv('file.csv')` or DuckDB `read_csv_auto('file.csv')`
-    - `.json`: `pd.read_json('file.json')`
-    - `.xlsx`/`.xls`: `pd.read_excel('file.xlsx')`
-    - `.txt`: `pd.read_csv('file.txt', sep='\\t')`
-- **IMPORTANT:** Use the exact filename from the context — do NOT change the file extension or assume all files are parquet.
-- **Allowed libraries:** pandas, numpy, duckdb, math, datetime, json, statistics, collections, re, sklearn, scipy, random, itertools, functools, operator, time
-- **Not allowed:** matplotlib, plotly, seaborn, requests, subprocess, os, sys, io, or any other library not listed above.
-- File system access (open, write) and network access are also forbidden.
-
-**When to use DuckDB vs pandas:**
-- **Prefer plain pandas** for most tasks — it's simpler and more readable.
-- Only use DuckDB when the dataset is very large and you need efficient SQL aggregations, filtering, joins, or window functions.
-- You can combine both: DuckDB for initial loading/filtering on large files, then pandas for complex operations.
-
-**Code structure:** standalone script (no function wrapper), imports at top. **CRITICAL:** The final result DataFrame MUST be assigned to the exact variable name you specified in `"output_variable"` in the JSON spec — the system uses this name to extract the result. For example, if your output_variable is `sales_by_region`, the script must contain `sales_by_region = ...`.'''
-
-
-SHARED_SEMANTIC_TYPE_REFERENCE = '''**[SEMANTIC TYPE REFERENCE]**
-
-Choose the most specific type that fits. Only annotate fields used in chart encodings.
-
-| Category | Types |
-|---|---|
-| Temporal | DateTime, Date, Time, Timestamp, Year, Quarter, Month, Week, Day, Hour, YearMonth, YearQuarter, YearWeek, Decade, Duration |
-| Monetary measures | Amount, Price |
-| Physical measures | Quantity, Temperature |
-| Proportion | Percentage |
-| Signed/diverging | Profit, PercentageChange, Sentiment, Correlation |
-| Generic measures | Count, Number |
-| Discrete numeric | Rank, Score |
-| Identifier | ID |
-| Geographic | Latitude, Longitude, Country, State, City, Region, Address, ZipCode |
-| Entity names | Category, Name |
-| Coded categorical | Status, Boolean, Direction |
-| Binned ranges | Range |
-| Fallback | Unknown |
-
-Key guidelines:
-- Use **Amount** for summed monetary totals, **Price** for per-unit prices, **Profit** for values that can be negative.
-- Use **Temperature** (not Quantity) for temperature — it has special diverging behavior.
-- Use **Year** (not Number) for columns like "year" with values 2020, 2021.'''
-
-
-SHARED_CHART_REFERENCE = '''**[CHART TYPE REFERENCE]**
-
-The `chart_type` value in the `visualize` action MUST be one of the names listed
-in the first column below (exact spelling, including capitalization). When a row
-lists multiple names, pick whichever fits the "when to use" hint best.
-
-| chart_type | encodings | config | when to use |
-|---|---|---|---|
-| Scatter Plot | x, y, color, size, facet | opacity (0.1–1.0) | Relationships between two quantitative fields |
-| Regression | x, y, color, size, facet | regressionMethod ("linear","log","exp","pow","quad","poly"), polyOrder (2–10) | Trend line over scatter; one line per color group |
-| Bar Chart / Lollipop Chart / Waterfall Chart | x, y, color, facet | — | Bar: default categorical comparison. Lollipop: cleaner for ranked lists / sparse categories. Waterfall: cumulative gain/loss, each bar starts where the previous ended |
-| Grouped Bar Chart | x, y, group, facet | — | Side-by-side bars across a second categorical dimension |
-| Histogram / Density Plot | x, color, facet | — | Distribution of one quantitative field. Histogram: discrete bins, auto-binned. Density Plot: smooth KDE curve |
-| Boxplot | x, y, color, facet | — | Distribution summary (median/quartiles/outliers) by category |
-| Ranged Dot Plot | x, y, color, facet | — | Min–max range or two-point comparison per category |
-| Line Chart | x, y, color, strokeDash, facet | interpolate ("linear","monotone","step") | Trends over an ordered (usually temporal) x-axis |
-| Area Chart | x, y, color, facet | — | Magnitude over ordered x; auto-stacks when color is set |
-| Pie Chart | size, color, facet | innerRadius (0–100; 0=pie, >0=donut) | Part-of-whole with ≤7 categories. Wedge value goes on **size**, not **theta** |
-| Radar Chart | x, y, color, facet | — | Multi-metric profile/comparison; x = metric name, y = value, color = entity (long-form data) |
-| Heatmap | x, y, color, facet | colorScheme ("viridis","blues","reds","oranges","greens","blueorange","redblue") | Matrix / 2D density; color encodes the quantitative cell value |
-| Bar Table | x, y, color, facet | — | Ranked horizontal table with inline bars; one row per category. y = category, x = value |
-| KPI Card | metric, value, goal | — | "Big number" dashboard tile(s); one row per tile. `value` must be pre-aggregated; `goal` is optional |
-| Candlestick Chart | x, open, high, low, close, facet | — | OHLC financial data |
-| World Map | longitude, latitude, color, size | projection ("mercator","equalEarth","naturalEarth1","orthographic"), projectionCenter ([lon,lat]) | Geographic points/regions on a world projection |
-| US Map | longitude, latitude, color, size | — (fixed albersUsa) | US-only points/regions (albersUsa projection) |
-
-**Critical chart rules:**
-- **Scatter Plot**: use config opacity (0.1–1.0) for dense data instead of encoding opacity.
-- **Regression**: trend line is automatic — do NOT compute regression coefficients/predictions in Python. Use `color` to get separate trend lines per group.
-- **Bar Chart**: x=categorical, y=quantitative (vertical bars). Swap x↔y for horizontal bars. Same-x rows are auto-stacked when `color` is set.
-- **Grouped Bar Chart**: use the `group` channel (not `color`) for side-by-side bars.
-- **Histogram**: do NOT pre-bin in Python — pass the raw quantitative field on `x` and the chart bins automatically. Pre-aggregating gives wrong bin widths.
-- **Line Chart**: use `strokeDash` to differentiate line styles (e.g. actual vs forecast).
-- **Pie Chart**: use the `size` channel (not `theta`) for wedge values. Avoid when >7–8 categories.
-- **Radar Chart**: data must be long-form — one row per (entity, metric, value). If your data is wide-form (one column per metric), melt it first in the Python step.
-- **Bar Table**: y is the category column to rank; x is the quantitative value driving bar length. Don't sort in Python — the template sorts.
-- **KPI Card**: channels are `metric`, `value`, `goal` (not x/y). One DataFrame row = one tile. The `value` column must already contain the final number to display (aggregate upstream in the Python step).
-- **Candlestick Chart**: requires `open`, `high`, `low`, `close` columns.
-- **World Map / US Map**: channel names are `longitude` / `latitude`, not `x` / `y`.
-- **facet**: available for nearly all chart types; use a low-cardinality categorical field.
-- All fields in `encodings` must also appear in `output_fields`. Typically use 2–3 channels (x, y, color/size).'''
-
-
-SHARED_STATISTICAL_ANALYSIS = '''**Statistical analysis guide:**
-- **Regression**: use chart_type "Regression" — the trend line is automatic, do NOT compute regression values in Python code. Configure method via `{"regressionMethod": "linear"}` (options: "linear", "log", "exp", "pow", "quad", "poly"; for poly add `{"polyOrder": 3}`).
-- **Forecasting**: compute predicted future values in Python. Use Line Chart with strokeDash to distinguish actual vs forecast, and color for series grouping.
-- **Clustering**: compute cluster assignments in Python. Output [x, y, cluster_id]. Use Scatter Plot with color → cluster_id.'''
-
-
-SHARED_DUCKDB_NOTES = '''**DuckDB notes:**
-- Escape single quotes with '' (not \\')
-- No Unicode escapes (\\u0400); use character ranges directly: [а-яА-Я]
-- Cast date columns explicitly: `CAST(col AS DATE)`, `CAST(col AS TIMESTAMP)`
-- For complex datetime operations, load data first then use pandas datetime functions
-- Critical identifier quoting rule:
-  * If a table/column name contains non-ASCII characters (e.g., Chinese, Japanese, Korean, Cyrillic, etc.), spaces, or punctuation,
-    you MUST wrap it in double quotes, e.g. SELECT "金额" FROM "客户表".
-  * Never output placeholder identifiers like your_table_name, your_column, your_condition.'''
-
-
-CHART_CREATION_GUIDE = f"""\
-## Chart Creation Guide
-
-The following reference material applies when you call the `visualize` tool.
-
-### A. Code Execution Rules
-
-{SHARED_ENVIRONMENT}
-
-{SHARED_DUCKDB_NOTES}
-
-**Datetime handling:**
-- `date` columns contain date-only values (YYYY-MM-DD). `datetime` columns contain date+time (ISO 8601).
-- `time` columns contain time-only values (HH:mm:ss). `duration` columns are time intervals.
-- Year → number. Year-month / year-month-day → string ("2020-01" / "2020-01-01").
-- Hour alone → number. Hour:min or h:m:s → string. Never return raw datetime objects.
-
-### B. Chart Type Reference
-
-{SHARED_CHART_REFERENCE}
-
-### C. Semantic Type Reference
-
-{SHARED_SEMANTIC_TYPE_REFERENCE}
-
-### D. Statistical Analysis Guide
-
-{SHARED_STATISTICAL_ANALYSIS}
-"""
diff --git a/py-src/data_formulator/agents/data_agent.py b/py-src/data_formulator/agents/data_agent.py
deleted file mode 100644
index 8c35f783..00000000
--- a/py-src/data_formulator/agents/data_agent.py
+++ /dev/null
@@ -1,1930 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-"""Hybrid data exploration agent (Option A with tool-calling for data inspection).
-
-Architecture:
-  - **Tools** (explore, inspect_source_data): Called via OpenAI tool-calling
-    API within a single LLM turn.  The agent gathers data silently — these
-    are internal to the agent and not surfaced to the user.
-  - **Actions** (visualize, clarify, explain, summary, delegate): Structured
-    JSON output in the LLM's text response.  These are externalized to the
-    user — each one ends the current turn and produces visible output.
-
-The server-side while loop handles one action per iteration:
-  1. Call LLM (with tools) → agent may call tools internally
-  2. Parse the structured JSON action from the text response
-  3. Execute the action (sandbox, chart assembly, etc.)
-  4. Append rich observation to trajectory
-  5. Repeat or terminate
-"""
-
-import json
-import logging
-import time
-import uuid
-from pathlib import Path
-from typing import Any, Generator
-
-import pandas as pd
-
-from data_formulator.agent_config import reasoning_effort_for
-from data_formulator.agents.agent_utils import (
-    attach_reasoning_content,
-    ensure_output_variable_in_code,
-    extract_json_objects,
-    generate_data_summary,
-)
-from data_formulator.agents.context import (
-    build_focused_thread_context,
-    build_lightweight_table_context,
-    build_peripheral_thread_context,
-    handle_inspect_source_data,
-)
-from data_formulator.agents.client_utils import Client
-from data_formulator.datalake.parquet_utils import df_to_safe_records
-from data_formulator.agents.chart_creation_guide import CHART_CREATION_GUIDE
-from data_formulator.security.code_signing import sign_result
-from data_formulator.workflows.create_vl_plots import (
-    assemble_vegailte_chart,
-    coerce_field_type,
-    resolve_field_type,
-    spec_to_base64,
-    field_metadata_to_semantic_types,
-)
-
-logger = logging.getLogger(__name__)
-
-_AGENT_ID = "data_agent"
-
-# ── Weak-model rescue helpers ─────────────────────────────────────────────
-# When a weaker LLM calls visualize/clarify/explain/summary as a tool instead
-# of outputting JSON in text, these helpers validate and normalise the args
-# so the action can be rescued without wasting rounds.
-
-_ACTION_REQUIRED_FIELDS: dict[str, list[str]] = {
-    "visualize": ["code", "output_variable", "chart"],
-    "clarify": ["questions"],
-    "explain": ["explanation"],
-    "summary": ["summary"],
-    "delegate": ["target", "options"],
-}
-
-# Valid targets for a `delegate` action.
-_DELEGATE_TARGETS: tuple[str, ...] = ("data_loading", "report_gen")
-
-
-def _rescue_unpack_json_strings(data: dict) -> None:
-    """In-place: parse values that are JSON-encoded strings back to objects.
-
-    Weak models sometimes double-serialise nested fields, e.g.
-    ``"chart": "{\\"chart_type\\": \\"Scatter Plot\\"}"`` instead of a dict.
-    """
-    for key in ("chart", "input_tables", "questions", "options", "followups", "field_metadata", "field_display_names"):
-        val = data.get(key)
-        if isinstance(val, str) and val.strip()[:1] in ("{", "["):
-            try:
-                data[key] = json.loads(val)
-            except (json.JSONDecodeError, ValueError):
-                pass
-
-
-def _rescue_validate_action(data: dict) -> list[str]:
-    """Return list of missing required fields for the action, or [] if valid."""
-    action = data.get("action", "")
-    required = _ACTION_REQUIRED_FIELDS.get(action, [])
-    return [f for f in required if not data.get(f)]
-
-
-# ── Tool definitions (OpenAI function-calling format) ─────────────────────
-# These are internal tools the agent can use freely within a turn to
-# gather data before committing to a user-visible action.
-
-TOOLS = [
-    {
-        "type": "function",
-        "function": {
-            "name": "explore",
-            "description": (
-                "Run Python code to inspect data, compute statistics, or verify "
-                "assumptions.  Use print() to see results — stdout is returned. "
-                "pandas, numpy, duckdb, sklearn, scipy are available."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "purpose": {
-                        "type": "string",
-                        "description": "One-sentence description of what this code does and why (shown to user as progress).",
-                    },
-                    "code": {
-                        "type": "string",
-                        "description": "Python code to execute. Use print() to see output.",
-                    },
-                },
-                "required": ["purpose", "code"],
-            },
-        },
-    },
-    {
-        "type": "function",
-        "function": {
-            "name": "inspect_source_data",
-            "description": (
-                "Get a detailed summary of one or more source tables — schema, "
-                "field-level statistics, and sample rows.  Cheaper than explore() "
-                "for basic data inspection."
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "table_names": {
-                        "type": "array",
-                        "items": {"type": "string"},
-                        "description": "List of table names from [SOURCE TABLES] to inspect.",
-                    },
-                },
-                "required": ["table_names"],
-            },
-        },
-    },
-    # TODO(knowledge): The agent-callable knowledge tools (`search_knowledge`,
-    # `read_knowledge`) were removed along with the automatic up-front
-    # injection (see _build_initial_messages). Reintroduce a single, unified
-    # knowledge-access design here when we revisit it — see the TODO block in
-    # _build_initial_messages for the intended shape.
-]
-
-
-# ── System prompt ─────────────────────────────────────────────────────────
-
-SYSTEM_PROMPT = '''\
-You are an autonomous data exploration agent.
-
-Your goal is to help the user answer their question by creating one or more
-data visualizations.  You operate in a loop.
-
-## Tools (internal — for data gathering)
-
-You have tools you can call to gather data:
-
-- **explore(code)** — run Python code to inspect data, compute stats, etc.
-  **Important**: each call runs in a fresh namespace — variables do NOT
-  persist between calls.  Combine all related operations (loading,
-  transforming, printing) into a single explore() call.
-- **inspect_source_data(table_names)** — get schema, stats, and sample rows
-  for source tables (cheaper than explore for basic inspection).
-
-You analyse data that is **already in the workspace**.  If the user's
-question requires data that isn't present, do NOT try to find it yourself —
-emit a `delegate` action targeting the Data Loading agent and the user
-can hand off in one click.
-
-The initial context already includes sample rows and statistics for each
-table.  If the data is straightforward, proceed directly to your action
-without calling tools.  Tool results are returned to you before you
-produce your action.  Tools are NOT shown to the user.
-
-## Actions (external — shown to the user)
-
-After gathering data (or immediately if the data is clear), output
-**exactly one action** as a JSON object in your text response.  Actions
-are shown to the user and end the current turn.
-
-⚠ **CRITICAL**: `visualize`, `clarify`, `explain`, `summary`, and
-`delegate` are **actions**, NOT tools.  Never call them via
-function/tool calling — they MUST appear as a JSON object in your **text
-reply**.  Only the items listed in the Tools section above (`explore`,
-`inspect_source_data`) may be invoked as tool calls.
-
-### `visualize`
-```json
-{{
-    "action": "visualize",
-    "display_instruction": "<≤12 words. State the question or hypothesis the chart investigates — don't recap the chart spec (x/y/color/split are already visible). Bold a **column** if it anchors the question. ✗ 'Plotting price over time, split by fuel, to see trends'>",
-    "input_tables": ["<table names from [SOURCE TABLES] that the code reads>"],
-    "code": "<Python code producing a DataFrame assigned to output_variable>",
-    "output_variable": "<snake_case variable name>",
-    "chart": {{
-        "chart_type": "<from chart type reference>",
-        "encodings": {{"x": "<field>", "y": "<field>", ...}},
-        "config": {{}}
-    }},
-    "field_metadata": {{"<field>": "<SemanticType>", ...}},
-    "field_display_names": {{"<field>": "<human-readable display name for chart axes and table headers>", ...}}
-}}
-```
-
-### `clarify`
-```json
-{{
-    "action": "clarify",
-    "questions": [
-        {{
-            "text": "<a polite, concise question>",
-            "responseType": "single_choice",
-            "options": ["<option label>", "<option label>"]
-        }}
-    ]
-}}
-```
-
-For clarification, always output `questions[]`. If there is one ambiguity,
-include one question. If there are multiple independent ambiguities, include
-multiple questions. Each question must own its own `options[]`; each option
-is a plain text label. Use `"responseType": "free_text"`
-only when the user needs to type a custom answer. Ask at most 3 questions.
-
-### `explain`
-```json
-{{
-    "action": "explain",
-    "explanation": "<a short, friendly answer in 1–3 sentences. Stay grounded in what the data actually shows; admit when something is unknown. Avoid long lectures.>",
-    "followups": [
-        "<a short visualization question the user might click next, phrased as something the user would say. Each followup should be a chart-producing question that naturally builds on the explanation, e.g. 'Plot **revenue** by region', 'Show monthly trend of **sign_ups**'.>"
-    ]
-}}
-```
-
-Use `explain` when the user is asking a conceptual / clarifying question
-about the data, the schema, the meaning of a field, or any informational
-exchange that does **not** require producing a chart right now. Keep the
-explanation concise (1–3 sentences). Followups are optional (≤4 items,
-≤8 words each) and must be visualization-oriented prompts — clicking one
-should lead to a `visualize` action on the next turn. Omit `followups`
-entirely if no useful chart-producing follow-ups exist.
-
-**Column-name emphasis:** in `explain.explanation`, `followups[]`, and
-`clarify.questions[].text` / `options[]`, you may wrap a column name in
-`**…**` to render it as a highlighted token in the UI.
-
-### `summary`
-```json
-{{
-    "action": "summary",
-    "summary": "<one sentence (≤ 25 words) summarizing the key finding>"
-}}
-```
-
-Use `summary` to end the run after visualization(s) with a one-sentence
-closing remark on the key finding. This is the standard close for any
-question you answer with charts.
-
-### `delegate`
-```json
-{{
-    "action": "delegate",
-    "target": "data_loading" | "report_gen",
-    "message": "<short note to the user that you're handing off, e.g. 'I'll hand this to the data loading agent — pick a search:'>",
-    "options": ["<seed prompt for the target agent>"]
-}}
-```
-
-Use `delegate` to hand off to a peer agent. Each option is a seed prompt for
-the target agent.
-
-Valid `target` values:
-- **`data_loading`** — the user's question needs data that isn't in the
-  workspace. Each option becomes a one-click button (the string is both the
-  button label and the seed prompt). Provide 1–2 short search phrases (e.g.
-  `'monthly orders 2024'`); if two, make them meaningfully distinct (e.g.
-  different search angles). Prefer `clarify` if the workspace tables might
-  already cover the question.
-- **`report_gen`** — the user wants a narrative report or write-up over
-  the charts already produced. This hand-off is **automatic** (no button —
-  the user is not asked to choose), so provide **exactly one** option: a
-  single, well-formed seed prompt for the report agent. 
-  Elaborate that one prompt from the conversation context —
-  name the subject, the angle/focus the user asked for, and which findings or
-  charts it should cover (e.g. `'Write a report on 2024 regional sales,
-  focusing on why the West region outperformed, covering the revenue-by-region
-  and monthly-trend charts'`).
-
-## Understanding your context
-
-{{context_guide}}
-
-## Decision guidelines
-
-- **Classify the question first** (silently) to calibrate effort, not as a hard rule:
-  - *Conceptual / informational* (asking about meaning, schema, what a field represents, why something is the way it is — no chart needed): use `explain`.
-  - *Concrete* (one specific answer, e.g. "avg price by region", "which sold most"): **1 visualization** → `summary` (one-line takeaway).
-  - *Progressive* (one question best answered by a small sequence, e.g. "why did revenue drop?", "compare regions"): **2–3 visualizations** → `summary` to tie them together.
-  - *Open-ended* (explicit exploration, e.g. "explore", "overview", "what's interesting"): **3–5 visualizations** forming a narrative → `summary` to tie them together.
-  - *Hand-off needed* — use `delegate` as the terminal action when the request is better served by a peer agent:
-    - *Missing data* (the user's question needs tables not in the workspace): `delegate(target="data_loading")` with a short search phrase as `prompt`.
-    - *Report request* (e.g. "create a report about X", "write up the findings", "summarize Y as a narrative"): produce any charts the report needs (0–3, judgment-based — if the workspace already has relevant charts you may delegate immediately), then end with `delegate(target="report_gen")`.
-- **After each chart**, continue only if the next chart answers a gap *raised* by the previous one — not just another interesting angle. Otherwise close out (`summary`, or `delegate` for hand-off cases) and let the user ask for more.
-- If ambiguous, `clarify`.
-- **Never** repeat a visualization already in the trajectory or in another thread.
-- {max_iterations} visualizations is a **hard ceiling**, not a target.
-
-{agent_exploration_rules}
-'''
-
-
-# ---------------------------------------------------------------------------
-# Agent
-# ---------------------------------------------------------------------------
-
-
-class DataAgent:
-    """Structured JSON data exploration agent."""
-
-    def __init__(
-        self,
-        client: Client,
-        workspace,
-        agent_exploration_rules: str = "",
-        agent_coding_rules: str = "",
-        language_instruction: str = "",
-        max_iterations: int = 5,
-        max_repair_attempts: int = 2,
-        identity_id: str | None = None,
-    ):
-        self.client = client
-        self.workspace = workspace
-        self.agent_exploration_rules = agent_exploration_rules
-        self.agent_coding_rules = agent_coding_rules
-        self.language_instruction = language_instruction
-        self.max_iterations = max_iterations
-        self.max_repair_attempts = max_repair_attempts
-
-        from data_formulator.agents.reasoning_log import (
-            ReasoningLogger, _NullReasoningLogger,
-        )
-        self._session_id = uuid.uuid4().hex[:12]
-        if identity_id:
-            try:
-                self._reasoning_log = ReasoningLogger(
-                    identity_id, "DataAgent", self._session_id,
-                )
-            except Exception:
-                logger.warning("Failed to initialise ReasoningLogger", exc_info=True)
-                self._reasoning_log = _NullReasoningLogger()
-        else:
-            self._reasoning_log = _NullReasoningLogger()
-
-        self._knowledge_store = None
-        self._injected_knowledge: list[dict[str, Any]] = []
-        self._injected_rules: list[str] = []
-        _user_home = getattr(workspace, "user_home", None)
-        if _user_home:
-            try:
-                from data_formulator.knowledge.store import KnowledgeStore
-                self._knowledge_store = KnowledgeStore(_user_home)
-            except Exception:
-                logger.warning("Failed to initialise KnowledgeStore", exc_info=True)
-
-    # ------------------------------------------------------------------
-    # Helpers
-    # ------------------------------------------------------------------
-
-    def _explore_ns_dir(self) -> Path:
-        """Directory for cross-turn namespace serialisation."""
-        return self.workspace.confined_scratch.root / "_explore_ns"
-
-    # ------------------------------------------------------------------
-    # Public API
-    # ------------------------------------------------------------------
-
-    def run(
-        self,
-        input_tables: list[dict[str, Any]],
-        user_question: str,
-        focused_thread: list[dict[str, Any]] | None = None,
-        other_threads: list[dict[str, Any]] | None = None,
-        trajectory: list[dict] | None = None,
-        completed_step_count: int = 0,
-        primary_tables: list[str] | None = None,
-        attached_images: list[str] | None = None,
-    ) -> Generator[dict[str, Any], None, None]:
-        """Run the structured exploration loop.
-
-        Yields event dicts with ``type`` in:
-            ``"action"``      – the agent's chosen action (for UI)
-            ``"result"``      – a visualization result (data + chart)
-            ``"explore_result"`` – explore code output
-            ``"clarify"``     – clarification question (loop pauses)
-            ``"explain"``     – conversational explanation (loop pauses)
-            ``"delegate"``    – hand-off to a peer agent (loop terminates)
-            ``"completion"``  – final summary (loop terminates)
-            ``"error"``       – error information
-        """
-        rlog = self._reasoning_log
-        session_start_time = time.time()
-        total_llm_calls = 0
-        completed_steps: list[dict[str, Any]] = []
-        iteration = completed_step_count
-        final_status = "max_iterations"
-
-        try:
-            rlog.log(
-                "session_start",
-                agent="DataAgent",
-                session_id=self._session_id,
-                user_question=user_question,
-                input_tables=[t.get("name", "") for t in input_tables],
-                model=self.client.model,
-                rules_injected=[
-                    r for r in [self.agent_exploration_rules, self.agent_coding_rules] if r
-                ],
-                knowledge_injected=[],
-            )
-
-            if trajectory is None:
-                ns_dir = self._explore_ns_dir()
-                if ns_dir.exists():
-                    import shutil
-                    shutil.rmtree(ns_dir, ignore_errors=True)
-
-                trajectory = self._build_initial_messages(
-                    input_tables, user_question, focused_thread, other_threads,
-                    primary_tables=primary_tables,
-                    attached_images=attached_images,
-                )
-                rlog.log(
-                    "context_built",
-                    system_prompt_tokens=len(trajectory[0].get("content", "")) // 4 if trajectory else 0,
-                    user_msg_tokens=len(str(trajectory[1].get("content", ""))) // 4 if len(trajectory) > 1 else 0,
-                    total_tables=len(input_tables),
-                    primary_tables=primary_tables or [],
-                    knowledge_rules_injected=self._injected_rules,
-                    knowledge_injected=self._injected_knowledge,
-                )
-
-                if self._injected_rules or self._injected_knowledge:
-                    yield {
-                        "type": "context_info",
-                        "rules_injected": self._injected_rules,
-                        "knowledge_injected": [
-                            {"category": k["category"], "title": k["title"]}
-                            for k in self._injected_knowledge
-                        ],
-                    }
-
-            action_retry_budget = 1  # one extra chance when the LLM fails to produce an action
-
-            while iteration < self.max_iterations:
-                iteration += 1
-
-                # --- THINK: call LLM with tools, get action ---------------
-                t_start = time.time()
-                action = None
-                action_reason = "ok"
-                action_error = ""
-                for event in self._get_next_action(trajectory, input_tables, outer_iteration=iteration):
-                    if event.get("type") == "agent_action":
-                        action = event.get("action_data")
-                        action_reason = event.get("reason", "ok")
-                        action_error = event.get("error_message", "")
-                        total_llm_calls += event.get("llm_calls", 0)
-                    else:
-                        yield event
-                logger.info("[DataAgent] iteration %d total=%.2fs reason=%s",
-                            iteration, time.time() - t_start, action_reason)
-
-                if action is None:
-                    # ① tool rounds exhausted → pause and let the user decide
-                    if action_reason == "tool_rounds_exhausted":
-                        steps_desc = "\n".join(
-                            f"  • {s['display_instruction']}" for s in completed_steps
-                        ) or "(none yet)"
-                        final_status = "clarify_exhausted"
-                        yield {
-                            "type": "clarify",
-                            "iteration": iteration,
-                            "thought": "",
-                            "questions": [
-                                {
-                                    "text": (
-                                        "I've been exploring extensively but haven't reached "
-                                        "a conclusion yet.\n\nCompleted steps so far:\n"
-                                        f"{steps_desc}\n\n"
-                                        "How would you like to proceed?"
-                                    ),
-                                    "text_code": "agent.clarifyExhausted",
-                                    "text_params": {"steps": steps_desc},
-                                    "responseType": "single_choice",
-                                    "required": True,
-                                    "options": [
-                                        {
-                                            "label": "Continue exploring",
-                                            "label_code": "agent.clarifyOptionContinue",
-                                        },
-                                        {
-                                            "label": "Simplify the task",
-                                            "label_code": "agent.clarifyOptionSimplify",
-                                        },
-                                        {
-                                            "label": "Summarize what you have so far",
-                                            "label_code": "agent.clarifyOptionSummary",
-                                        },
-                                    ],
-                                }
-                            ],
-                            "trajectory": self._strip_images(trajectory),
-                            "completed_step_count": len(completed_steps),
-                        }
-                        self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time)
-                        return
-
-                    # ② LLM API error (already retried in _call_llm) → fatal
-                    if action_reason == "llm_error":
-                        final_status = "llm_error"
-                        yield self._error_event(
-                            iteration,
-                            action_error or "LLM API error",
-                            message_code="agent.llmApiError",
-                        )
-                        break
-
-                    # ③ json_parse_failed or unknown → retry once with context
-                    if action_retry_budget > 0:
-                        action_retry_budget -= 1
-                        logger.info("[DataAgent] action=None (reason=%s), retrying "
-                                    "(%d retries left)", action_reason, action_retry_budget)
-                        steps_summary = "\n".join(
-                            f"  - Step {i + 1}: {s['display_instruction']}"
-                            for i, s in enumerate(completed_steps)
-                        ) or "  (no completed steps)"
-                        trajectory.append({
-                            "role": "user",
-                            "content": (
-                                "[SYSTEM] Your previous response could not be parsed. "
-                                "Here is what was already completed:\n"
-                                f"{steps_summary}\n\n"
-                                "Please output a JSON action object "
-                                "(visualize / clarify / explain / summary / delegate) "
-                                "to continue."
-                            ),
-                        })
-                        continue
-
-                    final_status = "parse_failed"
-                    yield self._error_event(
-                        iteration,
-                        action_error or "Failed to parse agent action from LLM response",
-                        message_code="agent.parseActionFailed",
-                    )
-                    break
-
-                action_type = action.get("action")
-                logger.info(f"[DataAgent] Iteration {iteration}: action={action_type}")
-
-                # --- ACT (only user-visible actions reach here) --------
-                if action_type == "clarify":
-                    rlog.log("action_execution", action="clarify", status="ok",
-                             iteration=iteration)
-                    final_status = "clarify"
-                    try:
-                        clarify_payload = self._normalize_clarify_action(action)
-                    except ValueError:
-                        final_status = "parse_failed"
-                        yield self._error_event(
-                            iteration,
-                            "Clarify action requires non-empty questions.",
-                            message_code="agent.parseActionFailed",
-                        )
-                        self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time)
-                        return
-                    yield {
-                        "type": "clarify",
-                        "iteration": iteration,
-                        "thought": action.get("thought", ""),
-                        **clarify_payload,
-                        "trajectory": self._strip_images(trajectory),
-                        "completed_step_count": len(completed_steps),
-                    }
-                    self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time)
-                    return
-
-                elif action_type == "explain":
-                    rlog.log("action_execution", action="explain", status="ok",
-                             iteration=iteration)
-                    final_status = "explain"
-                    try:
-                        explain_payload = self._normalize_explain_action(action)
-                    except ValueError:
-                        final_status = "parse_failed"
-                        yield self._error_event(
-                            iteration,
-                            "Explain action requires a non-empty explanation.",
-                            message_code="agent.parseActionFailed",
-                        )
-                        self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time)
-                        return
-                    yield {
-                        "type": "explain",
-                        "iteration": iteration,
-                        "thought": action.get("thought", ""),
-                        **explain_payload,
-                        "trajectory": self._strip_images(trajectory),
-                        "completed_step_count": len(completed_steps),
-                    }
-                    self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time)
-                    return
-
-                elif action_type == "summary":
-                    rlog.log("action_execution", action="summary", status="ok",
-                             iteration=iteration, total_steps=len(completed_steps))
-                    final_status = "success"
-                    yield {
-                        "type": "completion",
-                        "iteration": iteration,
-                        "status": "success",
-                        "content": {
-                            "thought": action.get("thought", ""),
-                            "summary": action.get("summary", ""),
-                            "total_steps": len(completed_steps),
-                        },
-                    }
-                    self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time)
-                    return
-
-                elif action_type == "delegate":
-                    rlog.log("action_execution", action="delegate", status="ok",
-                             iteration=iteration)
-                    final_status = "delegate"
-                    try:
-                        delegate_payload = self._normalize_delegate_action(action)
-                    except ValueError as exc:
-                        final_status = "parse_failed"
-                        yield self._error_event(
-                            iteration,
-                            str(exc) or "delegate action requires non-empty target and options.",
-                            message_code="agent.parseActionFailed",
-                        )
-                        self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time)
-                        return
-                    yield {
-                        "type": "delegate",
-                        "iteration": iteration,
-                        "thought": action.get("thought", ""),
-                        **delegate_payload,
-                        "trajectory": self._strip_images(trajectory),
-                        "completed_step_count": len(completed_steps),
-                    }
-                    self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time)
-                    return
-
-                elif action_type == "visualize":
-                    code = action.get("code", "")
-                    output_variable = action.get("output_variable", "result_df")
-                    chart_spec = action.get("chart", {})
-                    field_metadata = action.get("field_metadata", {})
-                    field_display_names = action.get("field_display_names", {})
-                    display_instruction = action.get("display_instruction", "")
-
-                    yield {
-                        "type": "action",
-                        "iteration": iteration,
-                        "action": "visualize",
-                        "thought": action.get("thought", ""),
-                        "display_instruction": display_instruction,
-                        "input_tables": action.get("input_tables", []),
-                    }
-
-                    viz_result = self._execute_visualize(
-                        code=code,
-                        output_variable=output_variable,
-                        chart_spec=chart_spec,
-                        field_metadata=field_metadata,
-                        field_display_names=field_display_names,
-                        display_instruction=display_instruction,
-                        input_tables=input_tables,
-                        messages=trajectory,
-                        outer_iteration=iteration,
-                    )
-                    total_llm_calls += viz_result.get("repair_llm_calls", 0)
-
-                    if viz_result["status"] != "ok":
-                        error_msg = viz_result.get("error_message", "Unknown error")
-                        rlog.log("action_execution", action="visualize", status="error",
-                                 iteration=iteration, error=error_msg)
-                        observation = f"[OBSERVATION – Step {len(completed_steps) + 1} FAILED]\n\nError: {error_msg}"
-                        trajectory.append({"role": "user", "content": observation})
-                        yield self._error_event(iteration, error_msg, display_instruction=display_instruction)
-                        continue
-
-                    transform_result = viz_result["transform_result"]
-                    sign_result(transform_result)
-                    transformed_data = transform_result["content"]
-                    output_rows = len(transformed_data.get("rows", []))
-                    chart_type = chart_spec.get("chart_type", "")
-                    rlog.log("action_execution", action="visualize", status="ok",
-                             iteration=iteration, output_rows=output_rows,
-                             chart_type=chart_type)
-
-                    completed_steps.append({
-                        "display_instruction": display_instruction,
-                        "code": transform_result.get("code", ""),
-                    })
-
-                    yield {
-                        "type": "result",
-                        "iteration": iteration,
-                        "status": "success",
-                        "content": {
-                            "question": display_instruction,
-                            "result": transform_result,
-                        },
-                    }
-
-                    observation_msg = self._format_observation(
-                        step_index=len(completed_steps),
-                        display_instruction=display_instruction,
-                        thought=action.get("thought", ""),
-                        code=transform_result.get("code", ""),
-                        data=transformed_data,
-                        chart_image=None,
-                    )
-                    trajectory.append(observation_msg)
-
-                else:
-                    trajectory.append({
-                        "role": "user",
-                        "content": (
-                            f"[ERROR] Unknown action '{action_type}'. "
-                            "Please choose one of: visualize, clarify, explain, summary, delegate."
-                        ),
-                    })
-                    yield self._error_event(iteration, f"Unknown action: {action_type}", message_code="agent.unknownAction")
-
-            # Exhausted max iterations (or break from error)
-            self._log_session_end(rlog, final_status, iteration, total_llm_calls, session_start_time)
-            if final_status == "max_iterations":
-                yield {
-                    "type": "completion",
-                    "iteration": iteration,
-                    "status": "max_iterations",
-                    "content": {
-                        "summary": "Reached the maximum number of exploration steps.",
-                        "summary_code": "agent.maxIterationsSummary",
-                        "total_steps": len(completed_steps),
-                    },
-                }
-        finally:
-            rlog.close()
-
-    @classmethod
-    def _sanitize_clarification_options(cls, raw_options: Any) -> list[dict[str, Any]]:
-        """Normalize clarify/explain option payloads.
-
-        Accepts either bare strings (the new simplified shape) or
-        ``{label, label_code?}`` dicts (legacy). Output is always a list of
-        ``{label, label_code?}`` dicts — no ids. Position in the list is the
-        only stable handle, used by the response payload.
-        """
-        if not isinstance(raw_options, list):
-            return []
-
-        options: list[dict[str, Any]] = []
-        for raw_option in raw_options[:6]:
-            if isinstance(raw_option, str):
-                label = raw_option.strip()
-                label_code = ""
-            elif isinstance(raw_option, dict):
-                label = str(raw_option.get("label", "")).strip()
-                label_code = str(raw_option.get("label_code", "")).strip()
-            else:
-                continue
-
-            if not label and not label_code:
-                continue
-
-            option: dict[str, Any] = {}
-            if label:
-                option["label"] = label
-            if label_code:
-                option["label_code"] = label_code
-            options.append(option)
-
-        return options
-
-    @classmethod
-    def _sanitize_clarification_questions(cls, raw_questions: Any) -> list[dict[str, Any]]:
-        if not isinstance(raw_questions, list):
-            return []
-
-        questions: list[dict[str, Any]] = []
-        for raw_question in raw_questions[:3]:
-            if not isinstance(raw_question, dict):
-                continue
-
-            text = str(raw_question.get("text", "")).strip()
-            text_code = str(raw_question.get("text_code", "")).strip()
-            if not text and not text_code:
-                continue
-
-            options = cls._sanitize_clarification_options(raw_question.get("options"))
-            response_type = raw_question.get("responseType") or raw_question.get("response_type")
-            if response_type not in ("single_choice", "free_text"):
-                response_type = "single_choice" if options else "free_text"
-
-            question: dict[str, Any] = {
-                "responseType": response_type,
-                "required": bool(raw_question.get("required", True)),
-            }
-            if text:
-                question["text"] = text
-            if text_code:
-                question["text_code"] = text_code
-            if isinstance(raw_question.get("text_params"), dict):
-                question["text_params"] = raw_question["text_params"]
-            if options:
-                question["options"] = options
-            questions.append(question)
-
-        return questions
-
-    @classmethod
-    def _normalize_clarify_action(cls, action: dict[str, Any]) -> dict[str, Any]:
-        questions = cls._sanitize_clarification_questions(action.get("questions"))
-        if not questions:
-            raise ValueError("clarify action requires non-empty questions[]")
-        return {"questions": questions}
-
-    @classmethod
-    def _normalize_explain_action(cls, action: dict[str, Any]) -> dict[str, Any]:
-        """Normalize an explain action into the same shape as clarify.
-
-        The frontend reuses the clarify pipeline (one question whose ``text``
-        is the explanation and whose ``options`` are clickable followups), so
-        we emit a ``questions[]`` payload here. The followups are optional
-        visualization-leading prompts; clicking one is equivalent to typing
-        that prompt as the next user message.
-        """
-        explanation = str(action.get("explanation", "")).strip()
-        if not explanation:
-            raise ValueError("explain action requires a non-empty 'explanation'")
-
-        options = cls._sanitize_clarification_options(action.get("followups"))
-        question: dict[str, Any] = {
-            "text": explanation,
-            "responseType": "single_choice",
-            "required": False,
-        }
-        if options:
-            question["options"] = options
-        return {"questions": [question]}
-
-    @classmethod
-    def _normalize_delegate_action(cls, action: dict[str, Any]) -> dict[str, Any]:
-        """Normalize a delegate action.
-
-        The agent emits this when it wants to hand off to a peer agent
-        (e.g. the Data Loading agent when the workspace lacks needed
-        data, or the Report Gen agent when the user wants a written
-        report).  The frontend renders each option as a one-click
-        handoff card.
-
-        Shape: ``{target, message?, options: [str, ...]}`` with 1–2
-        options.
-        """
-        target = str(action.get("target", "")).strip()
-        if target not in _DELEGATE_TARGETS:
-            raise ValueError(
-                f"delegate action requires 'target' ∈ {_DELEGATE_TARGETS}, got {target!r}"
-            )
-
-        message = str(action.get("message") or "").strip()
-
-        raw_options = action.get("options")
-        cleaned: list[str] = []
-        if isinstance(raw_options, list):
-            for opt in raw_options:
-                if isinstance(opt, str):
-                    text = opt.strip()
-                    if text:
-                        cleaned.append(text)
-
-        if not cleaned:
-            raise ValueError("delegate action requires non-empty 'options[]'")
-
-        # Cap at 2 — keep the user choice cognitively light.
-        cleaned = cleaned[:2]
-
-        payload: dict[str, Any] = {"target": target, "options": cleaned}
-        if message:
-            payload["message"] = message
-        return payload
-
-    # ------------------------------------------------------------------
-    # Visualize execution (with repair)
-    # ------------------------------------------------------------------
-
-    def _execute_visualize(
-        self,
-        code: str,
-        output_variable: str,
-        chart_spec: dict,
-        field_metadata: dict,
-        field_display_names: dict,
-        display_instruction: str,
-        input_tables: list[dict[str, Any]],
-        messages: list[dict],
-        outer_iteration: int = 0,
-    ) -> dict[str, Any]:
-        """Execute a visualize action with repair retries.
-
-        Returns a dict with at least ``status`` and, on success,
-        ``transform_result``.  Also includes ``repair_llm_calls`` —
-        the number of LLM calls made during repair attempts so that
-        the caller can accumulate them into ``total_llm_calls``.
-        """
-        viz_result = self._run_visualize_code(
-            code=code,
-            output_variable=output_variable,
-            chart_spec=chart_spec,
-            field_metadata=field_metadata,
-            field_display_names=field_display_names,
-            display_instruction=display_instruction,
-            messages=messages,
-        )
-
-        rlog = self._reasoning_log
-        repair_llm_calls = 0
-        attempt = 0
-        while viz_result["status"] != "ok" and attempt < self.max_repair_attempts:
-            attempt += 1
-            error_msg = viz_result.get("error_message", "Unknown error")
-            logger.warning(f"[DataAgent] Repair attempt {attempt}/{self.max_repair_attempts}: {error_msg}")
-
-            # Mutate the canonical `messages` list so the dialog snapshot
-            # captures the repair turn just like any other tool round.
-            # The agent therefore sees one continuous conversation across
-            # the original visualize and any repairs, not a forked copy.
-            messages.append({
-                "role": "user",
-                "content": (
-                    f"[CODE ERROR]\n\n{error_msg}\n\n"
-                    "Please fix the code and output a new visualize action."
-                ),
-            })
-            repair_action = None
-            for evt in self._get_next_action(
-                messages, input_tables,
-                outer_iteration=outer_iteration,
-            ):
-                if evt.get("type") == "agent_action":
-                    repair_action = evt.get("action_data")
-                    repair_llm_calls += evt.get("llm_calls", 0)
-            if repair_action and repair_action.get("action") == "visualize":
-                viz_result = self._run_visualize_code(
-                    code=repair_action.get("code", code),
-                    output_variable=repair_action.get("output_variable", output_variable),
-                    chart_spec=repair_action.get("chart", chart_spec),
-                    field_metadata=repair_action.get("field_metadata", field_metadata),
-                    field_display_names=repair_action.get("field_display_names", field_display_names),
-                    display_instruction=display_instruction,
-                    messages=messages,
-                )
-                rlog.log("repair_attempt", attempt=attempt,
-                         original_error=error_msg[:200],
-                         status=viz_result["status"])
-            else:
-                rlog.log("repair_attempt", attempt=attempt,
-                         original_error=error_msg[:200],
-                         status="repair_failed")
-                break
-
-        viz_result["repair_llm_calls"] = repair_llm_calls
-        return viz_result
-
-    def _run_explore_code(
-        self,
-        code: str,
-        input_tables: list[dict[str, Any]],
-    ) -> dict[str, Any]:
-        """Run explore code in sandbox, capturing stdout.
-
-        When called inside ``_get_next_action``, uses the shared
-        ``SandboxSession`` so that variables persist across calls.
-        Falls back to a one-shot subprocess otherwise.
-        """
-        # Wrap code: capture stdout
-        capture_code = (
-            "import io as _io, sys as _sys, pandas as _pd\n"
-            "_old_stdout = _sys.stdout\n"
-            "_sys.stdout = _captured = _io.StringIO()\n"
-            "\n"
-            f"{code}\n"
-            "\n"
-            "_sys.stdout = _old_stdout\n"
-            "_pack = {\n"
-            "    'stdout': _captured.getvalue(),\n"
-            "}\n"
-        )
-
-        try:
-            with self.workspace.local_dir() as local_path:
-                import os as _os
-                workspace_path = _os.path.abspath(str(local_path))
-                allowed_objects = {"_pack": None}
-
-                session = getattr(self, "_explore_session", None)
-                if session is not None:
-                    raw = session.execute(capture_code, allowed_objects, workspace_path)
-                else:
-                    from data_formulator.sandbox import create_sandbox
-                    try:
-                        from flask import current_app
-                        sandbox_mode = current_app.config.get('CLI_ARGS', {}).get('sandbox', 'local')
-                    except (ImportError, RuntimeError):
-                        sandbox_mode = 'local'
-                    sandbox = create_sandbox(sandbox_mode)
-                    raw = sandbox._run_in_warm_subprocess(
-                        capture_code, allowed_objects, workspace_path
-                    )
-
-            if raw.get("status") == "ok":
-                allowed = raw.get("allowed_objects") or {}
-                if not isinstance(allowed, dict):
-                    allowed = {}
-                pack = allowed.get("_pack", {})
-                stdout = pack.get("stdout", "") if isinstance(pack, dict) else ""
-                if not isinstance(stdout, str):
-                    stdout = str(stdout)
-                if len(stdout) > 8000:
-                    stdout = stdout[:8000] + "\n... (truncated)"
-                return {"status": "ok", "stdout": stdout}
-            else:
-                return {
-                    "status": "error",
-                    "error": raw.get("error_message", raw.get("content", "Unknown error")),
-                    "stdout": "",
-                }
-        except Exception as e:
-            logger.error("[DataAgent] Sandbox execution error", exc_info=e)
-            return {"status": "error", "error": "Code execution failed", "stdout": ""}
-
-    def _run_visualize_code(
-        self,
-        code: str,
-        output_variable: str,
-        chart_spec: dict,
-        field_metadata: dict,
-        field_display_names: dict,
-        display_instruction: str,
-        messages: list[dict] | None = None,
-    ) -> dict[str, Any]:
-        """Run visualize code in sandbox and assemble chart."""
-        from data_formulator.sandbox import create_sandbox
-
-        try:
-            from flask import current_app
-            sandbox_mode = current_app.config.get('CLI_ARGS', {}).get('sandbox', 'local')
-            max_display_rows = current_app.config['CLI_ARGS'].get('max_display_rows', 5000)
-        except (ImportError, RuntimeError):
-            sandbox_mode = 'local'
-            max_display_rows = 5000
-
-        # Patch output_variable if needed
-        code, was_patched, detected_var = ensure_output_variable_in_code(code, output_variable)
-        if was_patched:
-            logger.info(f"[DataAgent] patched output_variable: {output_variable} = {detected_var}")
-
-        sandbox = create_sandbox(sandbox_mode)
-
-        try:
-            execution_result = sandbox.run_python_code(
-                code=code,
-                workspace=self.workspace,
-                output_variable=output_variable,
-            )
-
-            if execution_result['status'] != 'ok':
-                error_message = execution_result.get('content', 'Unknown error')
-                return {"status": "error", "error_message": str(error_message)}
-
-            full_df = execution_result['content']
-            row_count = len(full_df)
-
-            # Validate that all encoding fields exist in the output DataFrame
-            chart_encodings = chart_spec.get("encodings", {})
-            missing_fields = [
-                f"{channel}: '{field}'"
-                for channel, field in chart_encodings.items()
-                if field and field not in full_df.columns
-            ]
-            if missing_fields:
-                available = list(full_df.columns)
-                return {
-                    "status": "error",
-                    "error_message": (
-                        f"Chart encoding fields not found in output DataFrame: "
-                        f"{', '.join(missing_fields)}. "
-                        f"Available columns: {available}"
-                    ),
-                    "error_code": "agent.fieldsNotFound",
-                    "error_params": {
-                        "missing": ", ".join(missing_fields),
-                        "available": str(available),
-                    },
-                }
-
-            if row_count == 0:
-                return {
-                    "status": "error",
-                    "error_message": "Output DataFrame is empty (0 rows). Check filters or data loading.",
-                    "error_code": "agent.emptyDataframe",
-                }
-
-            output_table_name = self.workspace.get_fresh_name(f"d-{output_variable}")
-            self.workspace.write_parquet(full_df, output_table_name)
-
-            if row_count > max_display_rows:
-                query_output = full_df.head(max_display_rows)
-            else:
-                query_output = full_df
-            query_output = query_output.loc[:, ~query_output.columns.duplicated()]
-
-            # Skip chart image generation for agent observation (avoids rendering
-            # discrepancy between server-side matplotlib and frontend Vega-Lite).
-            # User-submitted images (attached_images) and focused thread chart
-            # thumbnails (rendered by the frontend) are still passed through.
-
-            # Build refined_goal for frontend compatibility
-            refined_goal = {
-                "display_instruction": display_instruction,
-                "output_variable": output_variable,
-                "output_fields": list(query_output.columns),
-                "chart": chart_spec,
-                "field_metadata": field_metadata,
-                "field_display_names": field_display_names or {},
-            }
-
-            transform_result = {
-                "status": "ok",
-                "code": code,
-                "content": {
-                    "rows": df_to_safe_records(query_output),
-                    "virtual": {
-                        "table_name": output_table_name,
-                        "row_count": row_count,
-                    },
-                },
-                "refined_goal": refined_goal,
-                "dialog": self._snapshot_dialog(messages),
-                "agent": "DataAgent",
-            }
-
-            return {
-                "status": "ok",
-                "transform_result": transform_result,
-            }
-
-        except Exception as e:
-            logger.error("[DataAgent] Visualize execution error", exc_info=e)
-            return {"status": "error", "error_message": "Visualization execution failed"}
-
-    def _create_chart(
-        self,
-        data: dict[str, Any],
-        chart_spec: dict[str, Any],
-        field_metadata: dict[str, Any] | None = None,
-    ) -> str | None:
-        """Create a chart and return a base64 PNG string for observation feedback."""
-        chart_type = chart_spec.get("chart_type", "Bar Chart")
-        chart_encodings = chart_spec.get("encodings", {})
-        chart_config = chart_spec.get("config", {})
-
-        try:
-            df = pd.DataFrame(data["rows"])
-            if df.empty:
-                return None
-
-            encodings = {}
-            for channel, field in chart_encodings.items():
-                if field and field in df.columns:
-                    field_type = resolve_field_type(df[field], field)
-                    field_type = coerce_field_type(chart_type, channel, field_type)
-                    encodings[channel] = {"field": field, "type": field_type}
-
-            spec = assemble_vegailte_chart(
-                df, chart_type, encodings, config=chart_config,
-                semantic_types=field_metadata_to_semantic_types(field_metadata),
-            )
-            return spec_to_base64(spec) if spec else None
-        except Exception as e:
-            logger.error(f"[DataAgent] Chart creation error: {e}")
-            return None
-
-    # ------------------------------------------------------------------
-    # Message construction
-    # ------------------------------------------------------------------
-
-    def _build_system_prompt(
-        self,
-        has_primary_tables: bool = False,
-        has_focused_thread: bool = False,
-        has_other_threads: bool = False,
-        has_attached_images: bool = False,
-    ) -> str:
-        rules_block = ""
-        if self.agent_exploration_rules and self.agent_exploration_rules.strip():
-            rules_block = (
-                "\n## Additional exploration rules\n\n"
-                + self.agent_exploration_rules.strip()
-                + "\n\nPlease follow the above rules when exploring data."
-            )
-
-        # Build context guide dynamically based on what's actually present
-        context_lines = []
-        if has_primary_tables:
-            context_lines.append(
-                "- **[PRIMARY TABLE(S)]**: The table(s) the user is focused on. "
-                "Prioritize these, but freely use other available tables if needed."
-            )
-            context_lines.append(
-                "- **[OTHER AVAILABLE TABLES]**: Additional tables in the workspace."
-            )
-        else:
-            context_lines.append(
-                "- **[AVAILABLE TABLES]**: All tables in the workspace."
-            )
-        context_lines.append(
-            "  Use `inspect_source_data` to get detailed stats and sample rows. "
-            "Use `explore` for custom computations."
-        )
-        if has_focused_thread:
-            context_lines.append(
-                "- **[FOCUSED THREAD]**: The thread the user is continuing. "
-                "Build on this — do not repeat visualizations already created here."
-            )
-        if has_other_threads:
-            context_lines.append(
-                "- **[OTHER THREADS]**: Brief summaries of other exploration threads in this workspace. "
-            )
-        if has_attached_images:
-            context_lines.append(
-                "- **[USER ATTACHMENT(S)]**: Image(s) provided by the user. "
-                "Refer to these when relevant to the user's question."
-            )
-        context_guide = "\n".join(context_lines)
-
-        prompt = SYSTEM_PROMPT.format(
-            max_iterations=self.max_iterations,
-            agent_exploration_rules=rules_block,
-            context_guide=context_guide,
-        )
-
-        # Inject alwaysApply rules RIGHT AFTER the core prompt, BEFORE
-        # technical reference material (chart guide, coding rules).
-        # This placement ensures the LLM sees user rules early, while
-        # they are still in the high-attention window.
-        if self._knowledge_store:
-            knowledge_rules = self._knowledge_store.load_always_apply_rules()
-            self._injected_rules = [r["title"] for r in knowledge_rules]
-            prompt += self._knowledge_store.format_rules_block(knowledge_rules)
-        else:
-            self._injected_rules = []
-
-        # Append technical reference material after user rules
-        prompt += "\n\n" + CHART_CREATION_GUIDE
-        if self.agent_coding_rules and self.agent_coding_rules.strip():
-            prompt += (
-                "\n\n## Agent Coding Rules\n\n"
-                + self.agent_coding_rules.strip()
-            )
-
-        if self.language_instruction:
-            prompt = prompt + "\n\n" + self.language_instruction
-        return prompt
-
-    def _build_initial_messages(
-        self,
-        input_tables: list[dict[str, Any]],
-        user_question: str,
-        focused_thread: list[dict[str, Any]] | None = None,
-        other_threads: list[dict[str, Any]] | None = None,
-        primary_tables: list[str] | None = None,
-        attached_images: list[str] | None = None,
-    ) -> list[dict]:
-        """Build the initial messages with 3-tier context.
-
-        Tier 1: Source tables (lightweight — column names + types + row count)
-        Tier 2: Focused thread (detailed — per-step interaction history)
-        Tier 3: Peripheral threads (minimal — one-line per step)
-        """
-        # Tier 1: Always lightweight schema — agent uses inspect_source_data
-        # tool for details on tables it needs
-        table_summaries = self._build_lightweight_table_context(input_tables, primary_tables=primary_tables)
-
-        # Tier 2: Focused thread (detailed)
-        focused_block = ""
-        if focused_thread:
-            focused_block = self._build_focused_thread_context(focused_thread)
-
-        # Tier 3: Peripheral threads (minimal)
-        peripheral_block = ""
-        if other_threads:
-            peripheral_block = self._build_peripheral_thread_context(other_threads)
-
-        # Use [SOURCE TABLES] when no tiering, omit section header when tiered
-        # (the tiers already have their own headers)
-        if primary_tables:
-            user_content = f"{table_summaries}\n\n"
-        else:
-            user_content = f"[AVAILABLE TABLES]\n\n{table_summaries}\n\n"
-        if focused_block:
-            user_content += f"{focused_block}\n\n"
-        if peripheral_block:
-            user_content += f"{peripheral_block}\n\n"
-
-        # Search and inject relevant knowledge (workflows + non-alwaysApply rules)
-        #
-        # TODO(knowledge): Both knowledge-access paths are intentionally
-        # disabled for now:
-        #   1. (controlled) the automatic up-front injection that used to run
-        #      `self._search_relevant_knowledge(user_question, table_names)`
-        #      here and append a `[RELEVANT KNOWLEDGE]` block to the user msg.
-        #   2. (uncontrolled) the agent-callable `search_knowledge` /
-        #      `read_knowledge` tools (definitions + dispatch + handlers).
-        # Reason: lexical keyword search over rules/workflows injected unclear,
-        # often-irrelevant context and added agent burden without a clear win.
-        #
-        # When we revisit this, design ONE coherent retrieval path rather than
-        # two competing ones. Open questions to settle first:
-        #   - relevance: replace the keyword `_match_score` with semantic /
-        #     embedding search (or a hybrid) so matches are actually on-topic;
-        #   - trigger: decide controlled (deterministic pre-inject, bounded)
-        #     vs. tool-driven (agent asks on demand) — pick one, not both;
-        #   - budget: hard cap how many entries + tokens land in the prompt;
-        #   - scope: keep alwaysApply rules (injected below) separate — those
-        #     are an explicit user opt-in, not search.
-        # `_injected_knowledge` stays an empty list so the reasoning log and
-        # context_info payloads keep their shape.
-        self._injected_knowledge = []
-        # Inject alwaysApply rules into user message for better visibility
-        # (rules in system prompt are often ignored; rules in user message have higher impact)
-        if self._knowledge_store:
-            always_apply_rules = self._knowledge_store.load_always_apply_rules()
-            if always_apply_rules:
-                rules_text = "\n\n".join([f"### {r['title']}\n{r['body']}" for r in always_apply_rules])
-                user_content += f"[USER RULES - MUST FOLLOW]\n\n{rules_text}\n\n"
-
-        user_content += f"[USER QUESTION]\n\n{user_question}"
-
-        # Check if any step in the focused thread has a chart thumbnail
-        # (the focused leaf's chart image for visual context)
-        chart_thumbnail = None
-        if focused_thread:
-            for step in focused_thread:
-                if step.get("chart_thumbnail"):
-                    chart_thumbnail = step["chart_thumbnail"]
-
-        # Build system prompt with context-aware guide
-        system_prompt = self._build_system_prompt(
-            has_primary_tables=bool(primary_tables),
-            has_focused_thread=bool(focused_thread),
-            has_other_threads=bool(other_threads),
-            has_attached_images=bool(attached_images),
-        )
-
-        # Determine if we need multimodal content (chart thumbnail or user-attached images)
-        has_images = (chart_thumbnail and chart_thumbnail.startswith("data:")) or (attached_images and len(attached_images) > 0)
-
-        if has_images:
-            content_parts: list[dict] = [{"type": "text", "text": user_content}]
-            if chart_thumbnail and chart_thumbnail.startswith("data:"):
-                content_parts.append({"type": "text", "text": "\n[CURRENT CHART] (the chart the user is currently viewing):"})
-                content_parts.append({"type": "image_url", "image_url": {"url": chart_thumbnail, "detail": "low"}})
-            if attached_images:
-                label = "[USER ATTACHMENT]" if len(attached_images) == 1 else "[USER ATTACHMENTS]"
-                content_parts.append({"type": "text", "text": f"\n{label} (image(s) provided by the user):"})
-                for img in attached_images:
-                    if img.startswith("data:"):
-                        content_parts.append({"type": "image_url", "image_url": {"url": img, "detail": "low"}})
-            return [
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": content_parts},
-            ]
-        else:
-            return [
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_content},
-            ]
-
-    def _build_focused_thread_context(
-        self, focused_thread: list[dict[str, Any]]
-    ) -> str:
-        return build_focused_thread_context(focused_thread)
-
-    def _build_peripheral_thread_context(
-        self, other_threads: list[dict[str, Any]]
-    ) -> str:
-        return build_peripheral_thread_context(other_threads)
-
-    def _build_lightweight_table_context(
-        self, input_tables: list[dict[str, Any]], primary_tables: list[str] | None = None
-    ) -> str:
-        return build_lightweight_table_context(
-            input_tables,
-            self.workspace,
-            primary_tables,
-        )
-
-    # ------------------------------------------------------------------
-    # LLM interaction (with internal tool-calling loop)
-    # ------------------------------------------------------------------
-
-    def _get_next_action(
-        self,
-        trajectory: list[dict],
-        input_tables: list[dict[str, Any]] | None = None,
-        outer_iteration: int = 0,
-    ) -> Generator[dict[str, Any], None, None]:
-        """Call the LLM with tools, handle tool calls internally, then
-        parse the structured JSON action from the text response.
-
-        Yields:
-            - ``{"type": "tool_start", "tool": ..., ...}`` for each tool call
-            - ``{"type": "tool_result", "tool": ..., ...}`` for each tool result
-            - ``{"type": "agent_action", "action_data": dict, "reason": ...,
-                "llm_calls": int}`` as the final yield.
-              ``action_data`` is *None* on failure;
-              ``reason`` is one of ``"ok"``, ``"json_parse_failed"``,
-              ``"llm_error"``, ``"tool_rounds_exhausted"``.
-              ``llm_calls`` is the number of LLM calls made in this cycle.
-        """
-        max_tool_rounds = 12
-        max_json_retries = 1
-        json_retries = 0
-        messages = trajectory
-        llm_calls_in_cycle = 0
-
-        rlog = self._reasoning_log
-
-        from data_formulator.sandbox.local_sandbox import SandboxSession
-        ns_dir = self._explore_ns_dir()
-        ws_path = str(self.workspace.confined_scratch.root.parent)
-
-        with SandboxSession() as explore_session:
-            self._explore_session = explore_session
-
-            if ns_dir.exists():
-                ok = SandboxSession.restore_namespace(explore_session, ns_dir, ws_path)
-                if ok:
-                    logger.info("[DataAgent] Restored explore namespace from %s", ns_dir)
-                import shutil
-                shutil.rmtree(ns_dir, ignore_errors=True)
-
-            self._tool_loop_exit_reason = None
-            yield from self._tool_loop(
-                messages, max_tool_rounds, max_json_retries, json_retries,
-                llm_calls_in_cycle, rlog, input_tables, outer_iteration,
-            )
-
-            if self._tool_loop_exit_reason == "tool_rounds_exhausted":
-                saved = explore_session.save_namespace(ns_dir, ws_path)
-                if saved:
-                    logger.info("[DataAgent] Saved explore namespace to %s", ns_dir)
-
-            self._explore_session = None
-
-    def _tool_loop(
-        self,
-        messages, max_tool_rounds, max_json_retries, json_retries,
-        llm_calls_in_cycle, rlog, input_tables, outer_iteration,
-    ):
-        """Inner tool-calling loop, extracted so _get_next_action can wrap
-        it in a SandboxSession context manager."""
-        for round_idx in range(max_tool_rounds):
-            llm_calls_in_cycle += 1
-            rlog.log("llm_request", iteration=outer_iteration,
-                     round=round_idx + 1,
-                     messages_count=len(messages),
-                     tools_available=[t["function"]["name"] for t in TOOLS])
-            llm_t0 = time.time()
-            try:
-                response = self._call_llm(messages)
-            except Exception as exc:
-                llm_latency = int((time.time() - llm_t0) * 1000)
-                rlog.log("llm_response", iteration=outer_iteration,
-                         round=round_idx + 1,
-                         latency_ms=llm_latency, finish_reason="error",
-                         error=type(exc).__name__)
-                logger.error("[DataAgent] LLM call failed", exc_info=exc)
-                from data_formulator.security.sanitize import classify_llm_error
-                yield {
-                    "type": "agent_action",
-                    "action_data": None,
-                    "reason": "llm_error",
-                    "error_message": classify_llm_error(exc),
-                    "llm_calls": llm_calls_in_cycle,
-                }
-                return
-
-            llm_latency = int((time.time() - llm_t0) * 1000)
-
-            if not response.choices:
-                rlog.log("llm_response", iteration=outer_iteration,
-                         round=round_idx + 1,
-                         latency_ms=llm_latency, finish_reason="empty")
-                yield {"type": "agent_action", "action_data": None, "reason": "llm_error",
-                       "error_message": "LLM returned empty response",
-                       "llm_calls": llm_calls_in_cycle}
-                return
-
-            choice = response.choices[0]
-            content = choice.message.content or ""
-            tool_calls = getattr(choice.message, 'tool_calls', None)
-            finish_reason = getattr(choice, "finish_reason", "stop")
-
-            if tool_calls:
-                rlog.log("llm_response", iteration=outer_iteration,
-                         round=round_idx + 1,
-                         latency_ms=llm_latency, finish_reason="tool_calls",
-                         tool_calls=[{"name": tc.function.name} for tc in tool_calls])
-            else:
-                rlog.log("llm_response", iteration=outer_iteration,
-                         round=round_idx + 1,
-                         latency_ms=llm_latency, finish_reason=finish_reason)
-
-            # --- tool calls: execute and loop back ---
-            if tool_calls:
-                if content.strip():
-                    yield {"type": "thinking_text", "content": content.strip()}
-
-                assistant_msg: dict[str, Any] = {
-                    "role": "assistant",
-                    "content": content or None,
-                }
-                attach_reasoning_content(assistant_msg, choice.message)
-                assistant_msg["tool_calls"] = [
-                    {
-                        "id": tc.id,
-                        "type": "function",
-                        "function": {
-                            "name": tc.function.name,
-                            "arguments": tc.function.arguments,
-                        },
-                    }
-                    for tc in tool_calls
-                ]
-                messages.append(assistant_msg)
-
-                for tc in tool_calls:
-                    tool_name = tc.function.name
-                    try:
-                        tool_args = json.loads(tc.function.arguments)
-                    except json.JSONDecodeError:
-                        tool_args = {}
-
-                    yield {
-                        "type": "tool_start",
-                        "tool": tool_name,
-                        "purpose": tool_args.get("purpose") if tool_name == "explore" else None,
-                        "code": tool_args.get("code") if tool_name == "explore" else None,
-                        "table_names": tool_args.get("table_names") if tool_name == "inspect_source_data" else None,
-                    }
-
-                    tool_t0 = time.time()
-                    tool_status = "ok"
-
-                    if tool_name == "explore":
-                        result = self._run_explore_code(
-                            tool_args.get("code", ""),
-                            input_tables or [],
-                        )
-                        tool_content = result.get("stdout", "")
-                        tool_status = result.get("status", "ok")
-                        if result.get("error"):
-                            tool_content += f"\n\nError: {result['error']}"
-                        yield {
-                            "type": "tool_result",
-                            "tool": tool_name,
-                            "status": tool_status,
-                            "stdout": result.get("stdout", ""),
-                            "error": result.get("error"),
-                        }
-                    elif tool_name == "inspect_source_data":
-                        table_names = tool_args.get("table_names", [])
-                        tool_content = handle_inspect_source_data(
-                            table_names, input_tables or [], self.workspace,
-                        )
-                        yield {
-                            "type": "tool_result",
-                            "tool": tool_name,
-                            "status": "ok",
-                            "stdout": tool_content,
-                        }
-                    elif tool_name in ("visualize", "clarify", "explain", "summary", "delegate", "action"):
-                        action_data = dict(tool_args)
-                        if "action" not in action_data:
-                            real_name = tool_name if tool_name != "action" else action_data.get("type", "summary")
-                            action_data["action"] = real_name
-
-                        _rescue_unpack_json_strings(action_data)
-
-                        missing = _rescue_validate_action(action_data)
-                        if missing:
-                            tool_content = (
-                                f"ERROR: '{action_data['action']}' is an ACTION, not a tool. "
-                                f"Output it as a JSON object in your text reply. "
-                                f"Also, these required fields are missing: {', '.join(missing)}."
-                            )
-                            logger.warning("[DataAgent] Action-as-tool with missing fields %s, sending correction", missing)
-                            yield {
-                                "type": "tool_result",
-                                "tool": tool_name,
-                                "status": "error",
-                                "error": f"Missing fields: {', '.join(missing)}",
-                            }
-                        else:
-                            logger.info("[DataAgent] Rescued action '%s' from tool call (weak-model fallback)", action_data.get("action"))
-                            tool_content = "ok"
-                            messages.append({
-                                "role": "tool",
-                                "tool_call_id": tc.id,
-                                "content": tool_content,
-                            })
-                            rlog.log("tool_execution", iteration=outer_iteration,
-                                     tool=tool_name,
-                                     input_summary="rescued_as_action",
-                                     output_summary="ok",
-                                     latency_ms=0, status="ok")
-                            yield {"type": "agent_action", "action_data": action_data,
-                                   "reason": "ok", "llm_calls": llm_calls_in_cycle}
-                            return
-                    else:
-                        tool_content = f"Unknown tool: {tool_name}"
-
-                    tool_latency = int((time.time() - tool_t0) * 1000)
-                    output_summary = (tool_content[:200] + "...") if len(tool_content) > 200 else tool_content
-                    rlog.log("tool_execution", iteration=outer_iteration,
-                             tool=tool_name,
-                             input_summary=tool_args.get("purpose", "")[:200],
-                             output_summary=output_summary,
-                             latency_ms=tool_latency, status=tool_status)
-
-                    messages.append({
-                        "role": "tool",
-                        "tool_call_id": tc.id,
-                        "content": tool_content,
-                    })
-
-                logger.info("[DataAgent] Executed %d tool call(s), looping back to LLM", len(tool_calls))
-                continue
-
-            # --- no tool calls — parse JSON action from text ---
-            logger.debug("[DataAgent] Raw LLM response:\n%s", content)
-            json_blocks = extract_json_objects(content)
-            if json_blocks:
-                messages.append({"role": "assistant", "content": content})
-                yield {"type": "agent_action", "action_data": json_blocks[0], "reason": "ok",
-                       "llm_calls": llm_calls_in_cycle}
-                return
-
-            # --- JSON parse failed — focused retry (ask LLM to reformat only) ---
-            if json_retries < max_json_retries:
-                json_retries += 1
-                logger.warning("[DataAgent] No JSON found (retry %d/%d), asking LLM to reformat",
-                               json_retries, max_json_retries)
-                retry_assistant_msg: dict[str, Any] = {"role": "assistant", "content": content}
-                attach_reasoning_content(retry_assistant_msg, choice.message)
-                messages.append(retry_assistant_msg)
-                messages.append({
-                    "role": "user",
-                    "content": (
-                        "[FORMAT ERROR] Your previous response did not contain a valid JSON action. "
-                        "Please output ONLY a JSON object with one of these actions: "
-                        "visualize, clarify, explain, summary, or delegate. Do NOT repeat your analysis — "
-                        "just reformat your conclusion as JSON."
-                    ),
-                })
-                continue
-
-            logger.warning("[DataAgent] JSON parse failed after retries: %s", content[:200])
-            yield {"type": "agent_action", "action_data": None, "reason": "json_parse_failed",
-                   "llm_calls": llm_calls_in_cycle}
-            return
-
-        # --- tool rounds exhausted ---
-        logger.warning("[DataAgent] Exceeded %d tool rounds without producing an action", max_tool_rounds)
-        self._tool_loop_exit_reason = "tool_rounds_exhausted"
-        yield {"type": "agent_action", "action_data": None, "reason": "tool_rounds_exhausted",
-               "llm_calls": llm_calls_in_cycle}
-        return
-
-    _MAX_LLM_RETRIES = 3
-
-    @staticmethod
-    def _is_transient_error(exc: Exception) -> bool:
-        msg = str(exc).lower()
-        if any(kw in msg for kw in (
-            "timeout", "timed out", "rate limit", "rate_limit",
-            "429", "503", "502", "connection", "reset by peer",
-        )):
-            return True
-        name = type(exc).__name__.lower()
-        return any(kw in name for kw in ("timeout", "ratelimit", "connection"))
-
-    def _call_llm(self, messages: list[dict]):
-        """Call the LLM with tool definitions (non-streaming).
-
-        Retries up to ``_MAX_LLM_RETRIES`` times on transient errors
-        (timeout, rate-limit, connection reset) with exponential back-off.
-        """
-        last_exc: Exception | None = None
-        for attempt in range(self._MAX_LLM_RETRIES):
-            try:
-                return self._call_llm_once(messages)
-            except Exception as e:
-                last_exc = e
-                if self._is_transient_error(e) and attempt < self._MAX_LLM_RETRIES - 1:
-                    wait = 2 ** attempt
-                    logger.warning(
-                        "[DataAgent] Transient LLM error (attempt %d/%d), "
-                        "retrying in %ds: %s",
-                        attempt + 1, self._MAX_LLM_RETRIES, wait, e,
-                    )
-                    time.sleep(wait)
-                    continue
-                raise
-        raise last_exc  # pragma: no cover
-
-    def _call_llm_once(self, messages: list[dict]):
-        """Single LLM call (no retry)."""
-        return self.client.get_completion_with_tools(
-            messages, tools=TOOLS, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model),
-        )
-
-    # ------------------------------------------------------------------
-    # Observation formatting
-    # ------------------------------------------------------------------
-
-    def _format_observation(
-        self,
-        step_index: int,
-        display_instruction: str,
-        thought: str,
-        code: str,
-        data: dict[str, Any],
-        chart_image: str | None,
-    ) -> dict:
-        """Format a rich observation for the trajectory.
-
-        Includes data summary, code, and optionally the chart image
-        so the agent can make informed decisions about the next step.
-        """
-        data_summary = generate_data_summary(
-            [{"name": data.get("virtual", {}).get("table_name", f"step_{step_index}"),
-              "rows": data["rows"]}],
-            workspace=self.workspace,
-        )
-
-        text = (
-            f"[OBSERVATION – Step {step_index}]\n\n"
-            f"**Visualization**: {display_instruction}\n\n"
-            f"**Code**:\n```python\n{code}\n```\n\n"
-            f"**Transformed Data**:\n{data_summary}"
-        )
-
-        if chart_image:
-            content: list[dict[str, Any]] = [
-                {"type": "text", "text": text + "\n\n**Chart**:"},
-            ]
-            if chart_image.startswith("data:") or chart_image.startswith("http"):
-                content.append({
-                    "type": "image_url",
-                    "image_url": {"url": chart_image, "detail": "low"},
-                })
-            return {"role": "user", "content": content}
-
-        return {"role": "user", "content": text}
-
-    # ------------------------------------------------------------------
-    # Knowledge helpers
-    # ------------------------------------------------------------------
-    #
-    # TODO(knowledge): The data agent's knowledge access is disabled for now.
-    # Removed together:
-    #   - _search_relevant_knowledge()  (controlled up-front auto-injection)
-    #   - _handle_search_knowledge()    (uncontrolled `search_knowledge` tool)
-    #   - _handle_read_knowledge()      (uncontrolled `read_knowledge` tool)
-    # KnowledgeStore.search()/read() still exist and are used elsewhere
-    # (e.g. the Knowledge panel + alwaysApply rule injection). When we bring
-    # agent knowledge back, add a single unified retrieval entry point here
-    # rather than re-adding both competing paths. See the TODO block in
-    # _build_initial_messages for the design questions to settle first.
-
-    # ------------------------------------------------------------------
-    # Helpers
-    # ------------------------------------------------------------------
-
-    @staticmethod
-    def _strip_images(trajectory: list[dict]) -> list[dict]:
-        """Return a copy of the trajectory with image_url blocks removed."""
-        stripped: list[dict] = []
-        for msg in trajectory:
-            content = msg.get("content")
-            if isinstance(content, list):
-                text_parts = [p for p in content if p.get("type") == "text"]
-                if text_parts:
-                    stripped.append({**msg, "content": text_parts})
-                else:
-                    stripped.append({**msg, "content": "[image removed]"})
-            else:
-                stripped.append(msg)
-        return stripped
-
-    @staticmethod
-    def _log_session_end(
-        rlog,
-        status: str,
-        total_iterations: int,
-        total_llm_calls: int,
-        session_start_time: float,
-    ) -> None:
-        """Write ``session_end`` to the reasoning log.
-
-        Does **not** close the log — the ``finally`` block in ``run()``
-        handles that so the fd is released even on unexpected exceptions.
-        """
-        rlog.log(
-            "session_end",
-            status=status,
-            total_iterations=total_iterations,
-            total_llm_calls=total_llm_calls,
-            total_latency_ms=int((time.time() - session_start_time) * 1000),
-        )
-
-    @staticmethod
-    def _error_event(
-        iteration: int,
-        message: str,
-        *,
-        display_instruction: str = "",
-        message_code: str = "",
-        message_params: dict | None = None,
-    ) -> dict[str, Any]:
-        """Build an ``"error"`` event dict for the streaming response."""
-        event: dict[str, Any] = {
-            "type": "error",
-            "iteration": iteration,
-            "message": message,
-        }
-        if message_code:
-            event["message_code"] = message_code
-        if message_params:
-            event["message_params"] = message_params
-        if display_instruction:
-            event["display_instruction"] = display_instruction
-        return event
-
-    @staticmethod
-    def _snapshot_dialog(messages: list[dict] | None) -> list[dict]:
-        """Snapshot the conversation for the Agent Log dialog.
-
-        Handles plain text, multimodal content, tool_calls on assistant
-        messages, and tool result messages.
-        """
-        if not messages:
-            return []
-        snapshot: list[dict] = []
-        for msg in messages:
-            role = msg.get("role", "")
-            content = msg.get("content")
-
-            # Flatten multimodal content to text-only
-            if isinstance(content, list):
-                content = "\n".join(
-                    p.get("text", "") for p in content if p.get("type") == "text"
-                )
-
-            # Assistant messages with tool_calls — show tool call details
-            if role == "assistant" and msg.get("tool_calls"):
-                tool_details = []
-                for tc in msg["tool_calls"]:
-                    fn = tc.get("function", {})
-                    name = fn.get("name", "?")
-                    args_str = fn.get("arguments", "{}")
-                    try:
-                        args_obj = json.loads(args_str)
-                        if name == "explore" and "code" in args_obj:
-                            tool_details.append(f"[tool: {name}]\n```python\n{args_obj['code']}\n```")
-                        else:
-                            formatted = json.dumps(args_obj, indent=2, ensure_ascii=False)
-                            tool_details.append(f"[tool: {name}]\n```json\n{formatted}\n```")
-                    except (json.JSONDecodeError, TypeError):
-                        tool_details.append(f"[tool: {name}]\n{args_str}")
-                text_part = content or ""
-                combined = (text_part + "\n\n" + "\n\n".join(tool_details)).strip()
-                snapshot.append({"role": role, "content": combined})
-
-            # Tool result messages
-            elif role == "tool":
-                tool_content = content or ""
-                if isinstance(tool_content, str) and len(tool_content) > 3000:
-                    tool_content = tool_content[:3000] + "\n... (truncated)"
-                snapshot.append({"role": "assistant", "content": f"[tool result]\n{tool_content}"})
-
-            # Regular messages (system, user, assistant without tool_calls)
-            elif content:
-                if role != "system" and isinstance(content, str) and len(content) > 4000:
-                    content = content[:4000] + "\n... (truncated)"
-                snapshot.append({"role": role, "content": content})
-        return snapshot
diff --git a/py-src/data_formulator/analyst/agent.py b/py-src/data_formulator/analyst/agent.py
index e83c4432..51e8010c 100644
--- a/py-src/data_formulator/analyst/agent.py
+++ b/py-src/data_formulator/analyst/agent.py
@@ -52,7 +52,6 @@
     handle_inspect_source_data,
 )
 from data_formulator.agents.client_utils import Client
-from data_formulator.agents.chart_creation_guide import CHART_CREATION_GUIDE
 from data_formulator.datalake.parquet_utils import df_to_safe_records
 
 from data_formulator.analyst.skills import (
@@ -810,6 +809,15 @@ def _route_skill_events(
         *buffered* re-emission of the same content — its ``action`` event and the
         ``text_delta`` on that channel — is dropped here so the frontend sees the
         content exactly once (design-docs/36 §5).
+
+        Recoverable errors: every ``error`` event a skill yields is paired with a
+        returned observation string (e.g. visualize's "chart fields not found",
+        a malformed ``ask_user`` payload). That observation is fed back to the
+        agent as the action's tool-call result, so the agent sees the failure and
+        self-corrects on the next iteration. These are *internal* retry signals,
+        not user-facing failures, so they are dropped here and never streamed to
+        the frontend. Only fatal, run-ending errors (LLM API failures) are
+        emitted directly by ``run`` outside this router and do reach the client.
         """
         suppress_channel = self._suppress_stream_channel
         try:
@@ -817,9 +825,12 @@ def _route_skill_events(
             while True:
                 ev.setdefault("iteration", iteration)
                 etype = ev.get("type")
-                drop = bool(suppress_channel) and (
-                    etype == "action"
-                    or (etype == "text_delta" and ev.get("channel") == suppress_channel)
+                drop = (
+                    etype == "error"
+                    or (bool(suppress_channel) and (
+                        etype == "action"
+                        or (etype == "text_delta" and ev.get("channel") == suppress_channel)
+                    ))
                 )
                 if not drop:
                     if etype == "result":
@@ -1188,7 +1199,6 @@ def _build_system_prompt(
         else:
             self._injected_rules = []
 
-        prompt += "\n\n" + CHART_CREATION_GUIDE
         if self.agent_coding_rules and self.agent_coding_rules.strip():
             prompt += (
                 "\n\n## Agent Coding Rules\n\n"
diff --git a/py-src/data_formulator/analyst/skills/core/SKILL.md b/py-src/data_formulator/analyst/skills/core/SKILL.md
index 9820d65f..6faafff0 100644
--- a/py-src/data_formulator/analyst/skills/core/SKILL.md
+++ b/py-src/data_formulator/analyst/skills/core/SKILL.md
@@ -149,3 +149,120 @@ effort:
 When chaining visualizations, add the next chart only if it answers a gap *raised*
 by the previous one — not just another interesting angle. **Never** repeat a
 visualization already in the trajectory or in another thread.
+
+## Chart Creation Guide
+
+The following reference material applies when you call the `visualize` tool.
+
+### A. Code Execution Rules
+
+**About the execution environment:**
+- You can use BOTH DuckDB SQL and pandas operations in the same script
+- The script will run in the workspace data directory (all data files are in the current directory)
+- Each table in [CONTEXT] has a **file path** (e.g., `student_exam.parquet`, `sales.csv`). Use EXACTLY that path to load data:
+    - `.parquet`: `pd.read_parquet('file.parquet')` or DuckDB `read_parquet('file.parquet')`
+    - `.csv`: `pd.read_csv('file.csv')` or DuckDB `read_csv_auto('file.csv')`
+    - `.json`: `pd.read_json('file.json')`
+    - `.xlsx`/`.xls`: `pd.read_excel('file.xlsx')`
+    - `.txt`: `pd.read_csv('file.txt', sep='\t')`
+- **IMPORTANT:** Use the exact filename from the context — do NOT change the file extension or assume all files are parquet.
+- **Allowed libraries:** pandas, numpy, duckdb, math, datetime, json, statistics, collections, re, sklearn, scipy, random, itertools, functools, operator, time
+- **Not allowed:** matplotlib, plotly, seaborn, requests, subprocess, os, sys, io, or any other library not listed above.
+- File system access (open, write) and network access are also forbidden.
+
+**When to use DuckDB vs pandas:**
+- **Prefer plain pandas** for most tasks — it's simpler and more readable.
+- Only use DuckDB when the dataset is very large and you need efficient SQL aggregations, filtering, joins, or window functions.
+- You can combine both: DuckDB for initial loading/filtering on large files, then pandas for complex operations.
+
+**Code structure:** standalone script (no function wrapper), imports at top. **CRITICAL:** The final result DataFrame MUST be assigned to the exact variable name you specified in `"output_variable"` — the system uses this name to extract the result. For example, if your output_variable is `sales_by_region`, the script must contain `sales_by_region = ...`.
+
+**DuckDB notes:**
+- Escape single quotes with '' (not \')
+- No Unicode escapes (\u0400); use character ranges directly: [а-яА-Я]
+- Cast date columns explicitly: `CAST(col AS DATE)`, `CAST(col AS TIMESTAMP)`
+- For complex datetime operations, load data first then use pandas datetime functions
+- Critical identifier quoting rule:
+  * If a table/column name contains non-ASCII characters (e.g., Chinese, Japanese, Korean, Cyrillic, etc.), spaces, or punctuation,
+    you MUST wrap it in double quotes, e.g. SELECT "金额" FROM "客户表".
+  * Never output placeholder identifiers like your_table_name, your_column, your_condition.
+
+**Datetime handling:**
+- `date` columns contain date-only values (YYYY-MM-DD). `datetime` columns contain date+time (ISO 8601).
+- `time` columns contain time-only values (HH:mm:ss). `duration` columns are time intervals.
+- Year → number. Year-month / year-month-day → string ("2020-01" / "2020-01-01").
+- Hour alone → number. Hour:min or h:m:s → string. Never return raw datetime objects.
+
+### B. Chart Type Reference
+
+The `chart_type` value in the `visualize` action MUST be one of the names listed
+in the first column below (exact spelling, including capitalization). When a row
+lists multiple names, pick whichever fits the "when to use" hint best.
+
+| chart_type | encodings | config | when to use |
+|---|---|---|---|
+| Scatter Plot | x, y, color, size, facet | opacity (0.1–1.0) | Relationships between two quantitative fields |
+| Regression | x, y, color, size, facet | regressionMethod ("linear","log","exp","pow","quad","poly"), polyOrder (2–10) | Trend line over scatter; one line per color group |
+| Bar Chart / Lollipop Chart / Waterfall Chart | x, y, color, facet | — | Bar: default categorical comparison. Lollipop: cleaner for ranked lists / sparse categories. Waterfall: cumulative gain/loss, each bar starts where the previous ended |
+| Grouped Bar Chart | x, y, group, facet | — | Side-by-side bars across a second categorical dimension |
+| Histogram / Density Plot | x, color, facet | — | Distribution of one quantitative field. Histogram: discrete bins, auto-binned. Density Plot: smooth KDE curve |
+| Boxplot | x, y, color, facet | — | Distribution summary (median/quartiles/outliers) by category |
+| Ranged Dot Plot | x, y, color, facet | — | Min–max range or two-point comparison per category |
+| Line Chart | x, y, color, strokeDash, facet | interpolate ("linear","monotone","step") | Trends over an ordered (usually temporal) x-axis |
+| Area Chart | x, y, color, facet | — | Magnitude over ordered x; auto-stacks when color is set |
+| Pie Chart | size, color, facet | innerRadius (0–100; 0=pie, >0=donut) | Part-of-whole with ≤7 categories. Wedge value goes on **size**, not **theta** |
+| Radar Chart | x, y, color, facet | — | Multi-metric profile/comparison; x = metric name, y = value, color = entity (long-form data) |
+| Heatmap | x, y, color, facet | colorScheme — sequential ("viridis","blues","reds","oranges","greens") or diverging ("blueorange","redblue") | Matrix / 2D density; color encodes the quantitative cell value |
+| Bar Table | x, y, color, facet | — | Ranked horizontal table with inline bars; one row per category. y = category, x = value |
+| KPI Card | metric, value, goal | — | "Big number" dashboard tile(s); one row per tile. `value` must be pre-aggregated; `goal` is optional |
+| Candlestick Chart | x, open, high, low, close, facet | — | OHLC financial data |
+| World Map | longitude, latitude, color, size | projection ("mercator","equalEarth","naturalEarth1","orthographic"), projectionCenter ([lon,lat]) | Geographic points/regions on a world projection |
+| US Map | longitude, latitude, color, size | — (fixed albersUsa) | US-only points/regions (albersUsa projection) |
+
+**Critical chart rules:**
+- **Scatter Plot**: use config opacity (0.1–1.0) for dense data instead of encoding opacity.
+- **Regression**: trend line is automatic — do NOT compute regression coefficients/predictions in Python. Use `color` to get separate trend lines per group.
+- **Bar Chart**: x=categorical, y=quantitative (vertical bars). Swap x↔y for horizontal bars. Same-x rows are auto-stacked when `color` is set.
+- **Grouped Bar Chart**: use the `group` channel (not `color`) for side-by-side bars.
+- **Histogram**: do NOT pre-bin in Python — pass the raw quantitative field on `x` and the chart bins automatically. Pre-aggregating gives wrong bin widths.
+- **Line Chart**: use `strokeDash` to differentiate line styles (e.g. actual vs forecast).
+- **Pie Chart**: use the `size` channel (not `theta`) for wedge values. Avoid when >7–8 categories.
+- **Radar Chart**: data must be long-form — one row per (entity, metric, value). If your data is wide-form (one column per metric), melt it first in the Python step.
+- **Heatmap**: pick `colorScheme` by the meaning of the values. Use a **sequential** scheme (viridis/blues/reds/oranges/greens) for single-direction magnitudes (counts, rates, prices, scores — higher is simply more). Use a **diverging** scheme (blueorange/redblue) ONLY when the values have a meaningful center to read away from (e.g. profit/loss around 0, change vs. a baseline, temperature around freezing).
+- **Bar Table**: y is the category column to rank; x is the quantitative value driving bar length. Don't sort in Python — the template sorts.
+- **KPI Card**: channels are `metric`, `value`, `goal` (not x/y). One DataFrame row = one tile. The `value` column must already contain the final number to display (aggregate upstream in the Python step).
+- **Candlestick Chart**: requires `open`, `high`, `low`, `close` columns.
+- **World Map / US Map**: channel names are `longitude` / `latitude`, not `x` / `y`.
+- **facet**: available for nearly all chart types; use a low-cardinality categorical field.
+- All fields in `encodings` must also appear in `output_fields`. Typically use 2–3 channels (x, y, color/size).
+
+### C. Semantic Type Reference
+
+Choose the most specific type that fits. Only annotate fields used in chart encodings.
+
+| Category | Types |
+|---|---|
+| Temporal | DateTime, Date, Time, Timestamp, Year, Quarter, Month, Week, Day, Hour, YearMonth, YearQuarter, YearWeek, Decade, Duration |
+| Monetary measures | Amount, Price |
+| Physical measures | Quantity, Temperature |
+| Proportion | Percentage |
+| Signed/diverging | Profit, PercentageChange, Sentiment, Correlation |
+| Generic measures | Count, Number |
+| Discrete numeric | Rank, Score |
+| Identifier | ID |
+| Geographic | Latitude, Longitude, Country, State, City, Region, Address, ZipCode |
+| Entity names | Category, Name |
+| Coded categorical | Status, Boolean, Direction |
+| Binned ranges | Range |
+| Fallback | Unknown |
+
+Key guidelines:
+- Use **Amount** for summed monetary totals, **Price** for per-unit prices, **Profit** for values that can be negative.
+- Use **Temperature** (not Quantity) for temperature — it has special diverging behavior.
+- Use **Year** (not Number) for columns like "year" with values 2020, 2021.
+
+### D. Statistical Analysis Guide
+
+- **Regression**: use chart_type "Regression" — the trend line is automatic, do NOT compute regression values in Python code. Configure method via `{"regressionMethod": "linear"}` (options: "linear", "log", "exp", "pow", "quad", "poly"; for poly add `{"polyOrder": 3}`).
+- **Forecasting**: compute predicted future values in Python. Use Line Chart with strokeDash to distinguish actual vs forecast, and color for series grouping.
+- **Clustering**: compute cluster assignments in Python. Output [x, y, cluster_id]. Use Scatter Plot with color → cluster_id.
diff --git a/py-src/data_formulator/routes/agents.py b/py-src/data_formulator/routes/agents.py
index b9e546f0..e44e82e6 100644
--- a/py-src/data_formulator/routes/agents.py
+++ b/py-src/data_formulator/routes/agents.py
@@ -18,9 +18,6 @@
 import html
 import pandas as pd
 
-from data_formulator.agents.agent_data_transform import DataTransformationAgent
-from data_formulator.agents.agent_data_rec import DataRecAgent
-
 from data_formulator.agents.agent_sort_data import SortDataAgent
 from data_formulator.agents.agent_simple import SimpleAgents
 from data_formulator.auth.identity import get_identity_id
@@ -32,13 +29,10 @@
 from data_formulator.agents.agent_data_loading_chat import DataLoadingAgent
 from data_formulator.agents.agent_code_explanation import CodeExplanationAgent
 from data_formulator.agents.agent_chart_insight import ChartInsightAgent
-from data_formulator.agents.agent_interactive_explore import InteractiveExploreAgent
-from data_formulator.agents.agent_report_gen import ReportGenAgent
 from data_formulator.agents.client_utils import Client
 from data_formulator.model_registry import model_registry
 from data_formulator.knowledge.store import KnowledgeStore
 
-from data_formulator.agents.data_agent import DataAgent
 from data_formulator.analyst.agent import AnalystAgent
 from data_formulator.agents.agent_language import build_language_instruction
 from data_formulator.security.sanitize import classify_llm_error, sanitize_error_message
@@ -75,29 +69,6 @@ def _get_knowledge_store(identity_id: str) -> KnowledgeStore | None:
 agent_bp = Blueprint('agent', __name__, url_prefix='/api/agent')
 
 
-def _try_parse_explore_line(raw_line: str) -> str | None:
-    """Parse a single line from the exploration agent into an NDJSON line.
-
-    The LLM is prompted to output one JSON object per line.  Older prompts
-    used an SSE-style ``data: `` prefix which we strip for compatibility.
-    Non-JSON lines (thinking text, blank lines) are silently dropped.
-    """
-    line = raw_line.strip()
-    if not line:
-        return None
-    if line.startswith("data:"):
-        line = line[5:].lstrip()
-    if not line.startswith("{"):
-        return None
-    try:
-        obj = json.loads(line)
-        if "type" not in obj:
-            obj = {"type": "question", **obj}
-        return json.dumps(obj, ensure_ascii=False) + "\n"
-    except (json.JSONDecodeError, ValueError):
-        return None
-
-
 def _with_warnings(gen):
     """Wrap an NDJSON generator to flush accumulated stream warnings.
 
@@ -318,229 +289,13 @@ def sort_data_request():
         logger.error("Error in sort-data", exc_info=e)
         raise classify_and_wrap_llm_error(e) from e
 
-@agent_bp.route('/derive-data', methods=['GET', 'POST'])
-def derive_data():
-    if not request.is_json:
-        raise AppError(ErrorCode.INVALID_REQUEST, "Invalid request format")
-
-    logger.info("# derive-data request")
-    content = request.get_json()        
-
-    client = get_client(content['model'])
-
-    input_tables = content["input_tables"]
-
-    instruction = content["extra_prompt"]
-
-    max_repair_attempts = content["max_repair_attempts"] if "max_repair_attempts" in content else 1
-    agent_coding_rules = content.get("agent_coding_rules", "")
-    current_visualization = content.get("current_visualization", None)
-    expected_visualization = content.get("expected_visualization", None)
-
-    if "additional_messages" in content:
-        prev_messages = content["additional_messages"]
-    else:
-        prev_messages = []
-
-    logger.debug("== input tables ===>")
-    for table in input_tables:
-        logger.debug(f"===> Table: {table['name']} (first 5 rows)")
-        logger.debug(table['rows'][:5])
-
-    logger.debug("== user spec ===")
-    logger.debug(instruction)
-
-    mode = "transform" if current_visualization or expected_visualization else "recommendation"
-    primary_tables = content.get("primary_tables", None)
-
-    try:
-        identity_id = get_identity_id()
-        workspace = get_workspace(identity_id)
-        max_display_rows = current_app.config['CLI_ARGS']['max_display_rows']
-
-        language_instruction = get_language_instruction(mode="compact")
-
-        model_info = {
-            "model": content['model'].get("model", ""),
-            "endpoint": content['model'].get("endpoint", ""),
-            "api_base": content['model'].get("api_base", ""),
-        }
-
-        knowledge_store = _get_knowledge_store(identity_id)
-
-        if mode == "recommendation":
-            agent = DataRecAgent(client=client, workspace=workspace, agent_coding_rules=agent_coding_rules, language_instruction=language_instruction, max_display_rows=max_display_rows, model_info=model_info, knowledge_store=knowledge_store)
-            results = agent.run(input_tables, instruction, n=1, prev_messages=prev_messages, primary_tables=primary_tables)
-        else:
-            agent = DataTransformationAgent(client=client, workspace=workspace, agent_coding_rules=agent_coding_rules, language_instruction=language_instruction, max_display_rows=max_display_rows, model_info=model_info, knowledge_store=knowledge_store)
-            results = agent.run(input_tables, instruction, prev_messages,
-                                current_visualization=current_visualization, expected_visualization=expected_visualization)
-
-        repair_attempts = 0
-        while (
-            isinstance(results, list)
-            and len(results) > 0
-            and results[0].get('status') in ('error', 'other error')
-            and repair_attempts < max_repair_attempts
-        ):
-            error_message = results[0].get('content', 'Unknown error')
-            logger.warning(f"[derive-data] Code generation failed (attempt {repair_attempts + 1}/{max_repair_attempts}), mode={mode}. Error: {error_message}")
-            new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_message}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur."
-
-            prev_dialog = results[0].get('dialog', [])
-
-            try:
-                if mode == "transform":
-                    results = agent.followup(input_tables, prev_dialog, [], new_instruction, n=1)
-                if mode == "recommendation":
-                    results = agent.followup(input_tables, prev_dialog, [], new_instruction, n=1)
-            except Exception as followup_exc:
-                logger.exception("derive_data followup failed")
-                results = [{
-                    "status": "error",
-                    "content": classify_llm_error(followup_exc),
-                    "code": "",
-                    "dialog": [],
-                }]
-                break
-
-            repair_attempts += 1
-            logger.warning(f"[derive-data] Repair attempt {repair_attempts}/{max_repair_attempts} result: {results[0].get('status', 'unknown')}")
-
-        if repair_attempts > 0:
-            logger.warning(f"[derive-data] Finished repair loop after {repair_attempts} attempt(s). Final status: {results[0].get('status', 'unknown')}")
-
-        for r in results:
-            if r.get("status") in ("error", "other error") and r.get("content"):
-                r["content"] = sanitize_error_message(r["content"])
-            sign_result(r)
-
-        return json_ok({"results": results})
-    except Exception as e:
-        logger.error("Error in derive-data", exc_info=e)
-        raise classify_and_wrap_llm_error(e) from e
-
-@agent_bp.route('/data-agent-streaming', methods=['GET', 'POST'])
-def data_agent_streaming():
-    """Streaming tool-calling data exploration agent endpoint.
-
-    The agent streams events as newline-delimited JSON:
-        text_delta  – streamed text from the agent (narration)
-        tool_start  – agent is about to call a tool (explore/visualize/clarify)
-        tool_result – tool execution result (visualize results match DataRecAgent format)
-        clarify     – clarification question (loop pauses)
-        done        – turn complete
-        error       – error information
-
-    To resume after a clarification, the client sends:
-        - trajectory: the trajectory list returned in the clarify event
-        - user_question: the user's reply (selections + freeform), already
-          assembled by the frontend (the same string shown in the timeline)
-    """
-    from data_formulator.error_handler import stream_error_event
-
-    if not request.is_json:
-        return stream_preflight_error(AppError(ErrorCode.INVALID_REQUEST, "Invalid request format"))
-
-    content = request.get_json()
-
-    identity_id = get_identity_id()
-    if not identity_id:
-        return stream_preflight_error(AppError(ErrorCode.AUTH_REQUIRED, "Identity ID required"))
-
-    client = get_client(content['model'])
-    workspace = get_workspace(identity_id)
-
-    input_tables = content["input_tables"]
-    user_question = content.get("user_question", "")
-    max_iterations = content.get("max_iterations", 5)
-    max_repair_attempts = content.get("max_repair_attempts", 1)
-    agent_exploration_rules = content.get("agent_exploration_rules", "")
-    agent_coding_rules = content.get("agent_coding_rules", "")
-    focused_thread = content.get("focused_thread", None)
-    other_threads = content.get("other_threads", None)
-    primary_tables = content.get("primary_tables", None)
-    attached_images = content.get("attached_images", None)
-    resume_trajectory = content.get("trajectory", None)
-    completed_step_count = content.get("completed_step_count", 0)
-
-    if resume_trajectory is not None and not str(user_question or "").strip():
-        return stream_preflight_error(AppError(ErrorCode.INVALID_REQUEST, "user_question is required to resume after clarification"))
-
-    logger.setLevel(logging.INFO)
-    logger.info("# data-agent-streaming request")
-    logger.debug("== input tables ===>")
-    for table in input_tables:
-        logger.debug(f"===> Table: {table['name']}")
-    logger.debug(f"== user question ===> {user_question}")
-    if attached_images:
-        logger.info(f"== attached_images ===> {len(attached_images)} image(s), sizes: {[len(img) for img in attached_images]}")
-
-    language_instruction = get_language_instruction(mode="full")
-
-    def generate():
-        try:
-            agent = DataAgent(
-                client=client,
-                workspace=workspace,
-                agent_exploration_rules=agent_exploration_rules,
-                agent_coding_rules=agent_coding_rules,
-                language_instruction=language_instruction,
-                max_iterations=max_iterations,
-                max_repair_attempts=max_repair_attempts,
-                identity_id=identity_id,
-            )
-
-            trajectory = None
-            if resume_trajectory:
-                # Append the user's reply (already assembled by the frontend
-                # from option clicks + any typed instructions) as a normal
-                # user message. The LLM correlates numbered selections back
-                # to the questions in the immediately preceding assistant
-                # message.
-                trajectory = list(resume_trajectory)
-                trajectory.append({
-                    "role": "user",
-                    "content": user_question,
-                })
-                logger.debug("== resuming after clarification ===>")
-
-            for event in agent.run(
-                input_tables=input_tables,
-                user_question=user_question,
-                focused_thread=focused_thread,
-                other_threads=other_threads,
-                trajectory=trajectory,
-                completed_step_count=completed_step_count,
-                primary_tables=primary_tables,
-                attached_images=attached_images,
-            ):
-                yield json.dumps(event, ensure_ascii=False) + '\n'
-
-                if event.get("type") in ("completion", "clarify", "explain"):
-                    break
-
-        except Exception as e:
-            logger.error("Error in data-agent-streaming", exc_info=e)
-            yield stream_error_event(classify_and_wrap_llm_error(e))
-
-        logger.setLevel(logging.WARNING)
-
-    return Response(
-        stream_with_context(_with_warnings(generate())),
-        mimetype='application/x-ndjson',
-    )
-
-
 @agent_bp.route('/analyst-streaming', methods=['GET', 'POST'])
 def analyst_streaming():
     """Unified AnalystAgent streaming endpoint (design-docs/35 + /36).
 
-    Parallel to ``/data-agent-streaming`` while the unified agent is validated
-    end-to-end; the legacy data-agent and report routes stay live and untouched.
     The single ``AnalystAgent`` subsumes both data exploration and report
     writing: it gathers with inspection tools, commits one action per turn
-    (``visualize`` / ``interact`` / ``delegate`` / ``write_report``), and streams
+    (``visualize`` / ``ask_user`` / ``delegate`` / ``write_report``), and streams
     the report live on the ``report`` channel (same ``text_delta`` event the
     frontend already routes).
 
@@ -645,92 +400,6 @@ def generate():
     )
 
 
-@agent_bp.route('/refine-data', methods=['GET', 'POST'])
-def refine_data():
-    if not request.is_json:
-        raise AppError(ErrorCode.INVALID_REQUEST, "Invalid request format")
-
-    logger.info("# refine-data request")
-    content = request.get_json()
-
-    client = get_client(content['model'])
-
-    input_tables = content["input_tables"]
-    dialog = content["dialog"]
-
-    new_instruction = content["new_instruction"]
-    latest_data_sample = content["latest_data_sample"]
-    max_repair_attempts = content.get("max_repair_attempts", 1)
-    agent_coding_rules = content.get("agent_coding_rules", "")
-    current_visualization = content.get("current_visualization", None)
-    expected_visualization = content.get("expected_visualization", None)
-
-    logger.debug("== input tables ===>")
-    for table in input_tables:
-        logger.debug(f"===> Table: {table['name']} (first 5 rows)")
-        logger.debug(table['rows'][:5])
-
-    logger.debug("== user spec ===>")
-    logger.debug(new_instruction)
-
-    try:
-        identity_id = get_identity_id()
-        workspace = get_workspace(identity_id)
-        max_display_rows = current_app.config['CLI_ARGS']['max_display_rows']
-
-        language_instruction = get_language_instruction(mode="compact")
-
-        model_info = {
-            "model": content['model'].get("model", ""),
-            "endpoint": content['model'].get("endpoint", ""),
-            "api_base": content['model'].get("api_base", ""),
-        }
-
-        knowledge_store = _get_knowledge_store(identity_id)
-        agent = DataTransformationAgent(client=client, workspace=workspace, agent_coding_rules=agent_coding_rules, language_instruction=language_instruction, max_display_rows=max_display_rows, model_info=model_info, knowledge_store=knowledge_store)
-        results = agent.followup(input_tables, dialog, latest_data_sample, new_instruction, n=1,
-                                current_visualization=current_visualization, expected_visualization=expected_visualization)
-
-        repair_attempts = 0
-        while (
-            isinstance(results, list)
-            and len(results) > 0
-            and results[0].get('status') in ('error', 'other error')
-            and repair_attempts < max_repair_attempts
-        ):
-            error_message = results[0].get('content', 'Unknown error')
-            logger.info(f"[refine-data] Code generation failed (attempt {repair_attempts + 1}/{max_repair_attempts}). Error: {error_message}")
-            new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_message}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur."
-            prev_dialog = results[0].get('dialog', [])
-
-            try:
-                results = agent.followup(input_tables, prev_dialog, [], new_instruction, n=1)
-            except Exception as followup_exc:
-                logger.exception("refine_data followup failed")
-                results = [{
-                    "status": "error",
-                    "content": classify_llm_error(followup_exc),
-                    "code": "",
-                    "dialog": [],
-                }]
-                break
-
-            repair_attempts += 1
-            logger.info(f"[refine-data] Repair attempt {repair_attempts}/{max_repair_attempts} result: {results[0].get('status', 'unknown')}")
-
-        if repair_attempts > 0:
-            logger.info(f"[refine-data] Finished repair loop after {repair_attempts} attempt(s). Final status: {results[0].get('status', 'unknown')}")
-
-        for r in results:
-            if r.get("status") in ("error", "other error") and r.get("content"):
-                r["content"] = sanitize_error_message(r["content"])
-            sign_result(r)
-
-        return json_ok({"results": results})
-    except Exception as e:
-        logger.error("Error in refine-data", exc_info=e)
-        raise classify_and_wrap_llm_error(e) from e
-
 @agent_bp.route('/code-expl', methods=['GET', 'POST'])
 def request_code_expl():
     if not request.is_json:
@@ -821,133 +490,6 @@ def request_chart_insight():
         logger.error("Error in chart-insight", exc_info=e)
         raise classify_and_wrap_llm_error(e) from e
 
-@agent_bp.route('/get-recommendation-questions', methods=['GET', 'POST'])
-def get_recommendation_questions():
-    from data_formulator.error_handler import stream_error_event
-
-    if not request.is_json:
-        return stream_preflight_error(AppError(ErrorCode.INVALID_REQUEST, "Invalid request format"))
-
-    logger.info("# get recommendation questions request")
-    content = request.get_json()
-
-    client = get_client(content['model'])
-    input_tables = content.get("input_tables", [])
-    identity_id = get_identity_id()
-    workspace = get_workspace(identity_id)
-
-    agent_exploration_rules = content.get("agent_exploration_rules", "")
-    start_question = content.get("start_question", None)
-    current_chart = content.get("current_chart", None)
-    focused_thread = content.get("focused_thread", None)
-    other_threads = content.get("other_threads", None)
-    primary_tables = content.get("primary_tables", None)
-    exploration_thread = content.get("exploration_thread", None)
-    current_data_sample = content.get("current_data_sample", None)
-
-    knowledge_store = _get_knowledge_store(identity_id)
-
-    def generate():
-        agent = InteractiveExploreAgent(client=client, workspace=workspace,
-                                        agent_exploration_rules=agent_exploration_rules,
-                                        language_instruction=get_language_instruction(),
-                                        knowledge_store=knowledge_store)
-        try:
-            text_buf = ""
-            for chunk in agent.run(
-                input_tables,
-                start_question=start_question,
-                focused_thread=focused_thread,
-                other_threads=other_threads,
-                primary_tables=primary_tables,
-                current_chart=current_chart,
-                exploration_thread=exploration_thread,
-                current_data_sample=current_data_sample,
-            ):
-                if isinstance(chunk, dict):
-                    # Flush pending text before emitting structured event
-                    while "\n" in text_buf:
-                        line, text_buf = text_buf.split("\n", 1)
-                        ndjson_line = _try_parse_explore_line(line)
-                        if ndjson_line:
-                            yield ndjson_line
-                    if "type" not in chunk:
-                        chunk = {"type": "question", **chunk}
-                    yield json.dumps(chunk, ensure_ascii=False) + "\n"
-                    continue
-                text_buf += chunk
-                while "\n" in text_buf:
-                    line, text_buf = text_buf.split("\n", 1)
-                    ndjson_line = _try_parse_explore_line(line)
-                    if ndjson_line:
-                        yield ndjson_line
-            if text_buf.strip():
-                ndjson_line = _try_parse_explore_line(text_buf)
-                if ndjson_line:
-                    yield ndjson_line
-        except Exception as e:
-            logger.exception("get-recommendation-questions failed")
-            yield stream_error_event(classify_and_wrap_llm_error(e))
-
-    return Response(
-        stream_with_context(_with_warnings(generate())),
-        mimetype='application/x-ndjson',
-    )
-
-
-@agent_bp.route('/generate-report-chat', methods=['POST'])
-def generate_report_chat():
-    """Chat-driven report generation via @report-agent.
-
-    Accepts lightweight context + user prompt.  The agent inspects
-    charts/data on demand via tool calls and streams the report with
-    embed_chart / embed_table events.
-    """
-    from data_formulator.error_handler import stream_error_event
-
-    if not request.is_json:
-        return stream_preflight_error(AppError(ErrorCode.INVALID_REQUEST, "Invalid request format"))
-
-    logger.info("# generate report chat request")
-    content = request.get_json()
-
-    client = get_client(content['model'])
-    identity_id = get_identity_id()
-    workspace = get_workspace(identity_id)
-
-    input_tables = content.get("input_tables", [])
-    charts = content.get("charts", [])
-    user_prompt = content.get("user_prompt", "Create a report summarizing the exploration.")
-    focused_thread = content.get("focused_thread", None)
-    other_threads = content.get("other_threads", None)
-    primary_tables = content.get("primary_tables", None)
-
-    def generate():
-        agent = ReportGenAgent(
-            client=client,
-            workspace=workspace,
-            language_instruction=get_language_instruction(),
-        )
-        try:
-            for event in agent.run(
-                input_tables,
-                charts,
-                user_prompt=user_prompt,
-                focused_thread=focused_thread,
-                other_threads=other_threads,
-                primary_tables=primary_tables,
-            ):
-                yield json.dumps(event, ensure_ascii=False) + '\n'
-        except Exception as e:
-            logger.exception("generate-report-chat failed")
-            yield stream_error_event(classify_and_wrap_llm_error(e))
-
-    return Response(
-        stream_with_context(_with_warnings(generate())),
-        mimetype='application/x-ndjson',
-    )
-
-
 @agent_bp.route('/refresh-derived-data', methods=['POST'])
 def refresh_derived_data():
     """
diff --git a/src/app/App.tsx b/src/app/App.tsx
index b63c0986..7d873877 100644
--- a/src/app/App.tsx
+++ b/src/app/App.tsx
@@ -51,8 +51,6 @@ import {
     ListItemText,
     CircularProgress,
     LinearProgress,
-    Switch,
-    FormControlLabel,
 } from '@mui/material';
 
 
@@ -162,32 +160,6 @@ const TopNavButton: FC<{ to: string; label: string; selected: boolean }> = ({ to
     </Button>
 );
 
-// Dev-only toggle to route the data-agent chat through the unified
-// AnalystAgent (design-docs/35/36). Source of truth is localStorage
-// (`df_useAnalystAgent`), which `exploreFromChat` reads fresh per run, so the
-// switch stays in sync without any shared store wiring.
-const AnalystAgentToggle: FC = () => {
-    const [on, setOn] = useState(() => localStorage.getItem('df_useAnalystAgent') === '1');
-    return (
-        <FormControlLabel
-            sx={{ ml: 0.5, mr: 0.5 }}
-            control={
-                <Switch
-                    size="small"
-                    checked={on}
-                    onChange={(e) => {
-                        const next = e.target.checked;
-                        setOn(next);
-                        if (next) localStorage.setItem('df_useAnalystAgent', '1');
-                        else localStorage.removeItem('df_useAnalystAgent');
-                    }}
-                />
-            }
-            label={<Typography sx={{ fontSize: '0.7rem', color: 'text.secondary' }}>Analyst</Typography>}
-        />
-    );
-};
-
 declare module '@mui/material/styles' {
     interface PaletteColor {
         bgcolor?: string;
@@ -864,7 +836,6 @@ const AppShell: FC = () => {
                         )}
                         {isAppPage && (
                             <Box sx={{ display: 'flex', ml: 'auto', fontSize: 14, alignItems: 'center' }}>
-                                <AnalystAgentToggle />
                                 <LanguageSwitcher />
                                 <ConfigDialog />
                                 <Divider orientation="vertical" variant="middle" flexItem />
diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx
index aca29fe3..87963e7d 100644
--- a/src/app/dfSlice.tsx
+++ b/src/app/dfSlice.tsx
@@ -131,6 +131,12 @@ export interface GeneratedReport {
     contentSnapshotHash?: string;
     prompt?: string;
     status?: 'generating' | 'completed' | 'error';
+    // The run's closing answer (the agent's summary of what the report covers).
+    // Owned by the report — not borrowed onto a table's interaction log — so it
+    // is rendered and deleted together with the report (no cross-collection
+    // tagging). `summaryThought` is the agent's reasoning behind that summary.
+    summary?: string;
+    summaryThought?: string;
     generatingPhase?: 'inspecting' | 'writing';  // transient: which phase the agent is in while generating
     // transient: accumulated inspect steps, flipped to done on completion.
     // `charts` carries lightweight descriptors (chartType for the icon + a
@@ -1853,13 +1859,17 @@ export const dataFormulatorSlice = createSlice({
                 state.viewMode = 'editor';
             }
         },
-        updateGeneratedReportContent: (state, action: PayloadAction<{ id: string; content: string; status?: GeneratedReport['status']; title?: string; triggerTableId?: string }>) => {
-            const { id, content, status, title, triggerTableId } = action.payload;
+        updateGeneratedReportContent: (state, action: PayloadAction<{ id: string; content: string; status?: GeneratedReport['status']; title?: string; triggerTableId?: string; summary?: string; summaryThought?: string }>) => {
+            const { id, content, status, title, triggerTableId, summary, summaryThought } = action.payload;
             const report = state.generatedReports.find(r => r.id === id);
             if (report) {
                 report.content = content;
                 if (title) report.title = title;
                 if (status) report.status = status;
+                // The run's closing answer is owned by the report (rendered and
+                // deleted with it), not appended to a table's interaction log.
+                if (summary !== undefined) report.summary = summary;
+                if (summaryThought !== undefined) report.summaryThought = summaryThought;
                 // Re-anchor the report to the latest table produced during the
                 // run so it renders against the newest thread item (like charts).
                 if (triggerTableId) report.triggerTableId = triggerTableId;
diff --git a/src/app/useFormulateData.ts b/src/app/useFormulateData.ts
deleted file mode 100644
index 160f8a85..00000000
--- a/src/app/useFormulateData.ts
+++ /dev/null
@@ -1,615 +0,0 @@
-// Copyright (c) Microsoft Corporation.
-// Licensed under the MIT License.
-
-import { useSelector, useDispatch } from 'react-redux';
-import { useTranslation } from 'react-i18next';
-import { DataFormulatorState, dfActions, dfSelectors, fetchCodeExpl, fetchChartInsight, fetchFieldSemanticType } from './dfSlice';
-import { AppDispatch } from './store';
-import { Chart, FieldItem, Trigger, createDictTable, DictTable } from '../components/ComponentType';
-import { getUrls, getTriggers, translateBackend } from './utils';
-import { apiRequest, streamRequest } from './apiClient';
-import { getErrorMessage } from './errorCodes';
-import { persistEphemeralDerivedTable } from './tableThunks';
-
-export type IdeaItem = {
-    text: string;
-    goal: string;
-    tag: 'deep-dive' | 'pivot' | 'broaden' | 'cross-data' | 'statistical' | string;
-};
-
-export interface StreamIdeasOptions {
-    actionTableIds: string[];
-    currentTable: DictTable;
-    onIdeas: (ideas: IdeaItem[]) => void;
-    onThinkingBuffer: (buffer: string) => void;
-    onLoadingChange: (loading: boolean) => void;
-    /** Backend progress phase updates (e.g. "building_context", "generating") */
-    onProgress?: (phase: string) => void;
-    /** Chart image (PNG data URL) for current visualization context */
-    currentChartImage?: string | null;
-    /** Sample rows from the current table */
-    currentDataSample?: any[];
-    /** Optional start question for idea generation */
-    startQuestion?: string;
-}
-
-export interface FormulateDataOptions {
-    instruction: string;
-    mode: 'formulate' | 'ideate';
-    actionTableIds: string[];
-    currentTable: DictTable;
-    overrideTableId?: string;
-    currentVisualization?: any;
-    expectedVisualization?: any;
-    /** The chart spec to embed in the trigger for the derived table */
-    triggerChart: Chart;
-    /**
-     * Component-specific chart creation callback.
-     * Called with the candidate table, refined goal, and resolved concepts.
-     * Should dispatch chart creation actions and return the focused chart ID (or undefined).
-     */
-    createChart: (params: {
-        candidateTable: DictTable;
-        refinedGoal: any;
-        currentConcepts: FieldItem[];
-    }) => string | undefined;
-    /** Called before the request is made */
-    onStarted?: () => void;
-    /** Called on successful formulation */
-    onSuccess?: (params: { displayInstruction: string; candidateTable: DictTable; focusedChartId?: string }) => void;
-    /** Called on error */
-    onError?: (error: any) => void;
-    /** Called after the request completes (success or error) */
-    onFinally?: () => void;
-}
-
-function generateTableId(tables: DictTable[]): string {
-    let tableSuffix = Number.parseInt((Date.now() - Math.floor(Math.random() * 10000)).toString().slice(-6));
-    let tableId = `table-${tableSuffix}`;
-    while (tables.find(t => t.id === tableId) !== undefined) {
-        tableSuffix += 1;
-        tableId = `table-${tableSuffix}`;
-    }
-    return tableId;
-}
-
-/**
- * Shared hook for data formulation and idea streaming.
- * Used by both EncodingShelfCard (chart-aware formulation) and ChartRecBox (NL-driven formulation).
- */
-export function useFormulateData() {
-    const dispatch = useDispatch<AppDispatch>();
-    const { t } = useTranslation();
-    const tables = useSelector((state: DataFormulatorState) => state.tables);
-    const config = useSelector((state: DataFormulatorState) => state.config);
-    const conceptShelfItems = useSelector((state: DataFormulatorState) => state.conceptShelfItems);
-    const charts = useSelector(dfSelectors.getAllCharts);
-    const activeModel = useSelector(dfSelectors.getActiveModel);
-    const workspaceBackend = useSelector((state: DataFormulatorState) => state.serverConfig.WORKSPACE_BACKEND);
-    const activeWorkspaceId = useSelector((state: DataFormulatorState) => state.activeWorkspace?.id);
-
-    /**
-     * Resolve the actual chart that's rendered for a derived table. The
-     * `trigger.chart` saved on the table is just an "Auto" stub generated
-     * during the agent run — the chart the user actually sees lives in the
-     * Redux `charts` slice. Mirrors the lookup in `SimpleChartRecBox`.
-     */
-    function resolveChartForTable(tableId: string) {
-        return charts.find(c => c.tableRef === tableId && c.source === 'trigger')
-            || charts.find(c => c.tableRef === tableId);
-    }
-
-    /** Map a chart's encodingMap to `{ channel: fieldName }` (skips empties). */
-    function chartEncodingsByName(chart: Chart | undefined): Record<string, string> {
-        if (!chart?.encodingMap) return {};
-        return Object.fromEntries(
-            Object.entries(chart.encodingMap)
-                .filter(([, v]: [string, any]) => v?.fieldID)
-                .map(([k, v]: [string, any]) => {
-                    const field = conceptShelfItems.find(f => f.id === v.fieldID);
-                    return [k, field?.name || v.fieldID];
-                })
-        );
-    }
-
-    /**
-     * Build a rich focused thread from the current table's derivation chain.
-     * Each step includes: user question, display instruction, chart type + encodings,
-     * created table metadata, and agent summary.
-     */
-    function buildFocusedThread(currentTable: DictTable): any[] {
-        if (!currentTable.derive || currentTable.anchored) return [];
-        const triggers = getTriggers(currentTable, tables);
-        return triggers.map(trigger => {
-            const resultTable = tables.find(t2 => t2.id === trigger.resultTableId);
-            const interaction = trigger.interaction || [];
-            const userPrompt = interaction.find(e => e.role === 'prompt')?.content;
-            const instruction = interaction.find(e => e.role === 'instruction');
-            const summary = interaction.find(e => e.role === 'summary');
-            // Resolve the actual rendered chart (not the trigger's "Auto" stub)
-            // so chart_type + encodings reflect what the user is looking at.
-            const resolvedChart = resolveChartForTable(trigger.resultTableId);
-            return {
-                user_question: userPrompt || instruction?.content || '',
-                display_instruction: instruction?.displayContent || instruction?.content || '',
-                agent_thinking: instruction?.plan,
-                agent_summary: summary?.content,
-                table_name: resultTable?.virtual?.tableId || trigger.resultTableId,
-                columns: resultTable?.names || [],
-                row_count: resultTable?.virtual?.rowCount ?? resultTable?.rows?.length ?? 0,
-                chart_type: resolvedChart?.chartType || '',
-                encodings: chartEncodingsByName(resolvedChart),
-            };
-        });
-    }
-
-    /**
-     * Build a legacy exploration thread (flat table list) for backward compatibility.
-     */
-    function buildExplorationThread(currentTable: DictTable): any[] {
-        if (!currentTable.derive || currentTable.anchored) return [];
-        const triggers = getTriggers(currentTable, tables);
-        return triggers.map(trigger => ({
-            name: trigger.resultTableId,
-            rows: tables.find(t2 => t2.id === trigger.resultTableId)?.rows,
-            description: `Derive from ${tables.find(t2 => t2.id === trigger.resultTableId)?.derive?.source}`,
-        }));
-    }
-
-    /**
-     * Build peripheral thread summaries — leaf tables in the workspace that
-     * are NOT part of the focused chain. Mirrors the data agent's Tier 3
-     * context (`SimpleChartRecBox.exploreFromChat`): all leaves except the
-     * focused one, with per-step `display → chart_type (encodings)` lines
-     * using resolved field names.
-     */
-    function buildOtherThreads(currentTable: DictTable): any[] {
-        // Collect all table IDs in the focused thread
-        const focusedIds = new Set<string>();
-        if (currentTable.derive && !currentTable.anchored) {
-            const triggers = getTriggers(currentTable, tables);
-            for (const t of triggers) {
-                focusedIds.add(t.resultTableId);
-            }
-        }
-        focusedIds.add(currentTable.id);
-
-        // Find every leaf table (no children, or all children anchored) that
-        // is derived from somewhere and NOT part of the focused chain.
-        const otherThreads: any[] = [];
-        for (const table of tables) {
-            if (focusedIds.has(table.id)) continue;
-            if (!table.derive) continue;
-            const children = tables.filter(c => c.derive?.trigger?.tableId === table.id);
-            const isLeaf = children.length === 0 || children.every(c => c.anchored);
-            if (!isLeaf) continue;
-
-            const triggers = getTriggers(table, tables);
-            if (triggers.length === 0) continue;
-
-            const STEP_FINDING_CHAR_LIMIT = 200;
-            const steps = triggers.map(trigger => {
-                const instr = trigger.interaction?.find(e => e.role === 'instruction');
-                const label = instr?.displayContent || instr?.content || trigger.resultTableId;
-                // Use the actual rendered chart, not the trigger's "Auto" stub.
-                const chart = resolveChartForTable(trigger.resultTableId);
-                const chartType = chart?.chartType && chart.chartType !== 'Auto' ? chart.chartType : '';
-                const encStr = Object.entries(chartEncodingsByName(chart))
-                    .map(([k, v]) => `${k}: ${v}`)
-                    .join(', ');
-                // Per-step agent commentary: the `summary` entry the visualize
-                // action emits after running this step.
-                let finding = trigger.interaction?.find(
-                    e => e.role === 'summary',
-                )?.content?.trim() || '';
-                if (finding.length > STEP_FINDING_CHAR_LIMIT) {
-                    finding = finding.slice(0, STEP_FINDING_CHAR_LIMIT - 1).trimEnd() + '…';
-                }
-                const head = `${label}${chartType ? ` → ${chartType}` : ''}${encStr ? ` (${encStr})` : ''}`;
-                return finding ? `${head} — finding: ${finding}` : head;
-            });
-
-            const sourceTableId = triggers[0].tableId;
-            const sourceTable = tables.find(t => t.id === sourceTableId);
-            otherThreads.push({
-                source_table: sourceTable?.virtual?.tableId || sourceTableId,
-                leaf_table: table.virtual?.tableId || table.id,
-                step_count: triggers.length,
-                steps,
-            });
-        }
-        return otherThreads;
-    }
-
-    /**
-     * Stream ideas/recommendations from the exploration agent via SSE.
-     */
-    async function streamIdeas(options: StreamIdeasOptions): Promise<void> {
-        const {
-            actionTableIds, currentTable,
-            onIdeas, onThinkingBuffer, onLoadingChange, onProgress,
-            currentChartImage, currentDataSample,
-            startQuestion,
-        } = options;
-
-        onLoadingChange(true);
-        onThinkingBuffer("");
-        onIdeas([]);
-
-        let timeoutId: ReturnType<typeof setTimeout> | undefined;
-        let timedOut = false;
-        try {
-            const focusedThread = buildFocusedThread(currentTable);
-            const otherThreads = buildOtherThreads(currentTable);
-            const actionTables = actionTableIds.map(id => tables.find(t => t.id === id) as DictTable);
-
-            const messageBody = JSON.stringify({
-                model: activeModel,
-                input_tables: actionTables.map(t => ({
-                    name: t.virtual?.tableId || t.id.replace(/\.[^/.]+$/, ""),
-                })),
-                primary_tables: (() => {
-                    if (currentTable.derive && !currentTable.anchored) {
-                        return (currentTable.derive.source as string[]).map(id => {
-                            const t = tables.find(tbl => tbl.id === id);
-                            return t?.virtual?.tableId || id.replace(/\.[^/.]+$/, "");
-                        });
-                    }
-                    return [currentTable.virtual?.tableId || currentTable.id.replace(/\.[^/.]+$/, "")];
-                })(),
-                ...(focusedThread.length > 0 ? { focused_thread: focusedThread } : {}),
-                ...(otherThreads.length > 0 ? { other_threads: otherThreads } : {}),
-                ...(currentChartImage ? { current_chart: currentChartImage } : {}),
-                ...(startQuestion ? { start_question: startQuestion } : {}),
-            });
-
-            const engine = getUrls().GET_RECOMMENDATION_QUESTIONS;
-            const controller = new AbortController();
-            timeoutId = setTimeout(() => { timedOut = true; controller.abort(); }, config.formulateTimeoutSeconds * 1000);
-
-            const questions: IdeaItem[] = [];
-            for await (const event of streamRequest(engine, {
-                method: 'POST',
-                headers: { 'Content-Type': 'application/json' },
-                body: messageBody,
-            }, controller.signal)) {
-                if (event.type === 'error') {
-                    throw new Error(event.error ? getErrorMessage(event.error) : t('messages.error'));
-                }
-                if (event.type === 'warning') {
-                    dispatch(dfActions.addMessages({
-                        timestamp: Date.now(), type: 'warning',
-                        component: 'exploration',
-                        value: (event as any).warning?.message ?? 'Warning from server',
-                    }));
-                    continue;
-                }
-                if (event.type === 'progress') {
-                    onProgress?.((event as any).phase);
-                    continue;
-                }
-                if (event.type === 'question' && (event as any).text) {
-                    questions.push({
-                        text: (event as any).text,
-                        goal: (event as any).goal,
-                        tag: (event as any).tag || 'deep-dive',
-                    });
-                    onIdeas([...questions]);
-                    continue;
-                }
-                if ((event as any).text) {
-                    onThinkingBuffer((event as any).text);
-                }
-            }
-            clearTimeout(timeoutId);
-            timeoutId = undefined;
-
-            if (questions.length === 0) {
-                throw new Error('No valid results returned from agent');
-            }
-        } catch (error) {
-            if (error instanceof DOMException && error.name === 'AbortError') {
-                if (timedOut) {
-                    dispatch(dfActions.addMessages({
-                        timestamp: Date.now(), type: 'warning',
-                        component: 'exploration',
-                        value: t('messages.agent.suggestionsTimedOut', { seconds: config.formulateTimeoutSeconds }),
-                    }));
-                }
-            } else {
-                dispatch(dfActions.addMessages({
-                    timestamp: Date.now(),
-                    type: "error",
-                    component: "chart builder",
-                    value: error instanceof Error ? error.message : t('messages.agent.unexpectedError'),
-                    detail: error instanceof Error ? error.message : 'Unknown error',
-                }));
-            }
-        } finally {
-            if (timeoutId) clearTimeout(timeoutId);
-            onLoadingChange(false);
-        }
-    }
-
-    /**
-     * Formulate data: send instruction to derive/refine endpoint and process the result.
-     * Handles request building, dialog continuation, table/concept creation, and error handling.
-     * Chart creation is delegated to the caller via the createChart callback.
-     */
-    async function formulateData(options: FormulateDataOptions): Promise<void> {
-        const {
-            instruction, mode, actionTableIds, currentTable,
-            overrideTableId, currentVisualization, expectedVisualization,
-            triggerChart, createChart,
-            onStarted, onSuccess, onError, onFinally,
-        } = options;
-
-        if (actionTableIds.length === 0) return;
-
-        onStarted?.();
-
-        const actionTables = actionTableIds.map(id => tables.find(t => t.id === id) as DictTable);
-
-        // Build input_tables payload (shared across all request variants)
-        const inputTablesPayload = actionTables.map(t => ({
-            name: t.virtual?.tableId || t.id.replace(/\.[^/.]+$/, ""),
-            rows: t.rows,
-        }));
-
-        // Determine primary table names for agent context prioritization
-        // For derived tables, all source tables are primary; for source tables, just the current one
-        const primaryTableNames = (() => {
-            if (currentTable.derive && !currentTable.anchored) {
-                return (currentTable.derive.source as string[]).map(id => {
-                    const t = tables.find(tbl => tbl.id === id);
-                    return t?.virtual?.tableId || id.replace(/\.[^/.]+$/, "");
-                });
-            }
-            return [currentTable.virtual?.tableId || currentTable.id.replace(/\.[^/.]+$/, "")];
-        })();
-
-        // Build base request body
-        let messageBody: any = {
-            mode,
-            input_tables: inputTablesPayload,
-            primary_tables: primaryTableNames,
-            extra_prompt: instruction,
-            model: activeModel,
-            ...(currentVisualization ? { current_visualization: currentVisualization } : {}),
-            ...(expectedVisualization ? { expected_visualization: expectedVisualization } : {}),
-        };
-        let engine = getUrls().DERIVE_DATA;
-
-        // Handle dialog continuation / refinement
-        if (currentTable.derive?.dialog && !currentTable.anchored) {
-            const sourceTableIds = currentTable.derive.source;
-            const tableIdsChanged = !sourceTableIds.every((id: string) => actionTableIds.includes(id)) ||
-                !actionTableIds.every(id => sourceTableIds.includes(id));
-
-            if (mode === 'ideate' || tableIdsChanged) {
-                // Start fresh with prior dialog as additional context
-                messageBody.additional_messages = currentTable.derive.dialog;
-                engine = getUrls().DERIVE_DATA;
-            } else {
-                // Refine: continue existing dialog
-                messageBody = {
-                    mode,
-                    input_tables: inputTablesPayload,
-                    dialog: currentTable.derive.dialog,
-                    latest_data_sample: currentTable.rows.slice(0, 10),
-                    new_instruction: instruction,
-                    model: activeModel,
-                    ...(currentVisualization ? { current_visualization: currentVisualization } : {}),
-                    ...(expectedVisualization ? { expected_visualization: expectedVisualization } : {}),
-                };
-                engine = getUrls().REFINE_DATA;
-            }
-        }
-
-        const controller = new AbortController();
-        let timedOut = false;
-        const timeoutId = setTimeout(() => { timedOut = true; controller.abort(); }, config.formulateTimeoutSeconds * 1000);
-
-        apiRequest(engine, {
-            method: 'POST',
-            headers: { 'Content-Type': 'application/json' },
-            body: JSON.stringify(messageBody),
-            signal: controller.signal,
-        })
-        .then(async ({ data }) => {
-            if (!data.results || data.results.length === 0) {
-                dispatch(dfActions.addMessages({
-                    "timestamp": Date.now(),
-                    "component": "chart builder",
-                    "type": "error",
-                    "value": "No result is returned from the data formulation agent. Please try again.",
-                }));
-                onError?.(new Error("No results returned"));
-                return;
-            }
-
-            const candidates = data["results"].filter((item: any) => item["status"] === "ok");
-
-            if (candidates.length === 0) {
-                const firstResult = data.results[0];
-                dispatch(dfActions.addMessages({
-                    "timestamp": Date.now(),
-                    "type": "error",
-                    "component": "chart builder",
-                    "value": "Data formulation failed, please try again.",
-                    "code": firstResult.code,
-                    "detail": translateBackend(firstResult.content, firstResult.content_code),
-                    "diagnostics": firstResult.diagnostics,
-                }));
-                onError?.(new Error("All candidates failed"));
-                return;
-            }
-
-            // Process the best candidate
-            const candidate = candidates[0];
-            const code = candidate["code"];
-            const codeSignature = candidate["code_signature"]; // HMAC signature from server
-            const rows = candidate["content"]["rows"];
-            const dialog = candidate["dialog"];
-            const refinedGoal = candidate['refined_goal'];
-            const displayInstruction = refinedGoal["display_instruction"];
-
-            // Determine table ID
-            let candidateTableId: string;
-            if (overrideTableId) {
-                candidateTableId = overrideTableId;
-            } else if (candidate["content"]["virtual"]) {
-                candidateTableId = candidate["content"]["virtual"]["table_name"];
-            } else {
-                candidateTableId = generateTableId(tables);
-            }
-
-            // Create trigger
-            // Resolve input table names from agent's response
-            const agentInputTables: string[] = refinedGoal['input_tables'] || [];
-            const resolvedSourceIds = agentInputTables.length > 0
-                ? actionTableIds.filter(id => {
-                    const t = tables.find(tbl => tbl.id === id);
-                    if (!t) return false;
-                    const name = t.virtual?.tableId || t.id.replace(/\.[^/.]+$/, "");
-                    return agentInputTables.some((n: string) => n.replace(/\.[^/.]+$/, "") === name);
-                })
-                : actionTableIds;
-            const resolvedSourceNames = (resolvedSourceIds.length > 0 ? resolvedSourceIds : actionTableIds).map(id => {
-                const t = tables.find(tbl => tbl.id === id);
-                return t?.displayId || t?.virtual?.tableId || id.replace(/\.[^/.]+$/, "");
-            });
-            const trigger: Trigger = {
-                tableId: currentTable.id,
-                resultTableId: candidateTableId,
-                chart: triggerChart,
-                interaction: [{
-                    from: 'user' as const,
-                    to: 'datarec-agent' as const,
-                    role: 'instruction' as const,
-                    content: instruction,
-                    displayContent: displayInstruction,
-                    inputTableNames: resolvedSourceNames,
-                    timestamp: Date.now(),
-                }],
-            };
-
-            // Create candidate table with derive info
-            const candidateTable = createDictTable(candidateTableId, rows, {
-                code,
-                codeSignature,
-                outputVariable: refinedGoal['output_variable'] || 'result_df',
-                source: resolvedSourceIds.length > 0 ? resolvedSourceIds : actionTableIds,
-                dialog,
-                trigger,
-            });
-
-            if (candidate["content"]["virtual"]) {
-                candidateTable.virtual = {
-                    tableId: candidate["content"]["virtual"]["table_name"],
-                    rowCount: candidate["content"]["virtual"]["row_count"],
-                };
-            }
-
-            // Bootstrap metadata from agent field_metadata (temporary until fetchFieldSemanticType completes)
-            const fieldMetadata = refinedGoal['field_metadata'];
-            if (fieldMetadata && typeof fieldMetadata === 'object') {
-                for (const [fieldName, meta] of Object.entries(fieldMetadata)) {
-                    if (!candidateTable.metadata[fieldName]) continue;
-                    if (typeof meta === 'string') {
-                        // Plain string format: { "field": "SemanticType" }
-                        candidateTable.metadata[fieldName].semanticType = meta;
-                    } else if (typeof meta === 'object' && meta !== null) {
-                        // Dict format: { "field": { "semantic_type": "...", "unit": "...", ... } }
-                        const m = meta as Record<string, any>;
-                        if (m['semantic_type']) {
-                            candidateTable.metadata[fieldName].semanticType = m['semantic_type'];
-                        }
-                        if (m['unit']) {
-                            candidateTable.metadata[fieldName].unit = m['unit'];
-                        }
-                        if (m['intrinsic_domain']) {
-                            candidateTable.metadata[fieldName].intrinsicDomain = m['intrinsic_domain'];
-                        }
-                    }
-                }
-            }
-
-            const fieldDisplayNames = refinedGoal['field_display_names'];
-            if (fieldDisplayNames && typeof fieldDisplayNames === 'object') {
-                for (const [fieldName, displayName] of Object.entries(fieldDisplayNames)) {
-                    if (candidateTable.metadata[fieldName] && typeof displayName === 'string') {
-                        candidateTable.metadata[fieldName].displayName = displayName;
-                    }
-                }
-            }
-
-            // Ephemeral mode: persist full rows to IndexedDB (keeps only a
-            // sample + virtual marker in Redux). Other backends store on the server.
-            const persistedTable = (workspaceBackend === 'ephemeral' && activeWorkspaceId)
-                ? await persistEphemeralDerivedTable(activeWorkspaceId, candidateTable)
-                : candidateTable;
-
-            // Insert or override table
-            if (overrideTableId) {
-                dispatch(dfActions.overrideDerivedTables(persistedTable));
-            } else {
-                dispatch(dfActions.insertDerivedTables(persistedTable));
-            }
-
-            // Add missing concepts
-            const names = persistedTable.names;
-            const missingNames = names.filter((name: string) => !conceptShelfItems.some(field => field.name === name));
-            const conceptsToAdd = missingNames.map((name: string) => ({
-                id: `concept-${name}-${Date.now()}`,
-                name,
-                source: "custom",
-                tableRef: "custom",
-            } as FieldItem));
-
-            dispatch(dfActions.addConceptItems(conceptsToAdd));
-            dispatch(fetchFieldSemanticType(persistedTable));
-            dispatch(fetchCodeExpl(persistedTable));
-
-            // Compute current concepts for chart creation
-            const currentConcepts = [...conceptShelfItems.filter(c => names.includes(c.name)), ...conceptsToAdd];
-
-            // Delegate chart creation to the caller
-            const focusedChartId = createChart({ candidateTable: persistedTable, refinedGoal, currentConcepts });
-
-            if (focusedChartId) {
-                dispatch(fetchChartInsight({ chartId: focusedChartId, tableId: persistedTable.id }) as any);
-            }
-
-            onSuccess?.({ displayInstruction, candidateTable: persistedTable, focusedChartId });
-        })
-        .catch((error) => {
-            if (error.name === 'AbortError') {
-                if (timedOut) {
-                    dispatch(dfActions.addMessages({
-                        timestamp: Date.now(),
-                        component: "chart builder",
-                        type: "warning",
-                        value: t('messages.agent.formulationTimedOut', { seconds: config.formulateTimeoutSeconds }),
-                    }));
-                }
-            } else {
-                console.error(error);
-                dispatch(dfActions.addMessages({
-                    timestamp: Date.now(),
-                    component: "chart builder",
-                    type: "error",
-                    value: t('messages.agent.unexpectedError'),
-                    detail: error.message,
-                }));
-            }
-            onError?.(error);
-        })
-        .finally(() => {
-            clearTimeout(timeoutId);
-            onFinally?.();
-        });
-    }
-
-    return { streamIdeas, formulateData };
-}
diff --git a/src/app/utils.tsx b/src/app/utils.tsx
index cd10b87f..68f558ee 100644
--- a/src/app/utils.tsx
+++ b/src/app/utils.tsx
@@ -31,9 +31,6 @@ export function getUrls() {
         CHART_INSIGHT_URL: `/api/agent/chart-insight`,
         SERVER_PROCESS_DATA_ON_LOAD: `/api/agent/process-data-on-load`,
 
-        DERIVE_DATA: `/api/agent/derive-data`,
-        REFINE_DATA: `/api/agent/refine-data`,
-        DATA_AGENT_STREAMING: `/api/agent/data-agent-streaming`,
         ANALYST_STREAMING: `/api/agent/analyst-streaming`,
 
         // these functions involves database
@@ -52,7 +49,6 @@ export function getUrls() {
         EXPORT_TABLE_CSV: `/api/tables/export-table-csv`,
 
         GET_RECOMMENDATION_QUESTIONS: `/api/agent/get-recommendation-questions`,
-        GENERATE_REPORT_CHAT: `/api/agent/generate-report-chat`,
 
         // Workspace display name (auto-naming)
         WORKSPACE_NAME: `/api/agent/workspace-name`,
diff --git a/src/i18n/locales/en/common.json b/src/i18n/locales/en/common.json
index 2e6c1de6..72b8f426 100644
--- a/src/i18n/locales/en/common.json
+++ b/src/i18n/locales/en/common.json
@@ -530,8 +530,6 @@
     "replyPlaceholder": "Reply to agent's question...",
     "explorePlaceholder": "Ask questions or describe what to explore (add context with @)",
     "explorePlaceholderSingleTable": "Ask questions or describe what to explore",
-    "reportPlaceholder": "Describe what report to create (add context with @)",
-    "reportPlaceholderSingleTable": "Describe what report to create",
     "addMoreData": "Add more data to the workspace",
     "mentionTable": "Add a table to the context (@)",
     "searchTables": "Search tables...",
@@ -539,6 +537,9 @@
     "getIdeaSuggestions": "Get idea suggestions",
     "exploreIdeasPrompt": "Help me decide what to explore next — use a `clarify` action to give me 3–5 options, and don't pick one for me yet.\n\nEach option should be a short, clickable direction — for example, drill into a detail, pivot to a different angle, broaden the view, bring in another table, or try a statistical technique. Add a **very brief** one-line rationale for each option (no more than 10 words).",
     "askedForRecommendations": "What should I explore next?",
+    "generateReport": "Generate a report",
+    "reportPrompt": "Write a report summarizing the key findings from this exploration.",
+    "askedForReport": "Write a report summarizing the exploration.",
     "endConversation": "End conversation",
     "sendReply": "Send reply",
     "explore": "Explore",
@@ -579,14 +580,9 @@
     "errorDuringExploration": "Error during exploration",
     "explorationStep": "Exploration step {{step}}: {{question}}",
     "threadExplorePrompt": "Explore interesting patterns and trends in this data",
-    "threadReportPrompt": "Summarize the key findings from this exploration",
     "explorationThreadDeriveDescription": "Derive from {{source}} with instruction: {{instruction}}",
     "explorationStepCodeComment": "# Exploration step {{step}}",
-    "maxIterationsReached": "Reached the maximum number of exploration steps.",
-    "switchToReport": "Switch to Report mode",
-    "switchToExplore": "Switch to Explore mode",
-    "modeExplore": "Explore",
-    "modeReport": "Report"
+    "maxIterationsReached": "Reached the maximum number of exploration steps."
   },
   "dataGrid": {
     "loading": "Loading ...",
diff --git a/src/i18n/locales/zh/common.json b/src/i18n/locales/zh/common.json
index bf8bed1c..60335ce0 100644
--- a/src/i18n/locales/zh/common.json
+++ b/src/i18n/locales/zh/common.json
@@ -581,8 +581,6 @@
     "replyPlaceholder": "回复 Agent 的问题...",
     "explorePlaceholder": "有什么问题，有什么想要探索的？（用 @ 添加上下文）",
     "explorePlaceholderSingleTable": "有什么问题，有什么想要探索的？",
-    "reportPlaceholder": "描述要创建的报告（用 @ 添加上下文）",
-    "reportPlaceholderSingleTable": "描述要创建的报告",
     "addMoreData": "向工作区添加更多数据",
     "mentionTable": "添加表到上下文 (@)",
     "searchTables": "搜索表...",
@@ -590,6 +588,9 @@
     "getIdeaSuggestions": "获取建议",
     "exploreIdeasPrompt": "帮我决定下一步探索什么 —— 请使用 `clarify` 动作给我 3–5 个选项，先不要替我选。\n\n每个选项应该是一个简短、可点击的方向 —— 比如深入某个细节、换个分析角度、放宽视角、引入另一张表，或者试试统计方法。为每个选项增加**非常简短**的一句话推荐理由（不超过 10 个字）。",
     "askedForRecommendations": "接下来应该探索什么呢？",
+    "generateReport": "生成报告",
+    "reportPrompt": "撰写一份报告，总结本次探索的主要发现。",
+    "askedForReport": "撰写一份报告，总结本次探索。",
     "endConversation": "结束对话",
     "sendReply": "发送回复",
     "explore": "探索",
@@ -630,14 +631,9 @@
     "errorDuringExploration": "探索过程中出错",
     "explorationStep": "探索步骤 {{step}}：{{question}}",
     "threadExplorePrompt": "探索这份数据中有趣的模式和趋势",
-    "threadReportPrompt": "总结本次探索的主要发现",
     "explorationThreadDeriveDescription": "从 {{source}} 派生，指令：{{instruction}}",
     "explorationStepCodeComment": "# 探索步骤 {{step}}",
-    "maxIterationsReached": "已达到最大探索步数。",
-    "switchToReport": "切换到报告模式",
-    "switchToExplore": "切换到探索模式",
-    "modeExplore": "探索",
-    "modeReport": "报告"
+    "maxIterationsReached": "已达到最大探索步数。"
   },
   "auth": {
     "loginTitle": "登录 Data Formulator",
diff --git a/src/views/ChartRecBox.tsx b/src/views/ChartRecBox.tsx
deleted file mode 100644
index 9eb26085..00000000
--- a/src/views/ChartRecBox.tsx
+++ /dev/null
@@ -1,863 +0,0 @@
-// Copyright (c) Microsoft Corporation.
-// Licensed under the MIT License.
-
-import { FC, useState, useRef } from 'react'
-import { useTranslation } from 'react-i18next';
-import { transition } from '../app/tokens';
-import { useSelector, useDispatch } from 'react-redux'
-import { DataFormulatorState, dfActions, dfSelectors, generateFreshChart } from '../app/dfSlice';
-
-import { AppDispatch } from '../app/store';
-
-import {
-    Box,
-    Tooltip,
-    Typography,
-    SxProps,
-    LinearProgress,
-    alpha,
-    useTheme,
-    Theme,
-} from '@mui/material';
-
-import React from 'react';
-
-import { Chart } from "../components/ComponentType";
-
-import '../scss/EncodingShelf.scss';
-
-import { resolveRecommendedChart } from '../app/utils';
-import { useFormulateData } from '../app/useFormulateData';
-
-import { TableIcon } from '../icons';
-import { renderTextWithEmphasis } from './EncodingShelfCard';
-import { getChartTemplate } from '../components/ChartTemplates';
-import { generateChartSkeleton } from './ChartUtils';
-
-// when this is set to true, the new chart will be focused automatically
-const AUTO_FOCUS_NEW_CHART = false;
-
-export interface ChartRecBoxProps {
-    tableId: string;
-    placeHolderChartId?: string;
-    sx?: SxProps;
-}
-
-export const IdeaChip: FC<{
-    mini?: boolean,
-    idea: {text?: string, goal: string, tag?: string, type?: 'branch' | 'deep_dive'} 
-    theme: Theme, 
-    onClick: () => void, 
-    sx?: SxProps,
-    disabled?: boolean,
-}> = function ({mini, idea, theme, onClick, sx, disabled}) {
-
-    const accentColor = theme.palette.text.primary;
-    const tagLabel = idea.tag ? `(${idea.tag})` : '';
-    const ideaText = idea.goal;
-
-    const ideaTextComponent = renderTextWithEmphasis(ideaText, {
-        borderRadius: '0px',
-        fontSize: '11px',
-        lineHeight: 1.4,
-        backgroundColor: alpha(accentColor, 0.04),
-    });
-
-    return (
-        <Box
-            component="button"
-            type="button"
-            onClick={disabled ? undefined : onClick}
-            disabled={disabled}
-            sx={{
-                position: 'relative',
-                display: 'inline-block',
-                textAlign: 'left',
-                px: '8px',
-                py: '4px',
-                fontSize: 11,
-                lineHeight: 1.4,
-                color: accentColor,
-                fontFamily: theme.typography.fontFamily,
-                borderRadius: '6px',
-                border: `1px solid ${alpha(accentColor, 0.12)}`,
-                backgroundColor: theme.palette.background.paper,
-                cursor: disabled ? 'default' : 'pointer',
-                opacity: disabled ? 0.6 : 1,
-                whiteSpace: 'normal',
-                wordBreak: 'break-word',
-                transition: transition.fast,
-                '&:hover': disabled ? undefined : {
-                    backgroundColor: alpha(accentColor, 0.06),
-                },
-                ...sx
-            }}
-        >
-            {tagLabel && (
-                <Typography
-                    component="span"
-                    sx={{
-                        fontSize: 11,
-                        color: theme.palette.text.secondary,
-                        mr: '4px',
-                    }}
-                >
-                    {tagLabel}
-                </Typography>
-            )}
-            <Typography component="span" sx={{ fontSize: 11, color: accentColor }}>
-                {ideaTextComponent}
-            </Typography>
-        </Box>
-    );
-};
-
-export const ChartRecBox: FC<ChartRecBoxProps> = function ({ tableId, placeHolderChartId, sx }) {
-    const dispatch = useDispatch<AppDispatch>();
-    const { t } = useTranslation();
-    const theme = useTheme();
-
-    // reference to states
-    const tables = useSelector((state: DataFormulatorState) => state.tables);
-    const allCharts = useSelector(dfSelectors.getAllCharts);
-    // Thumbnails live in their own slice; reading the whole map is fine here
-    // because the recommendation strip already re-renders when its chart list
-    // changes. The map identity only changes when some thumbnail is updated.
-    const chartThumbnails = useSelector((state: DataFormulatorState) => state.chartThumbnails) || {};
-    const { formulateData } = useFormulateData();
-
-    const focusNextChartRef = useRef<boolean>(true);
-
-    const modeColor = theme.palette.secondary.main;
-
-    const [isFormulating, setIsFormulating] = useState<boolean>(false);
-
-    // Use the provided tableId and find additional available tables for multi-table operations
-    const currentTable = tables.find(t => t.id === tableId);
-
-    // All root/anchored tables, with current source tables ordered first for context priority
-    const rootTables = tables.filter(t => t.derive === undefined || t.anchored);
-    const priorityIds = (currentTable?.derive && !currentTable.anchored)
-        ? currentTable.derive.source
-        : [tableId];
-    let selectedTableIds = [
-        ...priorityIds.filter(id => rootTables.some(t => t.id === id)),
-        ...rootTables.map(t => t.id).filter(id => !priorityIds.includes(id))
-    ];
-
-    const deriveDataFromNL = (instruction: string) => {
-
-        if (selectedTableIds.length === 0 || instruction.trim() === "") {
-            return;
-        }
-
-        if (placeHolderChartId) {
-            dispatch(dfActions.changeChartRunningStatus({chartId: placeHolderChartId, status: true}));
-        }
-
-        const actionId = `deriveDataFromNL_${String(Date.now())}`;
-
-        // Validate table selection
-        const firstTableId = selectedTableIds[0];
-        if (!firstTableId) {
-            dispatch(dfActions.addMessages({
-                "timestamp": Date.now(),
-                "type": "error",
-                "component": "chart builder",
-                "value": "No table selected for data formulation.",
-            }));
-            return;
-        }
-
-        let refChart = generateFreshChart(tableId, 'Auto') as Chart;
-        refChart.source = 'trigger';
-
-        formulateData({
-            instruction,
-            mode: 'formulate',
-            actionTableIds: selectedTableIds,
-            currentTable: currentTable!,
-            triggerChart: refChart,
-            createChart: ({ candidateTable, refinedGoal, currentConcepts }) => {
-                let newChart = resolveRecommendedChart(refinedGoal, currentConcepts, candidateTable);
-                dispatch(dfActions.addChart(newChart));
-                if (focusNextChartRef.current || AUTO_FOCUS_NEW_CHART) {
-                    focusNextChartRef.current = false;
-                    dispatch(dfActions.setFocused({ type: 'chart', chartId: newChart.id }));
-                }
-                return newChart.id;
-            },
-            onStarted: () => {
-                setIsFormulating(true);
-            },
-            onSuccess: ({ displayInstruction, candidateTable }) => {
-                dispatch(dfActions.addMessages({
-                    "timestamp": Date.now(),
-                    "component": "chart builder",
-                    "type": "success",
-                    "value": `Data formulation: "${displayInstruction}"`
-                }));
-            },
-            onError: () => {
-            },
-            onFinally: () => {
-                setIsFormulating(false);
-                if (placeHolderChartId) {
-                    dispatch(dfActions.changeChartRunningStatus({chartId: placeHolderChartId, status: false}));
-                }
-            },
-        });
-    };
-
-    return (
-        <Box sx={{ maxWidth: "720px", width: '100%', display: 'flex', flexDirection: 'column', position: 'relative', ...sx }}>
-            {isFormulating && (
-                <LinearProgress
-                    sx={{
-                        position: 'absolute', top: -2, left: 0, right: 0,
-                        height: '2px', borderRadius: '2px',
-                        backgroundColor: alpha(modeColor, 0.15),
-                        '& .MuiLinearProgress-bar': { backgroundColor: modeColor },
-                    }}
-                />
-            )}
-            {currentTable && (() => {
-                // Unified provenance ribbon + chart strip:
-                //   row 1 = the trigger chain (… ▸ grandparent ▸ parent ▸ THIS ▸ child1, child2 ▸ …)
-                //   row 2 = a chart-thumbnail cluster directly under each
-                //           table label that owns charts.
-                //
-                // We use a single CSS grid with one column per ribbon item so
-                // the cluster for table X is always horizontally aligned with
-                // X's label. The entire grid is then centered inside the
-                // container, so the ribbon as a whole reads as balanced
-                // regardless of whether the current table sits near one end
-                // of the chain (e.g. a root like "gas-prices").
-                const parent = currentTable.derive?.trigger?.tableId
-                    ? tables.find(t => t.id === currentTable.derive!.trigger.tableId)
-                    : undefined;
-                const grandparent = parent?.derive?.trigger?.tableId
-                    ? tables.find(t => t.id === parent.derive!.trigger.tableId)
-                    : undefined;
-                const hasGreatGrandparent = !!grandparent?.derive?.trigger?.tableId;
-                const children = tables.filter(t => t.derive?.trigger?.tableId === currentTable.id);
-
-                const ancestors = [grandparent, parent].filter(Boolean) as typeof tables;
-
-                // Symmetric reach: when the current node sits at an end of
-                // the lineage, extend further into the available direction
-                // so we always show up to 3 neighbours total.
-                //
-                //  • At the root (no ancestors) with a single child: also
-                //    surface the grandchild(ren) as additional right-chain
-                //    entries.  This turns "Movie Performance → Movie
-                //    Budgets Gross → …" into "Movie Performance → Movie
-                //    Budgets Gross → Genre ROI Summary".
-                //  • At a leaf (no children) we already display two
-                //    ancestors; if there's only a parent, also surface
-                //    the great-grandparent so we still show 3 nodes.
-                let extraDescendants: typeof tables = [];
-                if (ancestors.length === 0 && children.length === 1) {
-                    extraDescendants = tables.filter(t => t.derive?.trigger?.tableId === children[0].id);
-                }
-                const greatGrandparent = hasGreatGrandparent
-                    ? tables.find(t => t.id === grandparent!.derive!.trigger.tableId)
-                    : undefined;
-                if (children.length === 0 && ancestors.length === 1 && greatGrandparent) {
-                    ancestors.unshift(greatGrandparent);
-                }
-                // Is there still an unseen node above our topmost ancestor?
-                const topAncestor = ancestors[0];
-                const hasHiddenAncestor = !!topAncestor?.derive?.trigger?.tableId;
-
-                if (ancestors.length === 0 && children.length === 0) return null;
-
-                // ── chart filtering ────────────────────────────────────────
-                // Drop:
-                //  • the empty-canvas placeholder chart that's rendering us,
-                //  • trigger-source stubs (virtual metadata merged in by
-                //    `selectTriggerCharts` — they have no real thumbnail),
-                //  • placeholder chart types that never render to PNG.
-                const chartsForTable = (tid: string) => allCharts.filter(c =>
-                    c.tableRef === tid
-                    && c.id !== placeHolderChartId
-                    && c.source !== 'trigger'
-                    && !['Auto', '?'].includes(c.chartType)
-                );
-
-                // ── ribbon atoms ───────────────────────────────────────────
-                const TableRef: FC<{ table: typeof currentTable, current?: boolean }> = ({ table, current }) => (
-                    <Box
-                        component={current ? 'span' : 'button'}
-                        type={current ? undefined : 'button'}
-                        onClick={current ? undefined : () => dispatch(dfActions.setFocused({ type: 'table', tableId: table.id }))}
-                        sx={{
-                            display: 'inline-flex', alignItems: 'center', gap: '3px',
-                            border: 'none', background: 'transparent', p: 0,
-                            fontFamily: theme.typography.fontFamily,
-                            fontSize: 11, lineHeight: 1.4,
-                            color: current ? 'primary.main' : 'text.secondary',
-                            fontWeight: current ? 600 : 400,
-                            cursor: current ? 'default' : 'pointer',
-                            whiteSpace: 'nowrap',
-                            transition: transition.fast,
-                            '&:hover': current ? undefined : { color: 'primary.main' },
-                        }}
-                    >
-                        <TableIcon sx={{ fontSize: 12, color: 'inherit' }} />
-                        {table.displayId}
-                    </Box>
-                );
-                const Sep = () => (
-                    // Solid 1px connector line — mirrors the timeline guide
-                    // lines used in DataThread to express "this flows into
-                    // that" rather than a generic "next item" arrow.
-                    <Box sx={{
-                        width: 24, height: '1px',
-                        backgroundColor: 'rgba(0,0,0,0.2)',
-                    }} />
-                );
-                const Ellipsis = () => (
-                    <Typography component="span" sx={{ fontSize: 11, color: 'text.disabled' }}>…</Typography>
-                );
-                const Comma = () => (
-                    <Typography component="span" sx={{ fontSize: 11, color: 'text.disabled', mx: '3px' }}>,</Typography>
-                );
-
-                // ── progressive truncation (same heuristic as before) ──────
-                // Estimates per-item width and sheds entries from the longer
-                // chain until the whole ribbon fits in BUDGET.
-                const charW = 7;
-                const currentCharW = 10;
-                const ITEM_OVERHEAD = 22;
-                const CURRENT_OVERHEAD = 28;
-                const SEP_W = 16;
-                const ELLIPSIS_W = 12;
-                const BUDGET = 680;
-
-                const estW = (table: typeof currentTable) =>
-                    ITEM_OVERHEAD + (table?.displayId.length ?? 0) * charW;
-                const currentW = CURRENT_OVERHEAD + currentTable.displayId.length * currentCharW;
-
-                // When there are 2+ children we abandon the inline
-                // comma-chain and render them as a vertical fan to the
-                // right of the current node — each branch sits on its own
-                // short rail with its label + inline stack chip.
-                const useChildrenFan = children.length >= 2;
-                // The fan is a vertical stack of branches, so its width is
-                // governed by the LONGEST single branch — not the sum of
-                // children.  Estimate: elbow stub + label padding + label
-                // glyphs + optional grandchild ellipsis affordance.  This
-                // replaces the old fixed FAN_W = 280 which overestimated
-                // and caused ancestors to be shed unnecessarily.
-                const fanBranchW = (t: typeof currentTable) => {
-                    const labelW = (t?.displayId.length ?? 0) * charW;
-                    const grandchildAffordance =
-                        tables.some(tt => tt.derive?.trigger?.tableId === t.id) ? 24 : 0;
-                    // 22 elbow + 14 pl + label + ~10 right padding
-                    return 22 + 14 + labelW + grandchildAffordance + 10;
-                };
-                const FAN_W = useChildrenFan
-                    ? Math.max(...children.map(fanBranchW))
-                    : 0;
-
-                let leftChain = [...ancestors];
-                let rightChain = useChildrenFan ? [] as typeof tables : [...children, ...extraDescendants];
-                let leftEllipsis = hasHiddenAncestor;
-                let rightTruncated = false;
-
-                const totalW = () => {
-                    let w = currentW;
-                    if (leftEllipsis) w += ELLIPSIS_W + SEP_W;
-                    for (const a of leftChain) w += estW(a) + SEP_W;
-                    if (useChildrenFan) {
-                        w += SEP_W + FAN_W;
-                    } else {
-                        if (rightChain.length > 0) w += SEP_W;
-                        rightChain.forEach((c, i) => { w += estW(c) + (i > 0 ? 8 : 0); });
-                        if (rightTruncated) w += 8 + ELLIPSIS_W;
-                    }
-                    return w;
-                };
-
-                while (totalW() > BUDGET) {
-                    // In fan mode we never shed children — the fan owns its
-                    // own vertical real estate.  Just shed ancestors.
-                    if (useChildrenFan) {
-                        if (leftChain.length > 0) {
-                            leftChain.shift();
-                            leftEllipsis = true;
-                        } else { break; }
-                    } else if (rightChain.length > leftChain.length && rightChain.length > 0) {
-                        rightChain.pop();
-                        rightTruncated = true;
-                    } else if (leftChain.length > 0) {
-                        leftChain.shift();
-                        leftEllipsis = true;
-                    } else if (rightChain.length > 0) {
-                        rightChain.pop();
-                        rightTruncated = true;
-                    } else {
-                        break;
-                    }
-                }
-
-                // ── build a flat sequence of grid items ────────────────────
-                // Each item is one of:
-                //  • connector     — occupies row 1 of its own column
-                //  • table         — label in row 1, cluster in row 2
-                //  • children-fan  — a single cell spanning both rows that
-                //                    renders the children as a vertical
-                //                    stack of branch rows.
-                type Connector = { kind: 'connector', key: string, node: React.ReactNode };
-                type TableItem = {
-                    kind: 'table',
-                    key: string,
-                    label: React.ReactNode,
-                    charts: Chart[],
-                    current?: boolean,
-                };
-                type FanItem = {
-                    kind: 'children-fan',
-                    key: string,
-                    branches: typeof tables,
-                };
-                const items: (Connector | TableItem | FanItem)[] = [];
-
-                if (leftEllipsis) {
-                    items.push({ kind: 'connector', key: 'lell', node: <Ellipsis /> });
-                    items.push({ kind: 'connector', key: 'lell-sep', node: <Sep /> });
-                }
-                leftChain.forEach((a, i) => {
-                    items.push({
-                        kind: 'table', key: `a-${a.id}`,
-                        label: <TableRef table={a} />,
-                        charts: chartsForTable(a.id),
-                    });
-                    items.push({ kind: 'connector', key: `a-${a.id}-sep`, node: <Sep /> });
-                });
-                items.push({
-                    kind: 'table', key: `c-${currentTable.id}`,
-                    label: <TableRef table={currentTable} current />,
-                    charts: chartsForTable(currentTable.id),
-                    current: true,
-                });
-                if (useChildrenFan) {
-                    // The fan draws its own entry stub at its vertical
-                    // midpoint, so no separate row-1 connector is needed
-                    // (a Sep here would dangle from the label baseline
-                    // and never meet the trunk).
-                    items.push({ kind: 'children-fan', key: 'fan', branches: children });
-                } else {
-                    rightChain.forEach((c, i) => {
-                        const prev = i === 0 ? currentTable : rightChain[i - 1];
-                        const isDescendant = c.derive?.trigger?.tableId === prev.id;
-                        // Sep = chain continuation (parent→child).  Comma =
-                        // sibling enumeration under the same parent.
-                        items.push({
-                            kind: 'connector',
-                            key: `c-${c.id}-sep`,
-                            node: isDescendant ? <Sep /> : <Comma />,
-                        });
-                        items.push({
-                            kind: 'table', key: `r-${c.id}`,
-                            label: <TableRef table={c} />,
-                            charts: chartsForTable(c.id),
-                        });
-                        // "…" affordance: only when c has children AND the
-                        // next ribbon entry isn't one of them (otherwise the
-                        // chain already exposes the descendant).
-                        const cChildren = tables.filter(t => t.derive?.trigger?.tableId === c.id);
-                        const nextInChain = rightChain[i + 1];
-                        const nextIsChild = !!nextInChain && cChildren.some(cc => cc.id === nextInChain.id);
-                        if (cChildren.length > 0 && !nextIsChild) {
-                            items.push({ kind: 'connector', key: `r-${c.id}-sep2`, node: <Sep /> });
-                            items.push({ kind: 'connector', key: `r-${c.id}-ell`, node: <Ellipsis /> });
-                        }
-                    });
-                    if (rightTruncated) {
-                        items.push({ kind: 'connector', key: 'rell-comma', node: <Comma /> });
-                        items.push({ kind: 'connector', key: 'rell', node: <Ellipsis /> });
-                    }
-                }
-
-                // Renders a chart-thumbnail cluster for a single ribbon
-                // column.  Two presentations:
-                //  • strip   (focused / current table) — N thumbnails laid
-                //    out side-by-side, auto-scaled to chart count.
-                //  • stacked (neighbour tables) — a constant-width "paper
-                //    stack" card: the first chart on top, up to 2 faint
-                //    layers peeking out behind, and a ×N badge when there's
-                //    more than one.  Keeps non-focused slots a uniform
-                //    width so the ribbon stays compact.
-                const renderCluster = (
-                    charts: Chart[],
-                    opts: { scale: number, maxVisible: number, dim?: boolean, stacked?: boolean },
-                ) => {
-                    if (charts.length === 0) return null;
-                    const { scale, maxVisible, dim, stacked } = opts;
-                    const imgMaxW = Math.round(140 * scale);
-                    const imgMaxH = Math.round(96 * scale);
-                    const boxMinW = Math.round(88 * scale);
-                    const boxMinH = Math.round(68 * scale);
-                    const skeletonPx = Math.round(44 * scale);
-
-                    const renderThumb = (chart: Chart) => {
-                        const tpl = getChartTemplate(chart.chartType);
-                        const label = chart.chartType;
-                        const thumb = chartThumbnails[chart.id];
-                        const content = thumb ? (
-                            <img
-                                src={thumb}
-                                alt={label}
-                                style={{ maxWidth: imgMaxW, maxHeight: imgMaxH, objectFit: 'contain' }}
-                            />
-                        ) : (
-                            <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'center', width: boxMinW, height: boxMinH }}>
-                                {generateChartSkeleton(tpl?.icon, skeletonPx, skeletonPx, 0.4)}
-                            </Box>
-                        );
-                        return (
-                            <Box
-                                component="button"
-                                type="button"
-                                onClick={() => dispatch(dfActions.setFocused({ type: 'chart', chartId: chart.id }))}
-                                sx={{
-                                    display: 'inline-flex', alignItems: 'center', justifyContent: 'center',
-                                    minWidth: boxMinW, minHeight: boxMinH,
-                                    p: 0.5,
-                                    border: `1px solid ${alpha(theme.palette.text.primary, 0.08)}`,
-                                    borderRadius: '6px', background: theme.palette.background.paper,
-                                    cursor: 'pointer', transition: transition.fast,
-                                    '&:hover': {
-                                        borderColor: 'primary.main',
-                                        boxShadow: '0 0 6px rgba(25, 118, 210, 0.25)',
-                                    },
-                                }}
-                            >
-                                {content}
-                            </Box>
-                        );
-                    };
-
-                    if (stacked) {
-                        const front = charts[0];
-                        const behindCount = Math.min(charts.length - 1, 2);
-                        const offset = 4; // px per buried layer peeks out
-                        // Fixed card dimensions so the front fully covers the
-                        // behind layers — otherwise a wide thumbnail can
-                        // outgrow the paper and the stack falls apart.
-                        const cardW = Math.max(boxMinW, imgMaxW) + 8;
-                        const cardH = Math.max(boxMinH, imgMaxH) + 8;
-                        const totalW = cardW + behindCount * offset;
-                        const totalH = cardH + behindCount * offset;
-                        const cardSx = {
-                            width: cardW, height: cardH,
-                            border: `1px solid ${alpha(theme.palette.text.primary, 0.08)}`,
-                            borderRadius: '6px',
-                            background: theme.palette.background.paper,
-                            boxSizing: 'border-box' as const,
-                        };
-                        return (
-                            <Box sx={{
-                                position: 'relative', width: totalW, height: totalH,
-                                opacity: dim ? 0.55 : 1,
-                                    transition: transition.fast,
-                                    '&:hover': dim ? { opacity: 1 } : undefined,
-                                }}>
-                                    {Array.from({ length: behindCount }).map((_, i) => {
-                                        // Farthest layer drawn first so the
-                                        // front lands on top.  Slight rotation
-                                        // pivoting from the buried corner
-                                        // sells the "pile of paper" feel.
-                                        const reverseIdx = behindCount - i;
-                                        const off = reverseIdx * offset;
-                                        const angle = (reverseIdx % 2 === 0 ? 1 : -1) * (reverseIdx * 1.2);
-                                        return (
-                                            <Box key={`paper-${i}`} sx={{
-                                                ...cardSx,
-                                                position: 'absolute',
-                                                left: off, top: off,
-                                                transform: `rotate(${angle}deg)`,
-                                                transformOrigin: 'top left',
-                                                boxShadow: '0 1px 2px rgba(0,0,0,0.04)',
-                                            }} />
-                                        );
-                                    })}
-                                    {/* Front card: a fixed-size, fully opaque
-                                        slot that buries the layers below.
-                                        The thumbnail/skeleton is clipped to
-                                        fit so nothing overflows. */}
-                                    <Box
-                                        component="button"
-                                        type="button"
-                                        onClick={() => dispatch(dfActions.setFocused({ type: 'chart', chartId: front.id }))}
-                                        sx={{
-                                            ...cardSx,
-                                            position: 'absolute', left: 0, top: 0,
-                                            p: 0.5, m: 0,
-                                            display: 'flex', alignItems: 'center', justifyContent: 'center',
-                                            overflow: 'hidden',
-                                            cursor: 'pointer',
-                                            transition: transition.fast,
-                                            '&:hover': {
-                                                borderColor: 'primary.main',
-                                                boxShadow: '0 0 6px rgba(25, 118, 210, 0.25)',
-                                            },
-                                        }}
-                                    >
-                                        {(() => {
-                                            const frontThumb = chartThumbnails[front.id];
-                                            return frontThumb ? (
-                                                <img
-                                                    src={frontThumb}
-                                                    alt={front.chartType}
-                                                    style={{
-                                                        maxWidth: '100%', maxHeight: '100%',
-                                                        objectFit: 'contain',
-                                                    }}
-                                                />
-                                            ) : (
-                                                generateChartSkeleton(
-                                                    getChartTemplate(front.chartType)?.icon,
-                                                    skeletonPx, skeletonPx, 0.4,
-                                                )
-                                            );
-                                        })()}
-                                    </Box>
-                                    {charts.length > 1 && (
-                                        <Typography sx={{
-                                            position: 'absolute',
-                                            right: -6, bottom: -6,
-                                            fontSize: Math.max(9, Math.round(11 * scale)),
-                                            color: 'text.secondary',
-                                            px: '5px', py: '1px',
-                                            border: `1px solid ${alpha(theme.palette.text.primary, 0.15)}`,
-                                            borderRadius: '10px',
-                                            background: theme.palette.background.paper,
-                                            lineHeight: 1.2,
-                                            pointerEvents: 'none',
-                                        }}>
-                                            {`×${charts.length}`}
-                                        </Typography>
-                                    )}
-                                </Box>
-                        );
-                    }
-
-                    // Strip mode (focused / current table).
-                    const visible = charts.slice(0, maxVisible);
-                    const overflow = charts.length - visible.length;
-
-                    return (
-                        <Box sx={{
-                            display: 'flex', flexWrap: 'wrap',
-                            justifyContent: 'center', alignItems: 'center',
-                            gap: 0.5,
-                            opacity: dim ? 0.45 : 1,
-                            transition: transition.fast,
-                            '&:hover': dim ? { opacity: 1 } : undefined,
-                        }}>
-                            {visible.map(chart => (
-                                <Tooltip key={chart.id} title={chart.chartType} arrow>
-                                    {renderThumb(chart)}
-                                </Tooltip>
-                            ))}
-                            {overflow > 0 && (
-                                <Tooltip title={t('chartRec.moreCharts', `${overflow} more`)} arrow>
-                                    <Typography sx={{
-                                        fontSize: Math.max(10, Math.round(12 * scale)),
-                                        color: 'text.secondary',
-                                        px: 1, py: 0.5,
-                                        border: `1px dashed ${alpha(theme.palette.text.primary, 0.15)}`,
-                                        borderRadius: '6px',
-                                        minHeight: boxMinH,
-                                        display: 'inline-flex', alignItems: 'center',
-                                    }}>
-                                        {`+${overflow}`}
-                                    </Typography>
-                                </Tooltip>
-                            )}
-                        </Box>
-                    );
-                };
-
-                // All clusters render at the same scale; the current
-                // cluster is only distinguished by not being dimmed and by
-                // showing more thumbnails.
-                const centerScale = 0.5;
-                const sideScale = 0.5;
-
-                return (
-                    <Box sx={{
-                        display: 'grid',
-                        // One auto column per item, so each table's cluster
-                        // lines up directly below its label.  The whole grid
-                        // is centered inside the container — that's what
-                        // makes the ribbon read as balanced rather than
-                        // pivoting around the current table.
-                        gridAutoFlow: 'column',
-                        gridAutoColumns: 'auto',
-                        gridTemplateRows: 'auto auto',
-                        justifyContent: 'center',
-                        alignItems: 'center',
-                        columnGap: '14px',
-                        rowGap: '6px',
-                        mb: 1, maxWidth: '100%',
-                    }}>
-                        {items.map(item => {
-                            if (item.kind === 'children-fan') {
-                                // Vertical fan-out: branches read top-down,
-                                // with the FIRST branch aligned with the
-                                // current node's label row.  Trunk is drawn
-                                // as a single absolutely-positioned line so
-                                // the inter-row flex gap doesn't break it.
-                                const MAX_BRANCHES = 4;
-                                const shown = item.branches.slice(0, MAX_BRANCHES);
-                                const hidden = item.branches.length - shown.length;
-                                const totalRows = shown.length + (hidden > 0 ? 1 : 0);
-                                const ELBOW_W = 22;
-                                const LINE_COLOR = 'rgba(0,0,0,0.22)';
-                                const ROW_MIN_H = 22;
-                                const ROW_GAP = 6;
-                                const HALF = ROW_MIN_H / 2; // y-offset of any row's centerline
-                                return (
-                                    <Box key={item.key} sx={{
-                                        gridRow: '1 / span 2',
-                                        alignSelf: 'start',
-                                        justifySelf: 'start',
-                                        display: 'flex', flexDirection: 'column',
-                                        gap: `${ROW_GAP}px`,
-                                        position: 'relative',
-                                    }}>
-                                        {/* Entry stub from the current node:
-                                            sized to the column gap so it
-                                            sits cleanly in the whitespace
-                                            between the focused label and
-                                            the fan trunk, without bleeding
-                                            into the label glyphs. */}
-                                        <Box sx={{
-                                            position: 'absolute',
-                                            right: '100%',
-                                            top: `${HALF}px`,
-                                            width: 14,
-                                            height: '1px',
-                                            backgroundColor: LINE_COLOR,
-                                            transform: 'translateY(-0.5px)',
-                                        }} />
-                                        {/* Continuous trunk from first row's
-                                            centerline down to last row's
-                                            centerline, spanning the row
-                                            gaps so the connector reads as
-                                            one line. */}
-                                        {totalRows >= 2 && (
-                                            <Box sx={{
-                                                position: 'absolute',
-                                                left: 0,
-                                                top: `${HALF}px`,
-                                                bottom: `${HALF}px`,
-                                                width: '1px',
-                                                backgroundColor: LINE_COLOR,
-                                            }} />
-                                        )}
-                                        {shown.map((c) => {
-                                            const hasGrandchildren = tables.some(t => t.derive?.trigger?.tableId === c.id);
-                                            return (
-                                                <Box key={c.id} sx={{
-                                                    display: 'flex', alignItems: 'center',
-                                                    minHeight: ROW_MIN_H,
-                                                    position: 'relative',
-                                                }}>
-                                                    {/* Horizontal elbow stub from
-                                                        trunk to this branch label. */}
-                                                    <Box sx={{
-                                                        width: ELBOW_W, height: '1px',
-                                                        backgroundColor: LINE_COLOR,
-                                                        flexShrink: 0,
-                                                    }} />
-                                                    <Box sx={{
-                                                        display: 'flex', alignItems: 'center', gap: '6px',
-                                                        // 14px breathing room before the
-                                                        // label, mirroring the gap on the
-                                                        // other side of an inline Sep.
-                                                        pl: '14px',
-                                                    }}>
-                                                        <TableRef table={c} />
-                                                        {hasGrandchildren && (
-                                                            <>
-                                                                <Box sx={{ width: 12, height: '1px', backgroundColor: LINE_COLOR }} />
-                                                                <Ellipsis />
-                                                            </>
-                                                        )}
-                                                    </Box>
-                                                </Box>
-                                            );
-                                        })}
-                                        {hidden > 0 && (
-                                            <Box sx={{ display: 'flex', alignItems: 'center', minHeight: ROW_MIN_H }}>
-                                                <Box sx={{
-                                                    width: ELBOW_W, height: '1px',
-                                                    backgroundColor: LINE_COLOR,
-                                                    flexShrink: 0,
-                                                }} />
-                                                <Typography sx={{ fontSize: 11, color: 'text.disabled', pl: '14px' }}>
-                                                    +{hidden} more
-                                                </Typography>
-                                            </Box>
-                                        )}
-                                    </Box>
-                                );
-                            }
-                            return (
-                                <React.Fragment key={item.key}>
-                                    <Box sx={{
-                                        gridRow: 1,
-                                        // When the current node has a fan
-                                        // hanging off it, push its label
-                                        // (and chart cluster below) flush
-                                        // against the right edge of the
-                                        // column so the entry stub doesn't
-                                        // appear stranded in empty space
-                                        // between the centered label and
-                                        // the fan's trunk.
-                                        justifySelf: (item.kind === 'table' && item.current && useChildrenFan) ? 'end' : 'center',
-                                        alignSelf: 'center',
-                                        display: 'inline-flex', alignItems: 'center',
-                                        // Reserve the same 14px breathing
-                                        // room next to the focused label as
-                                        // the inline Sep connectors get on
-                                        // either side, so the connector
-                                        // cadence is consistent.
-                                        ...(item.kind === 'table' && item.current && useChildrenFan
-                                            ? { mr: '14px' } : {}),
-                                    }}>
-                                        {item.kind === 'connector' ? item.node : item.label}
-                                    </Box>
-                                    {item.kind === 'table' && (
-                                        <Box sx={{
-                                            gridRow: 2, alignSelf: 'start',
-                                            justifySelf: (item.current && useChildrenFan) ? 'end' : 'center',
-                                            // Non-current cluster cells take the
-                                            // stack-card's natural (constant)
-                                            // width; the current cell takes the
-                                            // strip's natural width which drives
-                                            // its column wide enough to align
-                                            // with the cluster.
-                                            display: 'flex', justifyContent: 'center',
-                                            px: '4px',
-                                            ...(item.current && useChildrenFan ? { mr: '14px' } : {}),
-                                        }}>
-                                            {renderCluster(item.charts, item.current
-                                                ? { scale: centerScale, maxVisible: 8, stacked: true }
-                                                : { scale: sideScale, maxVisible: 3, dim: true, stacked: true })}
-                                        </Box>
-                                    )}
-                                </React.Fragment>
-                            );
-                        })}
-                    </Box>
-                );
-            })()}
-        </Box>
-    );
-};
\ No newline at end of file
diff --git a/src/views/DataThread.tsx b/src/views/DataThread.tsx
index 25abdfc5..fff265d2 100644
--- a/src/views/DataThread.tsx
+++ b/src/views/DataThread.tsx
@@ -1465,14 +1465,26 @@ let SingleThreadGroupView: FC<{
             }
             if (runningDraft) renderedDraftIds.add(runningDraft.id);
             const draftInteraction = runningDraft?.derive?.trigger?.interaction;
+            // Once a report is streaming for this table, the generating report
+            // card (with its own spinner + "composing…" text) is the live
+            // indicator — so we drop the thinking banner entirely to avoid a
+            // second running state. We still render the prompt entries.
+            const generatingReports = (reportsByTriggerTable.get(tableId) || [])
+                .filter(r => r.status === 'generating');
+            const hasGeneratingReport = generatingReports.length > 0;
             if (draftInteraction && draftInteraction.length > 0) {
-                renderSplitByClarity(
-                    draftInteraction,
-                    runningDraft?.derive?.runningPlan,
-                    true,
-                    'agent-running-entry',
-                );
-            } else {
+                if (hasGeneratingReport) {
+                    // Just the prompt/clarity entries — no thinking banner.
+                    pushInteractionEntries(draftInteraction, tableId, triggerType, highlighted, 'agent-running-entry');
+                } else {
+                    renderSplitByClarity(
+                        draftInteraction,
+                        runningDraft?.derive?.runningPlan,
+                        true,
+                        'agent-running-entry',
+                    );
+                }
+            } else if (!hasGeneratingReport) {
                 const runningAction = runningAgentTableIds.get(tableId);
                 // `description` is the running plan: steps joined by STEP_SEP
                 // ('\x1E'), which renders invisibly. Split it back into discrete
@@ -1491,6 +1503,13 @@ let SingleThreadGroupView: FC<{
                         : ThinkingBanner(t('dataThread.working'), { px: 1, py: 0.5 }, true, true),
                 });
             }
+            // Live generating report card: rendered here (after the prompt,
+            // inside the running draft block) so it appears below the prompt
+            // while the report streams in — never above it. Completed reports
+            // render in the artifact slot via pushReportItems.
+            for (const report of generatingReports) {
+                timelineItems.push(buildReportTimelineItem(report, highlighted));
+            }
         } else if (clarifyAgentTableIds.has(tableId)) {
             const clarifyDraft = draftNodes.find(d => d.derive?.status === 'clarifying' && d.derive.trigger.tableId === tableId);
             if (clarifyDraft && renderedDraftIds.has(clarifyDraft.id)) {
@@ -1553,65 +1572,92 @@ let SingleThreadGroupView: FC<{
             });
         }
     };
-    // Push report artifacts triggered from the given table. A report is just
-    // another *output card* of the run — treated exactly like a table/chart
-    // card: the run's question (the triggering instruction) and the agent's
-    // closing summary are rendered ONCE by the thread machinery (trigger entry
-    // above, after-table summary below), so the report card never re-renders
-    // them (that would duplicate the run's opening instruction).
-    const pushReportItems = (tableId: string, highlighted: boolean) => {
+    // Build a single report's timeline item. Shared by pushReportItems
+    // (completed reports, in the artifact slot) and pushAgentDraftItems (the
+    // live generating card, rendered inside the running draft block so it sits
+    // below the prompt + thinking steps rather than above them).
+    const buildReportTimelineItem = (report: GeneratedReport, highlighted: boolean) => {
+        const isFocused = focusedId?.type === 'report' && focusedId.reportId === report.id;
+        const rowHL = highlighted || isFocused;
+        const isGenerating = report.status === 'generating';
+        const gutterIcon = isGenerating
+            ? <CircularProgress size={12} thickness={5} sx={{ color: theme.palette.secondary.main }} />
+            : <ArticleIcon sx={{ width: 14, height: 14, color: rowHL ? theme.palette.secondary.main : 'rgba(0,0,0,0.3)' }} />;
+        const card = (
+            <Card className={`data-thread-card ${isFocused ? 'selected-report-card' : ''}`} elevation={0}
+                sx={{
+                    width: '100%', backgroundColor: theme.palette.secondary.bgcolor,
+                    ...ComponentBorderStyle,
+                    ...(rowHL ? { borderLeft: '2px solid', borderLeftColor: 'secondary.main' } : {}),
+                    borderRadius: '6px', cursor: 'pointer',
+                }}
+                onClick={() => dispatch(dfActions.setFocused({ type: 'report', reportId: report.id }))}
+            >
+                <Box sx={{ margin: '0px', display: 'flex', minWidth: 0, alignItems: 'center',
+                    '& .report-delete-btn': { opacity: 0, transition: 'opacity 0.15s' },
+                    '&:hover .report-delete-btn': { opacity: 1 },
+                }}>
+                    <Box sx={{ margin: '4px 8px 4px 6px', minWidth: 0, flex: 1 }}>
+                        <Typography sx={{
+                            fontSize: 11, fontWeight: 500, color: 'text.primary',
+                            display: '-webkit-box', WebkitLineClamp: 2, WebkitBoxOrient: 'vertical',
+                            overflow: 'hidden', wordBreak: 'break-all',
+                        }}>
+                            {report.title || t('report.untitled')}
+                        </Typography>
+                        {isGenerating && (
+                            <Typography sx={{ fontSize: 9, color: 'text.disabled', lineHeight: 1.3, mt: 0.25 }}>
+                                {t('report.composing')}
+                            </Typography>
+                        )}
+                    </Box>
+                    <Tooltip title={t('dataThread.deleteReport')}>
+                        <IconButton className="report-delete-btn" size="small" color="error"
+                            sx={{ p: 0.5, mr: 0.5, '&:hover': { transform: 'scale(1.15)' } }}
+                            onClick={(e) => { e.stopPropagation(); dispatch(dfActions.deleteGeneratedReport(report.id)); }}
+                        >
+                            <DeleteIcon sx={{ fontSize: 16 }} />
+                        </IconButton>
+                    </Tooltip>
+                </Box>
+            </Card>
+        );
+        return {
+            key: `report-${report.id}`, type: 'artifact' as const, highlighted: rowHL,
+            reportId: report.id, gutterIcon, element: card,
+        };
+    };
+    // Push report artifacts triggered from the given table. A report is an
+    // *output card* of the run (like a chart) that OWNS its closing summary:
+    // the card renders, then the report's own summary renders right below it
+    // (from `report.summary`, not a table-anchored interaction entry), so the
+    // report and its summary live and die together.
+    //
+    // Only COMPLETED (non-generating) reports render here. A still-generating
+    // report is rendered live inside the running draft block (see
+    // pushAgentDraftItems) so it appears below the prompt, not above it.
+    const pushReportItems = (
+        tableId: string,
+        highlighted: boolean,
+        triggerType: 'trigger' | 'leaf-trigger',
+    ) => {
         const reports = reportsByTriggerTable.get(tableId);
         if (!reports) return;
         for (const report of reports) {
-            const isFocused = focusedId?.type === 'report' && focusedId.reportId === report.id;
-            const rowHL = highlighted || isFocused;
-            const isGenerating = report.status === 'generating';
-            const gutterIcon = isGenerating
-                ? <CircularProgress size={12} thickness={5} sx={{ color: theme.palette.secondary.main }} />
-                : <ArticleIcon sx={{ width: 14, height: 14, color: rowHL ? theme.palette.secondary.main : 'rgba(0,0,0,0.3)' }} />;
-            const card = (
-                <Card className={`data-thread-card ${isFocused ? 'selected-report-card' : ''}`} elevation={0}
-                    sx={{
-                        width: '100%', backgroundColor: theme.palette.secondary.bgcolor,
-                        ...ComponentBorderStyle,
-                        ...(rowHL ? { borderLeft: '2px solid', borderLeftColor: 'secondary.main' } : {}),
-                        borderRadius: '6px', cursor: 'pointer',
-                    }}
-                    onClick={() => dispatch(dfActions.setFocused({ type: 'report', reportId: report.id }))}
-                >
-                    <Box sx={{ margin: '0px', display: 'flex', minWidth: 0, alignItems: 'center',
-                        '& .report-delete-btn': { opacity: 0, transition: 'opacity 0.15s' },
-                        '&:hover .report-delete-btn': { opacity: 1 },
-                    }}>
-                        <Box sx={{ margin: '4px 8px 4px 6px', minWidth: 0, flex: 1 }}>
-                            <Typography sx={{
-                                fontSize: 11, fontWeight: 500, color: 'text.primary',
-                                display: '-webkit-box', WebkitLineClamp: 2, WebkitBoxOrient: 'vertical',
-                                overflow: 'hidden', wordBreak: 'break-all',
-                            }}>
-                                {report.title || t('report.untitled')}
-                            </Typography>
-                            {isGenerating && (
-                                <Typography sx={{ fontSize: 9, color: 'text.disabled', lineHeight: 1.3, mt: 0.25 }}>
-                                    {t('report.composing')}
-                                </Typography>
-                            )}
-                        </Box>
-                        <Tooltip title={t('dataThread.deleteReport')}>
-                            <IconButton className="report-delete-btn" size="small" color="error"
-                                sx={{ p: 0.5, mr: 0.5, '&:hover': { transform: 'scale(1.15)' } }}
-                                onClick={(e) => { e.stopPropagation(); dispatch(dfActions.deleteGeneratedReport(report.id)); }}
-                            >
-                                <DeleteIcon sx={{ fontSize: 16 }} />
-                            </IconButton>
-                        </Tooltip>
-                    </Box>
-                </Card>
-            );
-            timelineItems.push({
-                key: `report-${report.id}`, type: 'artifact', highlighted: rowHL,
-                reportId: report.id, gutterIcon, element: card,
-            });
+            if (report.status === 'generating') continue;
+            timelineItems.push(buildReportTimelineItem(report, highlighted));
+            if (report.summary) {
+                const summaryEntry: InteractionEntry = {
+                    from: 'data-agent', to: 'user', role: 'summary',
+                    plan: report.summaryThought,
+                    content: report.summary,
+                    timestamp: report.updatedAt,
+                };
+                pushInteractionEntries(
+                    [summaryEntry], tableId, triggerType, highlighted,
+                    `report-summary-${report.id}`,
+                );
+            }
         }
     };
 
@@ -1682,7 +1728,7 @@ let SingleThreadGroupView: FC<{
         // Add report cards anchored to this table. Reports are output cards of
         // the run (like charts), so they sit with the other outputs, BEFORE the
         // run's closing summary.
-        pushReportItems(tableId, isHighlighted);
+        pushReportItems(tableId, isHighlighted, 'trigger');
 
         // After-table entries (e.g. summary). The run's closing summary is the
         // final word and must follow the LAST artifact (table, chart, or
@@ -1723,7 +1769,7 @@ let SingleThreadGroupView: FC<{
         // Add report cards anchored to this leaf table. Reports are output cards
         // of the run (like charts), so they sit with the other outputs, BEFORE
         // the run's closing summary.
-        pushReportItems(lt.id, isHL);
+        pushReportItems(lt.id, isHL, 'leaf-trigger');
 
         // After-table entries (e.g. summary). The run's closing summary is the
         // final word and must follow the LAST artifact (table, chart, or
diff --git a/src/views/EncodingShelfCard.tsx b/src/views/EncodingShelfCard.tsx
index 909f4fb6..5a0f9c10 100644
--- a/src/views/EncodingShelfCard.tsx
+++ b/src/views/EncodingShelfCard.tsx
@@ -135,8 +135,6 @@ import CloseIcon from '@mui/icons-material/Close';
 import TipsAndUpdatesIcon from '@mui/icons-material/TipsAndUpdates';
 import ArrowBackIcon from '@mui/icons-material/ArrowBack';
 import PaletteOutlinedIcon from '@mui/icons-material/PaletteOutlined';
-import { IdeaChip } from './ChartRecBox';
-import { useFormulateData } from '../app/useFormulateData';
 
 // Property and state of an encoding shelf
 export interface EncodingShelfCardProps { 
@@ -398,32 +396,10 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
     const [isRestyling, setIsRestyling] = useState<boolean>(false);
     // Per-variant refresh in progress (variantId being refreshed, or null).
     const [refreshingVariantId, setRefreshingVariantId] = useState<string | null>(null);
-    // Intent-classifier round-trip in progress. Distinct from isRestyling so
-    // the UI can show a single "thinking" state on the submit button covering
-    // classify → route → execute. See submitPrompt() and the discussion in
-    // chat about routing on Enter.
-    const [isClassifying, setIsClassifying] = useState<boolean>(false);
-    // Phase shown in the inline status banner below the prompt input. Covers
-    // the whole submit pipeline so the user always knows what's happening:
-    //   classifying → restyling | formulating → idle.
-    // Set explicitly inside submitPrompt() and cleared by the effect below
-    // that watches chartSynthesisInProgress for the data-agent path.
-    const [submitPhase, setSubmitPhase] = useState<
-        'idle' | 'classifying' | 'restyling' | 'formulating'
-    >('idle');
     const chartSynthesisInProgress = useSelector(
         (state: DataFormulatorState) => state.chartSynthesisInProgress,
     );
     const isDataAgentRunning = chartSynthesisInProgress.includes(chartId);
-    // While we're in 'formulating' phase, watch the redux flag and clear the
-    // banner once the data agent finishes (success or error). The data agent
-    // is fire-and-forget from this card's perspective, so we can't rely on
-    // an explicit callback to mark completion.
-    useEffect(() => {
-        if (submitPhase === 'formulating' && !isDataAgentRunning) {
-            setSubmitPhase('idle');
-        }
-    }, [submitPhase, isDataAgentRunning]);
 
     useEffect(() => {
         setPrompt(triggerPrompt);
@@ -432,16 +408,9 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
     let encodingMap = chart?.encodingMap;
 
     const dispatch = useDispatch<AppDispatch>();
-    const { streamIdeas, formulateData } = useFormulateData();
 
     const [chartTypeMenuOpen, setChartTypeMenuOpen] = useState<boolean>(false);
 
-    // Anchor for the bottom-left "style presets" menu in the follow-up
-    // speech bubble. A preset click sends a detailed style instruction
-    // straight to the restyle agent (no intent classification needed —
-    // these are guaranteed style-only changes by construction).
-    const [stylePresetAnchor, setStylePresetAnchor] = useState<HTMLElement | null>(null);
-
     // Encoding channels are always shown (no auto hide/expand on hover/drag).
     const shouldExpandAll = true;
     
@@ -461,62 +430,6 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
     let isChartAvailable = checkChartAvailability(chart, conceptShelfItems, currentTable.rows);
 
 
-    // Consolidated chart state - maps chartId to its ideas, thinkingBuffer, and loading state
-    const [chartState, setChartState] = useState<Record<string, {
-        ideas: {text: string, goal: string, tag: string}[],
-        thinkingBuffer: string,
-        isLoading: boolean,
-        phase: string,
-    }>>({});
-    const [ideaElapsed, setIdeaElapsed] = useState(0);
-
-    // Get current chart's state
-    const currentState = chartState[chartId] || { ideas: [], thinkingBuffer: "", isLoading: false, phase: "" };
-    const currentChartIdeas = currentState.ideas;
-    const thinkingBuffer = currentState.thinkingBuffer;
-    const isLoadingIdeas = currentState.isLoading;
-    const ideaPhase = currentState.phase;
-
-    useEffect(() => {
-        if (!isLoadingIdeas) { setIdeaElapsed(0); return; }
-        // Tick once per second — fast enough to read as live, slow enough to
-        // stay readable; the loading indicator carries the liveness cue.
-        // Anchor to a start timestamp to avoid float drift.
-        const t0 = Date.now();
-        const timer = setInterval(() => setIdeaElapsed(Math.floor((Date.now() - t0) / 1000)), 1000);
-        return () => clearInterval(timer);
-    }, [isLoadingIdeas]);
-    
-    const defaultChartState = { ideas: [] as any[], thinkingBuffer: "", isLoading: false, phase: "" };
-
-    const setIdeas = (ideas: {text: string, goal: string, tag: string}[]) => {
-        setChartState(prev => ({
-            ...prev,
-            [chartId]: { ...defaultChartState, ...prev[chartId], ideas }
-        }));
-    };
-
-    const setThinkingBuffer = (thinkingBuffer: string) => {
-        setChartState(prev => ({
-            ...prev,
-            [chartId]: { ...defaultChartState, ...prev[chartId], thinkingBuffer }
-        }));
-    };
-
-    const setIsLoadingIdeas = (isLoading: boolean) => {
-        setChartState(prev => ({
-            ...prev,
-            [chartId]: { ...defaultChartState, ...prev[chartId], isLoading }
-        }));
-    };
-
-    const setIdeaPhase = (phase: string) => {
-        setChartState(prev => ({
-            ...prev,
-            [chartId]: { ...defaultChartState, ...prev[chartId], phase }
-        }));
-    };
-    
     let encodingBoxGroups = Object.entries(channelGroups)
         .filter(([group, channelList]) => channelList.some(ch => Object.keys(encodingMap).includes(ch)))
         .map(([group, channelList]) => {
@@ -565,177 +478,6 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
         ...rootTables.map(t => t.id).filter(id => !priorityIds.includes(id))
     ];
 
-    let getIdeasForVisualization = async () => {
-        if (!currentTable || isLoadingIdeas) return;
-
-        let chartAvailable = checkChartAvailability(chart, conceptShelfItems, currentTable.rows);
-        let currentChartPng = chartAvailable ? await vegaLiteSpecToPng(assembleVegaChart(
-            chart.chartType, chart.encodingMap, activeFields, currentTable.rows,
-            currentTable.metadata, 100, 80, false, chart.config)) : undefined;
-        if (currentChartPng) {
-            currentChartPng = await downscaleImageForAgent(currentChartPng);
-        }
-
-        await streamIdeas({
-            actionTableIds,
-            currentTable,
-            onIdeas: setIdeas,
-            onThinkingBuffer: setThinkingBuffer,
-            onLoadingChange: setIsLoadingIdeas,
-            onProgress: setIdeaPhase,
-            currentChartImage: currentChartPng,
-            currentDataSample: currentTable.rows.slice(0, 10),
-        });
-    }
-
-    // Function to handle idea chip click
-    const handleIdeaClick = (ideaText: string) => {
-        setPrompt(ideaText);
-        // Automatically start the data formulation process
-        deriveNewData(ideaText, 'ideate');
-    };
-
-
-    let deriveNewData = async (
-        instruction: string, 
-        mode: 'formulate' | 'ideate' = 'formulate', 
-        overrideTableId?: string,
-    ) => {
-
-        if (actionTableIds.length == 0) return;
-
-        // Short-circuit: if all fields exist in source table, just reference it
-        if (currentTable.derive == undefined && instruction == "" && 
-                (activeFields.length > 0 && activeCustomFields.length == 0) && 
-                tables.some(t => t.derive == undefined && 
-                activeFields.every(f => currentTable.names.includes(f.name)))) {
-            let tempTable = getDataTable(chart, tables, allCharts, conceptShelfItems, true);
-            dispatch(dfActions.updateTableRef({chartId: chartId, tableRef: tempTable.id}));
-            dispatch(dfActions.changeChartRunningStatus({chartId, status: true}));
-            setTimeout(function(){
-                dispatch(dfActions.changeChartRunningStatus({chartId, status: false}));
-                dispatch(dfActions.clearUnReferencedTables());
-            }, 400);
-            return;
-        }
-
-        dispatch(dfActions.clearUnReferencedTables());
-
-        let fieldNamesStr = activeFields.map(f => f.name).reduce(
-            (a: string, b: string, i, array) => a + (i == 0 ? "" : (i < array.length - 1 ? ', ' : ' and ')) + b, "");
-
-        const actionId = `deriveNewData_${String(Date.now())}`;
-        const originTableId = focusedTableId || currentTable.id;
-        const actionDescription = instruction || `Derive ${fieldNamesStr}`;
-
-        // Build chart visualization context
-        let chartComplete = checkChartAvailability(chart, conceptShelfItems, currentTable.rows);
-        let chartSpec = (mode == 'formulate' && Object.keys(activeSimpleEncodings).length > 0) ? {
-            chart_type: chart.chartType,
-            encodings: activeSimpleEncodings,
-            ...(chart.config ? { config: chart.config } : {})
-        } : undefined;
-
-        let currentChartImage: string | null | undefined = undefined;
-        if (chartComplete && chartSpec) {
-            currentChartImage = await vegaLiteSpecToPng(assembleVegaChart(
-                chart.chartType, chart.encodingMap, activeFields, currentTable.rows,
-                currentTable.metadata, 100, 80, false, chart.config
-            ));
-            if (currentChartImage) {
-                currentChartImage = await downscaleImageForAgent(currentChartImage);
-            }
-        }
-
-        let currentVisualization = (chartComplete && chartSpec) ? {
-            chart_spec: chartSpec,
-            ...(currentChartImage ? { chart_image: currentChartImage } : {})
-        } : undefined;
-        let expectedVisualization = (!chartComplete && chartSpec) ? { chart_spec: chartSpec } : undefined;
-
-        let triggerChartSpec = duplicateChart(chart);
-        triggerChartSpec.source = "trigger";
-
-        formulateData({
-            instruction,
-            mode,
-            actionTableIds,
-            currentTable,
-            overrideTableId,
-            currentVisualization,
-            expectedVisualization,
-            triggerChart: triggerChartSpec,
-            createChart: ({ candidateTable, refinedGoal, currentConcepts }) => {
-                let needToCreateNewChart = true;
-                let focusedChartId: string | undefined;
-                
-                if (mode != "ideate" && chart.chartType != "Auto" && overrideTableId != undefined && 
-                    allCharts.filter(c => c.source == "user").find(c => c.tableRef == overrideTableId)) {
-                    let chartsFromOverrideTable = allCharts.filter(c => c.source == "user" && c.tableRef == overrideTableId);
-                    let chartsWithSameEncoding = chartsFromOverrideTable.filter(c => {
-                        let getSimpliedChartEnc = (ch: Chart) => {
-                            return ch.chartType + ":" + Object.entries(ch.encodingMap)
-                                .filter(([channel, enc]) => enc.fieldID != undefined)
-                                .map(([channel, enc]) => `${channel}:${enc.fieldID}:${enc.aggregate}:${enc.sortOrder}:${enc.sortBy}:${enc.scheme}`)
-                                .join(";");
-                        }
-                        return getSimpliedChartEnc(c) == getSimpliedChartEnc(triggerChartSpec);
-                    });
-                    if (chartsWithSameEncoding.length > 0) {
-                        focusedChartId = chartsWithSameEncoding[0].id;
-                        dispatch(dfActions.setFocused({ type: 'chart', chartId: focusedChartId }));
-                        needToCreateNewChart = false;
-                    }
-                }
-                
-                if (needToCreateNewChart) {
-                    let newChart: Chart;
-                    if (mode == "ideate" || chart.chartType == "Auto") {
-                        newChart = resolveRecommendedChart(refinedGoal, currentConcepts, candidateTable);
-                    } else if (chart.chartType == "Table") {
-                        newChart = generateFreshChart(candidateTable.id, 'Table');
-                    } else {
-                        newChart = structuredClone(chart) as Chart;
-                        newChart.source = "user";
-                        newChart.id = `chart-${Date.now() - Math.floor(Math.random() * 10000)}`;
-                        newChart.tableRef = candidateTable.id;
-                        // Style variants belong to the chart they were authored
-                        // against — don't carry them over to a follow-up chart.
-                        // (See design-docs/28-chart-style-refinement-agent.md.)
-                        newChart.styleVariants = undefined;
-                        newChart.activeVariantId = undefined;
-                        let chartEncodings = refinedGoal['chart']?.['encodings'] || refinedGoal['chart_encodings'] || {};
-                        newChart = resolveChartFields(newChart, currentConcepts, chartEncodings, candidateTable);
-                    }
-                    focusedChartId = newChart.id;
-                    dispatch(dfActions.addAndFocusChart(newChart));
-                }
-                return focusedChartId;
-            },
-            onStarted: () => {
-                dispatch(dfActions.changeChartRunningStatus({chartId, status: true}));
-            },
-            onSuccess: ({ displayInstruction, candidateTable, focusedChartId }) => {
-                if (chart.chartType == "Table" || chart.chartType == "Auto" || (existsWorkingTable == false)) {
-                    dispatch(dfActions.deleteChartById(chartId));
-                }
-                dispatch(dfActions.clearUnReferencedTables());
-                dispatch(dfActions.clearUnReferencedCustomConcepts());
-                dispatch(dfActions.setFocused({ type: 'chart', chartId: focusedChartId as string }));
-                dispatch(dfActions.addMessages({
-                    "timestamp": Date.now(),
-                    "component": "chart builder",
-                    "type": "success",
-                    "value": t('encoding.formulationSucceeded', { fields: fieldNamesStr })
-                }));
-            },
-            onError: () => {
-            },
-            onFinally: () => {
-                dispatch(dfActions.changeChartRunningStatus({chartId, status: false}));
-            },
-        });
-    }
 
     // --- Style variants (see design-docs/28-chart-style-refinement-agent.md) ---
     // Chip strip for navigating user-authored "skins" of the current chart's
@@ -897,77 +639,6 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
         }
     };
 
-    /**
-     * Single entry point for the input bubble's primary submit (Enter or the
-     * primary button). Routes the prompt to either the chart restyle agent
-     * (visual changes) or the data agent (data shape / chart-type changes)
-     * via a tiny LLM intent classifier.
-     *
-     * Style → data fallback: if the restyle agent comes back with
-     * out_of_scope (i.e. it decided this was actually a data change), we
-     * automatically retry with the data agent so the user doesn't have to
-     * re-press anything. The original out_of_scope toast is suppressed in
-     * that case to avoid the misleading "click formulate instead" hint.
-     *
-     * Heuristics-free: see src/app/intentClassifier.ts for the rationale
-     * behind a tiny LLM call vs. a keyword list (multilingual support).
-     */
-    const submitPrompt = async () => {
-        const text = prompt.trim();
-        if (!text) return;
-        if (isRestyling || isClassifying) return;
-        if (!activeModel) {
-            // Both agents need a model; the data agent path will surface its
-            // own error too, but failing fast here saves a classifier call.
-            dispatch(dfActions.addMessages({
-                timestamp: Date.now(),
-                component: 'chart builder',
-                type: 'error',
-                value: 'No model is configured. Please select a model before submitting.',
-            }));
-            return;
-        }
-
-        // If the chart isn't rendered yet there's nothing for the style
-        // agent to refine; just go straight to the data agent.
-        if (!isChartAvailable) {
-            setSubmitPhase('formulating');
-            deriveNewData(text, 'formulate');
-            return;
-        }
-
-        setIsClassifying(true);
-        setSubmitPhase('classifying');
-        let intent: 'style' | 'data' = 'data';
-        try {
-            intent = await classifyChartIntent(text, activeModel);
-        } finally {
-            setIsClassifying(false);
-        }
-
-        if (intent === 'data') {
-            setSubmitPhase('formulating');
-            deriveNewData(text, 'formulate');
-            return;
-        }
-
-        // intent === 'style' — try restyle first, fall back to data on out_of_scope
-        setSubmitPhase('restyling');
-        const result = await handleRestyleSubmit({ suppressOutOfScopeMessage: true });
-        if (result === 'out_of_scope') {
-            // The restyle agent decided this was actually a data change.
-            // Hand off to the data agent. The banner switches from
-            // "restyling…" to "formulating data…" so the user sees the route
-            // change without an extra click.
-            setSubmitPhase('formulating');
-            deriveNewData(text, 'formulate');
-            // submitPhase will flip to 'idle' once the data agent finishes
-            // (see the chartSynthesisInProgress effect above).
-        } else {
-            // success or error — restyle path is fully done, clear banner.
-            setSubmitPhase('idle');
-        }
-    };
 
     /**
      * Refresh a stale variant: re-run its stored prompt against the
@@ -1172,242 +843,6 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
     ) : null;
 
 
-    // zip multiple components together
-    const w: any = (a: any[], b: any[]) => a.length ? [a[0], ...w(b, a.slice(1))] : b;
-
-    let formulateInputBox = <Card key='text-input-boxes' variant='outlined' sx={{
-        position: 'relative',
-        display: 'flex', flexDirection: 'column',
-        px: 1, pt: 0.5, pb: 0.25,
-        ml: '8px', // leave room for the speech-bubble tail on the left
-        borderWidth: 1,
-        borderColor: alpha(theme.palette.text.primary, 0.2),
-        borderRadius: '8px',
-        overflow: 'visible',
-        flexShrink: 0,
-        transition: transition.fast,
-        // Speech-bubble tail: outer triangle (border)
-        '&::before': {
-            content: '""',
-            position: 'absolute',
-            top: 12,
-            left: -8,
-            width: 0,
-            height: 0,
-            borderTop: '7px solid transparent',
-            borderBottom: '7px solid transparent',
-            borderRight: `8px solid ${alpha(theme.palette.text.primary, 0.2)}`,
-            transition: transition.fast,
-            pointerEvents: 'none',
-        },
-        // Speech-bubble tail: inner triangle (fill, masks the border edge)
-        '&::after': {
-            content: '""',
-            position: 'absolute',
-            top: 13,
-            left: -6,
-            width: 0,
-            height: 0,
-            borderTop: '6px solid transparent',
-            borderBottom: '6px solid transparent',
-            borderRight: `7px solid ${theme.palette.background.paper}`,
-            transition: transition.fast,
-            pointerEvents: 'none',
-        },
-        '&:hover': {
-            borderWidth: 1,
-            borderColor: alpha(theme.palette.primary.main, 0.6),
-        },
-        '&:hover::before': {
-            borderRightColor: alpha(theme.palette.primary.main, 0.6),
-        },
-        '&:focus-within': {
-            borderWidth: 1,
-            borderColor: alpha(theme.palette.primary.main, 0.8),
-        },
-        '&:focus-within::before': {
-            borderRightColor: alpha(theme.palette.primary.main, 0.8),
-        },
-    }}>
-        <TextField
-            variant="standard"
-            sx={{
-                flex: 1,
-                "& .MuiInput-input": { fontSize: '12px', lineHeight: 1.5 },
-                "& .MuiInput-underline:before": { borderBottom: 'none' },
-                "& .MuiInput-underline:hover:not(.Mui-disabled):before": { borderBottom: 'none' },
-                "& .MuiInput-underline:after": { borderBottom: 'none' },
-            }}
-            onChange={(event: any) => {
-                setPrompt(event.target.value);
-            }}
-            onKeyDown={(event: any) => {
-                if (event.key === 'Enter' && !event.shiftKey) {
-                    event.preventDefault();
-                    if (prompt.trim().length > 0) {
-                        // submitPrompt routes via the intent classifier:
-                        // style requests go to the restyle agent; data /
-                        // chart-type requests go to deriveNewData.
-                        submitPrompt();
-                    }
-                }
-            }}
-            slotProps={{
-                inputLabel: { shrink: true },
-            }}
-            value={prompt}
-            placeholder={t('encoding.followUpChartPlaceholder')}
-            fullWidth
-            multiline
-            minRows={2}
-            maxRows={5}
-        />
-        <Box sx={{
-            display: 'flex', flexDirection: 'row', alignItems: 'center',
-            justifyContent: 'space-between',
-        }}>
-            {/* Left group: one-click style presets. Clicking the palette
-                icon opens a menu of curated "style sheets" (NYT, Economist,
-                FiveThirtyEight, minimal, dark mode, presentation, comic).
-                Each preset sends a detailed style instruction straight to the
-                restyle agent — bypassing the intent classifier since these
-                are guaranteed style-only changes. The user can still type
-                freeform instructions in the textbox above; the menu's
-                footer hint reminds them of that. */}
-            <Tooltip title={t('encoding.stylePresetsTooltip')}>
-                <span>
-                    <IconButton
-                        size="small"
-                        disabled={!isChartAvailable || isClassifying || isRestyling}
-                        sx={{
-                            p: 0.5,
-                            color: alpha(theme.palette.text.primary, 0.55),
-                            '&:hover': { color: theme.palette.primary.main, backgroundColor: alpha(theme.palette.primary.main, 0.08) },
-                        }}
-                        onClick={(e) => setStylePresetAnchor(e.currentTarget)}
-                    >
-                        <PaletteOutlinedIcon sx={{ fontSize: 18 }} />
-                    </IconButton>
-                </span>
-            </Tooltip>
-            <Menu
-                anchorEl={stylePresetAnchor}
-                open={Boolean(stylePresetAnchor)}
-                onClose={() => setStylePresetAnchor(null)}
-                anchorOrigin={{ vertical: 'bottom', horizontal: 'left' }}
-                transformOrigin={{ vertical: 'top', horizontal: 'left' }}
-                slotProps={{
-                    paper: {
-                        sx: { minWidth: 220, maxWidth: 260, mt: 0.5 },
-                    },
-                }}
-            >
-                <Box sx={{ px: 1.5, pt: 0.75, pb: 0.25 }}>
-                    <Typography sx={{ fontSize: 10.5, fontWeight: 600, color: 'text.secondary', textTransform: 'uppercase', letterSpacing: 0.5 }}>
-                        {t('encoding.stylePresetsHeader')}
-                    </Typography>
-                </Box>
-                {STYLE_PRESETS.map((preset) => (
-                    <MenuItem
-                        key={preset.key}
-                        dense
-                        onClick={() => {
-                            setStylePresetAnchor(null);
-                            // Style presets are unambiguous style changes —
-                            // skip the intent classifier and send the
-                            // detailed instruction straight to the restyle
-                            // agent. We also drive submitPhase so the inline
-                            // status banner above shows "restyling…".
-                            setSubmitPhase('restyling');
-                            handleRestyleSubmit({ instructionOverride: preset.instruction })
-                                .finally(() => setSubmitPhase('idle'));
-                        }}
-                        sx={{ py: 0.5 }}
-                    >
-                        <Typography sx={{ fontSize: 12, lineHeight: 1.3 }}>
-                            {preset.label}
-                        </Typography>
-                    </MenuItem>
-                ))}
-                <Box sx={{ px: 1.5, py: 0.75, mt: 0.25 }}>
-                    <Typography sx={{ fontSize: 10.5, color: alpha(theme.palette.text.primary, 0.4), fontStyle: 'italic', lineHeight: 1.4, whiteSpace: 'normal' }}>
-                        {t('encoding.stylePresetsHint')}
-                    </Typography>
-                </Box>
-            </Menu>
-
-            {/* Right group: tips/ideas + primary submit. */}
-            <Box sx={{ display: 'flex', flexDirection: 'row', alignItems: 'center' }}>
-            <Tooltip title={currentChartIdeas.length > 0 ? t('encoding.refreshIdeas') : t('encoding.getIdeas')}>
-                <span>
-                    <IconButton size="small"
-                        disabled={isLoadingIdeas}
-                        sx={{ p: 0.5, color: theme.palette.custom.textColor || theme.palette.custom.main,
-                            '&:hover': { backgroundColor: alpha(theme.palette.custom.main, 0.08) } }}
-                        onClick={() => getIdeasForVisualization()}>
-                        {isLoadingIdeas 
-                            ? <CircularProgress size={20} sx={{ color: theme.palette.custom.main }} />
-                            : <TipsAndUpdatesIcon sx={{ fontSize: 20 }} />}
-                    </IconButton>
-                </span>
-            </Tooltip>
-            {/* Primary submit. The Enter key and this button both go through
-                submitPrompt(), which uses an LLM intent classifier to route
-                between the restyle agent and the data agent. The brush /
-                style-only button was removed in favor of this unified entry
-                point — if the classifier (or the user) is wrong, the restyle
-                agent's out_of_scope signal triggers an automatic data-agent
-                fallback. The trigger-override button below is kept because
-                it does something neither path does (re-derive into the same
-                table). See src/app/intentClassifier.ts. */}
-            {trigger ? (() => {
-                const overrideTableId = tables.find(t => t.derive?.trigger === trigger)?.id;
-                return overrideTableId ? (
-                <Tooltip title={<Typography sx={{fontSize: 11}}>{t('encoding.formulateAndOverride')} <TableIcon sx={{width: 10, height: 10, marginBottom: '-1px'}}/>{overrideTableId}</Typography>}>
-                    <span>
-                        <IconButton size="small" color={"warning"} sx={{ p: 0.5 }} onClick={() => { 
-                            deriveNewData(trigger!.interaction?.find(e => e.role === 'instruction')?.content || '', 'formulate', overrideTableId); 
-                        }}>
-                            <ChangeCircleOutlinedIcon sx={{ fontSize: 18 }} />
-                        </IconButton>
-                    </span>
-                </Tooltip>) : null;
-            })()
-                : 
-                <Tooltip title={t('encoding.formulate')}>
-                    <span>
-                        <IconButton size="small" color={"primary"} sx={{ p: 0.5 }}
-                            disabled={(!prompt.trim() && activeCustomFields.length === 0) || isClassifying || isRestyling}
-                            onClick={() => {
-                                if (prompt.trim()) {
-                                    submitPrompt();
-                                } else {
-                                    // No text — only the field shelf has
-                                    // changes. Skip the classifier and run
-                                    // the data agent directly.
-                                    deriveNewData(prompt, 'formulate');
-                                }
-                            }}>
-                            {(isClassifying || isRestyling)
-                                ? <CircularProgress size={18} sx={{ color: theme.palette.primary.main }} />
-                                : <PrecisionManufacturing sx={{
-                                    fontSize: 20,
-                                    ...(isChartAvailable ? {} : {
-                                        animation: 'pulseAttention 3s ease-in-out infinite',
-                                        '@keyframes pulseAttention': {
-                                            '0%, 90%': { scale: 1 },
-                                            '95%': { scale: 1.2 },
-                                            '100%': { scale: 1 },
-                                        },
-                                    }),
-                                }} />}
-                        </IconButton>
-                    </span>
-                </Tooltip>
-            }           
-            </Box>
-        </Box>
-    </Card>
 
 
 
@@ -1664,14 +1099,10 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
             </>)}
         </Box>);
 
-    // Whether any agent work is in flight (intent classify, restyle, or the
-    // data agent) and the matching status line shown in the overlay below.
-    const isAgentWorking = submitPhase !== 'idle' || isDataAgentRunning;
-    const agentStatusText =
-        submitPhase === 'classifying' ? 'thinking…'
-          : submitPhase === 'restyling' ? 'updating the chart…'
-          : (submitPhase === 'formulating' || isDataAgentRunning) ? 'preparing data for the chart…'
-          : 'thinking…';
+    // Whether the data agent is synthesizing this chart; drives the overlay
+    // status line shown below.
+    const isAgentWorking = isDataAgentRunning;
+    const agentStatusText = 'preparing data for the chart…';
 
     const encodingShelfCard = (
         <Box sx={{ 
diff --git a/src/views/SimpleChartRecBox.tsx b/src/views/SimpleChartRecBox.tsx
index bd8642f8..d59b2678 100644
--- a/src/views/SimpleChartRecBox.tsx
+++ b/src/views/SimpleChartRecBox.tsx
@@ -13,7 +13,6 @@ import {
     CircularProgress,
     Card,
     LinearProgress,
-    Button,
     Chip,
     Popper,
     Paper,
@@ -38,6 +37,7 @@ import ArrowUpwardRoundedIcon from '@mui/icons-material/ArrowUpwardRounded';
 import CloseIcon from '@mui/icons-material/Close';
 import AddIcon from '@mui/icons-material/Add';
 import TipsAndUpdatesIcon from '@mui/icons-material/TipsAndUpdates';
+import EditOutlinedIcon from '@mui/icons-material/EditOutlined';
 import StopIcon from '@mui/icons-material/Stop';
 
 import InsertDriveFileOutlinedIcon from '@mui/icons-material/InsertDriveFileOutlined';
@@ -47,6 +47,11 @@ import { useTranslation } from 'react-i18next';
 import { shouldAutoFocusGeneratedChart } from '../app/agentInteractionPolicy';
 import { ClarificationPanel, DelegatePanel, ExplanationPanel } from './AgentPausePanel';
 
+// Seed prompt used when the user invokes "report" mode (or a report hand-off)
+// without typing an explicit instruction. The unified analyst loads its
+// `report` skill and emits `write_report` within a normal explore run.
+const REPORT_SEED_PROMPT = 'Write a report summarizing the exploration.';
+
 const AgentWorkingOverlay: FC<{ message?: string; elapsed?: number; theme: Theme; onCancel?: () => void; color?: 'primary' | 'warning' }> = ({ message, elapsed, theme, onCancel, color = 'primary' }) => {
     const { t } = useTranslation();
     // `message` is the running plan: steps joined by the STEP_SEP control char
@@ -160,7 +165,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
     const [mentionedTableIds, setMentionedTableIds] = useState<string[]>([]);
     const [mentionDropdownOpen, setMentionDropdownOpen] = useState(false);
     const [mentionHighlightIdx, setMentionHighlightIdx] = useState(0);
-    const [selectedAgent, setSelectedAgent] = useState<'explore' | 'report'>('explore');
     const [attachedImages, setAttachedImages] = useState<string[]>([]);
     const [attachedFiles, setAttachedFiles] = useState<{ name: string; content: string }[]>([]);
     const fileInputRef = useRef<HTMLInputElement | null>(null);
@@ -404,10 +408,10 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
     }, [pendingClarification, draftNodes]);
 
     // ── Shared structured thread context builder (Tier 2 + Tier 3) ──
-    // Produces the same focused/peripheral thread context used by both the
-    // data agent (exploreFromChat) and the report agent (reportFromChat), so
-    // the report has the actual exploration narrative — user questions, agent
-    // thinking, findings — instead of just a flat list of charts.
+    // Produces the focused/peripheral thread context used by the analyst
+    // (exploreFromChat), so the report has the actual exploration narrative —
+    // user questions, agent thinking, findings — instead of just a flat list
+    // of charts.
     const buildThreadContext = useCallback((targetTableId: string): {
         focusedThread: any[] | undefined;
         otherThreads: any[] | undefined;
@@ -605,44 +609,40 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             max_iterations: 10,
         };
 
-        // ── Dev toggle: route through the unified AnalystAgent (design-35/36) ──
-        // Set localStorage `df_useAnalystAgent` = '1' to opt in. The unified
-        // agent can also write reports inside the same run, so we ship the
-        // available charts (same shape the report agent gets) for the report
-        // skill's inspect_chart. Additive: the legacy data agent ignores them.
-        const useAnalyst = localStorage.getItem('df_useAnalystAgent') === '1';
-        const streamUrl = useAnalyst ? getUrls().ANALYST_STREAMING : getUrls().DATA_AGENT_STREAMING;
-        const availableCharts = useAnalyst
-            ? charts
-                .filter(c => c.chartType !== 'Table' && c.chartType !== 'Auto')
-                .filter(c => tables.some(t => t.id === c.tableRef))
-                .map(c => {
-                    const tbl = tables.find(t => t.id === c.tableRef);
-                    const encodings: Record<string, string> = {};
-                    if (c.encodingMap) {
-                        for (const [ch, enc] of Object.entries(c.encodingMap)) {
-                            if ((enc as any)?.fieldID) {
-                                const field = conceptShelfItems.find(f => f.id === (enc as any).fieldID);
-                                if (field) encodings[ch] = field.name;
-                            }
+        // ── Route through the unified AnalystAgent (design-35/36) ──
+        // The unified agent can also write reports inside the same run, so we
+        // ship the available charts (same shape the report flow gets) for the
+        // report skill's inspect_chart.
+        const streamUrl = getUrls().ANALYST_STREAMING;
+        const availableCharts = charts
+            .filter(c => c.chartType !== 'Table' && c.chartType !== 'Auto')
+            .filter(c => tables.some(t => t.id === c.tableRef))
+            .map(c => {
+                const tbl = tables.find(t => t.id === c.tableRef);
+                const encodings: Record<string, string> = {};
+                if (c.encodingMap) {
+                    for (const [ch, enc] of Object.entries(c.encodingMap)) {
+                        if ((enc as any)?.fieldID) {
+                            const field = conceptShelfItems.find(f => f.id === (enc as any).fieldID);
+                            if (field) encodings[ch] = field.name;
                         }
                     }
-                    return {
-                        chart_id: c.id,
-                        chart_type: c.chartType,
-                        encodings,
-                        table_ref: tbl?.virtual?.tableId || c.tableRef,
-                        code: tbl?.derive?.code || '',
-                        chart_data: tbl ? { name: tbl.virtual?.tableId || tbl.id, rows: tbl.rows.slice(0, 50) } : undefined,
-                        // Optional rendered image: the agent reads charts from
-                        // data + encodings, but a cached PNG (when available)
-                        // lets it visually confirm a pre-existing chart. Prefer
-                        // the downscaled thumbnail to keep the request lean.
-                        chart_image: chartThumbnails[c.id] || getCachedChart(c.id)?.thumbnailDataUrl || undefined,
-                    };
-                })
-            : [];
-        if (useAnalyst) requestBody.charts = availableCharts;
+                }
+                return {
+                    chart_id: c.id,
+                    chart_type: c.chartType,
+                    encodings,
+                    table_ref: tbl?.virtual?.tableId || c.tableRef,
+                    code: tbl?.derive?.code || '',
+                    chart_data: tbl ? { name: tbl.virtual?.tableId || tbl.id, rows: tbl.rows.slice(0, 50) } : undefined,
+                    // Optional rendered image: the agent reads charts from
+                    // data + encodings, but a cached PNG (when available)
+                    // lets it visually confirm a pre-existing chart. Prefer
+                    // the downscaled thumbnail to keep the request lean.
+                    chart_image: chartThumbnails[c.id] || getCachedChart(c.id)?.thumbnailDataUrl || undefined,
+                };
+            });
+        requestBody.charts = availableCharts;
 
         if (isResume) {
             // Resume: just send the assembled prompt as user_question. The
@@ -737,9 +737,8 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         // The unified agent can write a report inside the same run: it emits an
         // `action`(write_report) commitment followed by `text_delta` events on
         // channel "report". We create a GeneratedReport on first signal, switch
-        // to the report view, and stream the markdown in — mirroring the
-        // standalone reportFromChat coalescing (90ms flush so Tiptap re-parses
-        // ~10×/sec instead of per-token).
+        // to the report view, and stream the markdown in — coalescing (90ms
+        // flush so Tiptap re-parses ~10×/sec instead of per-token).
         let reportId: string | null = null;
         let accumulatedReportMarkdown = '';
         let reportLastDispatched = '';
@@ -776,7 +775,13 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                 createdAt: Date.now(),
                 status: 'generating',
                 prompt: agentPrompt,
-                triggerTableId: focusedTableId,
+                // Anchor to the run's current table (the draft's table) so the
+                // thread can render the generating card. While streaming, the
+                // card is rendered INSIDE the draft block (after the thinking
+                // steps) — never via pushReportItems — so it sits below the
+                // prompt, not above it. On completion it flips to 'completed'
+                // and pushReportItems renders it in the artifact slot.
+                triggerTableId: lastCreatedTableId || focusedTableId,
             };
             dispatch(dfActions.saveGeneratedReport(inProgressReport));
             dispatch(dfActions.setFocused({ type: 'report', reportId: newId }));
@@ -797,16 +802,16 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             // write_report commitment → create the report + switch view.
             if (result.type === "action" && result.action === "write_report") {
                 ensureReport();
-                // Flush any buffered agent reasoning as its own step first, so
-                // it reads as a discrete prior step rather than running into the
-                // "outputting write_report" line (mirrors the tool_start flush).
+                // Flush any buffered agent reasoning as its own step. We do NOT
+                // add an "outputting write_report" step — the live generating
+                // report card already indicates that the report is being
+                // written, so the explicit step would be redundant.
                 if (pendingThought) {
                     thinkingSteps.push(pendingThought);
                     pendingThought = '';
-                }
-                thinkingSteps.push(t('dataThread.producingAction', { action: 'write_report' }));
-                if (currentDraftId) {
-                    dispatch(dfActions.updateDraftRunningPlan({ draftId: currentDraftId, plan: thinkingSteps.join(STEP_SEP) }));
+                    if (currentDraftId) {
+                        dispatch(dfActions.updateDraftRunningPlan({ draftId: currentDraftId, plan: thinkingSteps.join(STEP_SEP) }));
+                    }
                 }
                 return;
             }
@@ -1165,14 +1170,14 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                 const target = (result.target === 'report_gen' ? 'report_gen' : 'data_loading') as 'data_loading' | 'report_gen';
 
                 if (target === 'report_gen') {
-                    // Auto-delegate to the report agent — no user approval gate.
+                    // Auto-delegate to the report flow — no user approval gate.
                     // When the user asks for a report, jumping straight into
                     // report generation is the expected behavior, so we pick the
                     // agent's first seed prompt (falling back to its message) and
                     // hand off directly. The report_gen handoff useEffect picks
-                    // this up and starts reportFromChat. The placeholder draft
-                    // has no role in the report view, so we drop it like a normal
-                    // completion would.
+                    // this up and re-runs the analyst with the seeded prompt. The
+                    // placeholder draft has no role in the report view, so we
+                    // drop it like a normal completion would.
                     if (currentDraftId) {
                         thinkingSteps = [];
                         pendingThought = '';
@@ -1225,7 +1230,12 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                 const summary = result.status === "max_iterations"
                     ? translateBackend(rawSummary, result.content?.summary_code) || t('chartRec.maxIterationsReached')
                     : rawSummary;
-                // Finalize any report streamed during this run.
+                // Finalize any report streamed during this run. A report is an
+                // artifact that OWNS its closing summary: it anchors to the
+                // newest table created this run, or falls back to the focused
+                // table when the run only summarized existing exploration (no
+                // new table) — never detached.
+                const reportAnchorTableId = reportId ? (lastCreatedTableId || focusedTableId) : null;
                 if (reportId) {
                     reportFlushNow();
                     const titleMatch = accumulatedReportMarkdown.match(/^#\s+(.+)$/m);
@@ -1234,15 +1244,18 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                         content: accumulatedReportMarkdown,
                         status: 'completed',
                         title: titleMatch ? titleMatch[1].trim() : undefined,
-                        // Anchor the report to the latest table created this run
-                        // so it attaches to the newest thread item, like charts.
-                        triggerTableId: lastCreatedTableId || undefined,
+                        triggerTableId: reportAnchorTableId || undefined,
+                        // The closing answer lives on the report (rendered below
+                        // its card, deleted with it) — not on a table.
+                        summary: summary || undefined,
+                        summaryThought: result.content?.thought || undefined,
                     }));
                 }
-                if (lastCreatedTableId) {
-                    // The run produced an artifact (table / chart / report). Its
-                    // closing answer renders once as that table's after-summary
-                    // entry — exactly like a chart's summary.
+                // For a NON-report run, the closing answer renders once as the
+                // created table's after-summary entry — exactly like a chart's
+                // summary. (Report runs own their summary; see above.)
+                const summaryAnchorTableId = reportId ? null : lastCreatedTableId;
+                if (summaryAnchorTableId) {
                     if (summary) {
                         const entry: InteractionEntry = {
                             from: 'data-agent', to: 'user', role: 'summary',
@@ -1250,9 +1263,9 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                             content: summary,
                             timestamp: Date.now(),
                         };
-                        dispatch(dfActions.appendTriggerInteraction({ tableId: lastCreatedTableId, entries: [entry] }));
+                        dispatch(dfActions.appendTriggerInteraction({ tableId: summaryAnchorTableId, entries: [entry] }));
                     }
-                } else if (summary && currentDraftId) {
+                } else if (!reportId && summary && currentDraftId) {
                     // Pure Q&A run — the agent committed no action and answered in
                     // plain text (e.g. the user just asked a question). There's no
                     // table to anchor to. Treat the closing answer as an `explain`
@@ -1325,6 +1338,20 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                             timestamp: Date.now(), type: 'error',
                             component: 'data-agent', value: errMsg,
                         }));
+                        // Finalize and anchor any report streamed so far so a
+                        // partial report isn't left unanchored (invisible in the
+                        // thread) and stuck in the 'generating' state.
+                        if (reportId) {
+                            reportFlushNow();
+                            const titleMatch = accumulatedReportMarkdown.match(/^#\s+(.+)$/m);
+                            dispatch(dfActions.updateGeneratedReportContent({
+                                id: reportId,
+                                content: accumulatedReportMarkdown,
+                                status: 'completed',
+                                title: titleMatch ? titleMatch[1].trim() : undefined,
+                                triggerTableId: lastCreatedTableId || focusedTableId || undefined,
+                            }));
+                        }
                         if (currentDraftId) {
                             dispatch(dfActions.appendDraftInteraction({ draftId: currentDraftId, entry: {
                                 from: 'data-agent', to: 'user', role: 'error',
@@ -1387,217 +1414,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         })();
     }, [focusedTableId, tables, draftNodes, activeModel, config, conceptShelfItems, charts, dispatch, t, attachedImages, attachedFiles]);
 
-    // ── Report generation via report agent ──────────────────────────
-
-    const reportFromChat = useCallback(async (prompt: string) => {
-        if (!focusedTableId) return;
-
-        const cleanPrompt = prompt.trim() || 'Create a report summarizing the exploration.';
-
-        setChatPrompt('');
-        lastAutoFocusedChartIdRef.current = focusedId?.type === 'chart' ? focusedId.chartId : null;
-        firstFocusedThisRunRef.current = false;
-        userChartFocusLockedRef.current = false;
-        setIsChatFormulating(true);
-
-        // Build available charts list
-        const availableCharts = charts
-            .filter(c => c.chartType !== 'Table' && c.chartType !== 'Auto')
-            .filter(c => tables.some(t => t.id === c.tableRef))
-            .map(c => {
-                const tbl = tables.find(t => t.id === c.tableRef);
-                const encodings: Record<string, string> = {};
-                if (c.encodingMap) {
-                    for (const [ch, enc] of Object.entries(c.encodingMap)) {
-                        if ((enc as any)?.fieldID) {
-                            const field = conceptShelfItems.find(f => f.id === (enc as any).fieldID);
-                            if (field) encodings[ch] = field.name;
-                        }
-                    }
-                }
-                return {
-                    chart_id: c.id,
-                    chart_type: c.chartType,
-                    encodings,
-                    table_ref: tbl?.virtual?.tableId || c.tableRef,
-                    code: tbl?.derive?.code || '',
-                    chart_data: tbl ? { name: tbl.virtual?.tableId || tbl.id, rows: tbl.rows.slice(0, 50) } : undefined,
-                };
-            });
-
-        const selectedChartIds = availableCharts.map(c => c.chart_id);
-
-        // Create a report entry and switch to report view
-        const reportId = `report-${Date.now()}`;
-        const inProgressReport: GeneratedReport = {
-            id: reportId,
-            content: '',
-            selectedChartIds,
-            createdAt: Date.now(),
-            status: 'generating',
-            prompt: cleanPrompt,
-            triggerTableId: focusedTableId,
-        };
-        dispatch(dfActions.saveGeneratedReport(inProgressReport));
-        dispatch(dfActions.setFocused({ type: 'report', reportId }));
-        dispatch(dfActions.setViewMode('report'));
-
-        const actionTables = selectedTableIds.map(id => tables.find(t => t.id === id) as DictTable).filter(Boolean);
-
-        // Send the same structured exploration narrative the data agent gets,
-        // so the report is grounded in the actual thread (user questions, agent
-        // thinking, findings) rather than a flat list of charts.
-        const { focusedThread, otherThreads } = buildThreadContext(focusedTableId);
-
-        const body = JSON.stringify({
-            model: activeModel,
-            input_tables: actionTables.map(t => ({
-                name: t.virtual?.tableId || t.id.replace(/\.[^/.]+$/, ''),
-            })),
-            primary_tables: primaryTableIds.map(id => {
-                const t = tables.find(tbl => tbl.id === id);
-                return t?.virtual?.tableId || id.replace(/\.[^/.]+$/, '');
-            }),
-            charts: availableCharts,
-            user_prompt: cleanPrompt,
-            ...(focusedThread ? { focused_thread: focusedThread } : {}),
-            ...(otherThreads ? { other_threads: otherThreads } : {}),
-        });
-
-        const controller = new AbortController();
-        agentAbortRef.current = controller;
-        let accumulatedMarkdown = '';
-
-        // Coalesce per-token updates: dispatching on every text_delta forces the
-        // Tiptap editor to re-parse the entire document each time, which makes
-        // the stream feel chunky / non-streaming. Batch updates on a short
-        // timer so the editor refreshes ~10×/sec while the wire still streams.
-        const FLUSH_INTERVAL_MS = 90;
-        let lastDispatched = '';
-        let flushTimer: ReturnType<typeof setTimeout> | null = null;
-        const flushNow = () => {
-            if (flushTimer) {
-                clearTimeout(flushTimer);
-                flushTimer = null;
-            }
-            if (accumulatedMarkdown === lastDispatched) return;
-            lastDispatched = accumulatedMarkdown;
-            const titleMatch = accumulatedMarkdown.match(/^#\s+(.+)$/m);
-            dispatch(dfActions.updateGeneratedReportContent({
-                id: reportId,
-                content: accumulatedMarkdown,
-                title: titleMatch ? titleMatch[1].trim() : undefined,
-            }));
-        };
-        const scheduleFlush = () => {
-            if (flushTimer) return;
-            flushTimer = setTimeout(() => {
-                flushTimer = null;
-                flushNow();
-            }, FLUSH_INTERVAL_MS);
-        };
-
-        try {
-            for await (const event of streamRequest(getUrls().GENERATE_REPORT_CHAT, {
-                method: 'POST',
-                headers: { 'Content-Type': 'application/json' },
-                body,
-            }, controller.signal)) {
-                if (event.type === 'text_delta') {
-                    accumulatedMarkdown += (event as any).content;
-                    scheduleFlush();
-                } else if (event.type === 'tool_start') {
-                    // Mirror the data agent: surface what the agent is inspecting.
-                    const ev = event as any;
-                    let label = t('dataThread.thinking');
-                    let doneLabel: string | undefined;
-                    let chartDescs: { chartType: string; name: string }[] | undefined;
-                    if (ev.tool === 'inspect_chart') {
-                        // Resolve chart ids to descriptors: chart type (for the
-                        // icon) plus a display name — the insight title when we
-                        // have one, otherwise the encoded fields ("a × b × c").
-                        const ids: string[] = ev.chart_ids || [];
-                        chartDescs = ids
-                            .map(id => {
-                                const c = charts.find(cc => cc.id === id);
-                                if (!c) return undefined;
-                                let name = c.insight?.title;
-                                if (!name) {
-                                    const fields = Object.values(c.encodingMap)
-                                        .map(enc => enc.fieldID)
-                                        .filter((fid): fid is string => !!fid)
-                                        .map(fid => conceptShelfItems.find(f => f.id === fid)?.name)
-                                        .filter((n): n is string => !!n);
-                                    name = fields.length ? fields.join(' × ') : c.chartType;
-                                }
-                                return { chartType: c.chartType, name };
-                            })
-                            .filter((d): d is { chartType: string; name: string } => !!d);
-                        label = t('report.inspectingCharts');
-                        doneLabel = t('report.inspectedCharts');
-                    } else if (ev.tool === 'inspect_source_data') {
-                        const names = ev.table_names?.join(', ') || '';
-                        label = t('dataThread.inspectingData') + (names ? ` ${names}` : '');
-                        doneLabel = t('dataThread.inspectedData') + (names ? ` ${names}` : '');
-                    }
-                    dispatch(dfActions.updateGeneratedReportProgress({
-                        id: reportId,
-                        kind: 'start',
-                        label,
-                        doneLabel,
-                        charts: chartDescs,
-                    }));
-                } else if (event.type === 'tool_result') {
-                    // Flip the matching pending inspect step to done.
-                    dispatch(dfActions.updateGeneratedReportProgress({
-                        id: reportId,
-                        kind: 'end',
-                    }));
-                } else if (event.type === 'error') {
-                    const errMsg = event.error ? getErrorMessage(event.error) : t('messages.error');
-                    accumulatedMarkdown += `\n\n**Error:** ${errMsg}`;
-                    dispatch(dfActions.addMessages({
-                        timestamp: Date.now(), type: 'error',
-                        component: 'report-agent', value: errMsg,
-                    }));
-                    flushNow();
-                } else if (event.type === 'warning') {
-                    dispatch(dfActions.addMessages({
-                        timestamp: Date.now(), type: 'warning',
-                        component: 'report-agent',
-                        value: (event as any).warning?.message ?? 'Warning from server',
-                    }));
-                }
-            }
-
-            // Final update with completed status — make sure the latest content
-            // is in state before we mark it complete.
-            flushNow();
-            const titleMatch = accumulatedMarkdown.match(/^#\s+(.+)$/m);
-            dispatch(dfActions.updateGeneratedReportContent({
-                id: reportId,
-                content: accumulatedMarkdown,
-                status: 'completed',
-                title: titleMatch ? titleMatch[1].trim() : undefined,
-            }));
-        } catch (error: any) {
-            if (error.name !== 'AbortError') {
-                dispatch(dfActions.updateGeneratedReportContent({
-                    id: reportId,
-                    content: accumulatedMarkdown + `\n\n**Error:** ${error.message}`,
-                    status: 'error',
-                }));
-            }
-        } finally {
-            if (flushTimer) {
-                clearTimeout(flushTimer);
-                flushTimer = null;
-            }
-            agentAbortRef.current = null;
-            setIsChatFormulating(false);
-        }
-    }, [focusedTableId, charts, tables, selectedTableIds, primaryTableIds, conceptShelfItems, activeModel, dispatch, buildThreadContext]);
-
     // Honor cross-component handoff requests targeting the Report Gen
     // agent (e.g. Data Agent's `delegate` card with target='report_gen').
     // Hand-offs targeting other agents (e.g. `data_loading`) are consumed
@@ -1607,19 +1423,16 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         if (agentHandoffRequest && agentHandoffRequest.target === 'report_gen') {
             const promptText = agentHandoffRequest.prompt;
             dispatch(dfActions.clearAgentHandoffRequest());
-            // Fire-and-forget: reportFromChat manages its own streaming
-            // state via Redux dispatches.
-            reportFromChat(promptText);
+            // The unified analyst writes reports in-run via its `report`
+            // skill, so a report hand-off is just an explore run seeded with
+            // a report instruction.
+            exploreFromChat(promptText.trim() || REPORT_SEED_PROMPT);
         }
         // eslint-disable-next-line react-hooks/exhaustive-deps
     }, [agentHandoffRequest]);
 
     // ── Unified submit handler ───────────────────────────────────────
     const submitChat = useCallback((prompt: string, clarificationCtx?: any, displayPrompt?: string) => {
-        if (selectedAgent === 'report') {
-            reportFromChat(prompt);
-            return;
-        }
         if (clarificationCtx) {
             // Build the structured response payload. The backend assembles
             // the final LLM-facing text ("Selected answers: 1. xxx; 2. yyy\n
@@ -1644,7 +1457,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             return;
         }
         exploreFromChat(prompt, undefined, displayPrompt);
-    }, [reportFromChat, exploreFromChat, selectedAgent, clarificationQuestions, clarifyAnswers]);
+    }, [exploreFromChat, clarificationQuestions, clarifyAnswers]);
 
     // Replay a workflow: the KnowledgePanel fires `df-replay-workflow`
     // with a prompt describing the captured workflow; we hand it straight to
@@ -1758,8 +1571,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         }
     }, [pendingClarification, dispatch, t]);
 
-    const isReportMode = selectedAgent === 'report';
-
     // Landing / "no thread yet" highlight: when the user has loaded data
     // but hasn't started an exploration on the focused table (no real
     // charts AND the table isn't part of a derivation chain), gently pulse
@@ -1802,9 +1613,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             transition: transition.fast,
             backgroundColor: isChatFormulating
                 ? alpha(theme.palette.action.disabledBackground, 0.06)
-                : isReportMode
-                    ? alpha(theme.palette.warning.main, 0.04)
-                    : theme.palette.background.paper,
+                : theme.palette.background.paper,
             // Neutral elevation shadow recipe shared with AgentChatInput;
             // hover lifts the card a touch without shifting any colors.
             boxShadow: '0 1px 6px rgba(32, 33, 36, 0.10), 0 1px 2px rgba(32, 33, 36, 0.06)',
@@ -1824,8 +1633,8 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             } : {}),
             '&:focus-within': {
                 animation: 'none',
-                borderColor: isReportMode ? theme.palette.warning.main : theme.palette.primary.main,
-                boxShadow: `0 0 0 2px ${alpha(isReportMode ? theme.palette.warning.main : theme.palette.primary.main, 0.15)}, 0 2px 10px rgba(32, 33, 36, 0.14)`,
+                borderColor: theme.palette.primary.main,
+                boxShadow: `0 0 0 2px ${alpha(theme.palette.primary.main, 0.15)}, 0 2px 10px rgba(32, 33, 36, 0.14)`,
             },
         }}
         >
@@ -2015,7 +1824,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                     }
                     if (event.key === 'Tab' && !event.shiftKey && chatPrompt.trim() === '' && !isChatFormulating) {
                         event.preventDefault();
-                        setChatPrompt(isReportMode ? t('chartRec.threadReportPrompt') : t('chartRec.threadExplorePrompt'));
+                        setChatPrompt(t('chartRec.threadExplorePrompt'));
                     }
                     if (event.key === 'Enter' && !event.shiftKey) {
                         event.preventDefault();
@@ -2043,9 +1852,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                 placeholder={
                     pendingClarification
                         ? t('chartRec.replyPlaceholder')
-                        : isReportMode
-                            ? t(rootTables.length <= 1 ? 'chartRec.reportPlaceholderSingleTable' : 'chartRec.reportPlaceholder')
-                            : t(rootTables.length <= 1 ? 'chartRec.explorePlaceholderSingleTable' : 'chartRec.explorePlaceholder')
+                        : t(rootTables.length <= 1 ? 'chartRec.explorePlaceholderSingleTable' : 'chartRec.explorePlaceholder')
                 }
                 fullWidth
                 multiline
@@ -2076,36 +1883,24 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                             <AddIcon sx={{ fontSize: 18 }} />
                         </IconButton>
                     </Tooltip>
-                    {/* Agent mode toggle */}
-                    <Tooltip title={selectedAgent === 'explore' ? t('chartRec.switchToReport') : t('chartRec.switchToExplore')}>
-                        <Button
-                            size="small"
-                            onClick={() => setSelectedAgent(prev => prev === 'explore' ? 'report' : 'explore')}
-                            sx={{
-                                textTransform: 'none',
-                                fontSize: 11,
-                                minWidth: 0,
-                                px: 0.875,
-                                py: 0,
-                                height: 26,
-                                color: isReportMode ? theme.palette.warning.main : theme.palette.primary.main,
-                                borderRadius: '4px',
-                                display: 'flex',
-                                alignItems: 'center',
-                                gap: '3px',
-                                '&:hover': { backgroundColor: alpha(isReportMode ? theme.palette.warning.main : theme.palette.primary.main, 0.08) },
-                            }}
-                        >
-                            {selectedAgent === 'explore' ? t('chartRec.modeExplore') : t('chartRec.modeReport')}
-                        </Button>
-                    </Tooltip>
                 </Box>
                 <Box sx={{ display: 'flex', flexDirection: 'row', alignItems: 'center', gap: 0.25, flexShrink: 0 }}>
                 {isChatFormulating ? (
                     <CircularProgress size={18} sx={{ m: 0.5 }} />
                 ) : (
                     <>
-                        {!isReportMode && (
+                        <Tooltip title={t('chartRec.generateReport')}>
+                            <span>
+                                <IconButton
+                                    size="small"
+                                    sx={{ p: 0.5, color: theme.palette.text.secondary }}
+                                    disabled={!focusedTableId || isChatFormulating || !!pendingClarification}
+                                    onClick={() => submitChat(t('chartRec.reportPrompt'), undefined, t('chartRec.askedForReport'))}
+                                >
+                                    <EditOutlinedIcon sx={{ fontSize: 18 }} />
+                                </IconButton>
+                            </span>
+                        </Tooltip>
                         <Tooltip title={t('chartRec.getIdeaSuggestions')}>
                             <span>
                                 <IconButton
@@ -2118,7 +1913,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                                 </IconButton>
                             </span>
                         </Tooltip>
-                        )}
                         <Tooltip title={t('chartRec.explore')}>
                             <span>
                                 <IconButton
@@ -2165,7 +1959,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                     message={draftNodes.find(d => d.derive?.status === 'running' && threadTableIds.has(d.derive.trigger.tableId))
                             ?.derive?.runningPlan}
                     theme={theme}
-                    color={isReportMode ? 'warning' : 'primary'}
+                    color={'primary'}
                     onCancel={cancelAgent}
                 />
             )}
diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx
index 9673a9ca..8a6a15bb 100644
--- a/src/views/VisualizationView.tsx
+++ b/src/views/VisualizationView.tsx
@@ -86,7 +86,6 @@ import { formatCellValue } from './ViewUtils';
 
 
 import { dfSelectors } from '../app/dfSlice';
-import { ChartRecBox } from './ChartRecBox';
 import { CodeExplanationCard, ConceptExplCards, extractConceptExplanations } from './ExplComponents';
 import CodeIcon from '@mui/icons-material/Code';
 
@@ -1261,17 +1260,7 @@ export const VisualizationViewFC: FC<VisPanelProps> = function VisualizationView
                                     const hasThread = hasRealCharts || hasDerivation;
 
                                     if (hasThread) {
-                                        return (
-                                            <>
-                                                {focusedTableId ? <ChartRecBox sx={{margin: 'auto'}} tableId={focusedTableId as string} placeHolderChartId={focusedChartId as string} /> : null}
-                                                <Divider sx={{my: 4, width: '100%', maxWidth: 720}} textAlign='left'>
-                                                    <Typography sx={{fontSize: 11, color: "text.secondary"}}>
-                                                        {t('chart.orStartWithChartType')}
-                                                    </Typography>
-                                                </Divider>
-                                                {chartSelectionBox}
-                                            </>
-                                        );
+                                        return chartSelectionBox;
                                     }
                                     return <EmptyStateHero chartSelectionBox={chartSelectionBox} />;
                                 })()}
diff --git a/tests/backend/agents/test_agent_knowledge_integration.py b/tests/backend/agents/test_agent_knowledge_integration.py
deleted file mode 100644
index 4d738635..00000000
--- a/tests/backend/agents/test_agent_knowledge_integration.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-"""Tests for DataAgent knowledge integration (Phase 3).
-
-Covers:
-- Rules from KnowledgeStore injected into system prompt
-- Both file-based rules and text-based rules coexist
-- No rules → no User Rules section
-- Library knowledge search and injection
-- No matches → no injection
-- Max 5 items limit
-- search_knowledge / read_knowledge tool handlers
-- Tool path traversal rejection
-- Graceful degradation when knowledge store is unavailable
-- Reasoning log records knowledge_search and knowledge_injected
-"""
-
-from __future__ import annotations
-
-import os
-from pathlib import Path
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from data_formulator.agents.data_agent import DataAgent
-
-pytestmark = [pytest.mark.backend]
-
-TEST_IDENTITY = "user:test-knowledge@example.com"
-
-
-RULE_MD = """\
----
-title: ROI Standard
-tags: [finance]
-created: 2026-04-26
-updated: 2026-04-26
----
-
-ROI = (revenue - cost) / cost
-"""
-
-SKILL_MD = """\
----
-title: Handle Missing Values
-tags: [cleaning, pandas]
-created: 2026-04-26
-updated: 2026-04-26
-source: agent_summarized
----
-
-When encountering missing values, use fillna with median.
-"""
-
-
-@pytest.fixture()
-def user_home(tmp_path):
-    """Prepare a user_home with knowledge entries."""
-    rules_dir = tmp_path / "knowledge" / "rules"
-    rules_dir.mkdir(parents=True)
-    (rules_dir / "roi.md").write_text(RULE_MD, encoding="utf-8")
-
-    exp_dir = tmp_path / "knowledge" / "workflows" / "cleaning"
-    exp_dir.mkdir(parents=True)
-    (exp_dir / "missing.md").write_text(SKILL_MD, encoding="utf-8")
-
-    return tmp_path
-
-
-@pytest.fixture()
-def mock_client():
-    c = MagicMock()
-    c.model = "test-model"
-    c.endpoint = "openai"
-    c.params = {"api_key": "test-key"}
-    return c
-
-
-@pytest.fixture()
-def mock_workspace():
-    ws = MagicMock()
-    ws.get_fresh_name = MagicMock(return_value="test-table")
-    ws.user_home = None
-    return ws
-
-
-def _make_agent(mock_client, mock_workspace, user_home, **kwargs):
-    mock_workspace.user_home = user_home
-    return DataAgent(
-        client=mock_client,
-        workspace=mock_workspace,
-        **kwargs,
-    )
-
-
-# ── Rules injection ──────────────────────────────────────────────────────
-
-
-class TestRulesInjection:
-    def test_rules_injected_into_system_prompt(self, mock_client, mock_workspace, user_home):
-        agent = _make_agent(mock_client, mock_workspace, user_home)
-        prompt = agent._build_system_prompt()
-        assert "User Rules" in prompt
-        assert "MANDATORY" in prompt
-        assert "ROI Standard" in prompt
-        assert "ROI = (revenue - cost) / cost" in prompt
-        # User rules should appear BEFORE technical reference material
-        rules_pos = prompt.index("User Rules")
-        assert "Chart Creation Guide" in prompt
-        chart_guide_pos = prompt.index("Chart Creation Guide")
-        assert rules_pos < chart_guide_pos, (
-            "User Rules must be injected before chart guide for higher attention"
-        )
-
-    def test_text_rules_and_knowledge_rules_coexist(
-        self, mock_client, mock_workspace, user_home
-    ):
-        agent = _make_agent(
-            mock_client, mock_workspace, user_home,
-            agent_exploration_rules="Always explain your reasoning",
-        )
-        prompt = agent._build_system_prompt()
-        assert "Always explain your reasoning" in prompt
-        assert "ROI Standard" in prompt
-
-    def test_no_rules_no_section(self, mock_client, mock_workspace, tmp_path):
-        (tmp_path / "knowledge" / "rules").mkdir(parents=True)
-        agent = _make_agent(mock_client, mock_workspace, tmp_path)
-        prompt = agent._build_system_prompt()
-        assert "User Rules" not in prompt
-
-    def test_no_knowledge_store_graceful(self, mock_client, mock_workspace):
-        mock_workspace.user_home = None
-        agent = DataAgent(
-            client=mock_client,
-            workspace=mock_workspace,
-        )
-        prompt = agent._build_system_prompt()
-        assert "User Rules" not in prompt
-
-
-# ── Library knowledge injection ───────────────────────────────────────────
-
-
-class TestKnowledgeSearchInjection:
-    def test_relevant_knowledge_injected(self, mock_client, mock_workspace, user_home):
-        agent = _make_agent(mock_client, mock_workspace, user_home)
-        input_tables = [{"name": "sales_data"}]
-        messages = agent._build_initial_messages(
-            input_tables, "How to handle missing values?",
-        )
-        user_msg = messages[1]["content"]
-        if isinstance(user_msg, list):
-            user_msg = "\n".join(p.get("text", "") for p in user_msg if p.get("type") == "text")
-        assert "[RELEVANT KNOWLEDGE]" in user_msg or agent._injected_knowledge == []
-
-    def test_no_match_no_injection(self, mock_client, mock_workspace, user_home):
-        agent = _make_agent(mock_client, mock_workspace, user_home)
-        input_tables = [{"name": "xyz_table"}]
-        messages = agent._build_initial_messages(
-            input_tables, "xyznonexistent query",
-        )
-        user_msg = messages[1]["content"]
-        if isinstance(user_msg, list):
-            user_msg = "\n".join(p.get("text", "") for p in user_msg if p.get("type") == "text")
-        assert agent._injected_knowledge == []
-
-    def test_max_five_items(self, mock_client, mock_workspace, tmp_path):
-        rules_dir = tmp_path / "knowledge" / "rules"
-        rules_dir.mkdir(parents=True)
-        exp_dir = tmp_path / "knowledge" / "workflows" / "common"
-        exp_dir.mkdir(parents=True)
-        for i in range(10):
-            (exp_dir / f"exp-{i}.md").write_text(
-                f"---\ntitle: Common Workflow {i}\ntags: [common]\n"
-                f"created: 2026-04-26\nupdated: 2026-04-26\n---\n"
-                f"Content about common topic {i}.\n",
-                encoding="utf-8",
-            )
-
-        agent = _make_agent(mock_client, mock_workspace, tmp_path)
-        results = agent._search_relevant_knowledge("common topic", [])
-        assert len(results) <= 5
-
-
-# ── Tool handlers ─────────────────────────────────────────────────────────
-
-
-class TestKnowledgeToolHandlers:
-    def test_search_knowledge_returns_results(self, mock_client, mock_workspace, user_home):
-        agent = _make_agent(mock_client, mock_workspace, user_home)
-        result = agent._handle_search_knowledge({"query": "missing values"})
-        assert "Handle Missing Values" in result
-
-    def test_search_knowledge_no_match(self, mock_client, mock_workspace, user_home):
-        agent = _make_agent(mock_client, mock_workspace, user_home)
-        result = agent._handle_search_knowledge({"query": "xyznonexistent"})
-        assert "No matching" in result
-
-    def test_read_knowledge_returns_content(self, mock_client, mock_workspace, user_home):
-        agent = _make_agent(mock_client, mock_workspace, user_home)
-        result = agent._handle_read_knowledge(
-            {"category": "rules", "path": "roi.md"}
-        )
-        assert "ROI = (revenue - cost) / cost" in result
-
-    def test_read_knowledge_not_found(self, mock_client, mock_workspace, user_home):
-        agent = _make_agent(mock_client, mock_workspace, user_home)
-        result = agent._handle_read_knowledge(
-            {"category": "rules", "path": "ghost.md"}
-        )
-        assert "not found" in result
-
-    def test_read_knowledge_traversal_rejected(self, mock_client, mock_workspace, user_home):
-        agent = _make_agent(mock_client, mock_workspace, user_home)
-        result = agent._handle_read_knowledge(
-            {"category": "rules", "path": "../../etc/passwd.md"}
-        )
-        assert "Invalid path" in result or "not found" in result.lower()
-
-    def test_no_knowledge_store_returns_message(self, mock_client, mock_workspace):
-        mock_workspace.user_home = None
-        agent = DataAgent(client=mock_client, workspace=mock_workspace)
-        result = agent._handle_search_knowledge({"query": "anything"})
-        assert "not available" in result
-
-        result = agent._handle_read_knowledge({"category": "rules", "path": "file.md"})
-        assert "not available" in result
-
-
-# ── Graceful degradation ──────────────────────────────────────────────────
-
-
-class TestGracefulDegradation:
-    def test_agent_works_without_knowledge(self, mock_client, mock_workspace):
-        """Agent with no user_home still constructs valid system prompt."""
-        mock_workspace.user_home = None
-        agent = DataAgent(
-            client=mock_client,
-            workspace=mock_workspace,
-        )
-        prompt = agent._build_system_prompt()
-        assert "data exploration agent" in prompt
-
-    def test_empty_knowledge_dir(self, mock_client, mock_workspace, tmp_path):
-        """Agent with empty knowledge dir works normally."""
-        (tmp_path / "knowledge" / "rules").mkdir(parents=True)
-        (tmp_path / "knowledge" / "workflows").mkdir(parents=True)
-        agent = _make_agent(mock_client, mock_workspace, tmp_path)
-        prompt = agent._build_system_prompt()
-        assert "User Rules" not in prompt
-
-
-# ── Reasoning log integration ─────────────────────────────────────────────
-
-
-class TestReasoningLogIntegration:
-    @patch.dict(os.environ, {"DF_AGENT_LOG": "on"})
-    def test_session_start_includes_rules(self, mock_client, mock_workspace, user_home, tmp_path):
-        """session_start log event should be written (file-based check)."""
-        with patch.dict(os.environ, {"DATA_FORMULATOR_HOME": str(tmp_path)}):
-            agent = _make_agent(
-                mock_client, mock_workspace, user_home,
-                identity_id=TEST_IDENTITY,
-            )
-            rlog = agent._reasoning_log
-            rlog.log(
-                "session_start",
-                rules_injected=["ROI Standard"],
-                knowledge_injected=agent._injected_knowledge,
-            )
-            rlog.close()
-        # Logs are now stored system-level under DATA_FORMULATOR_HOME/agent-logs/
-        logs_dir = tmp_path / "agent-logs"
-        jsonl_files = list(logs_dir.rglob("*.jsonl"))
-        assert len(jsonl_files) >= 1
diff --git a/tests/backend/agents/test_data_agent_clarification.py b/tests/backend/agents/test_data_agent_clarification.py
deleted file mode 100644
index 16107e6c..00000000
--- a/tests/backend/agents/test_data_agent_clarification.py
+++ /dev/null
@@ -1,241 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-"""Tests for DataAgent structured clarification events."""
-
-from __future__ import annotations
-
-import pytest
-
-from data_formulator.agents.data_agent import DataAgent
-
-pytestmark = [pytest.mark.backend]
-
-
-class _FakeClient:
-    model = "test-model"
-
-
-def _agent() -> DataAgent:
-    return DataAgent(client=_FakeClient(), workspace=None)
-
-
-class TestDataAgentClarification:
-    def test_clarify_action_outputs_structured_questions(self, monkeypatch) -> None:
-        agent = _agent()
-
-        def fake_get_next_action(trajectory, input_tables, outer_iteration=0):
-            yield {
-                "type": "agent_action",
-                "action_data": {
-                    "action": "clarify",
-                    "questions": [
-                        {
-                            "text": "Which metric should I use?",
-                            "responseType": "single_choice",
-                            "options": ["Revenue", "Orders"],
-                        }
-                    ],
-                },
-                "reason": "ok",
-                "llm_calls": 1,
-            }
-
-        monkeypatch.setattr(agent, "_get_next_action", fake_get_next_action)
-
-        events = list(agent.run([], "", trajectory=[{"role": "system", "content": "test"}]))
-
-        assert events[-1]["type"] == "clarify"
-        assert events[-1]["questions"] == [
-            {
-                "text": "Which metric should I use?",
-                "responseType": "single_choice",
-                "required": True,
-                "options": [{"label": "Revenue"}, {"label": "Orders"}],
-            }
-        ]
-        assert "message" not in events[-1]
-        assert "options" not in events[-1]
-
-    def test_tool_rounds_exhausted_outputs_clarify_question(self, monkeypatch) -> None:
-        agent = _agent()
-
-        def fake_get_next_action(trajectory, input_tables, outer_iteration=0):
-            yield {
-                "type": "agent_action",
-                "action_data": None,
-                "reason": "tool_rounds_exhausted",
-                "llm_calls": 12,
-            }
-
-        monkeypatch.setattr(agent, "_get_next_action", fake_get_next_action)
-
-        events = list(agent.run([], "", trajectory=[{"role": "system", "content": "test"}]))
-
-        clarify = events[-1]
-        assert clarify["type"] == "clarify"
-        assert clarify["questions"][0]["text_code"] == "agent.clarifyExhausted"
-        assert clarify["questions"][0]["options"][0]["label_code"] == "agent.clarifyOptionContinue"
-        assert "id" not in clarify["questions"][0]
-        assert "id" not in clarify["questions"][0]["options"][0]
-        # auto_select was removed; the user is expected to pick an option.
-        assert "auto_select" not in clarify
-
-    def test_clarify_action_preserves_multiple_question_option_groups(self, monkeypatch) -> None:
-        agent = _agent()
-
-        def fake_get_next_action(trajectory, input_tables, outer_iteration=0):
-            yield {
-                "type": "agent_action",
-                "action_data": {
-                    "action": "clarify",
-                    "questions": [
-                        {
-                            "text": "Which metric?",
-                            "options": ["Revenue"],
-                        },
-                        {
-                            "text": "Which period?",
-                            "options": [{"label": "Last 12 months"}],
-                        },
-                    ],
-                },
-                "reason": "ok",
-                "llm_calls": 1,
-            }
-
-        monkeypatch.setattr(agent, "_get_next_action", fake_get_next_action)
-
-        events = list(agent.run([], "", trajectory=[{"role": "system", "content": "test"}]))
-
-        questions = events[-1]["questions"]
-        assert [q["text"] for q in questions] == ["Which metric?", "Which period?"]
-        assert questions[0]["options"] == [{"label": "Revenue"}]
-        assert questions[1]["options"] == [{"label": "Last 12 months"}]
-        # No id fields anywhere
-        for q in questions:
-            assert "id" not in q
-            for opt in q.get("options", []):
-                assert "id" not in opt
-
-
-class TestDataAgentDelegate:
-    """Tests for the delegate action."""
-
-    def test_emits_delegate_event_for_data_loading(self, monkeypatch) -> None:
-        agent = _agent()
-
-        def fake_get_next_action(trajectory, input_tables, outer_iteration=0):
-            yield {
-                "type": "agent_action",
-                "action_data": {
-                    "action": "delegate",
-                    "thought": "User asked about Q4 sales but no sales table is loaded.",
-                    "target": "data_loading",
-                    "message": "I don't see a sales table loaded — want to import one?",
-                    "options": ["quarterly sales 2024"],
-                },
-                "reason": "ok",
-                "llm_calls": 1,
-            }
-
-        monkeypatch.setattr(agent, "_get_next_action", fake_get_next_action)
-
-        events = list(agent.run([], "", trajectory=[{"role": "system", "content": "test"}]))
-
-        evt = events[-1]
-        assert evt["type"] == "delegate"
-        assert evt["target"] == "data_loading"
-        assert evt["options"] == ["quarterly sales 2024"]
-        assert evt["message"] == "I don't see a sales table loaded — want to import one?"
-        assert evt["thought"] == "User asked about Q4 sales but no sales table is loaded."
-        assert "trajectory" in evt
-        assert evt["completed_step_count"] == 0
-
-    def test_emits_delegate_event_for_report_gen(self, monkeypatch) -> None:
-        agent = _agent()
-
-        def fake_get_next_action(trajectory, input_tables, outer_iteration=0):
-            yield {
-                "type": "agent_action",
-                "action_data": {
-                    "action": "delegate",
-                    "target": "report_gen",
-                    "message": "Pick an angle for the write-up:",
-                    "options": [
-                        "Write a 200-word executive summary of regional trends.",
-                        "Create a detailed analytical report on regional trends with category breakdowns.",
-                    ],
-                },
-                "reason": "ok",
-                "llm_calls": 1,
-            }
-
-        monkeypatch.setattr(agent, "_get_next_action", fake_get_next_action)
-
-        events = list(agent.run([], "", trajectory=[{"role": "system", "content": "test"}]))
-
-        evt = events[-1]
-        assert evt["type"] == "delegate"
-        assert evt["target"] == "report_gen"
-        assert len(evt["options"]) == 2
-        assert evt["options"][0] == "Write a 200-word executive summary of regional trends."
-        assert evt["options"][1].startswith("Create a detailed")
-
-    def test_missing_prompt_yields_parse_error(self, monkeypatch) -> None:
-        agent = _agent()
-
-        def fake_get_next_action(trajectory, input_tables, outer_iteration=0):
-            yield {
-                "type": "agent_action",
-                "action_data": {
-                    "action": "delegate",
-                    "target": "data_loading",
-                    "message": "missing",
-                    "options": [""],
-                },
-                "reason": "ok",
-                "llm_calls": 1,
-            }
-
-        monkeypatch.setattr(agent, "_get_next_action", fake_get_next_action)
-
-        events = list(agent.run([], "", trajectory=[{"role": "system", "content": "test"}]))
-
-        # Last event should be an error event (not a delegate).
-        assert events[-1]["type"] != "delegate"
-
-    def test_normalizer_validates_fields(self) -> None:
-        with pytest.raises(ValueError):
-            DataAgent._normalize_delegate_action(
-                {"target": "", "options": ["x"]}
-            )
-        with pytest.raises(ValueError):
-            DataAgent._normalize_delegate_action(
-                {"target": "unknown", "options": ["x"]}
-            )
-        with pytest.raises(ValueError):
-            DataAgent._normalize_delegate_action(
-                {"target": "data_loading", "options": []}
-            )
-        with pytest.raises(ValueError):
-            DataAgent._normalize_delegate_action(
-                {"target": "data_loading", "options": ["   "]}
-            )
-        # Normal multi-option report_gen payload.
-        out = DataAgent._normalize_delegate_action({
-            "target": "  report_gen  ",
-            "message": "  pick one  ",
-            "options": ["  Brief recap.  ", "  Full report.  "],
-        })
-        assert out == {
-            "target": "report_gen",
-            "message": "pick one",
-            "options": ["Brief recap.", "Full report."],
-        }
-        # Message is optional; >2 options are truncated to 2.
-        out2 = DataAgent._normalize_delegate_action({
-            "target": "report_gen",
-            "options": ["A", "B", "C"],
-        })
-        assert out2 == {"target": "report_gen", "options": ["A", "B"]}
diff --git a/tests/backend/agents/test_duckdb_notes_prompt.py b/tests/backend/agents/test_duckdb_notes_prompt.py
index 385313f3..43fd4385 100644
--- a/tests/backend/agents/test_duckdb_notes_prompt.py
+++ b/tests/backend/agents/test_duckdb_notes_prompt.py
@@ -1,29 +1,35 @@
-"""Ensure SHARED_DUCKDB_NOTES contains the non-ASCII identifier quoting rule.
+"""Ensure the core skill's DuckDB notes contain the non-ASCII identifier quoting rule.
 
-This is a regression guard: the DuckDB notes prompt must remind the LLM to
-wrap non-ASCII identifiers in double quotes when generating DuckDB SQL.
+This is a regression guard: the live ``AnalystAgent`` loads its chart-creation
+guidance from the core skill body (``analyst/skills/core/SKILL.md``). The DuckDB
+notes there must remind the LLM to wrap non-ASCII identifiers in double quotes
+when generating DuckDB SQL.
 """
 from __future__ import annotations
 
+from pathlib import Path
+
 import pytest
 
-from data_formulator.agents.chart_creation_guide import SHARED_DUCKDB_NOTES
+import data_formulator
 
 pytestmark = [pytest.mark.backend]
 
+_CORE_SKILL_BODY = (
+    Path(data_formulator.__file__).parent
+    / "analyst"
+    / "skills"
+    / "core"
+    / "SKILL.md"
+).read_text(encoding="utf-8")
+
 
 def test_duckdb_notes_mentions_non_ascii_double_quoting() -> None:
-    lower = SHARED_DUCKDB_NOTES.lower()
+    lower = _CORE_SKILL_BODY.lower()
     assert "non-ascii" in lower or "non ascii" in lower
-    assert "double quotes" in lower or '"' in SHARED_DUCKDB_NOTES
+    assert '"' in _CORE_SKILL_BODY
 
 
 def test_duckdb_notes_mentions_identifier_quoting_rule() -> None:
     """The prompt should contain an explicit quoting rule for identifiers."""
-    assert "identifier" in SHARED_DUCKDB_NOTES.lower()
-
-
-def test_duckdb_notes_is_not_excessively_long() -> None:
-    """Overly long DuckDB notes can confuse models about the JSON output format.
-    Keep it under 800 characters to avoid prompt bloat."""
-    assert len(SHARED_DUCKDB_NOTES) < 800
+    assert "identifier" in _CORE_SKILL_BODY.lower()
diff --git a/tests/backend/agents/test_interactive_explore_context.py b/tests/backend/agents/test_interactive_explore_context.py
deleted file mode 100644
index 5680da67..00000000
--- a/tests/backend/agents/test_interactive_explore_context.py
+++ /dev/null
@@ -1,127 +0,0 @@
-"""Tests for recommendation-question context construction and inspect behavior."""
-from __future__ import annotations
-
-from datetime import datetime, timezone
-from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
-
-import pandas as pd
-import pytest
-
-from data_formulator.agents.agent_interactive_explore import InteractiveExploreAgent
-from data_formulator.agents.agent_utils import format_dataframe_sample_with_budget
-from data_formulator.agents.context import build_lightweight_table_context
-from data_formulator.datalake.workspace_metadata import (
-    ColumnInfo,
-    TableMetadata,
-    WorkspaceMetadata,
-)
-
-pytestmark = [pytest.mark.backend]
-
-
-@pytest.fixture()
-def workspace_with_metadata():
-    workspace = MagicMock()
-    workspace.read_data_as_df.return_value = pd.DataFrame({
-        "category": ["office", "electronics", "office", "furniture", "office"],
-        "profit": [10, 20, 15, 5, 30],
-    })
-    workspace.get_relative_data_file_path.return_value = "data/sales.parquet"
-
-    metadata = WorkspaceMetadata.create_new()
-    metadata.add_table(TableMetadata(
-        name="sales",
-        source_type="data_loader",
-        filename="sales.parquet",
-        file_type="parquet",
-        created_at=datetime.now(timezone.utc),
-        description="Sales performance table",
-        columns=[
-            ColumnInfo("category", "text", description="Business category"),
-            ColumnInfo("profit", "float64", description="Net profit"),
-        ],
-    ))
-    workspace.get_metadata.return_value = metadata
-    return workspace
-
-
-class TestRecommendationContext:
-    def test_lightweight_context_includes_metadata_and_field_values(self, workspace_with_metadata):
-        context = build_lightweight_table_context(
-            [{"name": "sales"}],
-            workspace_with_metadata,
-        )
-
-        assert "Sales performance table" in context
-        assert "Business category" in context
-        assert "Net profit" in context
-        assert "Field value samples" in context
-        assert "office" in context
-        assert "electronics" in context
-        assert "Numeric stats" in context
-
-    def test_sample_rows_floor_down_to_fit_budget(self):
-        df = pd.DataFrame({
-            "name": ["alpha" * 20, "beta" * 20, "gamma" * 20],
-            "value": [1, 2, 3],
-        })
-
-        sample, displayed_rows, truncated = format_dataframe_sample_with_budget(
-            df,
-            max_rows=3,
-            max_chars=150,
-            index=False,
-        )
-
-        assert len(sample) <= 150
-        assert displayed_rows < 3
-        assert truncated is True
-
-
-class TestInteractiveExploreAgent:
-    def test_run_skips_inspect_round_by_default(self, workspace_with_metadata):
-        client = MagicMock()
-        client.get_completion.return_value = [
-            SimpleNamespace(
-                choices=[
-                    SimpleNamespace(
-                        delta=SimpleNamespace(content='{"type":"question","text":"Q","goal":"G","tag":"pivot"}\n')
-                    )
-                ]
-            )
-        ]
-
-        agent = InteractiveExploreAgent(client=client, workspace=workspace_with_metadata)
-
-        with patch.object(agent, "_run_inspect_round", wraps=agent._run_inspect_round) as inspect_round:
-            chunks = list(agent.run([{"name": "sales"}]))
-
-        assert inspect_round.call_count == 0
-        text_chunks = [c for c in chunks if isinstance(c, str)]
-        assert text_chunks == ['{"type":"question","text":"Q","goal":"G","tag":"pivot"}\n']
-
-    def test_run_yields_progress_events_in_order(self, workspace_with_metadata):
-        """Progress events must appear before any LLM text chunks."""
-        client = MagicMock()
-        client.get_completion.return_value = [
-            SimpleNamespace(
-                choices=[
-                    SimpleNamespace(
-                        delta=SimpleNamespace(content='{"type":"question","text":"Q","goal":"G","tag":"pivot"}\n')
-                    )
-                ]
-            )
-        ]
-
-        agent = InteractiveExploreAgent(client=client, workspace=workspace_with_metadata)
-        chunks = list(agent.run([{"name": "sales"}]))
-
-        progress_events = [c for c in chunks if isinstance(c, dict) and c.get("type") == "progress"]
-        assert len(progress_events) == 2
-        assert progress_events[0]["phase"] == "building_context"
-        assert progress_events[1]["phase"] == "generating"
-
-        first_text_idx = next(i for i, c in enumerate(chunks) if isinstance(c, str))
-        last_progress_idx = max(i for i, c in enumerate(chunks) if isinstance(c, dict) and c.get("type") == "progress")
-        assert last_progress_idx < first_text_idx
diff --git a/tests/backend/errors/test_api_error_protocol_contract.py b/tests/backend/errors/test_api_error_protocol_contract.py
index b68a9563..2e377934 100644
--- a/tests/backend/errors/test_api_error_protocol_contract.py
+++ b/tests/backend/errors/test_api_error_protocol_contract.py
@@ -86,7 +86,7 @@ class TestStreamingErrorProtocol:
 
     def test_stream_preflight_error_uses_json_error_envelope(self, agents_client):
         resp = agents_client.post(
-            "/api/agent/data-agent-streaming",
+            "/api/agent/analyst-streaming",
             data="not json",
             content_type="text/plain",
         )
@@ -95,7 +95,7 @@ def test_stream_preflight_error_uses_json_error_envelope(self, agents_client):
         assert body["status"] == "error"
         assert body["error"]["code"] == ErrorCode.INVALID_REQUEST
 
-    def test_data_agent_streaming_emits_top_level_type_events(self, agents_client):
+    def test_analyst_streaming_emits_top_level_type_events(self, agents_client):
         agent_instance = MagicMock()
         agent_instance.run.return_value = [
             {"type": "text_delta", "content": "hello"},
@@ -107,10 +107,10 @@ def test_data_agent_streaming_emits_top_level_type_events(self, agents_client):
             patch("data_formulator.routes.agents.get_client", return_value=object()),
             patch("data_formulator.routes.agents.get_workspace", return_value=object()),
             patch("data_formulator.datalake.workspace.get_user_home", return_value=object()),
-            patch("data_formulator.routes.agents.DataAgent", return_value=agent_instance),
+            patch("data_formulator.routes.agents.AnalystAgent", return_value=agent_instance),
         ):
             resp = agents_client.post(
-                "/api/agent/data-agent-streaming",
+                "/api/agent/analyst-streaming",
                 json={
                     "model": {},
                     "input_tables": [],
diff --git a/tests/backend/routes/test_agent_diagnostics_wiring.py b/tests/backend/routes/test_agent_diagnostics_wiring.py
index 10cf8f24..f3dbcef5 100644
--- a/tests/backend/routes/test_agent_diagnostics_wiring.py
+++ b/tests/backend/routes/test_agent_diagnostics_wiring.py
@@ -69,7 +69,7 @@ def _make_llm_exception(body: str = "connection timeout") -> Exception:
 class TestDataRecAgentWiring:
 
     def _make_agent(self):
-        from data_formulator.agents.agent_data_rec import DataRecAgent
+        from eval_rec_ts.agent_data_rec import DataRecAgent
         client = MagicMock()
         workspace = MagicMock()
         workspace.get_fresh_name.return_value = "d-result_df"
@@ -78,7 +78,7 @@ def _make_agent(self):
             model_info={"provider": "test", "model": "mock"},
         )
 
-    @patch("data_formulator.agents.agent_data_rec.supplement_missing_block")
+    @patch("eval_rec_ts.agent_data_rec.supplement_missing_block")
     @patch("data_formulator.sandbox.create_sandbox")
     def test_normal_response_has_diagnostics(self, mock_sandbox_factory, mock_supplement) -> None:
         mock_supplement.return_value = (
@@ -122,7 +122,7 @@ def test_exception_response_has_error_diagnostics(self) -> None:
         assert diag["agent"] == "DataRecAgent"
         assert diag["error"] == "rate limit"
 
-    @patch("data_formulator.agents.agent_data_rec.supplement_missing_block")
+    @patch("eval_rec_ts.agent_data_rec.supplement_missing_block")
     @patch("data_formulator.sandbox.create_sandbox")
     def test_execution_exception_diagnostics_are_sanitized(self, mock_sandbox_factory, mock_supplement) -> None:
         mock_supplement.return_value = (
@@ -163,7 +163,7 @@ def test_execution_exception_diagnostics_are_sanitized(self, mock_sandbox_factor
 class TestDataTransformAgentWiring:
 
     def _make_agent(self):
-        from data_formulator.agents.agent_data_transform import DataTransformationAgent
+        from eval_rec_ts.agent_data_transform import DataTransformationAgent
         client = MagicMock()
         workspace = MagicMock()
         workspace.get_fresh_name.return_value = "d-result_df"
@@ -172,7 +172,7 @@ def _make_agent(self):
             model_info={"provider": "test", "model": "mock"},
         )
 
-    @patch("data_formulator.agents.agent_data_transform.supplement_missing_block")
+    @patch("eval_rec_ts.agent_data_transform.supplement_missing_block")
     @patch("data_formulator.sandbox.create_sandbox")
     def test_normal_response_has_diagnostics(self, mock_sandbox_factory, mock_supplement) -> None:
         mock_supplement.return_value = (
@@ -215,7 +215,7 @@ def test_exception_response_has_error_diagnostics(self) -> None:
         assert diag["agent"] == "DataTransformationAgent"
         assert diag["error"] == "server error"
 
-    @patch("data_formulator.agents.agent_data_transform.supplement_missing_block")
+    @patch("eval_rec_ts.agent_data_transform.supplement_missing_block")
     @patch("data_formulator.sandbox.create_sandbox")
     def test_execution_exception_diagnostics_are_sanitized(self, mock_sandbox_factory, mock_supplement) -> None:
         mock_supplement.return_value = (
diff --git a/tests/backend/routes/test_derive_data_repair_loop.py b/tests/backend/routes/test_derive_data_repair_loop.py
deleted file mode 100644
index 93fb64cf..00000000
--- a/tests/backend/routes/test_derive_data_repair_loop.py
+++ /dev/null
@@ -1,387 +0,0 @@
-"""Integration tests for the derive-data and refine-data repair loop improvements.
-
-Covers:
-- Repair loop triggers on both 'error' and 'other error' statuses
-- Empty results list does not crash (IndexError guard)
-- Followup exceptions are caught gracefully with safe generic messages
-- get-recommendation-questions never leaks exception details to the client
-"""
-from __future__ import annotations
-
-import json
-import shutil
-from contextlib import contextmanager
-from unittest.mock import MagicMock, patch
-
-import pytest
-from flask import Flask
-
-from data_formulator.routes.agents import agent_bp
-
-pytestmark = [pytest.mark.backend]
-
-MODULE = "data_formulator.routes.agents"
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def _make_ok_result(code: str = "x = 1") -> dict:
-    return {
-        "status": "ok",
-        "code": code,
-        "content": {"rows": [], "virtual": {"table_name": "t", "row_count": 0}},
-        "dialog": [{"role": "system", "content": "..."}],
-        "agent": "DataRecAgent",
-        "refined_goal": {},
-    }
-
-
-def _make_error_result(status: str = "error", content: str = "some error") -> dict:
-    return {
-        "status": status,
-        "code": "bad_code()",
-        "content": content,
-        "dialog": [{"role": "system", "content": "..."}],
-        "agent": "DataRecAgent",
-        "refined_goal": {},
-    }
-
-
-@contextmanager
-def _mock_workspace():
-    """Yield a (mock_workspace, tmp_workspace_cm) that stubs out workspace deps."""
-    ws = MagicMock()
-    ws.list_tables.return_value = set()
-
-    @contextmanager
-    def fake_temp_data(ws_inner, temp_data):
-        yield ws_inner
-
-    yield ws, fake_temp_data
-
-
-def _build_app():
-    from data_formulator.error_handler import register_error_handlers
-
-    app = Flask(__name__)
-    app.config["TESTING"] = True
-    app.config["CLI_ARGS"] = {"max_display_rows": 100}
-    app.register_blueprint(agent_bp)
-    register_error_handlers(app)
-    return app
-
-
-def _derive_data_payload(**overrides) -> dict:
-    base = {
-        "token": "test-token",
-        "model": {"endpoint": "openai", "model": "gpt-4", "api_key": "k", "api_base": "http://x"},
-        "input_tables": [{"name": "t1", "rows": [{"a": 1}]}],
-        "extra_prompt": "do something",
-        "max_repair_attempts": 1,
-    }
-    base.update(overrides)
-    return base
-
-
-def _refine_data_payload(**overrides) -> dict:
-    base = {
-        "token": "test-token",
-        "model": {"endpoint": "openai", "model": "gpt-4", "api_key": "k", "api_base": "http://x"},
-        "input_tables": [{"name": "t1", "rows": [{"a": 1}]}],
-        "dialog": [{"role": "system", "content": "..."}],
-        "new_instruction": "fix it",
-        "latest_data_sample": [{"a": 1}],
-        "max_repair_attempts": 1,
-    }
-    base.update(overrides)
-    return base
-
-
-# ---------------------------------------------------------------------------
-# derive-data: repair loop status matching
-# ---------------------------------------------------------------------------
-
-class TestDeriveDataRepairLoop:
-
-    def _post_derive(self, client, payload):
-        return client.post(
-            "/api/agent/derive-data",
-            data=json.dumps(payload),
-            content_type="application/json",
-        )
-
-    def test_repair_loop_triggers_on_other_error(self) -> None:
-        """'other error' status should enter the repair loop (not just 'error')."""
-        app = _build_app()
-
-        mock_agent = MagicMock()
-        mock_agent.run.return_value = [_make_error_result(status="other error")]
-        mock_agent.followup.return_value = [_make_ok_result()]
-
-        with _mock_workspace() as (ws, fake_ctx):
-            with (
-                patch(f"{MODULE}.get_client", return_value=MagicMock()),
-                patch(f"{MODULE}.get_identity_id", return_value="test-user"),
-                patch(f"{MODULE}.get_workspace", return_value=ws),
-                patch(f"{MODULE}.get_language_instruction", return_value=""),
-                patch(f"{MODULE}.DataRecAgent", return_value=mock_agent),
-                patch(f"{MODULE}.sign_result"),
-            ):
-                with app.test_client() as client:
-                    resp = self._post_derive(client, _derive_data_payload())
-
-        data = resp.get_json()
-        assert data["status"] == "success"
-        assert data["data"]["results"][0]["status"] == "ok"
-        mock_agent.followup.assert_called_once()
-
-    def test_repair_loop_skips_when_status_is_ok(self) -> None:
-        """When initial result is 'ok', repair loop should not execute."""
-        app = _build_app()
-
-        mock_agent = MagicMock()
-        mock_agent.run.return_value = [_make_ok_result()]
-
-        with _mock_workspace() as (ws, fake_ctx):
-            with (
-                patch(f"{MODULE}.get_client", return_value=MagicMock()),
-                patch(f"{MODULE}.get_identity_id", return_value="test-user"),
-                patch(f"{MODULE}.get_workspace", return_value=ws),
-                patch(f"{MODULE}.get_language_instruction", return_value=""),
-                patch(f"{MODULE}.DataRecAgent", return_value=mock_agent),
-                patch(f"{MODULE}.sign_result"),
-            ):
-                with app.test_client() as client:
-                    resp = self._post_derive(client, _derive_data_payload())
-
-        data = resp.get_json()
-        assert data["data"]["results"][0]["status"] == "ok"
-        mock_agent.followup.assert_not_called()
-
-    def test_empty_results_does_not_crash(self) -> None:
-        """If agent.run() returns an empty list, no IndexError should occur."""
-        app = _build_app()
-
-        mock_agent = MagicMock()
-        mock_agent.run.return_value = []
-
-        with _mock_workspace() as (ws, fake_ctx):
-            with (
-                patch(f"{MODULE}.get_client", return_value=MagicMock()),
-                patch(f"{MODULE}.get_identity_id", return_value="test-user"),
-                patch(f"{MODULE}.get_workspace", return_value=ws),
-                patch(f"{MODULE}.get_language_instruction", return_value=""),
-                patch(f"{MODULE}.DataRecAgent", return_value=mock_agent),
-                patch(f"{MODULE}.sign_result"),
-            ):
-                with app.test_client() as client:
-                    resp = self._post_derive(client, _derive_data_payload())
-
-        data = resp.get_json()
-        assert data["status"] == "success"
-        assert data["data"]["results"] == []
-
-    def test_followup_exception_is_caught(self) -> None:
-        """If agent.followup() raises, the error should be caught and a safe
-        classified message returned (no raw exception text)."""
-        app = _build_app()
-
-        mock_agent = MagicMock()
-        mock_agent.run.return_value = [_make_error_result(status="error")]
-        mock_agent.followup.side_effect = RuntimeError("LLM connection timeout")
-
-        with _mock_workspace() as (ws, fake_ctx):
-            with (
-                patch(f"{MODULE}.get_client", return_value=MagicMock()),
-                patch(f"{MODULE}.get_identity_id", return_value="test-user"),
-                patch(f"{MODULE}.get_workspace", return_value=ws),
-                patch(f"{MODULE}.get_language_instruction", return_value=""),
-                patch(f"{MODULE}.DataRecAgent", return_value=mock_agent),
-                patch(f"{MODULE}.sign_result"),
-            ):
-                with app.test_client() as client:
-                    resp = self._post_derive(client, _derive_data_payload())
-
-        data = resp.get_json()
-        assert data["status"] == "success"
-        result = data["data"]["results"][0]
-        assert result["status"] == "error"
-        # classify_llm_error maps "timeout" → safe timeout message
-        assert "timed out" in result["content"].lower() or "timeout" in result["content"].lower()
-        # Raw exception text must not leak
-        assert "LLM connection timeout" not in result["content"]
-
-
-# ---------------------------------------------------------------------------
-# refine-data: same repair loop tests
-# ---------------------------------------------------------------------------
-
-class TestRefineDataRepairLoop:
-
-    def _post_refine(self, client, payload):
-        return client.post(
-            "/api/agent/refine-data",
-            data=json.dumps(payload),
-            content_type="application/json",
-        )
-
-    def test_repair_loop_triggers_on_other_error(self) -> None:
-        app = _build_app()
-
-        mock_agent = MagicMock()
-        mock_agent.followup.side_effect = [
-            [_make_error_result(status="other error")],
-            [_make_ok_result()],
-        ]
-
-        with _mock_workspace() as (ws, fake_ctx):
-            with (
-                patch(f"{MODULE}.get_client", return_value=MagicMock()),
-                patch(f"{MODULE}.get_identity_id", return_value="test-user"),
-                patch(f"{MODULE}.get_workspace", return_value=ws),
-                patch(f"{MODULE}.get_language_instruction", return_value=""),
-                patch(f"{MODULE}.DataTransformationAgent", return_value=mock_agent),
-                patch(f"{MODULE}.sign_result"),
-            ):
-                with app.test_client() as client:
-                    resp = self._post_refine(client, _refine_data_payload())
-
-        data = resp.get_json()
-        assert data["data"]["results"][0]["status"] == "ok"
-        assert mock_agent.followup.call_count == 2
-
-    def test_empty_results_does_not_crash(self) -> None:
-        app = _build_app()
-
-        mock_agent = MagicMock()
-        mock_agent.followup.return_value = []
-
-        with _mock_workspace() as (ws, fake_ctx):
-            with (
-                patch(f"{MODULE}.get_client", return_value=MagicMock()),
-                patch(f"{MODULE}.get_identity_id", return_value="test-user"),
-                patch(f"{MODULE}.get_workspace", return_value=ws),
-                patch(f"{MODULE}.get_language_instruction", return_value=""),
-                patch(f"{MODULE}.DataTransformationAgent", return_value=mock_agent),
-                patch(f"{MODULE}.sign_result"),
-            ):
-                with app.test_client() as client:
-                    resp = self._post_refine(client, _refine_data_payload())
-
-        data = resp.get_json()
-        assert data["status"] == "success"
-        assert data["data"]["results"] == []
-
-    def test_followup_exception_in_repair_is_caught(self) -> None:
-        """Followup exception returns a safe classified message, not raw exception text."""
-        app = _build_app()
-
-        mock_agent = MagicMock()
-        mock_agent.followup.side_effect = [
-            [_make_error_result(status="error")],
-            RuntimeError("API key expired"),
-        ]
-
-        with _mock_workspace() as (ws, fake_ctx):
-            with (
-                patch(f"{MODULE}.get_client", return_value=MagicMock()),
-                patch(f"{MODULE}.get_identity_id", return_value="test-user"),
-                patch(f"{MODULE}.get_workspace", return_value=ws),
-                patch(f"{MODULE}.get_language_instruction", return_value=""),
-                patch(f"{MODULE}.DataTransformationAgent", return_value=mock_agent),
-                patch(f"{MODULE}.sign_result"),
-            ):
-                with app.test_client() as client:
-                    resp = self._post_refine(client, _refine_data_payload())
-
-        data = resp.get_json()
-        result = data["data"]["results"][0]
-        assert result["status"] == "error"
-        # Raw exception text must not appear
-        assert "API key expired" not in result["content"]
-        # Should be classified as a model request failure (generic fallback)
-        assert result["content"] in (
-            "Model request failed",
-            "Authentication failed — please check your API key",
-        )
-
-
-# ---------------------------------------------------------------------------
-# get-recommendation-questions: error message uses classify_llm_error
-# ---------------------------------------------------------------------------
-
-class TestGetRecommendationQuestionsError:
-
-    def test_error_message_is_classified_not_raw(self) -> None:
-        """Error response uses classify_llm_error — safe pre-defined message,
-        not the raw exception text."""
-        app = _build_app()
-
-        mock_agent = MagicMock()
-        mock_agent.run.side_effect = ValueError("column 'x' not found in table")
-
-        with _mock_workspace() as (ws, fake_ctx):
-            with (
-                patch(f"{MODULE}.get_client", return_value=MagicMock()),
-                patch(f"{MODULE}.get_identity_id", return_value="test-user"),
-                patch(f"{MODULE}.get_workspace", return_value=ws),
-                patch(f"{MODULE}.get_language_instruction", return_value=""),
-                patch(f"{MODULE}.InteractiveExploreAgent", return_value=mock_agent),
-            ):
-                with app.test_client() as client:
-                    resp = client.post(
-                        "/api/agent/get-recommendation-questions",
-                        data=json.dumps({
-                            "model": {"endpoint": "openai", "model": "gpt-4",
-                                       "api_key": "k", "api_base": "http://x"},
-                            "input_tables": [{"name": "t", "rows": []}],
-                        }),
-                        content_type="application/json",
-                    )
-
-        lines = resp.data.decode("utf-8").strip().split("\n")
-        assert len(lines) >= 1
-        error_events = [json.loads(l) for l in lines
-                        if l.strip() and json.loads(l).get("type") == "error"]
-        assert len(error_events) == 1
-
-        err = error_events[0]["error"]
-        assert "column 'x' not found" not in err["message"]
-        assert err["retry"] is False
-
-    def test_error_message_never_leaks_api_keys(self) -> None:
-        """Even when exception contains API keys, classify_and_wrap_llm_error
-        returns a safe pre-defined message without any raw exception text."""
-        app = _build_app()
-
-        mock_agent = MagicMock()
-        mock_agent.run.side_effect = RuntimeError("auth failed api_key=sk-secret123 for model")
-
-        with _mock_workspace() as (ws, fake_ctx):
-            with (
-                patch(f"{MODULE}.get_client", return_value=MagicMock()),
-                patch(f"{MODULE}.get_identity_id", return_value="test-user"),
-                patch(f"{MODULE}.get_workspace", return_value=ws),
-                patch(f"{MODULE}.get_language_instruction", return_value=""),
-                patch(f"{MODULE}.InteractiveExploreAgent", return_value=mock_agent),
-            ):
-                with app.test_client() as client:
-                    resp = client.post(
-                        "/api/agent/get-recommendation-questions",
-                        data=json.dumps({
-                            "model": {"endpoint": "openai", "model": "gpt-4",
-                                       "api_key": "k", "api_base": "http://x"},
-                            "input_tables": [{"name": "t", "rows": []}],
-                        }),
-                        content_type="application/json",
-                    )
-
-        lines = resp.data.decode("utf-8").strip().split("\n")
-        error_events = [json.loads(l) for l in lines
-                        if l.strip() and json.loads(l).get("type") == "error"]
-        assert len(error_events) >= 1
-        err = error_events[0]["error"]
-        assert "sk-secret123" not in err["message"]
-        assert err["code"] == "LLM_AUTH_FAILED"

From 1fa3f1278edef5c94beb9c9c088e9cde2a73f783 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Fri, 12 Jun 2026 16:03:19 -0700
Subject: [PATCH 21/29] cleaning up

---
 py-src/data_formulator/agents/__init__.py     |   2 -
 .../agents/agent_chart_insight.py             | 150 ---------------
 py-src/data_formulator/analyst/agent.py       |  31 ++-
 .../analyst/skills/core/SKILL.md              |   3 +
 .../analyst/skills/core/skill.py              |   5 +-
 .../analyst/skills/core/tools.json            |   4 +-
 .../analyst/skills/report/skill.py            |  30 +--
 py-src/data_formulator/routes/agents.py       |  59 ------
 src/app/dfSlice.tsx                           | 160 +---------------
 src/app/store.ts                              |   2 +-
 src/app/useAutoSave.tsx                       |   2 +-
 src/app/utils.tsx                             |   1 -
 src/app/workspaceService.ts                   |   2 +-
 src/components/ComponentType.tsx              |  11 +-
 src/lib/agents-chart/core/compute-layout.ts   |  40 +++-
 src/views/InteractionEntryCard.tsx            |  15 +-
 src/views/SimpleChartRecBox.tsx               |  38 ++--
 src/views/VisualizationView.tsx               |  12 +-
 .../routes/test_chart_insight_route.py        | 178 ------------------
 tests/frontend/unit/app/chartInsight.test.ts  | 119 ------------
 20 files changed, 100 insertions(+), 764 deletions(-)
 delete mode 100644 py-src/data_formulator/agents/agent_chart_insight.py
 delete mode 100644 tests/backend/routes/test_chart_insight_route.py
 delete mode 100644 tests/frontend/unit/app/chartInsight.test.ts

diff --git a/py-src/data_formulator/agents/__init__.py b/py-src/data_formulator/agents/__init__.py
index d5e439a7..2431c539 100644
--- a/py-src/data_formulator/agents/__init__.py
+++ b/py-src/data_formulator/agents/__init__.py
@@ -4,12 +4,10 @@
 from data_formulator.agents.agent_data_load import DataLoadAgent
 from data_formulator.agents.agent_sort_data import SortDataAgent
 from data_formulator.agents.agent_simple import SimpleAgents
-from data_formulator.agents.agent_chart_insight import ChartInsightAgent
 from data_formulator.agents.agent_chart_restyle import ChartRestyleAgent
 
 __all__ = [
     "DataLoadAgent",
     "SortDataAgent",
-    "ChartInsightAgent",
     "ChartRestyleAgent",
 ]
diff --git a/py-src/data_formulator/agents/agent_chart_insight.py b/py-src/data_formulator/agents/agent_chart_insight.py
deleted file mode 100644
index c280efc2..00000000
--- a/py-src/data_formulator/agents/agent_chart_insight.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-from data_formulator.agent_config import reasoning_effort_for
-from data_formulator.agents.agent_utils import generate_data_summary, extract_json_objects
-from data_formulator.agents.agent_language import inject_language_instruction
-
-import logging
-
-logger = logging.getLogger(__name__)
-
-_AGENT_ID = "chart_insight"
-
-
-SYSTEM_PROMPT = r'''You are a data analyst helping users understand their visualizations.
-You are given a chart image along with metadata about the chart type, data fields used, and a summary of the underlying data (including schema, value ranges, and sample rows).
-
-Use both the chart image and the data summary to produce:
-
-1. **title**: A short, descriptive title for the chart (5-10 words). It should summarize what the chart is about — the subject, the dimensions compared, and the scope. Do not include the chart type in the title. Write it in title case.
-
-2. **takeaways**: A list of 1-3 key findings or insights from the chart. Each takeaway should be one sentence. Highlight notable patterns, trends, outliers, or comparisons visible in the chart. Be specific — reference actual values, categories, or trends from the data when possible.
-
-Respond with a JSON object in exactly this format (no markdown fences):
-
-{"title": "...", "takeaways": ["...", "..."]}
-'''
-
-
-class ChartInsightAgent(object):
-
-    def __init__(self, client, workspace=None, language_instruction="", knowledge_store=None):
-        self.client = client
-        self.workspace = workspace
-        self.language_instruction = language_instruction
-        self._knowledge_store = knowledge_store
-
-    def run(self, chart_image_base64, chart_type, field_names, input_tables=None, n=1):
-        """
-        Generate insight for a chart.
-        
-        Args:
-            chart_image_base64: Base64-encoded PNG data URL of the chart
-            chart_type: The type of chart (e.g., "Bar Chart", "Scatter Plot")
-            field_names: List of field names used in the chart encodings
-            input_tables: Optional list of input table dicts for data context
-            n: Number of candidates to generate
-        """
-
-        # Build context about the chart
-        context_parts = [f"Chart type: {chart_type}"]
-        context_parts.append(f"Fields used: {', '.join(field_names)}")
-
-        if input_tables and self.workspace:
-            data_summary = generate_data_summary(
-                input_tables, workspace=self.workspace,
-                include_data_samples=True, row_sample_size=3,
-            )
-            context_parts.append(f"\nData summary:\n{data_summary}")
-
-        # Search relevant knowledge for analysis context
-        if self._knowledge_store:
-            try:
-                search_query = " ".join([chart_type] + field_names[:5]).strip()
-                if search_query:
-                    relevant = self._knowledge_store.search(
-                        search_query, categories=["workflows"], max_results=3,
-                    )
-                    if relevant:
-                        kb_parts = ["Relevant analysis knowledge:"]
-                        for item in relevant:
-                            kb_parts.append(f"- {item['title']}: {item['snippet'][:200]}")
-                        context_parts.append("\n".join(kb_parts))
-            except Exception:
-                logger.warning("Failed to search knowledge workflows", exc_info=True)
-
-        context = "\n".join(context_parts)
-
-        # Build the message with image
-        user_content = [
-            {
-                "type": "text",
-                "text": f"[CHART METADATA]\n{context}\n\n[CHART IMAGE]\nHere is the chart to analyze:"
-            },
-            {
-                "type": "image_url",
-                "image_url": {
-                    "url": f"data:image/png;base64,{chart_image_base64}",
-                    "detail": "high"
-                }
-            }
-        ]
-
-        system_prompt = SYSTEM_PROMPT
-
-        if self._knowledge_store:
-            system_prompt += self._knowledge_store.format_rules_block()
-
-        system_prompt = inject_language_instruction(system_prompt, self.language_instruction)
-
-        messages = [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_content}
-        ]
-
-        logger.debug(f"ChartInsightAgent: analyzing {chart_type} chart with fields {field_names}")
-        logger.info(f"[ChartInsightAgent] run start | chart_type={chart_type}")
-
-        response = self.client.get_completion(messages=messages, reasoning_effort=reasoning_effort_for(_AGENT_ID, self.client.model))
-
-        candidates = []
-        for choice in response.choices:
-            logger.debug("\n=== Chart insight result ===>\n")
-            logger.debug(choice.message.content + "\n")
-
-            response_content = choice.message.content
-            title = ""
-            takeaways = []
-
-            # Parse JSON response
-            json_blocks = extract_json_objects(response_content + "\n")
-            for parsed in json_blocks:
-                title = parsed.get('title', '')
-                takeaways = parsed.get('takeaways', [])
-                if isinstance(takeaways, str):
-                    takeaways = [takeaways]
-                if title or takeaways:
-                    break
-
-            if title or takeaways:
-                result = {
-                    'status': 'ok',
-                    'title': title,
-                    'takeaways': takeaways,
-                }
-            else:
-                logger.error(f"unable to parse insight from response: {response_content}")
-                result = {
-                    'status': 'other error',
-                    'content': 'unable to generate chart insight'
-                }
-
-            result['dialog'] = [*messages, {"role": choice.message.role, "content": choice.message.content}]
-            result['agent'] = 'ChartInsightAgent'
-
-            candidates.append(result)
-
-        status = candidates[0].get('status', '?') if candidates else 'empty'
-        logger.info(f"[ChartInsightAgent] run done | status={status}")
-        return candidates
diff --git a/py-src/data_formulator/analyst/agent.py b/py-src/data_formulator/analyst/agent.py
index 51e8010c..fb4489d1 100644
--- a/py-src/data_formulator/analyst/agent.py
+++ b/py-src/data_formulator/analyst/agent.py
@@ -887,10 +887,9 @@ def register_run_chart(
 
         The entry mirrors the shape the frontend forwards for pre-existing charts
         (``chart_id`` / ``chart_type`` / ``encodings`` / ``table_ref`` / ``code`` /
-        ``chart_data``) **minus** the optional ``chart_image`` — run-created charts
-        are read by the agent from their encodings + sample data (and code), not a
-        rendered image. The mutation lands on ``self._run_payload['charts']`` so the
-        next dispatched skill ctx sees it.
+        ``chart_data``). Charts are read by the agent from their encodings + sample
+        data (and code), not a rendered image. The mutation lands on
+        ``self._run_payload['charts']`` so the next dispatched skill ctx sees it.
         """
         chart_id = transform_result.get("chart_id")
         if not chart_id:
@@ -989,6 +988,7 @@ def _run_visualize_code(
         field_metadata: dict,
         field_display_names: dict,
         display_instruction: str,
+        title: str = "",
         messages: list[dict] | None = None,
     ) -> dict[str, Any]:
         """Run visualize code in sandbox and assemble chart."""
@@ -1062,6 +1062,7 @@ def _run_visualize_code(
 
             refined_goal = {
                 "display_instruction": display_instruction,
+                "title": title,
                 "output_variable": output_variable,
                 "output_fields": list(query_output.columns),
                 "chart": chart_spec,
@@ -1256,12 +1257,6 @@ def _build_initial_messages(
 
         user_content += f"[USER QUESTION]\n\n{user_question}"
 
-        chart_thumbnail = None
-        if focused_thread:
-            for step in focused_thread:
-                if step.get("chart_thumbnail"):
-                    chart_thumbnail = step["chart_thumbnail"]
-
         system_prompt = self._build_system_prompt(
             has_primary_tables=bool(primary_tables),
             has_focused_thread=bool(focused_thread),
@@ -1270,19 +1265,15 @@ def _build_initial_messages(
             has_charts=bool(charts_block),
         )
 
-        has_images = (chart_thumbnail and chart_thumbnail.startswith("data:")) or (attached_images and len(attached_images) > 0)
+        has_images = bool(attached_images) and len(attached_images) > 0
 
         if has_images:
             content_parts: list[dict] = [{"type": "text", "text": user_content}]
-            if chart_thumbnail and chart_thumbnail.startswith("data:"):
-                content_parts.append({"type": "text", "text": "\n[CURRENT CHART] (the chart the user is currently viewing):"})
-                content_parts.append({"type": "image_url", "image_url": {"url": chart_thumbnail, "detail": "low"}})
-            if attached_images:
-                label = "[USER ATTACHMENT]" if len(attached_images) == 1 else "[USER ATTACHMENTS]"
-                content_parts.append({"type": "text", "text": f"\n{label} (image(s) provided by the user):"})
-                for img in attached_images:
-                    if img.startswith("data:"):
-                        content_parts.append({"type": "image_url", "image_url": {"url": img, "detail": "low"}})
+            label = "[USER ATTACHMENT]" if len(attached_images) == 1 else "[USER ATTACHMENTS]"
+            content_parts.append({"type": "text", "text": f"\n{label} (image(s) provided by the user):"})
+            for img in attached_images:
+                if img.startswith("data:"):
+                    content_parts.append({"type": "image_url", "image_url": {"url": img, "detail": "low"}})
             return [
                 {"role": "system", "content": system_prompt},
                 {"role": "user", "content": content_parts},
diff --git a/py-src/data_formulator/analyst/skills/core/SKILL.md b/py-src/data_formulator/analyst/skills/core/SKILL.md
index 6faafff0..1ae6e882 100644
--- a/py-src/data_formulator/analyst/skills/core/SKILL.md
+++ b/py-src/data_formulator/analyst/skills/core/SKILL.md
@@ -77,6 +77,9 @@ result and decide your next move.
 - `display_instruction` — ≤12 words; the question/hypothesis the chart
   investigates (don't recap x/y/color — those are visible). Wrap a **column** in
   `**…**` if it anchors the question.
+- `title` — short descriptive chart heading (5–10 words, title case): the
+  subject, the dimensions compared, and the scope. Do NOT include the chart
+  type. This is shown as the chart's title.
 - `code` — Python producing a DataFrame assigned to `output_variable`.
 - `output_variable` — snake_case name the code assigns.
 - `chart` — `{chart_type, encodings:{x,y,…}, config:{}}` (chart_type from the
diff --git a/py-src/data_formulator/analyst/skills/core/skill.py b/py-src/data_formulator/analyst/skills/core/skill.py
index c15986c0..cc816936 100644
--- a/py-src/data_formulator/analyst/skills/core/skill.py
+++ b/py-src/data_formulator/analyst/skills/core/skill.py
@@ -126,12 +126,12 @@ def _handle_visualize(
         field_metadata = action.get("field_metadata", {})
         field_display_names = action.get("field_display_names", {})
         display_instruction = action.get("display_instruction", "")
+        title = action.get("title", "")
         step_index = int((ctx.payload or {}).get("completed_step_count", 0)) + 1
 
         yield {
             "type": "action",
             "action": "visualize",
-            "thought": action.get("thought", ""),
             "display_instruction": display_instruction,
             "input_tables": action.get("input_tables", []),
         }
@@ -143,6 +143,7 @@ def _handle_visualize(
             field_metadata=field_metadata,
             field_display_names=field_display_names,
             display_instruction=display_instruction,
+            title=title,
             messages=ctx.trajectory,
         )
 
@@ -179,7 +180,6 @@ def _handle_visualize(
         observation = self._format_observation(
             step_index=step_index,
             display_instruction=display_instruction,
-            thought=action.get("thought", ""),
             code=transform_result.get("code", ""),
             data=transformed_data,
             chart_id=transform_result.get("chart_id"),
@@ -255,7 +255,6 @@ def _handle_delegate(
     def _format_observation(
         step_index: int,
         display_instruction: str,
-        thought: str,
         code: str,
         data: dict[str, Any],
         workspace: Any,
diff --git a/py-src/data_formulator/analyst/skills/core/tools.json b/py-src/data_formulator/analyst/skills/core/tools.json
index e8a60209..bcb3826d 100644
--- a/py-src/data_formulator/analyst/skills/core/tools.json
+++ b/py-src/data_formulator/analyst/skills/core/tools.json
@@ -46,9 +46,9 @@
       "parameters": {
         "type": "object",
         "properties": {
-          "thought": {
+          "title": {
             "type": "string",
-            "description": "Brief rationale for this visualization (not shown to the user)."
+            "description": "A short, descriptive title for the chart (5-10 words, title case). Summarize what the chart shows — the subject, the dimensions compared, and the scope. Do NOT include the chart type. Shown as the chart's heading."
           },
           "display_instruction": {
             "type": "string",
diff --git a/py-src/data_formulator/analyst/skills/report/skill.py b/py-src/data_formulator/analyst/skills/report/skill.py
index d94be32f..0b99c1d2 100644
--- a/py-src/data_formulator/analyst/skills/report/skill.py
+++ b/py-src/data_formulator/analyst/skills/report/skill.py
@@ -98,8 +98,8 @@ def handle_tool(
         if name != "inspect_chart":
             return ToolResult(text=f"report has no tool '{name}'.")
         charts: list[dict[str, Any]] = (ctx.payload or {}).get("charts") or []
-        text, images = self._handle_inspect_chart(args.get("chart_ids", []), charts)
-        return ToolResult(text=text, images=tuple(images))
+        text = self._handle_inspect_chart(args.get("chart_ids", []), charts)
+        return ToolResult(text=text)
 
     # ------------------------------------------------------------------
     # Action handler (buffered fallback — delivers the finished report)
@@ -157,23 +157,19 @@ def _handle_inspect_chart(
         self,
         chart_ids: list[str],
         charts: list[dict[str, Any]],
-    ) -> tuple[str, list[str]]:
+    ) -> str:
         """Inspect charts by *reading their data*, not by rendering them.
 
         The agent "reads" a chart from its encodings + sample rows (+ the code
         that produced it), which it can further interrogate with
         ``execute_python_script``. This avoids fragile server-side rasterization
-        and the multi-modal round-trip. A rendered image is attached **only when
-        one is already supplied** (``chart_image`` — e.g. a pre-existing chart's
-        cached PNG forwarded by the frontend); run-created charts carry none.
-
-        Returns ``(text_summary, image_urls)`` where ``image_urls`` is the list
-        of optional base64 PNG data URLs. Images are returned separately so the
-        caller can attach them as a follow-up vision message — tool-result
-        messages cannot carry image content on most providers.
+        and the multi-modal round-trip — rendered chart images are no longer fed
+        to the agent (experiments showed they don't improve narration over
+        reading the data + spec directly).
+
+        Returns the text summary of the inspected charts.
         """
         results = []
-        image_urls: list[str] = []
         for chart_id in chart_ids:
             chart = next((c for c in charts if c["chart_id"] == chart_id), None)
             if not chart:
@@ -203,17 +199,11 @@ def _handle_inspect_chart(
                         f"against table '{chart_data['name']}'."
                     )
 
-            # Image is strictly optional: only a frontend-supplied render is used.
-            image = chart.get("chart_image")
-            if image:
-                image_urls.append(image)
-                parts.append("  [Chart image attached below for visual confirmation]")
-            else:
-                parts.append("  [No image — read the chart from its encodings + data above]")
+            parts.append("  [Read the chart from its encodings + data above]")
 
             results.append("\n".join(parts))
 
-        return "\n\n".join(results), image_urls
+        return "\n\n".join(results)
 
 
 def get_skill() -> ReportWritingSkill:
diff --git a/py-src/data_formulator/routes/agents.py b/py-src/data_formulator/routes/agents.py
index e44e82e6..509e7f72 100644
--- a/py-src/data_formulator/routes/agents.py
+++ b/py-src/data_formulator/routes/agents.py
@@ -28,7 +28,6 @@
 from data_formulator.agents.agent_data_load import DataLoadAgent
 from data_formulator.agents.agent_data_loading_chat import DataLoadingAgent
 from data_formulator.agents.agent_code_explanation import CodeExplanationAgent
-from data_formulator.agents.agent_chart_insight import ChartInsightAgent
 from data_formulator.agents.client_utils import Client
 from data_formulator.model_registry import model_registry
 from data_formulator.knowledge.store import KnowledgeStore
@@ -432,64 +431,6 @@ def request_code_expl():
         logger.error("Error in code-expl", exc_info=e)
         raise classify_and_wrap_llm_error(e) from e
 
-@agent_bp.route('/chart-insight', methods=['GET', 'POST'])
-def request_chart_insight():
-    from data_formulator.error_handler import classify_and_wrap_llm_error
-    from data_formulator.errors import AppError, ErrorCode
-
-    if not request.is_json:
-        raise AppError(ErrorCode.INVALID_REQUEST, "Invalid request format")
-
-    logger.info("# chart insight request")
-    content = request.get_json()
-
-    chart_image = content.get("chart_image", "")
-    chart_type = content.get("chart_type", "")
-    field_names = content.get("field_names", [])
-    input_tables = content.get("input_tables", [])
-
-    if not chart_image:
-        raise AppError(ErrorCode.VALIDATION_ERROR, "Chart image not available. Please retry.")
-
-    model_config = content.get("model")
-    if not model_config:
-        raise AppError(ErrorCode.INVALID_REQUEST, "Model configuration is required")
-
-    client = get_client(model_config)
-    identity_id = get_identity_id()
-    workspace = get_workspace(identity_id)
-
-    try:
-        knowledge_store = _get_knowledge_store(identity_id)
-        agent = ChartInsightAgent(client=client, workspace=workspace,
-                                  language_instruction=get_language_instruction(),
-                                  knowledge_store=knowledge_store)
-        candidates = agent.run(chart_image, chart_type, field_names, input_tables)
-
-        if not candidates or len(candidates) == 0:
-            logger.warning("[chart-insight] failed request_id=%s reason=no_candidates",
-                           getattr(flask.g, 'request_id', ''))
-            raise AppError(ErrorCode.AGENT_ERROR, "Unable to generate chart insight")
-
-        result = candidates[0]
-        if result.get('status') != 'ok':
-            reason = result.get('content', result.get('status', 'unknown'))
-            logger.warning("[chart-insight] failed request_id=%s reason=candidate_error detail=%s",
-                           getattr(flask.g, 'request_id', ''), reason)
-            raise AppError(ErrorCode.AGENT_ERROR, "Unable to generate chart insight")
-
-        logger.info("[chart-insight] done request_id=%s takeaway_count=%d",
-                    getattr(flask.g, 'request_id', ''),
-                    len(result.get('takeaways', [])))
-        return json_ok({"title": result.get("title", ""),
-                        "takeaways": result.get("takeaways", [])})
-
-    except AppError:
-        raise
-    except Exception as e:
-        logger.error("Error in chart-insight", exc_info=e)
-        raise classify_and_wrap_llm_error(e) from e
-
 @agent_bp.route('/refresh-derived-data', methods=['POST'])
 def refresh_derived_data():
     """
diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx
index 87963e7d..8084da5d 100644
--- a/src/app/dfSlice.tsx
+++ b/src/app/dfSlice.tsx
@@ -2,7 +2,7 @@
 // Licensed under the MIT License.
 
 import { createAsyncThunk, createSlice, PayloadAction, createSelector } from '@reduxjs/toolkit'
-import { Channel, Chart, ChartTemplate, DataCleanBlock, DataSourceConfig, EncodingItem, EncodingMap, FieldItem, Trigger, computeInsightKey, ChartInsight, ChartStyleVariant, DraftNode, InteractionEntry, DeriveStatus, ChatMessage, PendingTableLoad, PendingClarification } from '../components/ComponentType'
+import { Channel, Chart, ChartTemplate, DataCleanBlock, DataSourceConfig, EncodingItem, EncodingMap, FieldItem, Trigger, ChartStyleVariant, DraftNode, InteractionEntry, DeriveStatus, ChatMessage, PendingTableLoad, PendingClarification } from '../components/ComponentType'
 import { enableMapSet } from 'immer';
 import { DictTable } from "../components/ComponentType";
 import { Message } from '../views/MessageSnackbar';
@@ -12,7 +12,6 @@ import { getDataTable } from '../views/ChartUtils';
 import { getTriggers, getUrls, computeContentHash } from './utils';
 import { apiRequest } from './apiClient';
 import { deleteTablesFromWorkspace } from './workspaceService';
-import { getChartPngDataUrl } from './chartCache';
 import i18n from '../i18n';
 import { Type } from '../data/types';
 import { createTableFromFromObjectArray, inferTypeFromValueArray, refineTemporalType } from '../data/utils';
@@ -186,7 +185,6 @@ export interface DataFormulatorState {
     viewMode: 'editor' | 'report';
 
     chartSynthesisInProgress: string[];
-    chartInsightInProgress: string[];
 
     /**
      * Thumbnail PNG data URLs keyed by chart id. Stored in a separate slice
@@ -300,7 +298,6 @@ const initialState: DataFormulatorState = {
     viewMode: 'editor',
 
     chartSynthesisInProgress: [],
-    chartInsightInProgress: [],
     chartThumbnails: {},
     displayRowsTick: 0,
 
@@ -613,97 +610,6 @@ export const fetchCodeExpl = createAsyncThunk(
     }
 );
 
-export const fetchChartInsight = createAsyncThunk(
-    "dataFormulatorSlice/fetchChartInsight",
-    async (args: { chartId: string; tableId: string }, { getState }) => {
-        console.log(">>> call agent to generate chart insight <<<");
-
-        const state = getState() as DataFormulatorState;
-        const chart = collectAllCharts(state).find(c => c.id === args.chartId);
-        if (!chart) throw new Error(`Chart not found: ${args.chartId}`);
-
-        // Wait for chart image to be available in cache (replaces fixed 1.5s delay at call site)
-        const chartImage = await waitForChartImage(args.chartId);
-        if (!chartImage) {
-            throw new DOMException('Chart image not ready after waiting', 'ChartImageNotReady');
-        }
-
-        // Strip the data:image/png;base64, prefix for the backend
-        const base64Prefix = 'data:image/png;base64,';
-        const imagePayload = chartImage.startsWith(base64Prefix)
-            ? chartImage.substring(base64Prefix.length)
-            : chartImage;
-
-        // Collect field names from the encoding map
-        const fieldNames = Object.values(chart.encodingMap)
-            .map(enc => enc.fieldID)
-            .filter((id): id is string => !!id)
-            .map(id => {
-                const field = state.conceptShelfItems.find(f => f.id === id);
-                return field?.name || id;
-            });
-
-        // Collect input table info (include source tables for derived tables)
-        const table = state.tables.find(t => t.id === args.tableId);
-        const tableIds = table?.derive?.source ? [...table.derive.source, table.id] : [table?.id].filter(Boolean);
-        const inputTables = [...new Set(tableIds)]
-            .map(tId => state.tables.find(t => t.id === tId))
-            .filter((t): t is DictTable => !!t)
-            .map(t => ({
-                name: t.id,
-                rows: t.rows,
-            }));
-
-        // Use unified timeout from user config
-        const timeoutSeconds = state.config.formulateTimeoutSeconds;
-        const controller = new AbortController();
-        const timeoutId = setTimeout(() => {
-            controller.abort(new DOMException(
-                `Chart insight timed out after ${timeoutSeconds}s`,
-                'TimeoutError',
-            ));
-        }, timeoutSeconds * 1000);
-
-        try {
-            const { data } = await apiRequest(getUrls().CHART_INSIGHT_URL, {
-                method: 'POST',
-                headers: { 'Content-Type': 'application/json' },
-                body: JSON.stringify({
-                    chart_image: imagePayload,
-                    chart_type: chart.chartType,
-                    field_names: fieldNames,
-                    input_tables: inputTables,
-                    model: dfSelectors.getActiveModel(state),
-                }),
-                signal: controller.signal,
-            });
-
-            return { title: data.title, takeaways: data.takeaways,
-                     chartId: args.chartId, insightKey: computeInsightKey(chart) };
-        } finally {
-            clearTimeout(timeoutId);
-        }
-    }
-);
-
-/**
- * Wait for a chart image to appear in chartCache.
- * Polls at short intervals up to a maximum timeout.
- */
-async function waitForChartImage(
-    chartId: string,
-    timeoutMs: number = 8000,
-    intervalMs: number = 250,
-): Promise<string | undefined> {
-    const start = Date.now();
-    while (Date.now() - start < timeoutMs) {
-        const image = await getChartPngDataUrl(chartId);
-        if (image) return image;
-        await new Promise(r => setTimeout(r, intervalMs));
-    }
-    return undefined;
-}
-
 /** Fast fetch: returns the list of server-configured models instantly (no
  *  connectivity check).  The UI renders them immediately with a "testing"
  *  spinner so the admin can see every configured model right away. */
@@ -767,7 +673,6 @@ export const dataFormulatorSlice = createSlice({
             state.viewMode = 'editor';
 
             state.chartSynthesisInProgress = [];
-            state.chartInsightInProgress = [];
 
             // Preserve serverConfig ??it reflects the actual server state, not user state
 
@@ -907,7 +812,6 @@ export const dataFormulatorSlice = createSlice({
                 displayedMessageIdx: -1,
                 viewMode: saved.viewMode || 'editor',
                 chartSynthesisInProgress: [],
-                chartInsightInProgress: [],
                 cleanInProgress: false,
                 dataLoadingChatInProgress: false,
                 dataLoadingChatResetCounter: 0,
@@ -1309,12 +1213,6 @@ export const dataFormulatorSlice = createSlice({
         bumpDisplayRowsTick: (state) => {
             state.displayRowsTick = (state.displayRowsTick || 0) + 1;
         },
-        updateChartInsight: (state, action: PayloadAction<{chartId: string, insight: ChartInsight}>) => {
-            let chart = collectAllCharts(state).find(c => c.id == action.payload.chartId);
-            if (chart) {
-                chart.insight = action.payload.insight;
-            }
-        },
         // Zoom level applied by the resizer. Stored on the Chart (not in
         // config, which is for template-defined properties) so it persists
         // with the chart across focus changes and session save/load.
@@ -1951,8 +1849,8 @@ export const dataFormulatorSlice = createSlice({
                 });
             }
             // Reset other transient in-progress flags that snuck into the
-            // persisted blob (chartSynthesisInProgress / chartInsightInProgress
-            // are already blacklisted in store.ts).
+            // persisted blob (chartSynthesisInProgress is already blacklisted
+            // in store.ts).
             incoming.cleanInProgress = false;
             incoming.dataLoadingChatInProgress = false;
             incoming.sessionLoading = false;
@@ -2125,58 +2023,6 @@ export const dataFormulatorSlice = createSlice({
                 });
             }
         })
-        .addCase(fetchChartInsight.pending, (state, action) => {
-            let chartId = action.meta.arg.chartId;
-            if (!state.chartInsightInProgress.includes(chartId)) {
-                state.chartInsightInProgress.push(chartId);
-            }
-        })
-        .addCase(fetchChartInsight.fulfilled, (state, action) => {
-            let { chartId, insightKey, title, takeaways } = action.payload;
-            let chart = collectAllCharts(state).find(c => c.id === chartId);
-            if (chart && (title || (takeaways && takeaways.length > 0))) {
-                chart.insight = { title, takeaways: takeaways || [], key: insightKey };
-            }
-            state.chartInsightInProgress = state.chartInsightInProgress.filter(id => id !== chartId);
-            console.log("fetched chart insight", action.payload);
-        })
-        .addCase(fetchChartInsight.rejected, (state, action) => {
-            const chartId = action.meta.arg.chartId;
-            state.chartInsightInProgress = state.chartInsightInProgress.filter(id => id !== chartId);
-
-            const errorName = action.error?.name;
-
-            if (errorName === 'AbortError') {
-                // User cancelled — no feedback needed
-                return;
-            }
-
-            if (errorName === 'TimeoutError') {
-                state.messages.push({
-                    timestamp: Date.now(), type: 'warning',
-                    component: 'chart insight',
-                    value: i18n.t('messages.chartInsightTimedOut', {
-                        seconds: state.config.formulateTimeoutSeconds,
-                    }),
-                });
-                return;
-            }
-
-            if (errorName === 'ChartImageNotReady') {
-                state.messages.push({
-                    timestamp: Date.now(), type: 'warning',
-                    component: 'chart insight',
-                    value: i18n.t('messages.chartInsightImageNotReady'),
-                });
-                return;
-            }
-
-            state.messages.push({
-                timestamp: Date.now(), type: 'warning',
-                component: 'chart insight',
-                value: action.error?.message || i18n.t('messages.chartInsightFailed'),
-            });
-        })
     },
 })
 
diff --git a/src/app/store.ts b/src/app/store.ts
index 891080b6..c230f5a6 100644
--- a/src/app/store.ts
+++ b/src/app/store.ts
@@ -16,7 +16,7 @@ const persistConfig = {
     // globalModels are always fetched fresh from the server on each app start,
     // so there is no need (and it would cause stale-data issues) to persist them.
     // In-progress flags are transient and should not survive page refreshes.
-    blacklist: ['serverConfig', 'globalModels', 'chartSynthesisInProgress', 'chartInsightInProgress'],
+    blacklist: ['serverConfig', 'globalModels', 'chartSynthesisInProgress'],
 }
 
 const persistedReducer = persistReducer(persistConfig, dataFormulatorReducer)
diff --git a/src/app/useAutoSave.tsx b/src/app/useAutoSave.tsx
index 755a4918..3a167f14 100644
--- a/src/app/useAutoSave.tsx
+++ b/src/app/useAutoSave.tsx
@@ -15,7 +15,7 @@ const EXCLUDED_FIELDS = new Set([
     'models', 'selectedModelId', 'testedModels',
     'dataLoaderConnectParams', 'identity', 'serverConfig',
     // Transient fields that shouldn't trigger or be included in saves
-    'chartSynthesisInProgress', 'chartInsightInProgress',
+    'chartSynthesisInProgress',
     'cleanInProgress', 'sessionLoading', 'sessionLoadingLabel',
     // Thumbnails are derived from chart specs + table data; re-rendered
     // from the module cache on reload, so don't waste bandwidth saving them.
diff --git a/src/app/utils.tsx b/src/app/utils.tsx
index 68f558ee..9be61eb3 100644
--- a/src/app/utils.tsx
+++ b/src/app/utils.tsx
@@ -28,7 +28,6 @@ export function getUrls() {
         SCRATCH_BASE_URL: `/api/agent/workspace/scratch`,
         
         CODE_EXPL_URL: `/api/agent/code-expl`,
-        CHART_INSIGHT_URL: `/api/agent/chart-insight`,
         SERVER_PROCESS_DATA_ON_LOAD: `/api/agent/process-data-on-load`,
 
         ANALYST_STREAMING: `/api/agent/analyst-streaming`,
diff --git a/src/app/workspaceService.ts b/src/app/workspaceService.ts
index 01eaf00d..68088157 100644
--- a/src/app/workspaceService.ts
+++ b/src/app/workspaceService.ts
@@ -165,7 +165,7 @@ export async function exportWorkspace(id: string): Promise<Blob> {
             const EXCLUDED = new Set([
                 'models', 'selectedModelId', 'testedModels',
                 'dataLoaderConnectParams', 'identity', 'serverConfig',
-                'chartSynthesisInProgress', 'chartInsightInProgress',
+                'chartSynthesisInProgress',
                 'cleanInProgress', 'sessionLoading', 'sessionLoadingLabel',
             ]);
             const serializable: Record<string, unknown> = {};
diff --git a/src/components/ComponentType.tsx b/src/components/ComponentType.tsx
index 5a64b420..330befb4 100644
--- a/src/components/ComponentType.tsx
+++ b/src/components/ComponentType.tsx
@@ -335,12 +335,6 @@ export function createDictTable(
     }
 }
 
-export interface ChartInsight {
-    title: string;
-    takeaways: string[];
-    key: string;  // "chartType|sortedFieldIds" — used to detect staleness
-}
-
 /**
  * A user-authored "skin" of a chart: a Vega-Lite spec edited via the
  * style/restyle agent. Variants share the chart's encoding and data — they
@@ -405,14 +399,15 @@ export type Chart = {
     tableRef: string, 
     source: "user" | "trigger",
     config?: Record<string, any>,  // additional chart properties defined by the chart template
-    insight?: ChartInsight,  // AI-generated insight about the visualization
+    title?: string,  // AI-generated chart title (from the analyst's visualize action)
+    titleKey?: string,  // "chartType|sortedFieldIds" snapshot when title was set; used to detect staleness
     styleVariants?: ChartStyleVariant[],  // user-authored style refinements (see ChartStyleVariant)
     activeVariantId?: string,  // id of the variant currently rendered in the focused canvas; undefined = default
     scaleFactor?: number,  // zoom level applied by the resizer; undefined = 1 (no zoom)
     unread?: boolean,  // true for agent-generated charts the user hasn't focused yet; cleared on focus
 }
 
-/** Compute a string key for insight invalidation: chartType|sortedFieldIds */
+/** Compute a string key for title-staleness invalidation: chartType|sortedFieldIds */
 export function computeInsightKey(chart: Chart): string {
     const fieldIds = Object.values(chart.encodingMap)
         .map(enc => enc.fieldID)
diff --git a/src/lib/agents-chart/core/compute-layout.ts b/src/lib/agents-chart/core/compute-layout.ts
index 9121cb41..923b3ae5 100644
--- a/src/lib/agents-chart/core/compute-layout.ts
+++ b/src/lib/agents-chart/core/compute-layout.ts
@@ -825,13 +825,39 @@ export function computeLayout(
                 && stats.maxLen <= VL_SHORT_DISCRETE_LABEL_MAX_LEN;
 
             if (fewShortStrings || (numericLike && fitsHorizontally)) {
-                // Must be explicit: omitting labelAngle leaves VL defaults (e.g. -45° on ordinal).
-                xLabel = {
-                    ...xLabel,
-                    labelAngle: 0,
-                    labelAlign: 'center',
-                    labelBaseline: 'top',
-                };
+                // Horizontal labels need a band at least as wide as the widest
+                // label, or they overlap (e.g. "midgrade"/"premium" in a few-
+                // category box plot whose default band step is narrow). For
+                // string categories that don't yet fit, widen the step to fit —
+                // bounded by the stretch budget. If even the budget can't fit
+                // them, fall back to angled labels instead of overlapping.
+                let keepHorizontal = true;
+                if (!numericLike && !fitsHorizontally) {
+                    const desiredStep = Math.ceil(labelPx) + 6;  // +6px inter-label gap
+                    const cap = Math.max(minStepVal, Math.floor(maxSubplotW / xTotalNominalCount));
+                    const widenedStep = Math.min(desiredStep, cap);
+                    if (widenedStep >= desiredStep) {
+                        if (widenedStep > xStepSize) xStepSize = widenedStep;
+                    } else {
+                        keepHorizontal = false;
+                    }
+                }
+                if (keepHorizontal) {
+                    // Must be explicit: omitting labelAngle leaves VL defaults (e.g. -45° on ordinal).
+                    xLabel = {
+                        ...xLabel,
+                        labelAngle: 0,
+                        labelAlign: 'center',
+                        labelBaseline: 'top',
+                    };
+                } else {
+                    xLabel = {
+                        ...xLabel,
+                        labelAngle: -45,
+                        labelAlign: 'right',
+                        labelBaseline: 'top',
+                    };
+                }
             } else if (numericLike && !fitsHorizontally && xLabel.labelAngle === undefined) {
                 // Numeric labels that don't fit horizontally and weren't already
                 // rotated by step-based sizing (which only rotates at narrow
diff --git a/src/views/InteractionEntryCard.tsx b/src/views/InteractionEntryCard.tsx
index 6e40346e..0f7d3ae2 100644
--- a/src/views/InteractionEntryCard.tsx
+++ b/src/views/InteractionEntryCard.tsx
@@ -41,6 +41,7 @@ const PlanStepItem: React.FC<{
     showShimmer: boolean;
     trailing?: React.ReactNode;
 }> = ({ step, showShimmer, trailing }) => {
+    const theme = useTheme();
     const [expanded, setExpanded] = useState(false);
     const isChecked = step.startsWith('✓');
     const isFailed = step.startsWith('✗');
@@ -49,8 +50,14 @@ const PlanStepItem: React.FC<{
     const displayLine = (isChecked || isFailed) ? step.slice(2) : (isWarning || isInfo) ? step.slice(2).trimStart() : step;
     const IconComp = getStepIconComponent(step);
 
-    const stepColor = isFailed ? 'error.main' : isWarning ? 'warning.main' : isInfo ? 'info.main'
-        : showShimmer ? 'text.secondary' : 'text.disabled';
+    // Text stays in the normal muted color even for failed/warning steps — the
+    // icon already signals the state, so loud red/orange body text is overkill.
+    const textColor = showShimmer ? 'text.secondary' : 'text.disabled';
+    // The icon carries the state hint, lightly tinted (not full-strength).
+    const iconColor = isFailed ? alpha(theme.palette.error.main, 0.7)
+        : isWarning ? alpha(theme.palette.warning.main, 0.7)
+        : isInfo ? alpha(theme.palette.info.main, 0.7)
+        : textColor;
 
     return (
         <Box sx={{
@@ -74,10 +81,10 @@ const PlanStepItem: React.FC<{
         }}
         onClick={() => setExpanded(prev => !prev)}
         >
-            <IconComp sx={{ width: 10, height: 10, color: stepColor, flexShrink: 0, mt: '2px' }} />
+            <IconComp sx={{ width: 10, height: 10, color: iconColor, flexShrink: 0, mt: '2px' }} />
             <Typography component="span" sx={{
                 fontSize: '10px',
-                color: stepColor,
+                color: textColor,
                 fontStyle: 'italic',
                 lineHeight: 1.4,
                 ...(!expanded ? {
diff --git a/src/views/SimpleChartRecBox.tsx b/src/views/SimpleChartRecBox.tsx
index d59b2678..a1072d91 100644
--- a/src/views/SimpleChartRecBox.tsx
+++ b/src/views/SimpleChartRecBox.tsx
@@ -21,14 +21,13 @@ import {
 } from '@mui/material';
 
 import { useDispatch, useSelector } from 'react-redux';
-import { DataFormulatorState, dfActions, dfSelectors, fetchCodeExpl, fetchFieldSemanticType, fetchChartInsight, generateFreshChart, GeneratedReport } from '../app/dfSlice';
+import { DataFormulatorState, dfActions, dfSelectors, fetchCodeExpl, fetchFieldSemanticType, generateFreshChart, GeneratedReport } from '../app/dfSlice';
 import { AppDispatch } from '../app/store';
 import { resolveRecommendedChart, getUrls, getTriggers, translateBackend } from '../app/utils';
 import { streamRequest } from '../app/apiClient';
 import { getErrorMessage } from '../app/errorCodes';
 import { persistEphemeralDerivedTable } from '../app/tableThunks';
-import { getCachedChart } from '../app/chartCache';
-import { Chart, ClarificationResponse, DictTable, FieldItem, createDictTable, InteractionEntry } from "../components/ComponentType";
+import { Chart, ClarificationResponse, DictTable, FieldItem, createDictTable, InteractionEntry, computeInsightKey } from "../components/ComponentType";
 import { normalizeClarifyEvent, formatClarificationResponses } from '../app/clarification';
 
 import { alpha } from '@mui/material/styles';
@@ -141,7 +140,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
     const workspaceBackend = useSelector((state: DataFormulatorState) => state.serverConfig.WORKSPACE_BACKEND);
     const activeWorkspaceId = useSelector((state: DataFormulatorState) => state.activeWorkspace?.id);
     const draftNodes = useSelector((state: DataFormulatorState) => state.draftNodes);
-    const chartThumbnails = useSelector((state: DataFormulatorState) => state.chartThumbnails) || {};
 
     const theme = useTheme();
     const { t } = useTranslation();
@@ -459,11 +457,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                 agent_summary: summary?.content || '',
             };
 
-            // Include chart thumbnail for the focused leaf table (the one the user is looking at)
-            if (walkTable.id === targetTableId && resolvedChart && chartThumbnails[resolvedChart.id]) {
-                step.chart_thumbnail = chartThumbnails[resolvedChart.id];
-            }
-
             focusedSteps.unshift(step);
 
             walkTable = tables.find(t => t.id === trigger.tableId);
@@ -531,7 +524,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
         const otherThreads = peripheralThreads.length > 0 ? peripheralThreads : undefined;
 
         return { focusedThread, otherThreads };
-    }, [tables, charts, conceptShelfItems, chartThumbnails]);
+    }, [tables, charts, conceptShelfItems]);
 
     const exploreFromChat = useCallback((prompt: string, clarificationContext?: {
         trajectory: any[];
@@ -635,11 +628,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                     table_ref: tbl?.virtual?.tableId || c.tableRef,
                     code: tbl?.derive?.code || '',
                     chart_data: tbl ? { name: tbl.virtual?.tableId || tbl.id, rows: tbl.rows.slice(0, 50) } : undefined,
-                    // Optional rendered image: the agent reads charts from
-                    // data + encodings, but a cached PNG (when available)
-                    // lets it visually confirm a pre-existing chart. Prefer
-                    // the downscaled thumbnail to keep the request lean.
-                    chart_image: chartThumbnails[c.id] || getCachedChart(c.id)?.thumbnailDataUrl || undefined,
                 };
             });
         requestBody.charts = availableCharts;
@@ -712,8 +700,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             createNextDraft(lastCreatedTableId || focusedTableId!, initialEntries);
         }
 
-        // Track the last agent thought and display_instruction (from "action" events)
-        let lastAgentThought: string | null = null;
+        // Track the last agent display_instruction (from "action" events)
         let lastAgentDisplayInstruction: string | null = null;
         let lastAgentInputTables: string[] = [];
 
@@ -911,7 +898,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
 
             // ── action: agent chose what to do ──
             if (result.type === "action") {
-                lastAgentThought = result.thought || null;
                 lastAgentInputTables = result.input_tables || [];
                 if (result.action === "visualize") {
                     lastAgentDisplayInstruction = result.display_instruction || null;
@@ -974,7 +960,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                             ...currentDraftInteraction,
                             {
                                 from: 'data-agent' as const, to: 'datarec-agent' as const, role: 'instruction' as const,
-                                plan: [lastAgentThought, pendingThought, ...thinkingSteps.filter(s => s.trim())].filter(Boolean).join('\x1E') || undefined,
+                                plan: [pendingThought, ...thinkingSteps.filter(s => s.trim())].filter(Boolean).join('\x1E') || undefined,
                                 content: question || displayInstruction,
                                 displayContent: displayInstruction,
                                 inputTableNames: resolvedSourceNames,
@@ -983,7 +969,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                         ],
                     }
                 };
-                lastAgentThought = null;
                 lastAgentDisplayInstruction = null;
                 lastAgentInputTables = [];
                 thinkingSteps = []; // reset for next chart
@@ -1059,6 +1044,14 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                     && !createdCharts.some(c => c.id === forwardedChartId)) {
                     newChart.id = forwardedChartId;
                 }
+                // Title comes from the analyst's visualize action (read from the
+                // chart data + spec). Stored on the chart so the canvas renders
+                // it as the chart heading; keyed for staleness on edit.
+                const insightTitle = refinedGoal?.title;
+                if (typeof insightTitle === 'string' && insightTitle.trim()) {
+                    newChart.title = insightTitle.trim();
+                    newChart.titleKey = computeInsightKey(newChart);
+                }
                 runCreatedChartIds.push(newChart.id);
                 // Mark as unread by default; cleared below if we auto-focus it
                 // (i.e. it's the first artifact this run) or by setFocused when
@@ -1084,11 +1077,6 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                     currentDraftId = null;
                 }
                 createNextDraft(candidateTableId, []);
-
-                if (createdCharts.length > 0) {
-                    const lastChart = createdCharts[createdCharts.length - 1];
-                    dispatch(fetchChartInsight({ chartId: lastChart.id, tableId: candidateTable.id }) as any);
-                }
             }
 
             // ── clarify / explain: pause and let user respond ──
diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx
index 8a6a15bb..8ec0f443 100644
--- a/src/views/VisualizationView.tsx
+++ b/src/views/VisualizationView.tsx
@@ -720,11 +720,11 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
 
     let triggerTable = tables.find(t => t.derive?.trigger?.chart?.id == focusedChart?.id);
 
-    // Chart insight: the generation UI was removed, but a chart that already
-    // carries an insight still surfaces its title on the rendered chart, so we
-    // keep the freshness check used by `insightTitle` below.
-    const currentInsightKey = computeInsightKey(focusedChart);
-    const insightFresh = focusedChart.insight?.key === currentInsightKey;
+    // Chart title: surfaced as the rendered chart heading. The title is kept
+    // only while its key matches the chart's current encoded fields (chartType
+    // + field ids), so it stays through property edits (e.g. sort order) but is
+    // dropped once the encoded fields change.
+    const titleFresh = !!focusedChart.title && focusedChart.titleKey === computeInsightKey(focusedChart);
     
     const actionBtnSx = {
         padding: '4px',
@@ -841,7 +841,7 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
                                         scaleFactor={localScaleFactor}
                                         maxStretchFactor={config.maxStretchFactor}
                                         chartUnavailable={chartUnavailable}
-                                        insightTitle={insightFresh && focusedChart.insight?.title ? focusedChart.insight.title : undefined}
+                                        insightTitle={titleFresh ? focusedChart.title : undefined}
                                         onSpecReady={handleSpecReady}
                                     />
                                 </Box>
diff --git a/tests/backend/routes/test_chart_insight_route.py b/tests/backend/routes/test_chart_insight_route.py
deleted file mode 100644
index f6ba3c41..00000000
--- a/tests/backend/routes/test_chart_insight_route.py
+++ /dev/null
@@ -1,178 +0,0 @@
-"""Tests for /api/agent/chart-insight route.
-
-Validates input validation (missing image, missing model, non-vision model),
-success path, and agent error handling via AppError.
-"""
-from __future__ import annotations
-
-import json
-from unittest.mock import MagicMock, patch
-
-import flask
-import pytest
-
-from data_formulator.errors import AppError, ErrorCode
-
-pytestmark = [pytest.mark.backend]
-
-
-# ---------------------------------------------------------------------------
-# Fixtures
-# ---------------------------------------------------------------------------
-
-@pytest.fixture()
-def app():
-    """Minimal Flask app with agent_bp and error handlers registered."""
-    test_app = flask.Flask(__name__)
-    test_app.config["TESTING"] = True
-
-    from data_formulator.error_handler import register_error_handlers
-    from data_formulator.routes.agents import agent_bp
-    test_app.register_blueprint(agent_bp)
-    register_error_handlers(test_app)
-
-    return test_app
-
-
-@pytest.fixture()
-def client(app):
-    return app.test_client()
-
-
-def _valid_body(**overrides):
-    body = {
-        "chart_image": "iVBORw0KGgoAAAA==",
-        "chart_type": "Bar Chart",
-        "field_names": ["x", "y"],
-        "input_tables": [{"name": "t1", "rows": [{"x": 1}]}],
-        "model": {"provider": "openai", "model": "gpt-4o", "name": "gpt-4o"},
-    }
-    body.update(overrides)
-    return body
-
-
-# ---------------------------------------------------------------------------
-# Input validation
-# ---------------------------------------------------------------------------
-
-class TestChartInsightValidation:
-
-    def test_non_json_request_returns_error(self, client) -> None:
-        resp = client.post(
-            "/api/agent/chart-insight",
-            data="not json",
-            content_type="text/plain",
-        )
-        data = resp.get_json()
-        assert data["status"] == "error"
-        assert data["error"]["code"] == ErrorCode.INVALID_REQUEST
-
-    def test_missing_chart_image_returns_error(self, client) -> None:
-        resp = client.post(
-            "/api/agent/chart-insight",
-            json=_valid_body(chart_image=""),
-        )
-        data = resp.get_json()
-        assert data["status"] == "error"
-        assert data["error"]["code"] == ErrorCode.VALIDATION_ERROR
-
-    def test_missing_model_returns_error(self, client) -> None:
-        resp = client.post(
-            "/api/agent/chart-insight",
-            json=_valid_body(model=None),
-        )
-        data = resp.get_json()
-        assert data["status"] == "error"
-        assert data["error"]["code"] == ErrorCode.INVALID_REQUEST
-
-
-# ---------------------------------------------------------------------------
-# Success path
-# ---------------------------------------------------------------------------
-
-class TestChartInsightSuccess:
-
-    @patch("data_formulator.routes.agents._get_knowledge_store")
-    @patch("data_formulator.routes.agents.get_workspace")
-    @patch("data_formulator.routes.agents.get_identity_id", return_value="test-user")
-    @patch("data_formulator.routes.agents.get_client")
-    @patch("data_formulator.routes.agents.ChartInsightAgent")
-    def test_success_returns_title_and_takeaways(
-        self,
-        MockAgent,
-        mock_get_client,
-        mock_get_identity,
-        mock_get_workspace,
-        mock_get_ks,
-        client,
-    ) -> None:
-        agent_instance = MagicMock()
-        agent_instance.run.return_value = [{
-            "status": "ok",
-            "title": "Key Insights",
-            "takeaways": ["Point A", "Point B"],
-        }]
-        MockAgent.return_value = agent_instance
-
-        resp = client.post("/api/agent/chart-insight", json=_valid_body())
-        assert resp.status_code == 200
-        data = resp.get_json()
-        assert data["status"] == "success"
-        assert data["data"]["title"] == "Key Insights"
-        assert data["data"]["takeaways"] == ["Point A", "Point B"]
-
-
-# ---------------------------------------------------------------------------
-# Agent failure paths
-# ---------------------------------------------------------------------------
-
-class TestChartInsightAgentErrors:
-
-    @patch("data_formulator.routes.agents._get_knowledge_store")
-    @patch("data_formulator.routes.agents.get_workspace")
-    @patch("data_formulator.routes.agents.get_identity_id", return_value="test-user")
-    @patch("data_formulator.routes.agents.get_client")
-    @patch("data_formulator.routes.agents.ChartInsightAgent")
-    def test_empty_candidates_returns_agent_error(
-        self, MockAgent, mock_client, mock_id, mock_ws, mock_ks, client,
-    ) -> None:
-        MockAgent.return_value.run.return_value = []
-
-        resp = client.post("/api/agent/chart-insight", json=_valid_body())
-        data = resp.get_json()
-        assert data["status"] == "error"
-        assert data["error"]["code"] == ErrorCode.AGENT_ERROR
-
-    @patch("data_formulator.routes.agents._get_knowledge_store")
-    @patch("data_formulator.routes.agents.get_workspace")
-    @patch("data_formulator.routes.agents.get_identity_id", return_value="test-user")
-    @patch("data_formulator.routes.agents.get_client")
-    @patch("data_formulator.routes.agents.ChartInsightAgent")
-    def test_candidate_status_not_ok_returns_agent_error(
-        self, MockAgent, mock_client, mock_id, mock_ws, mock_ks, client,
-    ) -> None:
-        MockAgent.return_value.run.return_value = [{"status": "error", "content": "parse fail"}]
-
-        resp = client.post("/api/agent/chart-insight", json=_valid_body())
-        data = resp.get_json()
-        assert data["status"] == "error"
-        assert data["error"]["code"] == ErrorCode.AGENT_ERROR
-
-    @patch("data_formulator.routes.agents._get_knowledge_store")
-    @patch("data_formulator.routes.agents.get_workspace")
-    @patch("data_formulator.routes.agents.get_identity_id", return_value="test-user")
-    @patch("data_formulator.routes.agents.get_client")
-    @patch("data_formulator.routes.agents.ChartInsightAgent")
-    def test_llm_exception_returns_classified_error(
-        self, MockAgent, mock_client, mock_id, mock_ws, mock_ks, client,
-    ) -> None:
-        exc = Exception("Error code: 401 - Unauthorized, invalid api key")
-        MockAgent.return_value.run.side_effect = exc
-
-        resp = client.post("/api/agent/chart-insight", json=_valid_body())
-        data = resp.get_json()
-        assert data["status"] == "error"
-        assert data["error"]["code"] in (
-            ErrorCode.LLM_AUTH_FAILED,
-            ErrorCode.LLM_UNKNOWN_ERROR,
-        )
diff --git a/tests/frontend/unit/app/chartInsight.test.ts b/tests/frontend/unit/app/chartInsight.test.ts
deleted file mode 100644
index 106fdb89..00000000
--- a/tests/frontend/unit/app/chartInsight.test.ts
+++ /dev/null
@@ -1,119 +0,0 @@
-/**
- * Tests for Chart Insight fetchChartInsight thunk behavior.
- *
- * Covers:
- * - rejected reducer: message type varies by error name
- *   (AbortError = silent, TimeoutError = timeout msg, ChartImageNotReady = image msg, other = generic)
- */
-import { describe, it, expect, vi, beforeEach } from 'vitest';
-
-// We test the reducer logic by building a minimal extraReducers matcher.
-// Since the reducer is tightly coupled to createSlice, we test via the
-// slice's reducer function directly.
-
-// Mock all heavy deps before importing the slice
-vi.mock('../../../../src/app/utils', () => ({
-    fetchWithIdentity: vi.fn(),
-    getTriggers: vi.fn(() => []),
-    getUrls: vi.fn(() => ({
-        CHART_INSIGHT_URL: '/api/agent/chart-insight',
-    })),
-    computeContentHash: vi.fn(() => 'hash'),
-}));
-vi.mock('../../../../src/app/chartCache', () => ({
-    getChartPngDataUrl: vi.fn(),
-}));
-vi.mock('../../../../src/app/workspaceService', () => ({
-    deleteTablesFromWorkspace: vi.fn(),
-}));
-vi.mock('../../../../src/app/identity', () => ({
-    Identity: {},
-    IdentityType: { BROWSER: 'browser' },
-    getBrowserId: vi.fn(() => 'browser-id'),
-}));
-vi.mock('../../../../src/app/store', () => ({
-    store: {
-        getState: vi.fn(() => ({})),
-        dispatch: vi.fn(),
-    },
-}));
-vi.mock('../../../../src/i18n', () => ({
-    default: {
-        t: (key: string, params?: Record<string, any>) => {
-            if (key === 'messages.chartInsightTimedOut') {
-                return `Chart insight timed out after ${params?.seconds}s`;
-            }
-            if (key === 'messages.chartInsightImageNotReady') {
-                return 'Chart image was not ready';
-            }
-            if (key === 'messages.chartInsightFailed') {
-                return 'Failed to generate chart insight';
-            }
-            return key;
-        },
-    },
-}));
-
-// ---------------------------------------------------------------------------
-// Tests — rejected reducer message discrimination
-// ---------------------------------------------------------------------------
-
-describe('fetchChartInsight rejected reducer', () => {
-    // We import the reducer and simulate action.error shapes
-    // The reducer reads: action.error.name, action.error.message, action.meta.arg.chartId
-    let reducer: any;
-    let fetchChartInsight: any;
-    let initialState: any;
-
-    beforeEach(async () => {
-        const mod = await import('../../../../src/app/dfSlice');
-        reducer = mod.dataFormulatorSlice.reducer;
-        fetchChartInsight = mod.fetchChartInsight;
-
-        initialState = {
-            ...mod.dataFormulatorSlice.getInitialState(),
-            chartInsightInProgress: ['chart-1'],
-        };
-    });
-
-    function makeRejectedAction(errorName: string, errorMessage: string = 'test') {
-        return {
-            type: fetchChartInsight.rejected.type,
-            meta: { arg: { chartId: 'chart-1' } },
-            error: { name: errorName, message: errorMessage },
-        };
-    }
-
-    it('AbortError produces no message', () => {
-        const state = reducer(initialState, makeRejectedAction('AbortError'));
-        expect(state.messages).toHaveLength(0);
-        expect(state.chartInsightInProgress).not.toContain('chart-1');
-    });
-
-    it('TimeoutError produces a timeout warning with seconds', () => {
-        const state = reducer(initialState, makeRejectedAction('TimeoutError'));
-        expect(state.messages).toHaveLength(1);
-        expect(state.messages[0].type).toBe('warning');
-        expect(state.messages[0].value).toContain('timed out');
-        expect(state.messages[0].value).toContain(String(initialState.config.formulateTimeoutSeconds));
-    });
-
-    it('ChartImageNotReady produces an image-not-ready warning', () => {
-        const state = reducer(initialState, makeRejectedAction('ChartImageNotReady'));
-        expect(state.messages).toHaveLength(1);
-        expect(state.messages[0].type).toBe('warning');
-        expect(state.messages[0].value).toContain('not ready');
-    });
-
-    it('generic error produces a warning with the error message', () => {
-        const state = reducer(initialState, makeRejectedAction('Error', 'Model returned nonsense'));
-        expect(state.messages).toHaveLength(1);
-        expect(state.messages[0].type).toBe('warning');
-        expect(state.messages[0].value).toBe('Model returned nonsense');
-    });
-
-    it('removes chartId from chartInsightInProgress', () => {
-        const state = reducer(initialState, makeRejectedAction('Error'));
-        expect(state.chartInsightInProgress).not.toContain('chart-1');
-    });
-});

From 01ba1fe293b2ed8abed852014f9be87a017245f8 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Tue, 16 Jun 2026 17:59:38 -0700
Subject: [PATCH 22/29] analyst: differentiate open-ended vs concrete
 exploration budget

---
 py-src/data_formulator/analyst/agent.py            |  6 ++++--
 .../data_formulator/analyst/skills/core/SKILL.md   | 14 +++++++++-----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/py-src/data_formulator/analyst/agent.py b/py-src/data_formulator/analyst/agent.py
index fb4489d1..8ee4c452 100644
--- a/py-src/data_formulator/analyst/agent.py
+++ b/py-src/data_formulator/analyst/agent.py
@@ -254,8 +254,10 @@ def _decode(self, args: str) -> str | None:
   ceiling, not a target**. Use as few as the goal requires.
 - **Stop as soon as the user's goal is met.** End the run by giving your final
   answer in plain text rather than taking more actions just because you can.
-- Take a follow-up action only when it addresses a gap the previous step
-  actually raised — not merely another interesting angle.
+- For concrete/progressive questions, take a follow-up action only when it
+  addresses a gap the previous step actually raised. For open-ended
+  exploration, the opposite applies: deliberately spend your budget covering
+  distinct analytical angles (see the core skill's "Choosing what to do").
 - If the request is genuinely ambiguous, ask the user in plain text (no action)
   rather than guessing.
 
diff --git a/py-src/data_formulator/analyst/skills/core/SKILL.md b/py-src/data_formulator/analyst/skills/core/SKILL.md
index 1ae6e882..0be7d911 100644
--- a/py-src/data_formulator/analyst/skills/core/SKILL.md
+++ b/py-src/data_formulator/analyst/skills/core/SKILL.md
@@ -137,8 +137,9 @@ effort:
   answer in plain text.
 - *Progressive* (a small sequence, e.g. "why did revenue drop?"): **2–3
   visualizations**, then a closing plain-text answer tying them together.
-- *Open-ended* (explicit exploration): **3–5 visualizations** forming a
-  narrative, then a closing plain-text answer.
+- *Open-ended* (explicit exploration): **3–5 visualizations**, each a distinct
+  analytical angle (not variations on one axis), forming a narrative, then a
+  closing plain-text answer.
 - *Missing data* (needs tables not in the workspace):
   `delegate(target="data_loading")`.
 - *Report / write-up request* (e.g. "write a report on X", "summarize the findings
@@ -149,9 +150,12 @@ effort:
   charts by id. Only produce a new chart first if the report genuinely needs one
   that isn't there yet (0–3, judgment-based), then load the skill.
 
-When chaining visualizations, add the next chart only if it answers a gap *raised*
-by the previous one — not just another interesting angle. **Never** repeat a
-visualization already in the trajectory or in another thread.
+For concrete/progressive questions, add the next chart only if it answers a gap
+*raised* by the previous one. For open-ended exploration, do the reverse: each
+chart should open a **new** analytical angle (temporal, spatial, distributional,
+relational, comparative) rather than refine the last one — aim to use your full
+budget on distinct perspectives. **Never** repeat a visualization already in the
+trajectory or in another thread.
 
 ## Chart Creation Guide
 

From 6a839da91418d159c7ffa995c2f07211511fd73c Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Tue, 16 Jun 2026 18:04:19 -0700
Subject: [PATCH 23/29] add loops

---
 loops/model-evaluation/README.md | 463 +++++++++++++++++++++++++++++++
 1 file changed, 463 insertions(+)
 create mode 100644 loops/model-evaluation/README.md

diff --git a/loops/model-evaluation/README.md b/loops/model-evaluation/README.md
new file mode 100644
index 00000000..e6ec16f7
--- /dev/null
+++ b/loops/model-evaluation/README.md
@@ -0,0 +1,463 @@
+# Loop — Open-Source (Ollama) Model Review for the Data Formulator Analyst Agent
+
+**This document is an autonomous runbook.** You are a coding agent. Read it fully,
+then *spin the experiments yourself*: pick models, load data, generate questions,
+drive the `AnalystAgent`, grade the outputs, and write the report. Every step below
+is concrete enough to execute without further input. When a choice is genuinely
+ambiguous, pick the reasonable default named here and record the decision in the
+report rather than stopping to ask.
+
+---
+
+## 0. Purpose
+
+Data Formulator can be pointed at any LLM, including open-source models served locally
+through Ollama. But we don't currently have a principled, evidence-based view of **which
+open-source models are actually good enough to drive the analyst agent**, nor of the
+hardware and settings each one needs. This loop produces that review: a reproducible
+benchmark of open-source models against the agent, with clear recommendations.
+
+**The questions this review must answer (these become the report's headline sections):**
+1. **Which Ollama models actually work** with the Data Formulator analyst agent? (a
+   clear pass / partial / fail list).
+2. **Minimum specs** — especially VRAM — to run each working model at a usable quality.
+3. **Required settings** — quantization, context length (`num_ctx`), and other Ollama
+   `options` that make the difference between working and failing.
+4. **The low-VRAM story** — how small can you go? Identify the *smallest model that still
+   works*, the quant / `num_ctx` it needs, and call out the practical VRAM floor for usable
+   quality.
+
+Keep these four questions pinned. Everything measured should ladder up to answering them.
+
+---
+
+## 1. Mission
+
+Benchmark a broad set of open-source Ollama models against the unified **`AnalystAgent`**
+(the agent that powers Data Formulator's "Explore" / Data Thread). For each model, on
+several datasets, an LLM *asker* poses direct and open-ended analysis questions; the
+analyst agent answers by exploring data and producing charts / data-thread content; an
+LLM *grader* scores the answers. Reference runs with hosted models calibrate the scale.
+
+**Deliverable:** `loops/model-evaluation/report.md` — a results table + a model
+recommendation guide that directly answers the four questions in §0, including the
+practical low-VRAM floor.
+
+All scratch work, scripts, raw transcripts, and aggregates live under
+`loops/model-evaluation/work/` (see §9 for the layout).
+
+---
+
+## 2. Environment & setup
+
+- **Repo:** this working tree. Python source is under `py-src/`; the project is
+  installed editable. **Use `uv` only**, never pip: `uv pip install -e .`,
+  `uv run python ...`, `uv run pytest`. The venv is `.venv/` — `source .venv/bin/activate`.
+- **GPU box:** run on the multi-GPU eval box (≈4×A100). Exploit the hardware: serve
+  several models concurrently and/or run datasets in parallel (see §7.4). Smaller models
+  can share a GPU; large models get a dedicated GPU.
+- **Ollama:** install/serve locally (`ollama serve`). Pull models with `ollama pull <model>`.
+  The agent talks to Ollama through LiteLLM via the `Client` wrapper (next section).
+- **Reference (hosted) models:** use the Azure OpenAI resource already wired for evals
+  (managed identity, no API key — `DefaultAzureCredential` resolves it). Deployments
+  available include `gpt-5.5`, `gpt-5-mini`. Confirm the endpoint
+  the same way the chart-reading handoff does, use az login with .env azure stuff.
+  ([agent_eval_plans/00-AGENT-HANDOFF.md](../../agent_eval_plans/00-AGENT-HANDOFF.md) §3).
+
+### 2.1 Talking to a model — the `Client` wrapper
+
+`data_formulator.agents.client_utils.Client` is the single LLM entry point for every
+agent. It already supports Ollama and Azure:
+
+```python
+from data_formulator.agents.client_utils import Client
+
+# Local model under test (Ollama)
+agent_client = Client(
+    endpoint="ollama",
+    model="qwen2.5-coder:7b",            # ollama/ prefix added automatically
+    api_base="http://localhost:11434",   # default; LiteLLM strips a trailing /api
+)
+
+# Reference / asker / grader (Azure managed identity — no api_key)
+ref_client = Client(
+    endpoint="azure",
+    model="gpt-5.5",                      # deployment name
+    api_base="https://<resource>.openai.azure.com/",
+)
+```
+
+> **Ollama `options` (quantization, context length, etc.):** LiteLLM forwards extra
+> kwargs to Ollama's `options` (e.g. `num_ctx`, `temperature`, `num_gpu`). The agent
+> calls `client.get_completion_with_tools(...)`. Decide early how you will pass
+> `num_ctx` (see §6) — either bake it into the Modelfile (`ollama create` with
+> `PARAMETER num_ctx ...`) or pass it through the client call. **Record exactly what you
+> used** — the context-length setting is one of the four questions in §0.
+
+---
+
+## 3. Step 1 — Choose the Ollama models to test
+
+**Search first, then decide.** Use web search / the Ollama library to pick a current,
+representative set. Bias toward models with strong **coding** and **instruction-following**
+ability, since the analyst agent's core action (`visualize`) emits Python + a chart spec
+through native tool-calls (see §7.1). Cover a range of sizes so the VRAM story is complete.
+
+Selection guidance:
+- **Tool-calling matters.** The analyst commits actions as **native tool calls**, not
+  JSON-in-text. Prefer models Ollama lists as supporting tools/function-calling. Flag any
+  model that lacks tool support — it will likely fail the agent loop, which is itself a
+  finding.
+- **Size buckets to cover** (span the spectrum so the report can speak to different VRAM budgets):
+  - **~3–4B** (small, low-VRAM): e.g. `llama3.2:3b`, `qwen2.5-coder:3b`, `phi-class`.
+  - **~7–9B** (mainstream): e.g. `qwen2.5-coder:7b`, `llama3.1:8b`,
+    `mistral`/`ministral`, `granite` coder, `deepseek-coder` variants.
+  - **~14–34B** (mid/large): e.g. `qwen2.5-coder:14b/32b`, `devstral`, `gpt-oss`-class.
+  - **Large reference-tier OSS** if the box allows: pick one or two flagships.
+- Treat the list as a hypothesis; the actual Ollama catalog at run time is the authority.
+  **List the final set in the report** with: model name, parameter count, quantization
+  pulled, on-disk size, and Ollama tool-calling support (yes/no).
+
+**Hardware-requirement table (build this and put it in the report):**
+
+| Model | Params | Quant | Disk | Est. VRAM @ ctx | Tool-calling |
+|-------|--------|-------|------|-----------------|--------------|
+
+Estimate VRAM at the context length you actually run (KV cache grows with `num_ctx` — a
+3B model that fits at 4k may not at 32k). Verify estimates against `ollama ps` / `nvidia-smi`
+during a real run and record the *observed* peak.
+
+---
+
+## 4. Step 2 — Seed datasets
+
+Assemble a **corpus of ~30 real tables** as the exploration starting points — a broad,
+varied set so each model is tested across many domains and data shapes, not a handful of
+tables. Source them from the families Data Formulator already ships / references:
+
+- **vega_datasets** — installed as a package (`from vega_datasets import data`) and also
+  configured in [py-src/data_formulator/example_datasets_config.py](../../py-src/data_formulator/example_datasets_config.py)
+  (Gapminder, Movies, US Income, Unemployment, Disasters...). Good clean multi-type tables.
+- **TidyTuesday** — real, messier tables. Several are wired in `example_datasets_config.py`
+  (College Majors, weekly gas prices, movies/shows) and more exist under
+  [experiment_data/tidytuesday/](../../experiment_data/tidytuesday/). Use these for the
+  "real-world schema" condition.
+- **spider** (optional / secondary) — text-to-SQL databases. Only include if you also want
+  a relational/multi-table condition; otherwise skip and note it as future work. (Most of
+  the analyst's value is single-table-to-chart, so vega + tidytuesday are the priority.)
+
+**Target ~30 tables total**, balanced across the sources and spanning domains and data
+types (temporal, categorical, quantitative, geographic; some clean, some with
+nulls/messiness; a range of row/column counts). For each, download/materialize it and
+record a one-line description + schema in `work/datasets/`. Keep each small enough to fit a
+local model's context (downsample very large tables to a few thousand rows, noting it).
+**Carve out a dev subset** of these for pipeline calibration (§6.1) — the remaining tables
+are the held-out test corpus.
+
+### 4.1 Loading a seed table into a workspace (the agent reads tables from disk)
+
+The agent's context builder reads tables via `workspace.read_data_as_df(name)`
+([py-src/data_formulator/agents/context.py](../../py-src/data_formulator/agents/context.py)),
+so a seed table must be **registered in a `Workspace`** before the agent can use it. Recipe:
+
+```python
+import pyarrow as pa, pandas as pd
+from data_formulator.datalake.workspace import Workspace
+
+ws = Workspace(identity_id="eval", root_dir="loops/model-evaluation/work/ws")
+df = pd.read_csv(...)                      # your seed table
+name = ws.get_fresh_name("gapminder")
+meta = ws.write_parquet_from_arrow(pa.Table.from_pandas(df), name)
+ws.add_table_metadata(meta)                # now read_data_as_df(name) works
+```
+
+Then pass `input_tables=[{"name": name}]` to `agent.run(...)`. **Smoke-test this once**
+(load one table, confirm `ws.read_data_as_df(name)` returns the frame) before scaling.
+
+---
+
+## 5. Step 3 — Generate questions (the *asker*)
+
+For **each dataset**, use a hosted asker model (**`gpt-5.5`**) to generate **exactly 5
+analysis questions**. Generate once per dataset and **reuse the identical question set
+across all models under test** — the question must be a constant so model is the only
+variable. With ~30 datasets × 5 questions, **each model is tested on ~150 questions** (the
+test goal per model).
+
+- Per dataset, generate **5 questions** with a fixed mix — default **3 direct + 2
+  open-ended** (keep the split constant across datasets):
+  - **Direct** ("Show the trend of life expectancy over time for each cluster.") — has a
+    fairly determinate good answer; easier to grade for correctness.
+  - **Open-ended** ("What's the most interesting story in this data? Explore it.") — tests
+    initiative, multi-step exploration, and judgment.
+- Give the asker the table name, schema, and a few sample rows (use the same lightweight
+  table summary the agent sees). Ask it to return strict JSON: a list of
+  `{id, dataset, kind: "direct"|"open", question}`.
+- Persist the generated questions to `work/questions/<dataset>.json` and **freeze them**
+  (do not regenerate per model). Sanity-check that each file has exactly 5 well-formed,
+  on-topic questions before the sweep — a bad question contaminates every model equally.
+
+---
+
+## 6. Settings to hold fixed (and to vary deliberately)
+
+To isolate the model variable, **freeze**: the asker model + prompt, the grader model +
+rubric, the question sets, the agent's `max_iterations` (start at **5**), and temperature
+(**0** for agent, asker, and grader unless a model rejects it).
+
+**Deliberately vary (these are findings, not nuisance):**
+- **Quantization** — record what each `ollama pull` actually fetched (usually q4_K_M). If a
+  model fails, optionally retry at a higher quant and note whether it helped.
+- **Context length (`num_ctx`)** — the agent's system prompt + table context + tool schemas
+  are sizable. A too-small `num_ctx` will truncate the system frame and the model will fail
+  to follow the action protocol. Establish a **working default** (try 8192; bump to 16384
+  if outputs look truncated) and note the minimum that works per model. This is central to
+  the low-VRAM story (bigger `num_ctx` ⇒ more VRAM).
+- **Tool-calling support** — if a model can't emit native tool calls, capture that as the
+  failure mode.
+
+### 6.1 Calibrate on a dev set before the mass run
+
+**Do not launch the full ~30-dataset sweep blind.** First carve out a small **dev set** and
+use it to shake out the pipeline end-to-end:
+
+- **Dev set:** ~3 datasets (pick varied ones — one clean vega table, one messy TidyTuesday
+  table, one with nulls/odd types) × their 5 frozen questions = ~15 dev items. Pick **2–3
+  models** spanning the size spectrum (one small, one mid, plus one **hosted reference**
+  `gpt-5.5`/`gpt-5-mini`). Keep the dev datasets clearly tagged so they're reportable
+  separately and not silently mixed into the headline test numbers.
+- **What the dev run must validate** before you trust the mass run:
+  - the harness drives `AnalystAgent.run` and records every event type (§7.1) without crashing;
+  - the **answer bundle + per-action execution outcomes** (§7.2) are captured correctly,
+    including a deliberately code-broken case (confirm the `code-broken` level is detected);
+  - the **outcome-level classifier** (§7.3) assigns sane levels — eyeball all ~15;
+  - the **grader** returns valid strict JSON on every item and its scores look reasonable
+    against your own read (this is the judge-calibration sample, §8);
+  - resume/idempotency works (re-running skips completed items);
+  - the reference model lands near the top of the scale (a sanity floor for the rubric).
+- **Tune here, freeze after.** Adjust prompts (asker/grader), `num_ctx` default, timeouts,
+  and the outcome taxonomy on the dev set. Once the dev run looks right, **freeze the
+  pipeline** and only then launch the full corpus. Record the dev-set findings (especially
+  any rubric/classifier adjustments) in the report's Method section.
+
+---
+
+## 7. Step 4 — Run the analyst agent (the harness)
+
+Build a small harness under `work/` that, for each `(model, dataset, question)`, drives the
+real `AnalystAgent` and captures everything it emits.
+
+### 7.1 Driving the agent
+
+`AnalystAgent` lives at [py-src/data_formulator/analyst/agent.py](../../py-src/data_formulator/analyst/agent.py).
+`run(...)` is a **generator that yields event dicts**; consume them to exhaustion (or until a
+terminal event) and record them. Minimal shape:
+
+```python
+from data_formulator.analyst.agent import AnalystAgent
+
+agent = AnalystAgent(
+    client=agent_client,            # the Ollama Client from §2.1
+    workspace=ws,                   # the Workspace from §4.1
+    max_iterations=5,
+    identity_id="eval",             # enables reasoning log; pass None to skip
+)
+
+events = []
+for ev in agent.run(input_tables=[{"name": name}], user_question=question["question"]):
+    events.append(ev)
+    if ev.get("type") in ("completion", "interact", "error"):
+        break
+```
+
+**Event types to capture** (from the run loop / skills):
+- `agent_action` — the committed action (`visualize` / `interact` / `delegate` / `write_report`)
+  and its `action_data` (code, chart spec, etc.).
+- `result` — a visualization result: the transformed table (`rows`) + chart spec + `chart_id`.
+  This is the **data-thread content** — the primary artifact to grade.
+- `tool_start` / `tool_result` — inspection-tool activity (`execute_python_script`,
+  `inspect_source_data`, `load_skill`).
+- `text_delta` with `channel="report"` — streamed report markdown (if `write_report` runs).
+- `completion` — final answer / status (`success`, `tool_rounds_exhausted`, etc.).
+- `error` — capture the message + code; **classify the failure** (see §7.3).
+
+### 7.2 What "produces an answer" means here — capture the whole spectrum
+
+The agent answers by **acting on data**, not by prose alone. A good run typically commits
+one or more `visualize` actions (each yields a `result` with a derived table + chart), and
+ends with a concise closing answer. But the interesting signal is the *spectrum between*
+"did nothing" and "perfect": a model may emit code that **doesn't run**, code that **runs
+but produces the wrong/empty table**, a chart with the **wrong encodings**, or a technically
+correct answer that **misses the point of the question**. Capture enough to tell these apart.
+
+For every committed `visualize` (and every `execute_python_script` tool call), record the
+**execution outcome** explicitly — don't just keep the final answer:
+- **did the code run?** (the sandbox raised vs. returned) — from the `result` / `tool_result`
+  event and the observation the loop fed back. Note the exception type/message if it threw.
+- **how many repair attempts** the agent took before the code ran (or gave up).
+- **did it yield a non-empty, sensible output table?** (row/col counts; all-null or 0-row
+  outputs are a distinct "ran but empty" outcome).
+- **the chart spec actually produced** (type + encodings) vs. what the question called for.
+
+Persist, per `(model, dataset, question)`:
+- the full ordered event list (`work/runs/<model>/<dataset>/<qid>.jsonl`),
+- a distilled "answer bundle": the closing text, each chart's spec + a sample of its output
+  rows, the Python the agent ran, **and the per-action execution outcomes above**. This
+  bundle is what you hand the grader (§8).
+- run metadata: terminal status, action count, **code-error count + repair-loop count**,
+  wall-clock time, tokens if available, and observed peak VRAM (`nvidia-smi` snapshot).
+
+### 7.3 Outcome taxonomy — a graded spectrum, not just pass/fail (this is half the report)
+
+Classify **every run** (not only failures) into exactly one outcome level, so the report can
+show the full distribution per model rather than a binary. The levels, worst → best:
+
+1. **no-action** — model never emits a native action at all (often: no tool-calling support,
+   or `num_ctx` truncated the protocol). The "doesn't work at all" floor.
+2. **malformed-action** — emits actions but with broken/invalid args (bad JSON, missing
+   required field) and never recovers.
+3. **code-broken** — commits `visualize` but the code **never runs successfully** (throws
+   every attempt; agent exhausts the repair budget). Record the dominant exception.
+4. **ran-but-empty/wrong** — code runs, but the output table is **empty, all-null, or clearly
+   wrong** (bad aggregation/join/filter), so the chart is meaningless.
+5. **ran-but-suboptimal** — produces a valid chart, but it's a **weak answer**: wrong chart
+   type for the question, missing an obvious encoding/breakdown, answers a narrower question
+   than asked, or stops short on an open-ended prompt.
+6. **good** — runs cleanly and answers the question well; chart + transform are faithful and
+   appropriate; concise close.
+
+Also flag, orthogonally (a run can be `good` and still carry a flag): **protocol-drift**
+(narrates instead of acting, or re-explores the same thing without closing), **slow**
+(usable but far slower than the reference), and **timeout/OOM** (too big for VRAM or hangs —
+maps to level 1 for scoring but tag the cause).
+
+The per-model **distribution across these levels** (e.g. "40% good, 30% ran-but-suboptimal,
+20% code-broken, 10% no-action") plus the dominant failure level is exactly what turns the
+report from "works / doesn't" into an honest, graded review.
+
+### 7.4 Parallelism (use the box)
+
+Iterate `models × datasets × questions` (~30 datasets × 5 questions = ~150 per model). To
+exploit ≥4 GPUs: run several models concurrently (separate Ollama model loads / pinned GPUs),
+and/or fan out datasets per model. Keep the **asker and grader calls serialized enough** to
+respect Azure rate limits. Make the harness **resumable** (skip `(model,dataset,qid)` whose
+`.jsonl` already exists) so a crash mid-sweep doesn't restart everything. Always exercise the
+**dev-set calibration run (§6.1) first** and freeze the pipeline before launching the full
+corpus.
+
+---
+
+## 8. Step 5 — Grade the answers (the *grader*)
+
+Use **`gpt-5.5`** as the grader (a different concern from the agent under test; the agent is
+the Ollama model, so there's no self-grading). Temperature 0.
+
+For each `(model, dataset, question)` answer bundle, the grader sees: the question, the table
+schema + samples, and the agent's answer bundle (closing text + chart specs + output-row
+samples + code + **the per-action execution outcomes from §7.2**). The grader is told the
+run's mechanical outcome level (§7.3) so it scores *quality given that the code ran* rather
+than re-deriving whether it ran. It returns **strict JSON** scores on a fixed rubric, e.g.
+(1–5 each):
+- **task_completion** — did it actually answer the question that was asked (not a narrower one)?
+- **code_executed** — did the agent's code run cleanly (no errors / few repairs)?
+- **result_correctness** — are the transforms/aggregations and the output table faithful to
+  the data (right filter/group/join; non-empty, sensible)?
+- **chart_appropriateness** — sensible chart type + encodings for the question.
+- **insightfulness** (esp. for open-ended) — did it surface something meaningful / explore,
+  or stop at the shallow first answer?
+- **protocol_adherence** — clean agent behavior (acted decisively, no flailing/repetition).
+- plus a one-line `rationale`, the **outcome level** from §7.3, and an overall
+  `verdict ∈ {pass, partial, fail}` (partial = ran-but-suboptimal / ran-but-empty: the
+  "works but not ideal" middle the review must surface).
+
+Also compute a **reference delta**: grade the hosted reference runs (`gpt-5.5`, `gpt-5-mini`)
+on the identical questions so each local model can be reported *relative to* a known-good
+ceiling, not just on an absolute scale. Persist all scores to `work/grades/`.
+
+**Calibrate the judge:** spot-check ~5–10 graded items by hand and confirm the grader's
+scores are sane; note any systematic judge bias in the report.
+
+---
+
+## 9. Step 6 — Summarize & deliverables
+
+### 9.1 `work/` layout
+
+```
+loops/model-evaluation/work/
+  ws/                      # the eval Workspace (seed tables registered here)
+  models.json              # the chosen model set + hardware table (§3)
+  datasets/                # ~30 materialized seed tables + descriptions (§4); dev subset tagged
+  dataset_splits.json      # which datasets are dev (§6.1) vs held-out test
+  questions/<dataset>.json # frozen question sets — 5 per dataset (§5)
+  runs/<model>/<dataset>/<qid>.jsonl   # full event streams (§7)
+  bundles/<model>/<dataset>/<qid>.json # distilled answer bundles (§7.2)
+  grades/<model>/...                   # grader JSON (§8)
+  aggregates.{json,csv}    # per-model means, win-rates vs reference, outcome-level distribution
+  scripts/                 # all harness/asker/grader/aggregation scripts
+```
+
+Put **every script** under `work/scripts/`. Keep them runnable with `uv run python`.
+
+### 9.2 `report.md` (the headline deliverable)
+
+Write `loops/model-evaluation/report.md`. It must, up front, answer the four §0 questions,
+then back them with data. Required sections:
+
+1. **TL;DR verdict table** — every tested model with: size, quant, `num_ctx` used, observed
+   peak VRAM, overall score, score relative to the `gpt-5-mini` / `gpt-5.5` reference, and
+   the **outcome-level distribution** (§7.3) — e.g. `% good / suboptimal / empty-wrong /
+   code-broken / no-action` — plus the dominant level. Don't collapse to a single pass/fail.
+2. **"These models work, these don't" — and how they fall short** — group models into clear
+   tiers (reliable / usable-with-caveats / unusable), and for the middle tier name the
+   *specific* shortfall (e.g. "code runs but charts are often the wrong type", "fine on direct
+   questions, gives up on open-ended"). The graded middle is the point of the review.
+3. **Minimum specs & the low-VRAM floor** — name the *smallest model that actually works*,
+   the quant + `num_ctx` it needs, and the realistic quality at that size. State the
+   practical VRAM floor for usable quality and the recommended step-ups across the spectrum.
+4. **Recommended settings** — quantization, `num_ctx`, `max_iterations`, and any Ollama
+   `options` that materially helped. Include a copy-pasteable Ollama setup for the top pick.
+5. **Method** — datasets, question counts, asker/grader models, rubric, and reproduction
+   command(s). Note judge-calibration findings and limitations.
+6. **Per-model notes** — short paragraph each: outcome-level distribution, what it did well,
+   *how* it fell short (which level dominated and why), and example transcript pointers for
+   a representative good run and a representative failure.
+
+Keep it evidence-led and honest: the graded middle ("runs but not ideal") and a credible
+"fail" list are as valuable as the "works" list.
+
+---
+
+## 10. Conventions & guardrails
+
+- **`uv` only.** `uv run python ...` / `uv pip install ...`. Source is in `py-src/`.
+- **No secrets** in the repo, scripts, or transcripts. Azure auth is managed-identity only;
+  do not write API keys anywhere.
+- **Freeze the controls** (asker, grader, questions, `max_iterations`, temperature) so the
+  model is the only variable; **record** every deliberately-varied setting (quant, `num_ctx`).
+- **Make the sweep resumable** and idempotent; never delete prior runs to "retry" — write to
+  a fresh path and keep the originals.
+- **Don't commit/push or run destructive git ops** on this working tree.
+- Keep all artifacts under `loops/model-evaluation/work/`; the only top-level deliverable is
+  `loops/model-evaluation/report.md`.
+
+---
+
+## 11. Suggested order of work
+
+1. **Smoke the stack**: activate venv; confirm one Ollama chat completion through `Client`
+   and one Azure (`gpt-5.5`) completion; load one seed table into a `Workspace` and confirm
+   `read_data_as_df` works; drive `AnalystAgent.run` once on a tiny model + one question and
+   capture the event stream end-to-end. Fix wiring before scaling.
+2. **Pick models** (§3) and write `work/models.json` + the hardware table.
+3. **Materialize ~30 datasets** (§4) and **freeze the 5-question sets per dataset** (§5) with
+   `gpt-5.5`; record the dev vs. test split (§6.1) in `work/dataset_splits.json`.
+4. **Build the harness** (§7): runner → answer bundles + per-action execution outcomes,
+   resumable, with outcome-level tagging.
+5. **Calibrate on the dev set** (§6.1): run 2–3 models (incl. a hosted reference) over the
+   ~15 dev items end-to-end; validate capture, classifier, grader JSON, and resume; tune
+   prompts/`num_ctx`/timeouts; then **freeze the pipeline**.
+6. **Mass run**: sweep all `models × test-datasets × questions` (~150 questions/model).
+7. **Grade** (§8) all runs incl. the `gpt-5.5` / `gpt-5-mini` reference; calibrate the judge.
+8. **Aggregate** into `aggregates.{json,csv}` and **write `report.md`** (§9.2), leading with
+   the four §0 answers and the low-VRAM floor.
\ No newline at end of file

From 8bd4973f9c3cca1c86ea26e0f26aa9163ecc7165 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <93549116+Chenglong-MS@users.noreply.github.com>
Date: Thu, 18 Jun 2026 18:47:02 +0000
Subject: [PATCH 24/29] Add mini analyst agent with frontend mini-mode toggle
 and open-model support

Introduce MiniAnalystAgent, a single-decision, low-cost variant of the analyst
agent that emits the same frontend streaming event contract (visualize / explain)
as the standard agent, so it is a drop-in replacement selectable from the UI.

- analyst/mini_agent.py: MiniAnalystAgent (subclass of AnalystAgent). One turn:
  decide -> either "visualize" (data-transform code + chart spec, committed via the
  shared core-skill dispatch so action/result events are identical to the standard
  agent) or "explain" (free-form text surfaced via the frontend Pure-Q&A path).
  Optional one-shot data inspection; JSON action contract for small/local models.
- routes/agents.py: agent_mode switch to swap AnalystAgent <-> MiniAnalystAgent.
- src/app/App.tsx, dfSlice.tsx, SimpleChartRecBox.tsx: frontend mini-mode toggle,
  miniMode redux state, and agent_mode in the analyst streaming request body.
- agents/client_utils.py: open-model (Ollama) enablement -- replay buffered
  responses as streaming chunks, salvage JSON tool calls emitted as plain content,
  and retry without reasoning_effort for models that lack "think" support.
- analyst/agent.py: tolerate non-string chart encodings from weak models
  (repairable "field not found" instead of an unhashable-type crash).
- analyst/__init__.py: export MiniAnalystAgent.
- tests: test_mini_agent.py (23) and test_client_utils.py additions; all green.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 py-src/data_formulator/agents/client_utils.py | 277 ++++++-
 py-src/data_formulator/analyst/__init__.py    |   2 +
 py-src/data_formulator/analyst/agent.py       |  18 +-
 py-src/data_formulator/analyst/mini_agent.py  | 725 ++++++++++++++++++
 py-src/data_formulator/routes/agents.py       |  39 +-
 src/app/App.tsx                               |  17 +-
 src/app/dfSlice.tsx                           |   2 +
 src/views/SimpleChartRecBox.tsx               |   1 +
 tests/backend/agents/test_client_utils.py     | 166 ++++
 tests/backend/agents/test_mini_agent.py       | 433 +++++++++++
 10 files changed, 1641 insertions(+), 39 deletions(-)
 create mode 100644 py-src/data_formulator/analyst/mini_agent.py
 create mode 100644 tests/backend/agents/test_mini_agent.py

diff --git a/py-src/data_formulator/agents/client_utils.py b/py-src/data_formulator/agents/client_utils.py
index 0069c4b4..efd99d51 100644
--- a/py-src/data_formulator/agents/client_utils.py
+++ b/py-src/data_formulator/agents/client_utils.py
@@ -1,7 +1,220 @@
+import json
 import litellm
+from types import SimpleNamespace
+
 from azure.identity import DefaultAzureCredential, get_bearer_token_provider
 
 
+def _synthesize_stream(response):
+    """Yield LiteLLM-style streaming chunks reconstructed from a *buffered*
+    response, so a caller that consumes a stream sees the same data.
+
+    Used for Ollama: LiteLLM's Ollama streaming path does not parse native
+    tool calls (it leaks the call as raw JSON ``content`` with
+    ``finish_reason='stop'``), whereas the buffered path parses them correctly.
+    We therefore call Ollama non-streaming and replay the result as a stream.
+    """
+    try:
+        choice0 = response.choices[0]
+        message = choice0.message
+        finish_reason = getattr(choice0, "finish_reason", "stop") or "stop"
+    except (AttributeError, IndexError):
+        return
+
+    reasoning = getattr(message, "reasoning_content", None)
+    if reasoning:
+        yield SimpleNamespace(choices=[SimpleNamespace(
+            delta=SimpleNamespace(content=None, tool_calls=None,
+                                  reasoning_content=reasoning),
+            finish_reason=None)])
+
+    content = getattr(message, "content", None)
+    if content:
+        yield SimpleNamespace(choices=[SimpleNamespace(
+            delta=SimpleNamespace(content=content, tool_calls=None,
+                                  reasoning_content=None),
+            finish_reason=None)])
+
+    for idx, tc in enumerate(getattr(message, "tool_calls", None) or []):
+        fn = getattr(tc, "function", None)
+        yield SimpleNamespace(choices=[SimpleNamespace(
+            delta=SimpleNamespace(
+                content=None, reasoning_content=None,
+                tool_calls=[SimpleNamespace(
+                    index=idx, id=getattr(tc, "id", None) or f"call_{idx}",
+                    function=SimpleNamespace(
+                        name=getattr(fn, "name", None),
+                        arguments=getattr(fn, "arguments", "") or ""))]),
+            finish_reason=None)])
+
+    yield SimpleNamespace(choices=[SimpleNamespace(
+        delta=SimpleNamespace(content=None, tool_calls=None,
+                              reasoning_content=None),
+        finish_reason=finish_reason)])
+
+
+def _extract_json_objects(text):
+    """Return top-level brace-balanced JSON object substrings found in ``text``.
+
+    String-aware (ignores braces inside quoted strings) so it survives code
+    payloads that contain ``{`` / ``}``. Used to recover an action that a weak
+    model emitted as plain content instead of a native tool call.
+    """
+    objs = []
+    depth = 0
+    start = -1
+    in_str = False
+    esc = False
+    for i, ch in enumerate(text):
+        if in_str:
+            if esc:
+                esc = False
+            elif ch == "\\":
+                esc = True
+            elif ch == '"':
+                in_str = False
+            continue
+        if ch == '"':
+            in_str = True
+        elif ch == "{":
+            if depth == 0:
+                start = i
+            depth += 1
+        elif ch == "}":
+            if depth > 0:
+                depth -= 1
+                if depth == 0 and start >= 0:
+                    objs.append(text[start:i + 1])
+                    start = -1
+    return objs
+
+
+def _match_tool_from_obj(obj, tools, _depth=0):
+    """Map a parsed JSON object to ``(tool_name, arguments_dict)`` if it matches
+    one of ``tools``' schemas, else ``None``.
+
+    Handles three shapes weak models emit instead of a native tool call:
+      * nested wrapper — ``{"thought": ..., "action": {"name": "visualize",
+        "arguments": {...}}}`` (a key points to an object describing the call);
+      * flat explicit wrapper — ``{"name"/"tool"/"action": "visualize",
+        "arguments": {...}}`` (the object names the tool directly);
+      * bare arguments — ``{"code": ..., "output_variable": ..., "chart": ...}``
+        (no tool named; keys matched against each tool's ``required`` params,
+        most specific tool wins).
+    """
+    if not isinstance(obj, dict) or _depth > 4:
+        return None
+
+    tool_by_name = {}
+    for t in tools or []:
+        fn = (t or {}).get("function") or {}
+        name = fn.get("name")
+        if name:
+            tool_by_name[name] = fn
+
+    # Nested wrapper: a key points to an object that itself describes the call
+    # (e.g. {"action": {"name": "visualize", "arguments": {...}}}). Recurse.
+    for wrap_key in ("action", "tool", "function", "tool_call", "call",
+                     "function_call"):
+        inner = obj.get(wrap_key)
+        if isinstance(inner, dict):
+            got = _match_tool_from_obj(inner, tools, _depth + 1)
+            if got is not None:
+                return got
+
+    # OpenAI tool-call wire format echoed as content: {"tool_calls": [{...}]}.
+    tc_list = obj.get("tool_calls")
+    if isinstance(tc_list, list) and tc_list:
+        got = _match_tool_from_obj(tc_list[0], tools, _depth + 1)
+        if got is not None:
+            return got
+
+    # Flat explicit wrapper: the object names the tool as a string.
+    for name_key in ("name", "tool", "action", "function", "tool_name"):
+        cand = obj.get(name_key)
+        if isinstance(cand, str) and cand in tool_by_name:
+            args = obj.get("arguments")
+            if isinstance(args, str):
+                try:
+                    args = json.loads(args)
+                except (ValueError, TypeError):
+                    args = None
+            if not isinstance(args, dict):
+                args = obj.get("parameters") if isinstance(obj.get("parameters"), dict) else None
+            if not isinstance(args, dict):
+                args = obj.get("args") if isinstance(obj.get("args"), dict) else None
+            if not isinstance(args, dict):
+                args = {k: v for k, v in obj.items()
+                        if k not in (name_key, "arguments", "parameters", "args")}
+            return cand, args
+
+    # Bare arguments: match by required-key coverage, most specific tool wins.
+    keys = set(obj.keys())
+    best = None
+    best_score = None
+    for name, fn in tool_by_name.items():
+        params = fn.get("parameters") or {}
+        required = set(params.get("required") or [])
+        props = set((params.get("properties") or {}).keys())
+        if not required or not required.issubset(keys):
+            continue
+        score = (len(required), len(keys & props), -len(keys - props))
+        if best_score is None or score > best_score:
+            best_score, best = score, name
+    if best is not None:
+        return best, dict(obj)
+    return None
+
+
+def _salvage_tool_calls_from_content(response, tools):
+    """If ``response`` carries an action as JSON *content* but no native
+    ``tool_calls``, rewrite it into a proper tool call in place.
+
+    Weak / open models under a long system prompt frequently emit the action
+    (e.g. ``visualize``/``ask_user``) as a JSON object in the assistant content
+    channel rather than as a native function call. This recovers that action so
+    the agent — which only consumes native ``tool_calls`` — can proceed."""
+    if not tools:
+        return response
+    try:
+        choice0 = response.choices[0]
+        message = choice0.message
+    except (AttributeError, IndexError):
+        return response
+    if getattr(message, "tool_calls", None):
+        return response
+    content = getattr(message, "content", None)
+    if not isinstance(content, str) or "{" not in content:
+        return response
+
+    for blob in _extract_json_objects(content):
+        try:
+            obj = json.loads(blob)
+        except (ValueError, TypeError):
+            continue
+        matched = _match_tool_from_obj(obj, tools)
+        if matched is None:
+            continue
+        name, args = matched
+        try:
+            from litellm.types.utils import ChatCompletionMessageToolCall, Function
+            tc = ChatCompletionMessageToolCall(
+                function=Function(name=name, arguments=json.dumps(args)),
+                id="call_salvage_0", type="function")
+        except Exception:
+            tc = SimpleNamespace(
+                id="call_salvage_0", type="function",
+                function=SimpleNamespace(name=name, arguments=json.dumps(args)))
+        message.tool_calls = [tc]
+        message.content = None
+        try:
+            choice0.finish_reason = "tool_calls"
+        except (AttributeError, TypeError):
+            pass
+        break
+    return response
+
+
 class Client(object):
     """
     Returns a LiteLLM client configured for the specified endpoint and model.
@@ -91,8 +304,14 @@ def _is_reasoning_effort_error(self, error_text: str) -> bool:
         """Detect provider errors caused by an unsupported ``reasoning_effort``
         value (e.g. ``"minimal"`` on a model that only accepts
         ``none/low/medium/high/xhigh``). The provider message reliably
-        mentions the parameter name."""
-        return "reasoning_effort" in error_text.lower()
+        mentions the parameter name.
+
+        Also covers Ollama models that lack reasoning support: LiteLLM maps
+        ``reasoning_effort`` to Ollama's ``think`` flag, and such models reject
+        it with ``"<model> does not support thinking"``. Retrying without
+        ``reasoning_effort`` (which drops ``think``) lets these models run."""
+        lowered = error_text.lower()
+        return "reasoning_effort" in lowered or "does not support thinking" in lowered
 
     @classmethod
     def from_config(cls, model_config: dict[str, str]):
@@ -129,6 +348,27 @@ def ping(self, timeout: int = 10):
             max_tokens=3, drop_params=True, **params,
         )
 
+    def _dispatch(self, *, messages, stream, params, tools=None, extra=None):
+        """Issue the LiteLLM call, transparently handling Ollama streaming.
+
+        Ollama's streaming path in LiteLLM fails to parse native tool calls, so
+        for Ollama we always call non-streaming and, when the caller asked for a
+        stream, replay the buffered response as streaming chunks via
+        ``_synthesize_stream``. All other providers stream natively."""
+        is_ollama = self.endpoint == "ollama"
+        effective_stream = stream and not is_ollama
+        call_kwargs = dict(model=self.model, messages=messages,
+                           drop_params=True, stream=effective_stream,
+                           **params, **(extra or {}))
+        if tools is not None:
+            call_kwargs["tools"] = tools
+        resp = litellm.completion(**call_kwargs)
+        if is_ollama and tools:
+            resp = _salvage_tool_calls_from_content(resp, tools)
+        if is_ollama and stream:
+            return _synthesize_stream(resp)
+        return resp
+
     def get_completion(self, messages, stream=False, reasoning_effort="low",
                        **kwargs):
         """Send a chat completion request via LiteLLM.
@@ -142,24 +382,15 @@ def get_completion(self, messages, stream=False, reasoning_effort="low",
         params["reasoning_effort"] = reasoning_effort
         params.update(kwargs)
         try:
-            return litellm.completion(
-                model=self.model, messages=messages,
-                drop_params=True, stream=stream, **params,
-            )
+            return self._dispatch(messages=messages, stream=stream, params=params)
         except Exception as e:
             err = str(e)
             if self._is_reasoning_effort_error(err):
                 params.pop("reasoning_effort", None)
-                return litellm.completion(
-                    model=self.model, messages=messages,
-                    drop_params=True, stream=stream, **params,
-                )
+                return self._dispatch(messages=messages, stream=stream, params=params)
             if self._is_image_deserialize_error(err):
                 sanitized = self._strip_images_from_messages(messages)
-                return litellm.completion(
-                    model=self.model, messages=sanitized,
-                    drop_params=True, stream=stream, **params,
-                )
+                return self._dispatch(messages=sanitized, stream=stream, params=params)
             raise
 
     def get_completion_with_tools(self, messages, tools, stream=False,
@@ -172,22 +403,16 @@ def get_completion_with_tools(self, messages, tools, stream=False,
         params = self.params.copy()
         params["reasoning_effort"] = reasoning_effort
         try:
-            return litellm.completion(
-                model=self.model, messages=messages, tools=tools,
-                drop_params=True, stream=stream, **params, **kwargs,
-            )
+            return self._dispatch(messages=messages, stream=stream,
+                                  params=params, tools=tools, extra=kwargs)
         except Exception as e:
             err = str(e)
             if self._is_reasoning_effort_error(err):
                 params.pop("reasoning_effort", None)
-                return litellm.completion(
-                    model=self.model, messages=messages, tools=tools,
-                    drop_params=True, stream=stream, **params, **kwargs,
-                )
+                return self._dispatch(messages=messages, stream=stream,
+                                      params=params, tools=tools, extra=kwargs)
             if self._is_image_deserialize_error(err):
                 sanitized = self._strip_images_from_messages(messages)
-                return litellm.completion(
-                    model=self.model, messages=sanitized, tools=tools,
-                    drop_params=True, stream=stream, **params, **kwargs,
-                )
+                return self._dispatch(messages=sanitized, stream=stream,
+                                      params=params, tools=tools, extra=kwargs)
             raise
\ No newline at end of file
diff --git a/py-src/data_formulator/analyst/__init__.py b/py-src/data_formulator/analyst/__init__.py
index 0e56267b..13bac642 100644
--- a/py-src/data_formulator/analyst/__init__.py
+++ b/py-src/data_formulator/analyst/__init__.py
@@ -36,9 +36,11 @@
     build_registry,
 )
 from data_formulator.analyst.agent import AnalystAgent
+from data_formulator.analyst.mini_agent import MiniAnalystAgent
 
 __all__ = [
     "AnalystAgent",
+    "MiniAnalystAgent",
     "Event",
     "Skill",
     "SkillContext",
diff --git a/py-src/data_formulator/analyst/agent.py b/py-src/data_formulator/analyst/agent.py
index 8ee4c452..078fb610 100644
--- a/py-src/data_formulator/analyst/agent.py
+++ b/py-src/data_formulator/analyst/agent.py
@@ -1025,10 +1025,26 @@ def _run_visualize_code(
             row_count = len(full_df)
 
             chart_encodings = chart_spec.get("encodings", {})
+
+            def _missing_encoding(field: Any) -> bool:
+                # field is normally a column-name string. Weak models sometimes
+                # emit a dict ({"field": "col"}), a list, or other non-string;
+                # turn those into a clean, repairable "not found" instead of an
+                # unhashable-type crash on the membership test below.
+                if not field:
+                    return False  # empty / None -> optional channel, skip
+                if isinstance(field, dict):
+                    field = field.get("field")
+                    if not field:
+                        return False
+                if not isinstance(field, str):
+                    return True  # list / number / etc. -> invalid single column
+                return field not in full_df.columns
+
             missing_fields = [
                 f"{channel}: '{field}'"
                 for channel, field in chart_encodings.items()
-                if field and field not in full_df.columns
+                if _missing_encoding(field)
             ]
             if missing_fields:
                 available = list(full_df.columns)
diff --git a/py-src/data_formulator/analyst/mini_agent.py b/py-src/data_formulator/analyst/mini_agent.py
new file mode 100644
index 00000000..33332b53
--- /dev/null
+++ b/py-src/data_formulator/analyst/mini_agent.py
@@ -0,0 +1,725 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""A single-decision, no-loop variant of :class:`AnalystAgent` for small models.
+
+:class:`MiniAnalystAgent` is the most stripped-down member of the analyst family.
+Where :class:`~data_formulator.analyst.agent.AnalystAgent` runs a
+multi-step *inspect -> act -> observe* loop that can commit several actions, the
+mini agent makes exactly **one analytic decision per run** and stops. Given the
+data context it returns ONE of two things:
+
+* **visualize** — a small data-transformation script plus a chart spec (this is
+  the default; almost every question should produce a chart), or
+* **explain** — a short free-text answer (only when the user is clearly *not*
+  asking for a chart, e.g. a yes/no or factual question).
+
+By default the agent may look at the data once before deciding
+(``allow_inspection=True``), and this is the configuration the project uses: it ties
+the no-tool path on the strongest models and is more reliable on mid-tier models that
+need to check a join before writing it (see ``loops/model-evaluation`` Section 9).
+
+* ``allow_inspection=True`` (default, recommended) — the model MAY run a single
+  ``execute_python_script`` inspection to look at the data, then must produce its
+  visualize/explain. The inspection budget is one call, so it never becomes a loop.
+* ``allow_inspection=False`` — an evaluation-only ablation: no tools at all, the model
+  must produce its visualize/explain directly in one shot.
+
+The chart-type set is deliberately **reduced** to a handful of common types, and
+the prompt is tightly scoped, so small open-weight models reliably emit a
+well-formed ``visualize`` action. To keep small models usable without drifting
+into multi-chart territory, a *committed* visualize whose code/encodings fail is
+**repaired in place** (the model is shown the error and asked to fix the SAME
+chart) up to ``max_repair_attempts`` times — this completes the single
+visualization, it does not start a new analysis turn.
+
+Reuse: the visualize execution + ``result`` event are produced by the **same**
+core-skill dispatch the base agent uses (:meth:`_dispatch_skill_action`), so a
+mini result is byte-for-byte the shape every consumer already understands. The
+plain-text transport (``_call_model`` / ``_parse_action`` /
+``_run_inspection_tool``) carries actions as content JSON so models with weak or
+absent function-calling still work; only the prompt and the single-decision
+control flow are new.
+"""
+
+from __future__ import annotations
+
+import json
+import time
+from typing import Any, Generator
+
+from data_formulator.agent_config import reasoning_effort_for
+from data_formulator.agents.client_utils import (
+    _extract_json_objects,
+    _match_tool_from_obj,
+)
+from data_formulator.analyst.agent import (
+    AnalystAgent,
+    _AGENT_ID,
+    _CORE_SKILL,
+    _rescue_unpack_json_strings,
+    handle_inspect_source_data,
+    logger,
+)
+from data_formulator.analyst.skills import SkillContext
+
+# Keys a model may use to carry the explanation text in an ``explain`` action.
+_EXPLAIN_TEXT_KEYS = ("text", "explanation", "answer", "summary", "content", "message")
+
+# Keys a model may use to carry its private reasoning alongside the action JSON;
+# surfaced as a thinking_text event (mirrors how the native loop surfaces the
+# assistant content that accompanies a tool call).
+_THOUGHT_KEYS = ("thought", "thoughts", "reasoning", "thinking", "rationale")
+
+
+# The reduced chart-type set. Every name here is a valid Data Formulator
+# ``chart_type`` that the eval renderer and the visualize skill both understand;
+# the list is kept short on purpose so a small model picks a sensible type
+# instead of guessing among twenty.
+_MINI_CHART_TYPES = (
+    "Bar Chart",
+    "Grouped Bar Chart",
+    "Line Chart",
+    "Scatter Plot",
+    "Histogram",
+    "Pie Chart",
+    "Heatmap",
+)
+
+_MINI_CHART_REFERENCE = """\
+- Bar Chart (x, y, color) - compare ONE number across categories. Category on x, number on y. Set color to colour/stack by a second category.
+- Grouped Bar Chart (x, y, group) - side-by-side bars split by a second category; put that second category on `group`.
+- Line Chart (x, y, color) - a trend over an ordered or time x-axis; color draws one line per series.
+- Scatter Plot (x, y, color, size) - relationship between two numeric fields.
+- Histogram (x) - distribution of ONE numeric field; put the raw field on x, do NOT pre-bin it.
+- Pie Chart (color, size) - parts of a whole with <=7 slices; slice category on `color`, its value on `size`.
+- Heatmap (x, y, color) - a 2D grid; x and y are the two categories, color is the numeric cell value."""
+
+
+# A pseudo-tool advertised so the JSON matcher recognises an ``explain`` action.
+# ``explain`` is not a registered skill action (it never reaches the skill
+# dispatch); the mini loop intercepts it and ends the run with its text.
+_EXPLAIN_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "explain",
+        "description": "Answer the user in plain text when no chart is needed.",
+        "parameters": {
+            "type": "object",
+            "properties": {"text": {"type": "string"}},
+            "required": ["text"],
+        },
+    },
+}
+
+
+# The complete, self-contained system prompt for the mini agent. Slots
+# ({chart_types}, {inspect_note}) are filled by str.replace (NOT str.format) so
+# the literal JSON braces below stay intact.
+_MINI_PROMPT_TEMPLATE = """\
+You are a data visualization agent. The user asks a question about their data and
+you answer it by producing ONE chart in a single step.
+
+## Your data
+The tables are already loaded. The user message lists them under [AVAILABLE TABLES]
+(or [PRIMARY TABLE(S)]) with their columns and a few sample rows, and ends with
+[USER QUESTION]. In your Python, read a table by its EXACT file name shown there,
+e.g. pd.read_csv('orders.csv') or pd.read_parquet('sales.parquet'). Never invent
+files or columns that are not listed.
+
+## What you output: exactly ONE JSON object
+Your ENTIRE reply is ONE JSON object and nothing else - no prose, no markdown
+fences. It is one of two kinds:
+
+1. VISUALIZE - use this for almost every question:
+{"thought": "<one short sentence>", "tool": "visualize", "arguments": {"code": "<python that builds the result table>", "output_variable": "<the variable your code assigns the final DataFrame to>", "chart": {"chart_type": "<one name from the list below>", "encodings": {"x": "<col>", "y": "<col>"}, "config": {}}, "title": "<short Title Case title>", "input_tables": ["<source table name>"]}}
+
+2. EXPLAIN - only when the user is NOT asking for a chart (a yes/no or factual question):
+{"thought": "<one short sentence>", "tool": "explain", "arguments": {"text": "<your answer in 1-3 sentences>"}}
+
+When in doubt, VISUALIZE.
+
+## Writing the visualize code
+- A standalone Python script: imports at the top, NO function wrapper.
+- Read the source tables by their exact file names, then aggregate / filter / sort /
+  reshape so the DataFrame is exactly what the chart needs, and assign it to your
+  output_variable.
+- output_variable MUST be a pandas DataFrame (a table with named columns), NEVER a
+  Series or a single number. Two common mistakes and their fixes:
+    * groupby -> pass as_index=False, e.g.
+      df.groupby('city', as_index=False)['sales'].sum()
+    * value_counts() returns a Series -> call .reset_index(), e.g.
+      df['city'].value_counts().reset_index(name='count')   # columns: city, count
+- Every column named in `encodings` MUST be an actual column of your output
+  DataFrame (check the names match exactly, including the ones you create).
+- Allowed libraries: pandas, numpy, duckdb, math, datetime, statistics, collections,
+  re, sklearn, scipy. NOT allowed: matplotlib, plotly, seaborn, os, sys, requests.
+- Strings must be valid JSON: write newlines in the code as \\n and quotes as \\".
+
+### Chart types (chart_type must be one of these EXACT names)
+{chart_types}
+{inspect_note}
+## Worked example
+[USER QUESTION] Top 5 products by revenue.
+Your entire reply (one object, nothing else):
+{"thought": "sum revenue per product, take the top 5, bar chart", "tool": "visualize", "arguments": {"code": "import pandas as pd\\norders = pd.read_csv('orders.csv')\\nagg = orders.groupby('product', as_index=False)['revenue'].sum()\\ntop_products = agg.sort_values('revenue', ascending=False).head(5)", "output_variable": "top_products", "chart": {"chart_type": "Bar Chart", "encodings": {"x": "product", "y": "revenue"}, "config": {}}, "title": "Top 5 Products By Revenue", "input_tables": ["orders"]}}
+
+## Worked example (counting rows -> a DataFrame, not a Series)
+[USER QUESTION] How many orders are in each status?
+Your entire reply (one object, nothing else):
+{"thought": "count rows per status with value_counts, reset_index to a real table", "tool": "visualize", "arguments": {"code": "import pandas as pd\\norders = pd.read_csv('orders.csv')\\ncounts = orders['status'].value_counts().reset_index(name='count')", "output_variable": "counts", "chart": {"chart_type": "Bar Chart", "encodings": {"x": "status", "y": "count"}, "config": {}}, "title": "Orders By Status", "input_tables": ["orders"]}}
+
+## Rules
+- Reply with EXACTLY ONE JSON object. Do not wrap it in markdown, do not add text
+  before or after it.
+- Always assign the final DataFrame to the exact output_variable name you chose.
+- Use only file names and columns that appear in the user message.
+"""
+
+_INSPECT_NOTE = """\
+
+## (Optional) look at the data first
+If the sample rows do not tell you enough (e.g. you need the exact category values,
+a column's range, or how two tables join), you MAY first run ONE inspection:
+{"thought": "<why you need to look>", "tool": "execute_python_script", "arguments": {"code": "<python that prints what you need>"}}
+It returns its stdout to you only. After it runs you MUST reply with your visualize
+(or explain) object. Use this at most once; if the samples already tell you enough,
+skip it and go straight to visualize.
+"""
+
+
+class MiniAnalystAgent(AnalystAgent):
+    """A single-decision analyst: one ``visualize`` (or ``explain``) per run.
+
+    Unlike :class:`AnalystAgent` it does **not** loop: :meth:`run` makes one
+    analytic decision and stops. It carries its own plain-text transport seams
+    (``_call_model`` / ``_parse_action`` / ``_run_inspection_tool``) so models
+    with weak or absent function-calling still work, and dispatches the committed
+    ``visualize`` through the base core skill, so the emitted ``result`` /
+    ``completion`` events are identical to the loop-based agent.
+
+    Parameters
+    ----------
+    allow_inspection:
+        Defaults to ``True``, the recommended project-default *mini* behaviour: the
+        model may run a single ``execute_python_script`` inspection before producing
+        its answer. ``False`` is an evaluation-only ablation in which the model must
+        answer in one shot with no tools at all.
+    """
+
+    def __init__(self, *args: Any, allow_inspection: bool = True, **kwargs: Any) -> None:
+        # One committing action per run; the base machinery is never asked to
+        # take a second analytic step.
+        kwargs.setdefault("max_iterations", 1)
+        super().__init__(*args, **kwargs)
+        self.allow_inspection = allow_inspection
+
+    # ------------------------------------------------------------------
+    # Prompt: a tightly scoped, single-decision system prompt
+    # ------------------------------------------------------------------
+
+    def _build_system_prompt(
+        self,
+        has_primary_tables: bool = False,
+        has_focused_thread: bool = False,
+        has_other_threads: bool = False,
+        has_attached_images: bool = False,
+        has_charts: bool = False,
+        **kwargs: Any,
+    ) -> str:
+        """Assemble the mini prompt: one visualize/explain decision, a reduced
+        chart-type reference, and (only for the inspection variation) a short note
+        describing the single optional ``execute_python_script`` call."""
+        prompt = _MINI_PROMPT_TEMPLATE
+        prompt = prompt.replace("{chart_types}", _MINI_CHART_REFERENCE)
+        prompt = prompt.replace(
+            "{inspect_note}", _INSPECT_NOTE if self.allow_inspection else "")
+        if self.language_instruction:
+            prompt = prompt + "\n\n" + self.language_instruction
+        return prompt
+
+    # ------------------------------------------------------------------
+    # Tool set: only visualize + explain (+ one inspection in the tool variant)
+    # ------------------------------------------------------------------
+
+    def _mini_tools(self, allow_inspect: bool) -> list[dict[str, Any]]:
+        """The minimal tool set the mini agent recognises this turn: the
+        ``visualize`` action, the ``explain`` pseudo-tool, and — only when
+        ``allow_inspect`` — the ``execute_python_script`` inspection tool."""
+        base = AnalystAgent._current_tools(self)
+        keep = {"visualize"}
+        if allow_inspect:
+            keep.add("execute_python_script")
+        tools = [t for t in base
+                 if ((t.get("function") or {}).get("name") in keep)]
+        tools.append(_EXPLAIN_TOOL)
+        return tools
+
+    # ------------------------------------------------------------------
+    # Plain-text transport: a no-native-tools model call, a JSON-action parser,
+    # and the single inspection executor. Actions travel as content JSON so
+    # models with weak or absent function-calling still work; every tool is run
+    # by the SAME base handlers as the looping agent.
+    # ------------------------------------------------------------------
+
+    def _catalog_reminder(self, tools: list[dict[str, Any]]) -> str:
+        """A short per-turn reminder of the protocol + the names currently
+        available (reflects skills loaded so far, e.g. ``write_report`` after the
+        report skill loads). Keeps weak models on-protocol without re-deriving
+        the full schemas, which already live in the prompt/skill bodies."""
+        action_names = self.registry.action_names()
+        names = [(t.get("function") or {}).get("name") for t in tools]
+        inspect = [n for n in names if n and n not in action_names]
+        act = [n for n in names if n and n in action_names]
+        return (
+            "[ACTION PROTOCOL] Reply with ONE JSON object "
+            '{"thought":..,"tool":<name>,"arguments":{..}} to call a tool/action, '
+            "or plain text (no JSON) to finish. "
+            f"Inspection tools: {', '.join(inspect) or '(none)'}. "
+            f"Actions: {', '.join(act) or '(none)'}."
+        )
+
+    def _call_model(self, messages: list[dict]):
+        """Buffered completion with NO tools, retrying transient errors before
+        any output is consumed (mirrors the base :meth:`_open_stream` contract)."""
+        last_exc: Exception | None = None
+        for attempt in range(self._MAX_LLM_RETRIES):
+            try:
+                return self.client.get_completion(
+                    messages, stream=False,
+                    reasoning_effort=reasoning_effort_for(
+                        _AGENT_ID, self.client.model),
+                )
+            except Exception as e:  # noqa: BLE001 - retried or re-raised below
+                last_exc = e
+                if self._is_transient_error(e) and attempt < self._MAX_LLM_RETRIES - 1:
+                    wait = 2 ** attempt
+                    logger.warning(
+                        "[MiniAnalystAgent] Transient LLM error (attempt "
+                        "%d/%d), retrying in %ds: %s",
+                        attempt + 1, self._MAX_LLM_RETRIES, wait, e,
+                    )
+                    time.sleep(wait)
+                    continue
+                raise
+        raise last_exc  # pragma: no cover - loop always returns or raises
+
+    @staticmethod
+    def _parse_action(
+        content: str | None, tools: list[dict[str, Any]],
+    ) -> tuple[str, str, dict[str, Any]] | None:
+        """Parse the first JSON object in ``content`` that names a known tool.
+
+        Returns ``(thought, tool_name, arguments)`` or ``None`` when no JSON
+        object matches a tool - in which case ``content`` is the run's final
+        plain-text answer. The same matcher used by the Ollama salvage resolves
+        the documented ``{"tool","arguments"}`` shape as well as the nested /
+        bare-argument shapes weaker models fall into.
+        """
+        if not isinstance(content, str) or "{" not in content:
+            return None
+        for blob in _extract_json_objects(content):
+            try:
+                obj = json.loads(blob)
+            except (ValueError, TypeError):
+                continue
+            if not isinstance(obj, dict):
+                continue
+            matched = _match_tool_from_obj(obj, tools)
+            if matched is None:
+                continue
+            name, args = matched
+            thought = ""
+            for k in _THOUGHT_KEYS:
+                v = obj.get(k)
+                if isinstance(v, str) and v.strip():
+                    thought = v.strip()
+                    break
+            return thought, name, (args if isinstance(args, dict) else {})
+        return None
+
+    def _run_inspection_tool(
+        self,
+        tool_name: str,
+        tool_args: dict[str, Any],
+        input_tables: list[dict[str, Any]] | None,
+        outer_iteration: int,
+        rlog,
+    ) -> Generator[dict, None, tuple[str, dict | None]]:
+        """Execute one inspection tool with the SAME handlers as the base loop,
+        yielding the same ``tool_start`` / ``tool_result`` / ``skill_loaded``
+        events. Returns ``(observation_text, skill_body_msg_or_None)``."""
+        yield {
+            "type": "tool_start",
+            "tool": tool_name,
+            "purpose": tool_args.get("purpose") if tool_name == "execute_python_script" else None,
+            "code": tool_args.get("code") if tool_name == "execute_python_script" else None,
+            "table_names": tool_args.get("table_names") if tool_name == "inspect_source_data" else None,
+            "skill": tool_args.get("name") if tool_name == "load_skill" else None,
+        }
+
+        tool_t0 = time.time()
+        tool_status = "ok"
+        body_msg: dict | None = None
+
+        if tool_name == "execute_python_script":
+            result = self._run_explore_code(
+                tool_args.get("code", ""), input_tables or [])
+            tool_content = result.get("stdout", "")
+            tool_status = result.get("status", "ok")
+            if result.get("error"):
+                tool_content += f"\n\nError: {result['error']}"
+            yield {"type": "tool_result", "tool": tool_name, "status": tool_status,
+                   "stdout": result.get("stdout", ""), "error": result.get("error")}
+        elif tool_name == "inspect_source_data":
+            tool_content = handle_inspect_source_data(
+                tool_args.get("table_names", []), input_tables or [], self.workspace)
+            yield {"type": "tool_result", "tool": tool_name, "status": "ok",
+                   "stdout": tool_content}
+        elif tool_name == "load_skill":
+            skill_name = tool_args.get("name", "")
+            ok, message, body_msg = self._build_skill_body_message(skill_name)
+            tool_status = "ok" if ok else "error"
+            tool_content = message
+            if ok:
+                yield {"type": "skill_loaded", "skill": skill_name,
+                       "unlocks": list(self.registry.metas[skill_name].action_names)
+                       if self.registry.has(skill_name) else []}
+            yield {"type": "tool_result", "tool": tool_name, "status": tool_status,
+                   "stdout": message, "error": None if ok else message}
+        elif tool_name in self._loaded_skill_tool_map():
+            skill = self._loaded_skill_tool_map()[tool_name]
+            skill_ctx = SkillContext(
+                client=self.client, workspace=self.workspace,
+                language_instruction=self.language_instruction,
+                trajectory=[], payload=dict(self._run_payload))
+            try:
+                result = skill.handle_tool(tool_name, tool_args, skill_ctx)
+                tool_content = result.text
+            except Exception as exc:  # noqa: BLE001
+                logger.warning("[MiniAnalystAgent] Skill tool %r failed", tool_name, exc_info=exc)
+                tool_content = f"Tool '{tool_name}' failed: {exc}"
+                tool_status = "error"
+            yield {"type": "tool_result", "tool": tool_name, "status": tool_status,
+                   "stdout": tool_content}
+        else:
+            tool_content = (
+                f"Unknown tool: {tool_name}. Use only the tools/actions listed in "
+                "the protocol, or reply in plain text to finish."
+            )
+            tool_status = "error"
+            yield {"type": "tool_result", "tool": tool_name, "status": tool_status,
+                   "stdout": tool_content}
+
+        rlog.log("tool_execution", iteration=outer_iteration, tool=tool_name,
+                 input_summary=(tool_args.get("purpose", "") or "")[:200],
+                 output_summary=(tool_content[:200] + "...") if len(tool_content) > 200 else tool_content,
+                 latency_ms=int((time.time() - tool_t0) * 1000), status=tool_status)
+        return tool_content, body_msg
+
+    # ------------------------------------------------------------------
+    # The run: one decision, no loop
+    # ------------------------------------------------------------------
+
+    def run(
+        self,
+        input_tables: list[dict[str, Any]],
+        user_question: str,
+        focused_thread: list[dict[str, Any]] | None = None,
+        other_threads: list[dict[str, Any]] | None = None,
+        trajectory: list[dict] | None = None,
+        completed_step_count: int = 0,
+        primary_tables: list[str] | None = None,
+        attached_images: list[str] | None = None,
+        charts: list[dict[str, Any]] | None = None,
+    ) -> Generator[dict[str, Any], None, None]:
+        """Make a single analytic decision and stop.
+
+        Yields the same event types as :meth:`AnalystAgent.run` (``thinking_text``,
+        ``tool_start`` / ``tool_result`` for the optional inspection, ``action`` /
+        ``result`` for the chart, ``error``, and a terminal ``completion``), but
+        commits at most one ``visualize`` (repaired in place on failure) or ends
+        with one ``explain``.
+        """
+        rlog = self._reasoning_log
+        session_start = time.time()
+
+        self._loaded_skills = {_CORE_SKILL}
+        self._run_payload = {
+            "input_tables": input_tables,
+            "charts": charts or [],
+            "focused_thread": focused_thread,
+            "other_threads": other_threads,
+            "primary_tables": primary_tables,
+        }
+        completed_steps: list[dict[str, Any]] = []
+        iteration = completed_step_count + 1
+
+        try:
+            rlog.log(
+                "session_start",
+                agent="MiniAnalystAgent",
+                session_id=self._session_id,
+                user_question=user_question,
+                input_tables=[t.get("name", "") for t in input_tables],
+                model=self.client.model,
+                rules_injected=[],
+                knowledge_injected=[],
+            )
+
+            if trajectory is None:
+                ns_dir = self._explore_ns_dir()
+                if ns_dir.exists():
+                    import shutil
+                    shutil.rmtree(ns_dir, ignore_errors=True)
+                messages = self._build_initial_messages(
+                    input_tables, user_question, focused_thread, other_threads,
+                    primary_tables=primary_tables,
+                    attached_images=attached_images, charts=charts,
+                )
+            else:
+                messages = trajectory
+
+            # A live sandbox session backs the optional inspection so its
+            # namespace persists across the (at most one) inspection call.
+            from data_formulator.sandbox.local_sandbox import SandboxSession
+            with SandboxSession() as explore_session:
+                self._explore_session = explore_session
+                kind, payload = yield from self._decide(
+                    messages, input_tables, iteration,
+                    allow_inspect=self.allow_inspection,
+                )
+                self._explore_session = None
+
+            if kind == "explain":
+                yield {
+                    "type": "completion",
+                    "iteration": iteration,
+                    "status": "success",
+                    "content": {"summary": payload, "total_steps": 0},
+                }
+                self._log_session_end(rlog, "success", iteration, 0, session_start)
+                return
+
+            if kind == "visualize":
+                produced = yield from self._visualize_with_repair(
+                    payload, messages, input_tables, iteration, completed_steps)
+                status = "success" if produced else "completed_no_viz"
+                yield {
+                    "type": "completion",
+                    "iteration": iteration,
+                    "status": status,
+                    "content": {"summary": "", "total_steps": len(completed_steps)},
+                }
+                self._log_session_end(rlog, status, iteration, 0, session_start)
+                return
+
+            # kind == "none": an LLM error or an exhausted protocol; payload is
+            # the status string.
+            if payload == "llm_error":
+                yield self._error_event(
+                    iteration, "LLM API error", message_code="agent.llmApiError")
+            yield {
+                "type": "completion",
+                "iteration": iteration,
+                "status": payload,
+                "content": {"summary": "", "total_steps": 0},
+            }
+            self._log_session_end(rlog, payload, iteration, 0, session_start)
+            return
+        finally:
+            rlog.close()
+
+    # ------------------------------------------------------------------
+    # Decision: (optional inspection ->) one visualize/explain
+    # ------------------------------------------------------------------
+
+    def _decide(
+        self,
+        messages: list[dict],
+        input_tables: list[dict[str, Any]] | None,
+        iteration: int,
+        *,
+        allow_inspect: bool,
+    ) -> Generator[dict, None, tuple[str, Any]]:
+        """Run the single decision. Returns ``("visualize", args)``,
+        ``("explain", text)`` or ``("none", reason)``.
+
+        At most one inspection (``execute_python_script``) and one corrective
+        re-prompt are allowed, so the decision is bounded and never loops.
+        """
+        rlog = self._reasoning_log
+        inspections_left = 1 if allow_inspect else 0
+        corrections_left = 1
+
+        for _round in range(4):  # hard safety ceiling on model calls
+            can_inspect = inspections_left > 0
+            # Advertise inspection only when it's actually allowed this round, but
+            # always RECOGNISE an inspection call so a model that asks for one when
+            # it can't have it is nudged back on track (not misread as a final
+            # plain-text answer).
+            advertised = self._mini_tools(can_inspect)
+            recognize = self._mini_tools(allow_inspect=True)
+            rlog.log("llm_request", iteration=iteration,
+                     messages_count=len(messages),
+                     tools_available=[t["function"]["name"] for t in advertised],
+                     transport="json_protocol_mini")
+            call_messages = list(messages) + [
+                {"role": "system", "content": self._catalog_reminder(advertised)},
+            ]
+            t0 = time.time()
+            try:
+                response = self._call_model(call_messages)
+            except Exception as exc:  # noqa: BLE001
+                rlog.log("llm_response", iteration=iteration,
+                         latency_ms=int((time.time() - t0) * 1000),
+                         finish_reason="error", error=type(exc).__name__)
+                logger.error("[MiniAnalystAgent] LLM call failed", exc_info=exc)
+                return ("none", "llm_error")
+
+            latency = int((time.time() - t0) * 1000)
+            if not getattr(response, "choices", None):
+                rlog.log("llm_response", iteration=iteration,
+                         latency_ms=latency, finish_reason="empty")
+                return ("none", "llm_error")
+
+            content = (response.choices[0].message.content or "")
+            parsed = self._parse_action(content, recognize)
+
+            # --- plain text -> the explain answer ---------------------------
+            if parsed is None:
+                rlog.log("llm_response", iteration=iteration,
+                         latency_ms=latency, finish_reason="final_text")
+                messages.append({"role": "assistant", "content": content or None})
+                return ("explain", content.strip())
+
+            thought, name, args = parsed
+            messages.append({"role": "assistant", "content": content})
+            if thought:
+                yield {"type": "thinking_text", "content": thought}
+
+            # --- explain action ---------------------------------------------
+            if name == "explain":
+                rlog.log("llm_response", iteration=iteration,
+                         latency_ms=latency, finish_reason="explain")
+                text = ""
+                for k in _EXPLAIN_TEXT_KEYS:
+                    v = args.get(k)
+                    if isinstance(v, str) and v.strip():
+                        text = v.strip()
+                        break
+                return ("explain", text or thought or content.strip())
+
+            # --- visualize action -------------------------------------------
+            if name == "visualize":
+                _rescue_unpack_json_strings(args)
+                missing = [f for f in ("code", "output_variable", "chart")
+                           if not args.get(f)]
+                if missing and corrections_left > 0:
+                    corrections_left -= 1
+                    messages.append({"role": "user", "content": (
+                        "[OBSERVATION] ERROR: your visualize is missing required "
+                        f"field(s): {', '.join(missing)}. Emit the visualize JSON "
+                        "again with those filled in.")})
+                    rlog.log("llm_response", iteration=iteration,
+                             latency_ms=latency, finish_reason="missing_fields")
+                    continue
+                rlog.log("llm_response", iteration=iteration,
+                         latency_ms=latency, finish_reason="visualize")
+                return ("visualize", args)
+
+            # --- the one optional inspection --------------------------------
+            if name in ("execute_python_script", "inspect_source_data"):
+                if can_inspect:
+                    inspections_left -= 1
+                    rlog.log("llm_response", iteration=iteration,
+                             latency_ms=latency, finish_reason="inspect", tool=name)
+                    tool_content, body_msg = yield from self._run_inspection_tool(
+                        name, args, input_tables, iteration, rlog)
+                    messages.append({"role": "user", "content": (
+                        f"[OBSERVATION] {tool_content}\n\nNow emit your visualize "
+                        "JSON object (or an explain object).")})
+                    if body_msg is not None:
+                        messages.append(body_msg)
+                    continue
+                # Inspection asked for but not available (budget spent, or the
+                # no-tool variation): nudge straight to the answer.
+                if corrections_left > 0:
+                    corrections_left -= 1
+                    messages.append({"role": "user", "content": (
+                        "[OBSERVATION] Inspection is not available now; emit your "
+                        "visualize JSON object directly (or an explain object).")})
+                    rlog.log("llm_response", iteration=iteration,
+                             latency_ms=latency, finish_reason="inspect_denied")
+                    continue
+                return ("none", "tool_rounds_exhausted")
+
+            # --- anything else -> one corrective nudge ----------------------
+            if corrections_left > 0:
+                corrections_left -= 1
+                messages.append({"role": "user", "content": (
+                    f"[OBSERVATION] ERROR: '{name}' is not available. Reply with a "
+                    "single visualize JSON object (or an explain object).")})
+                rlog.log("llm_response", iteration=iteration,
+                         latency_ms=latency, finish_reason="unknown_tool")
+                continue
+
+            return ("none", "tool_rounds_exhausted")
+
+        return ("none", "tool_rounds_exhausted")
+
+    # ------------------------------------------------------------------
+    # Visualize: dispatch through the core skill, repair the SAME chart on failure
+    # ------------------------------------------------------------------
+
+    def _visualize_with_repair(
+        self,
+        args: dict[str, Any],
+        messages: list[dict],
+        input_tables: list[dict[str, Any]] | None,
+        iteration: int,
+        completed_steps: list[dict[str, Any]],
+    ) -> Generator[dict, None, bool]:
+        """Execute the committed ``visualize`` via the base core-skill dispatch,
+        re-yielding its ``action`` / ``result`` / ``error`` events. If the code or
+        encodings fail, show the model the error and let it fix the SAME chart, up
+        to ``max_repair_attempts`` times. Returns ``True`` once a chart is
+        produced, ``False`` if every attempt failed."""
+        repairs_left = max(0, int(self.max_repair_attempts))
+
+        while True:
+            action = dict(args)
+            action["action"] = "visualize"
+
+            gen = self._dispatch_skill_action(
+                _CORE_SKILL, "visualize", action, messages, iteration, completed_steps)
+            produced = False
+            observation: str | None = None
+            try:
+                while True:
+                    event = next(gen)
+                    if event.get("type") == "result":
+                        produced = True
+                    yield event
+            except StopIteration as stop:
+                observation = stop.value
+
+            # Keep history coherent (pure-text transport) so a repair turn reads
+            # the failure exactly like an inspection result.
+            self._set_action_observation(messages, None, observation)
+
+            if produced:
+                return True
+            if repairs_left <= 0:
+                return False
+
+            repairs_left -= 1
+            messages.append({"role": "user", "content": (
+                "[SYSTEM] The visualize above FAILED. Fix the SAME chart: read the "
+                "error in the observation, correct your code and/or encodings, and "
+                "emit ONE corrected visualize JSON object (no other text).")})
+            kind, new_args = yield from self._decide(
+                messages, input_tables, iteration, allow_inspect=False)
+            if kind != "visualize":
+                return False
+            args = new_args
diff --git a/py-src/data_formulator/routes/agents.py b/py-src/data_formulator/routes/agents.py
index 509e7f72..c3bdffb2 100644
--- a/py-src/data_formulator/routes/agents.py
+++ b/py-src/data_formulator/routes/agents.py
@@ -33,6 +33,7 @@
 from data_formulator.knowledge.store import KnowledgeStore
 
 from data_formulator.analyst.agent import AnalystAgent
+from data_formulator.analyst.mini_agent import MiniAnalystAgent
 from data_formulator.agents.agent_language import build_language_instruction
 from data_formulator.security.sanitize import classify_llm_error, sanitize_error_message
 from data_formulator.error_handler import json_ok, stream_preflight_error, classify_and_wrap_llm_error
@@ -321,6 +322,9 @@ def analyst_streaming():
     user_question = content.get("user_question", "")
     max_iterations = content.get("max_iterations", 5)
     max_repair_attempts = content.get("max_repair_attempts", 1)
+    # "mini" swaps in the single-decision MiniAnalystAgent (one visualize/explain
+    # per run) for small/local models; anything else uses the standard agent.
+    agent_mode = content.get("agent_mode", "standard")
     agent_exploration_rules = content.get("agent_exploration_rules", "")
     agent_coding_rules = content.get("agent_coding_rules", "")
     focused_thread = content.get("focused_thread", None)
@@ -335,7 +339,7 @@ def analyst_streaming():
         return stream_preflight_error(AppError(ErrorCode.INVALID_REQUEST, "user_question is required to resume after interaction"))
 
     logger.setLevel(logging.INFO)
-    logger.info("# analyst-streaming request")
+    logger.info(f"# analyst-streaming request (agent_mode={agent_mode})")
     logger.debug("== input tables ===>")
     for table in input_tables:
         logger.debug(f"===> Table: {table['name']}")
@@ -347,16 +351,29 @@ def analyst_streaming():
 
     def generate():
         try:
-            agent = AnalystAgent(
-                client=client,
-                workspace=workspace,
-                agent_exploration_rules=agent_exploration_rules,
-                agent_coding_rules=agent_coding_rules,
-                language_instruction=language_instruction,
-                max_iterations=max_iterations,
-                max_repair_attempts=max_repair_attempts,
-                identity_id=identity_id,
-            )
+            if agent_mode == "mini":
+                # Single-decision agent; it forces max_iterations=1 internally and
+                # may run one optional inspection (allow_inspection defaults True).
+                agent = MiniAnalystAgent(
+                    client=client,
+                    workspace=workspace,
+                    agent_exploration_rules=agent_exploration_rules,
+                    agent_coding_rules=agent_coding_rules,
+                    language_instruction=language_instruction,
+                    max_repair_attempts=max_repair_attempts,
+                    identity_id=identity_id,
+                )
+            else:
+                agent = AnalystAgent(
+                    client=client,
+                    workspace=workspace,
+                    agent_exploration_rules=agent_exploration_rules,
+                    agent_coding_rules=agent_coding_rules,
+                    language_instruction=language_instruction,
+                    max_iterations=max_iterations,
+                    max_repair_attempts=max_repair_attempts,
+                    identity_id=identity_id,
+                )
 
             trajectory = None
             if resume_trajectory:
diff --git a/src/app/App.tsx b/src/app/App.tsx
index 7d873877..3d6545e5 100644
--- a/src/app/App.tsx
+++ b/src/app/App.tsx
@@ -51,6 +51,8 @@ import {
     ListItemText,
     CircularProgress,
     LinearProgress,
+    Switch,
+    FormControlLabel,
 } from '@mui/material';
 
 
@@ -493,12 +495,14 @@ const ConfigDialog: React.FC = () => {
     const [paletteKey, setPaletteKey] = useState(
         (config.paletteKey && palettes[config.paletteKey]) ? config.paletteKey : defaultPaletteKey
     );
+    const [miniMode, setMiniMode] = useState(config.miniMode ?? false);
 
     const hasChanges = formulateTimeoutSeconds !== config.formulateTimeoutSeconds || 
                       defaultChartWidth !== config.defaultChartWidth ||
                       defaultChartHeight !== config.defaultChartHeight ||
                       maxStretchFactor !== config.maxStretchFactor ||
                       frontendRowLimit !== config.frontendRowLimit ||
+                      miniMode !== (config.miniMode ?? false) ||
                       paletteKey !== ((config.paletteKey && palettes[config.paletteKey]) ? config.paletteKey : defaultPaletteKey);
 
     return (
@@ -687,6 +691,16 @@ const ConfigDialog: React.FC = () => {
                                 </Typography>
                             </Box>
                         </Box>
+                        <Divider><Typography variant="caption">{t('config.agent', { defaultValue: 'Agent' })}</Typography></Divider>
+                        <Box>
+                            <FormControlLabel
+                                control={<Switch checked={miniMode} onChange={(e) => setMiniMode(e.target.checked)} size="small" />}
+                                label={t('config.miniMode', { defaultValue: 'Mini mode' })}
+                            />
+                            <Typography variant="caption" color="text.secondary" sx={{ display: 'block' }}>
+                                {t('config.miniModeHint', { defaultValue: 'Run the single-turn mini analyst: one visualize or explain per request, with one optional data inspection. Recommended for smaller or local models.' })}
+                            </Typography>
+                        </Box>
                     </Box>
                 </DialogContent>
                 <DialogActions sx={{'.MuiButton-root': {textTransform: 'none'}}}>
@@ -697,6 +711,7 @@ const ConfigDialog: React.FC = () => {
                         setMaxStretchFactor(2.0);
                         setFrontendRowLimit(rowLimitDefault);
                         setPaletteKey(defaultPaletteKey);
+                        setMiniMode(false);
                     }}>{t('session.resetToDefault')}</Button>
                     <Button onClick={() => setOpen(false)}>{t('app.cancel')}</Button>
                     <Button 
@@ -707,7 +722,7 @@ const ConfigDialog: React.FC = () => {
                             || isNaN(maxStretchFactor) || maxStretchFactor < 1 || maxStretchFactor > 5
                             || isNaN(frontendRowLimit) || frontendRowLimit < 100 || frontendRowLimit > rowLimitMax}
                         onClick={() => {
-                            dispatch(dfActions.setConfig({formulateTimeoutSeconds, defaultChartWidth, defaultChartHeight, maxStretchFactor, frontendRowLimit, paletteKey}));
+                            dispatch(dfActions.setConfig({formulateTimeoutSeconds, defaultChartWidth, defaultChartHeight, maxStretchFactor, frontendRowLimit, paletteKey, miniMode}));
                             setOpen(false);
                         }}
                     >
diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx
index 8084da5d..fdc24d72 100644
--- a/src/app/dfSlice.tsx
+++ b/src/app/dfSlice.tsx
@@ -117,6 +117,7 @@ export interface ClientConfig {
     maxStretchFactor: number; // max per-axis stretch multiplier for chart sizing (default 2.0)
     frontendRowLimit: number; // max rows to keep in browser when loading locally (non-virtual)
     paletteKey: string; // active color palette key from tokens.ts
+    miniMode: boolean; // when true, run the single-turn MiniAnalystAgent (for small/local models)
 }
 
 export interface GeneratedReport {
@@ -319,6 +320,7 @@ const initialState: DataFormulatorState = {
         maxStretchFactor: 2.0,
         frontendRowLimit: DEFAULT_ROW_LIMIT,
         paletteKey: 'fluent',
+        miniMode: false,
     },
 
     dataLoaderConnectParams: {},
diff --git a/src/views/SimpleChartRecBox.tsx b/src/views/SimpleChartRecBox.tsx
index a1072d91..6d137b7b 100644
--- a/src/views/SimpleChartRecBox.tsx
+++ b/src/views/SimpleChartRecBox.tsx
@@ -600,6 +600,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
             ...(attachedImages.length > 0 ? { attached_images: attachedImages } : {}),
             model: activeModel,
             max_iterations: 10,
+            agent_mode: config.miniMode ? 'mini' : 'standard',
         };
 
         // ── Route through the unified AnalystAgent (design-35/36) ──
diff --git a/tests/backend/agents/test_client_utils.py b/tests/backend/agents/test_client_utils.py
index 02872a56..6fb81567 100644
--- a/tests/backend/agents/test_client_utils.py
+++ b/tests/backend/agents/test_client_utils.py
@@ -233,3 +233,169 @@ def test_gemini_prefix_applied_via_from_config(self):
         cfg = {"endpoint": "gemini", "model": "gemini-pro", "api_key": "k"}
         c = Client.from_config(cfg)
         assert c.model.startswith("gemini/")
+
+
+# ---------------------------------------------------------------------------
+# Ollama content-JSON -> tool_call salvage
+# ---------------------------------------------------------------------------
+
+import json as _json
+from types import SimpleNamespace
+
+from data_formulator.agents.client_utils import (
+    _extract_json_objects,
+    _match_tool_from_obj,
+    _salvage_tool_calls_from_content,
+)
+
+
+def _core_action_tools():
+    """The visualize / ask_user / delegate / execute_python_script schemas the
+    matcher disambiguates between."""
+    return [
+        {"type": "function", "function": {
+            "name": "execute_python_script",
+            "parameters": {"type": "object",
+                           "properties": {"purpose": {"type": "string"},
+                                          "code": {"type": "string"}},
+                           "required": ["purpose", "code"]}}},
+        {"type": "function", "function": {
+            "name": "visualize",
+            "parameters": {"type": "object",
+                           "properties": {"code": {"type": "string"},
+                                          "output_variable": {"type": "string"},
+                                          "chart": {"type": "object"},
+                                          "title": {"type": "string"}},
+                           "required": ["code", "output_variable", "chart"]}}},
+        {"type": "function", "function": {
+            "name": "ask_user",
+            "parameters": {"type": "object",
+                           "properties": {"thought": {"type": "string"},
+                                          "questions": {"type": "array"}},
+                           "required": ["questions"]}}},
+        {"type": "function", "function": {
+            "name": "delegate",
+            "parameters": {"type": "object",
+                           "properties": {"target": {"type": "string"},
+                                          "options": {"type": "array"}},
+                           "required": ["target", "options"]}}},
+    ]
+
+
+class TestExtractJsonObjects:
+    def test_extracts_single_object(self):
+        assert _extract_json_objects('{"a": 1}') == ['{"a": 1}']
+
+    def test_ignores_braces_inside_strings(self):
+        text = '{"code": "x = {1: 2}; y = \\"}\\""}'
+        objs = _extract_json_objects(text)
+        assert len(objs) == 1
+        assert _json.loads(objs[0])["code"] == 'x = {1: 2}; y = "}"'
+
+    def test_extracts_object_from_markdown_fence(self):
+        text = 'Sure:\n```json\n{"tool": "visualize"}\n```\n'
+        objs = _extract_json_objects(text)
+        assert objs == ['{"tool": "visualize"}']
+
+    def test_no_object_returns_empty(self):
+        assert _extract_json_objects("just prose, no json") == []
+
+
+class TestMatchToolFromObj:
+    def test_explicit_wrapper_name_and_arguments(self):
+        obj = {"tool": "visualize",
+               "arguments": {"code": "df=1", "output_variable": "df",
+                             "chart": {}}}
+        name, args = _match_tool_from_obj(obj, _core_action_tools())
+        assert name == "visualize"
+        assert args["output_variable"] == "df"
+
+    def test_bare_visualize_args_match_visualize_not_execute(self):
+        obj = {"output_variable": "t", "code": "df=1", "chart": {}}
+        name, _ = _match_tool_from_obj(obj, _core_action_tools())
+        assert name == "visualize"
+
+    def test_bare_execute_args_match_execute(self):
+        obj = {"purpose": "peek", "code": "print(1)"}
+        name, _ = _match_tool_from_obj(obj, _core_action_tools())
+        assert name == "execute_python_script"
+
+    def test_ask_user_shape(self):
+        obj = {"thought": "clarify", "questions": [{"text": "which?"}]}
+        name, _ = _match_tool_from_obj(obj, _core_action_tools())
+        assert name == "ask_user"
+
+    def test_nested_action_wrapper_shape(self):
+        # qwen2.5-coder emits this under the long agent prompt.
+        obj = {"thought": "show it",
+               "action": {"name": "visualize",
+                          "arguments": {"code": "df=1", "output_variable": "df",
+                                        "chart": {"chart_type": "Bar Chart"}}}}
+        name, args = _match_tool_from_obj(obj, _core_action_tools())
+        assert name == "visualize"
+        assert args["output_variable"] == "df"
+
+    def test_nested_tool_wrapper_shape(self):
+        obj = {"tool": {"name": "ask_user",
+                        "arguments": {"questions": [{"text": "?"}]}}}
+        name, _ = _match_tool_from_obj(obj, _core_action_tools())
+        assert name == "ask_user"
+
+    def test_non_matching_object_returns_none(self):
+        assert _match_tool_from_obj({"answer": "42"}, _core_action_tools()) is None
+
+
+class TestSalvageToolCallsFromContent:
+    def _resp(self, content, tool_calls=None):
+        msg = SimpleNamespace(content=content, tool_calls=tool_calls)
+        return SimpleNamespace(choices=[SimpleNamespace(message=msg,
+                                                        finish_reason="stop")])
+
+    def test_salvages_visualize_action_from_content(self):
+        content = _json.dumps({"output_variable": "t", "code": "df=1",
+                               "chart": {"chart_type": "Bar Chart"}})
+        resp = self._resp(content)
+        out = _salvage_tool_calls_from_content(resp, _core_action_tools())
+        msg = out.choices[0].message
+        assert msg.tool_calls and msg.tool_calls[0].function.name == "visualize"
+        assert msg.content is None
+        assert out.choices[0].finish_reason == "tool_calls"
+        assert _json.loads(msg.tool_calls[0].function.arguments)["output_variable"] == "t"
+
+    def test_does_not_touch_response_with_native_tool_calls(self):
+        existing = [SimpleNamespace(function=SimpleNamespace(name="visualize",
+                                                             arguments="{}"))]
+        resp = self._resp(None, tool_calls=existing)
+        out = _salvage_tool_calls_from_content(resp, _core_action_tools())
+        assert out.choices[0].message.tool_calls is existing
+
+    def test_plain_text_answer_left_untouched(self):
+        resp = self._resp("The dataset has 14 languages.")
+        out = _salvage_tool_calls_from_content(resp, _core_action_tools())
+        assert not getattr(out.choices[0].message, "tool_calls", None)
+        assert out.choices[0].message.content == "The dataset has 14 languages."
+
+    def test_no_tools_is_noop(self):
+        content = _json.dumps({"output_variable": "t", "code": "df=1", "chart": {}})
+        resp = self._resp(content)
+        out = _salvage_tool_calls_from_content(resp, [])
+        assert not getattr(out.choices[0].message, "tool_calls", None)
+
+
+class TestMatchToolWireFormats:
+    def test_openai_tool_calls_array_in_content(self):
+        obj = {"tool_calls": [{"id": "x", "type": "function",
+                               "function": {"name": "visualize",
+                                            "arguments": {"code": "df=1",
+                                                          "output_variable": "df",
+                                                          "chart": {}}}}]}
+        name, args = _match_tool_from_obj(obj, _core_action_tools())
+        assert name == "visualize"
+        assert args["output_variable"] == "df"
+
+    def test_stringified_arguments_are_parsed(self):
+        obj = {"name": "execute_python_script",
+               "arguments": '{"purpose": "peek", "code": "print(1)"}'}
+        name, args = _match_tool_from_obj(obj, _core_action_tools())
+        assert name == "execute_python_script"
+        assert args["code"] == "print(1)"
diff --git a/tests/backend/agents/test_mini_agent.py b/tests/backend/agents/test_mini_agent.py
new file mode 100644
index 00000000..d2af354c
--- /dev/null
+++ b/tests/backend/agents/test_mini_agent.py
@@ -0,0 +1,433 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Unit tests for data_formulator.analyst.mini_agent.MiniAnalystAgent.
+
+The mini agent makes a SINGLE analytic decision per run (one ``visualize`` or one
+``explain``) with no multi-step loop. These tests cover the pure-logic seams (the
+prompt, the reduced tool set, the JSON decision) plus end-to-end drives of
+:meth:`run` with a scripted fake client and a stubbed core-skill dispatch,
+asserting the key contracts: one decision, pure-text history, the two output
+kinds, the two tool variations, and in-place repair of a failed chart.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+from data_formulator.analyst import mini_agent as ma
+from data_formulator.analyst.mini_agent import MiniAnalystAgent
+
+pytestmark = [pytest.mark.backend]
+
+
+_ACTION_NAMES = {"visualize", "ask_user", "delegate"}
+
+
+def _mini_tools_list(allow_inspect: bool):
+    tools = [
+        {"type": "function", "function": {
+            "name": "visualize",
+            "description": "Commit a data transform + chart.",
+            "parameters": {"type": "object",
+                           "properties": {"code": {"type": "string"},
+                                          "output_variable": {"type": "string"},
+                                          "chart": {"type": "object"}},
+                           "required": ["code", "output_variable", "chart"]}}},
+    ]
+    if allow_inspect:
+        tools.append({"type": "function", "function": {
+            "name": "execute_python_script",
+            "description": "Run a script.",
+            "parameters": {"type": "object",
+                           "properties": {"code": {"type": "string"}},
+                           "required": ["code"]}}})
+    tools.append(ma._EXPLAIN_TOOL)
+    return tools
+
+
+def _resp(content):
+    msg = SimpleNamespace(content=content, tool_calls=None)
+    return SimpleNamespace(choices=[SimpleNamespace(message=msg, finish_reason="stop")])
+
+
+class _FakeRegistry:
+    def action_names(self):
+        return set(_ACTION_NAMES)
+
+
+class _FakeRlog:
+    def log(self, *a, **k):
+        pass
+
+    def close(self):
+        pass
+
+
+def _bare_mini(allow_inspection=True):
+    """A MiniAnalystAgent with just the seams its decision logic touches stubbed
+    — no real LLM / sandbox / registry tool building."""
+    agent = MiniAnalystAgent.__new__(MiniAnalystAgent)
+    agent.allow_inspection = allow_inspection
+    agent.language_instruction = ""
+    agent.max_repair_attempts = 2
+    agent.registry = _FakeRegistry()
+    agent._reasoning_log = _FakeRlog()
+    agent._session_id = "test-session"
+    agent.client = SimpleNamespace(model="test-model")
+    agent._run_explore_code = lambda code, tables: {"status": "ok", "stdout": "ROWS=3"}
+    agent._loaded_skill_tool_map = lambda: {}
+    agent._mini_tools = lambda allow_inspect: _mini_tools_list(allow_inspect)
+    return agent
+
+
+def _decide(agent, scripted_contents, input_tables=None, allow_inspect=None):
+    """Run _decide with the client scripted to return ``scripted_contents`` in
+    order. Returns (events, decision_tuple, messages)."""
+    script = iter(scripted_contents)
+    agent._call_model = lambda messages: _resp(next(script))
+    messages: list[dict] = [{"role": "system", "content": "sys"},
+                            {"role": "user", "content": "q"}]
+    gen = agent._decide(
+        messages, input_tables or [], 1,
+        allow_inspect=agent.allow_inspection if allow_inspect is None else allow_inspect)
+    events = []
+    decision = None
+    try:
+        while True:
+            events.append(next(gen))
+    except StopIteration as stop:
+        decision = stop.value
+    return events, decision, messages
+
+
+# --------------------------------------------------------------------------
+# Prompt seams
+# --------------------------------------------------------------------------
+class TestSystemPrompt:
+    def test_template_describes_both_output_kinds(self):
+        assert '"tool": "visualize"' in ma._MINI_PROMPT_TEMPLATE
+        assert '"tool": "explain"' in ma._MINI_PROMPT_TEMPLATE
+        assert "ONE JSON object" in ma._MINI_PROMPT_TEMPLATE
+
+    def test_chart_reference_is_reduced_set(self):
+        # Exactly the seven reduced types, nothing more exotic.
+        for t in ma._MINI_CHART_TYPES:
+            assert t in ma._MINI_CHART_REFERENCE
+        assert "Boxplot" not in ma._MINI_CHART_REFERENCE
+        assert "Waterfall" not in ma._MINI_CHART_REFERENCE
+        assert len(ma._MINI_CHART_TYPES) == 7
+
+    def test_inspection_note_present_only_for_tool_variant(self):
+        agent = _bare_mini(allow_inspection=True)
+        out = agent._build_system_prompt()
+        assert "execute_python_script" in out
+        assert all(c in out for c in ma._MINI_CHART_TYPES)
+
+    def test_no_tool_variant_omits_inspection(self):
+        agent = _bare_mini(allow_inspection=False)
+        out = agent._build_system_prompt()
+        assert "execute_python_script" not in out
+        assert '"tool": "visualize"' in out
+
+
+class TestMiniTools:
+    def test_tool_variant_offers_visualize_explain_and_inspection(self):
+        from data_formulator.analyst.skills import build_registry
+        agent = MiniAnalystAgent.__new__(MiniAnalystAgent)
+        agent.registry = build_registry()
+        agent._loaded_skills = {"core"}
+        names = {(t.get("function") or {}).get("name")
+                 for t in agent._mini_tools(allow_inspect=True)}
+        assert "visualize" in names
+        assert "explain" in names
+        assert "execute_python_script" in names
+
+    def test_no_tool_variant_drops_inspection(self):
+        from data_formulator.analyst.skills import build_registry
+        agent = MiniAnalystAgent.__new__(MiniAnalystAgent)
+        agent.registry = build_registry()
+        agent._loaded_skills = {"core"}
+        names = {(t.get("function") or {}).get("name")
+                 for t in agent._mini_tools(allow_inspect=False)}
+        assert names == {"visualize", "explain"}
+
+
+# --------------------------------------------------------------------------
+# The single decision
+# --------------------------------------------------------------------------
+class TestDecide:
+    def test_visualize_in_one_shot(self):
+        agent = _bare_mini(allow_inspection=False)
+        viz = json.dumps({"thought": "bar it", "tool": "visualize", "arguments": {
+            "code": "out=df", "output_variable": "out",
+            "chart": {"chart_type": "Bar Chart"}}})
+        events, decision, messages = _decide(agent, [viz])
+        assert decision[0] == "visualize"
+        assert decision[1]["output_variable"] == "out"
+        # thought surfaced
+        assert any(e["type"] == "thinking_text" and e["content"] == "bar it"
+                   for e in events)
+        # pure-text history: the assistant turn is the verbatim JSON
+        assert messages[-1]["role"] == "assistant"
+        assert all("tool_calls" not in m for m in messages)
+        assert all(m["role"] != "tool" for m in messages)
+
+    def test_explain_action_ends_with_text(self):
+        agent = _bare_mini(allow_inspection=False)
+        exp = json.dumps({"tool": "explain",
+                          "arguments": {"text": "There are 42 rows."}})
+        _, decision, _ = _decide(agent, [exp])
+        assert decision == ("explain", "There are 42 rows.")
+
+    def test_plain_text_is_explain(self):
+        agent = _bare_mini(allow_inspection=False)
+        _, decision, messages = _decide(agent, ["The data covers 2019-2023."])
+        assert decision[0] == "explain"
+        assert "2019" in decision[1]
+        assert messages[-1]["role"] == "assistant"
+
+    def test_inspection_then_visualize_keeps_history_pure_text(self):
+        agent = _bare_mini(allow_inspection=True)
+        inspect = json.dumps({"tool": "execute_python_script",
+                              "arguments": {"code": "print(1)"}})
+        viz = json.dumps({"tool": "visualize", "arguments": {
+            "code": "out=df", "output_variable": "out",
+            "chart": {"chart_type": "Line Chart"}}})
+        events, decision, messages = _decide(agent, [inspect, viz])
+        etypes = [e["type"] for e in events]
+        assert "tool_start" in etypes and "tool_result" in etypes
+        assert decision[0] == "visualize"
+        # the inspection observation came back as a [OBSERVATION] user turn
+        assert any(m["role"] == "user" and "[OBSERVATION]" in (m["content"] or "")
+                   and "ROWS=3" in (m["content"] or "") for m in messages)
+        assert all("tool_calls" not in m for m in messages)
+
+    def test_inspection_budget_is_one(self):
+        # Two inspection attempts: the second must be refused (no tool offered),
+        # nudging the model; the final visualize still commits.
+        agent = _bare_mini(allow_inspection=True)
+        inspect = json.dumps({"tool": "execute_python_script",
+                              "arguments": {"code": "print(1)"}})
+        viz = json.dumps({"tool": "visualize", "arguments": {
+            "code": "out=df", "output_variable": "out",
+            "chart": {"chart_type": "Bar Chart"}}})
+        # script: inspect, inspect(again -> refused as unknown), visualize
+        _, decision, messages = _decide(agent, [inspect, inspect, viz])
+        assert decision[0] == "visualize"
+        # a correction nudge was issued for the second (now unavailable) inspect
+        assert any("not available" in (m["content"] or "")
+                   for m in messages if m["role"] == "user")
+
+    def test_missing_required_field_triggers_one_correction(self):
+        agent = _bare_mini(allow_inspection=False)
+        bad = json.dumps({"tool": "visualize",
+                          "arguments": {"code": "out=df", "output_variable": "out"}})
+        good = json.dumps({"tool": "visualize", "arguments": {
+            "code": "out=df", "output_variable": "out",
+            "chart": {"chart_type": "Bar Chart"}}})
+        _, decision, messages = _decide(agent, [bad, good])
+        assert decision[0] == "visualize"
+        assert decision[1].get("chart")
+        assert any("[OBSERVATION] ERROR" in (m["content"] or "")
+                   for m in messages if m["role"] == "user")
+
+
+# --------------------------------------------------------------------------
+# End-to-end run(): result/completion events + repair
+# --------------------------------------------------------------------------
+class _DummySandbox:
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *a):
+        return False
+
+
+def _prep_run(agent, scripted_contents, monkeypatch):
+    """Wire run()'s collaborators: scripted client, stub initial messages, no-op
+    sandbox + reasoning-log + explore ns."""
+    script = iter(scripted_contents)
+    agent._call_model = lambda messages: _resp(next(script))
+    agent._build_initial_messages = lambda *a, **k: [
+        {"role": "system", "content": "sys"}, {"role": "user", "content": "q"}]
+    agent._explore_ns_dir = lambda: Path("/nonexistent/mini-test-ns")
+    monkeypatch.setattr(
+        "data_formulator.sandbox.local_sandbox.SandboxSession",
+        lambda *a, **k: _DummySandbox())
+
+
+def _viz_result_event():
+    return {"type": "result", "status": "success", "content": {
+        "question": "",
+        "result": {"code": "out=df",
+                   "refined_goal": {"chart": {"chart_type": "Bar Chart"},
+                                    "title": "T"},
+                   "content": {"rows": [{"x": 1, "y": 2}]}}}}
+
+
+class TestRun:
+    def test_explain_run_emits_completion_summary(self, monkeypatch):
+        agent = _bare_mini(allow_inspection=False)
+        _prep_run(agent, [json.dumps(
+            {"tool": "explain", "arguments": {"text": "Sales are flat."}})], monkeypatch)
+        events = list(agent.run([{"name": "t"}], "is it growing?"))
+        comp = [e for e in events if e["type"] == "completion"]
+        assert comp and comp[0]["status"] == "success"
+        assert comp[0]["content"]["summary"] == "Sales are flat."
+        # an explain produces no result/chart
+        assert not any(e["type"] == "result" for e in events)
+
+    def test_visualize_run_emits_result_then_completion(self, monkeypatch):
+        agent = _bare_mini(allow_inspection=False)
+        viz = json.dumps({"tool": "visualize", "arguments": {
+            "code": "out=df", "output_variable": "out",
+            "chart": {"chart_type": "Bar Chart"}}})
+        _prep_run(agent, [viz], monkeypatch)
+
+        def _viz_ok(*a, **k):
+            yield {"type": "action", "action": "visualize"}
+            yield _viz_result_event()
+            return "[OBSERVATION] Chart created."
+        agent._dispatch_skill_action = _viz_ok
+
+        events = list(agent.run([{"name": "t"}], "show sales"))
+        etypes = [e["type"] for e in events]
+        assert "result" in etypes
+        comp = [e for e in events if e["type"] == "completion"]
+        assert comp and comp[0]["status"] == "success"
+        assert comp[0]["content"]["total_steps"] >= 0
+
+    def test_failed_visualize_is_repaired_in_place(self, monkeypatch):
+        agent = _bare_mini(allow_inspection=False)
+        viz1 = json.dumps({"tool": "visualize", "arguments": {
+            "code": "out=df.bad", "output_variable": "out",
+            "chart": {"chart_type": "Bar Chart"}}})
+        viz2 = json.dumps({"tool": "visualize", "arguments": {
+            "code": "out=df", "output_variable": "out",
+            "chart": {"chart_type": "Bar Chart"}}})
+        _prep_run(agent, [viz1, viz2], monkeypatch)
+
+        calls = {"n": 0}
+
+        def _viz_dispatch(*a, **k):
+            calls["n"] += 1
+            if calls["n"] == 1:
+                yield {"type": "error", "message": "boom"}
+                return "[OBSERVATION – Step 1 FAILED]\n\nError: boom"
+            yield _viz_result_event()
+            return "[OBSERVATION] Chart created."
+        agent._dispatch_skill_action = _viz_dispatch
+
+        events = list(agent.run([{"name": "t"}], "show sales"))
+        assert calls["n"] == 2  # one failure, one repair
+        assert any(e["type"] == "result" for e in events)
+        comp = [e for e in events if e["type"] == "completion"]
+        assert comp and comp[0]["status"] == "success"
+
+    def test_unrepairable_visualize_completes_without_chart(self, monkeypatch):
+        agent = _bare_mini(allow_inspection=False)
+        agent.max_repair_attempts = 0  # no repair budget
+        viz = json.dumps({"tool": "visualize", "arguments": {
+            "code": "out=df.bad", "output_variable": "out",
+            "chart": {"chart_type": "Bar Chart"}}})
+        _prep_run(agent, [viz], monkeypatch)
+
+        def _viz_fail(*a, **k):
+            yield {"type": "error", "message": "boom"}
+            return "[OBSERVATION – Step 1 FAILED]\n\nError: boom"
+        agent._dispatch_skill_action = _viz_fail
+
+        events = list(agent.run([{"name": "t"}], "show sales"))
+        assert not any(e["type"] == "result" for e in events)
+        comp = [e for e in events if e["type"] == "completion"]
+        assert comp and comp[0]["status"] == "completed_no_viz"
+
+
+# --------------------------------------------------------------------------
+# Plain-text transport seams (migrated from the removed simple_agent tests;
+# MiniAnalystAgent now owns _catalog_reminder / _parse_action).
+# --------------------------------------------------------------------------
+
+def _proto_tools():
+    """A representative tool list — an inspection tool with a unique required key
+    plus visualize — for exercising the generic protocol seams."""
+    return [
+        {"type": "function", "function": {
+            "name": "execute_python_script",
+            "description": "Run a script.",
+            "parameters": {"type": "object",
+                           "properties": {"purpose": {"type": "string"},
+                                          "code": {"type": "string"}},
+                           "required": ["purpose", "code"]}}},
+        {"type": "function", "function": {
+            "name": "inspect_source_data",
+            "description": "Summarise source tables.",
+            "parameters": {"type": "object",
+                           "properties": {"table_names": {"type": "array",
+                                                          "items": {"type": "string"}}},
+                           "required": ["table_names"]}}},
+        {"type": "function", "function": {
+            "name": "visualize",
+            "description": "Commit a data transform + chart.",
+            "parameters": {"type": "object",
+                           "properties": {"code": {"type": "string"},
+                                          "output_variable": {"type": "string"},
+                                          "chart": {"type": "object"}},
+                           "required": ["code", "output_variable", "chart"]}}},
+    ]
+
+
+class TestCatalogReminder:
+    def test_splits_inspection_and_action_names(self):
+        agent = _bare_mini()
+        text = agent._catalog_reminder(_proto_tools())
+        assert "execute_python_script" in text and "inspect_source_data" in text
+        assert "visualize" in text
+        # visualize is listed under Actions, not Inspection tools
+        inspect_part, action_part = text.split("Actions:")
+        assert "visualize" not in inspect_part
+        assert "visualize" in action_part
+
+
+class TestParseAction:
+    def test_wrapped_tool_envelope(self):
+        content = json.dumps({"thought": "let's chart it", "tool": "visualize",
+                              "arguments": {"code": "df=1", "output_variable": "df",
+                                            "chart": {}}})
+        thought, name, args = MiniAnalystAgent._parse_action(content, _proto_tools())
+        assert name == "visualize"
+        assert thought == "let's chart it"
+        assert args["output_variable"] == "df"
+
+    def test_bare_args_matched_by_required_keys(self):
+        content = json.dumps({"table_names": ["t1", "t2"]})
+        thought, name, args = MiniAnalystAgent._parse_action(content, _proto_tools())
+        assert name == "inspect_source_data"
+        assert args["table_names"] == ["t1", "t2"]
+
+    def test_nested_action_wrapper(self):
+        content = json.dumps({"thought": "go", "action": {
+            "name": "visualize",
+            "arguments": {"code": "df=1", "output_variable": "df", "chart": {}}}})
+        thought, name, args = MiniAnalystAgent._parse_action(content, _proto_tools())
+        assert name == "visualize"
+        assert thought == "go"
+
+    def test_json_embedded_in_prose(self):
+        content = ('I will inspect first.\n'
+                   '{"tool": "inspect_source_data", "arguments": {"table_names": ["t"]}}')
+        parsed = MiniAnalystAgent._parse_action(content, _proto_tools())
+        assert parsed is not None
+        assert parsed[1] == "inspect_source_data"
+
+    def test_plain_text_is_final_answer(self):
+        assert MiniAnalystAgent._parse_action(
+            "Here is the final summary.", _proto_tools()) is None
+
+    def test_none_content(self):
+        assert MiniAnalystAgent._parse_action(None, _proto_tools()) is None

From fe36b741ac7d5c9da554a4c602cb20734387046a Mon Sep 17 00:00:00 2001
From: Chenglong Wang <93549116+Chenglong-MS@users.noreply.github.com>
Date: Thu, 18 Jun 2026 21:27:19 +0000
Subject: [PATCH 25/29] Remove mini_notool ablation from MiniAnalystAgent

The mini_notool variant (MiniAnalystAgent with allow_inspection=False: a no-tools, one-shot ablation used only in evaluation) is removed. MiniAnalystAgent now always offers the single optional inspection before committing its answer; the per-turn inspection-budget logic in _decide/_mini_tools is unchanged. Tests updated to drop the no-tool-variant cases.
---
 py-src/data_formulator/analyst/mini_agent.py | 40 +++++++-------------
 py-src/data_formulator/routes/agents.py      |  2 +-
 tests/backend/agents/test_mini_agent.py      | 18 ++++-----
 3 files changed, 21 insertions(+), 39 deletions(-)

diff --git a/py-src/data_formulator/analyst/mini_agent.py b/py-src/data_formulator/analyst/mini_agent.py
index 33332b53..24979f5e 100644
--- a/py-src/data_formulator/analyst/mini_agent.py
+++ b/py-src/data_formulator/analyst/mini_agent.py
@@ -14,16 +14,10 @@
 * **explain** — a short free-text answer (only when the user is clearly *not*
   asking for a chart, e.g. a yes/no or factual question).
 
-By default the agent may look at the data once before deciding
-(``allow_inspection=True``), and this is the configuration the project uses: it ties
-the no-tool path on the strongest models and is more reliable on mid-tier models that
-need to check a join before writing it (see ``loops/model-evaluation`` Section 9).
-
-* ``allow_inspection=True`` (default, recommended) — the model MAY run a single
-  ``execute_python_script`` inspection to look at the data, then must produce its
-  visualize/explain. The inspection budget is one call, so it never becomes a loop.
-* ``allow_inspection=False`` — an evaluation-only ablation: no tools at all, the model
-  must produce its visualize/explain directly in one shot.
+Before deciding, the agent may look at the data once: the model MAY run a single
+``execute_python_script`` inspection (e.g. to check a join or a column's exact
+values), then must produce its visualize/explain. The inspection budget is one
+call, so it never becomes a loop (see ``loops/model-evaluation`` Section 9).
 
 The chart-type set is deliberately **reduced** to a handful of common types, and
 the prompt is tightly scoped, so small open-weight models reliably emit a
@@ -196,23 +190,16 @@ class MiniAnalystAgent(AnalystAgent):
     (``_call_model`` / ``_parse_action`` / ``_run_inspection_tool``) so models
     with weak or absent function-calling still work, and dispatches the committed
     ``visualize`` through the base core skill, so the emitted ``result`` /
-    ``completion`` events are identical to the loop-based agent.
-
-    Parameters
-    ----------
-    allow_inspection:
-        Defaults to ``True``, the recommended project-default *mini* behaviour: the
-        model may run a single ``execute_python_script`` inspection before producing
-        its answer. ``False`` is an evaluation-only ablation in which the model must
-        answer in one shot with no tools at all.
+    ``completion`` events are identical to the loop-based agent. Before committing,
+    the model may run a single ``execute_python_script`` inspection (a budget of
+    one, so it never loops).
     """
 
-    def __init__(self, *args: Any, allow_inspection: bool = True, **kwargs: Any) -> None:
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
         # One committing action per run; the base machinery is never asked to
         # take a second analytic step.
         kwargs.setdefault("max_iterations", 1)
         super().__init__(*args, **kwargs)
-        self.allow_inspection = allow_inspection
 
     # ------------------------------------------------------------------
     # Prompt: a tightly scoped, single-decision system prompt
@@ -228,18 +215,17 @@ def _build_system_prompt(
         **kwargs: Any,
     ) -> str:
         """Assemble the mini prompt: one visualize/explain decision, a reduced
-        chart-type reference, and (only for the inspection variation) a short note
-        describing the single optional ``execute_python_script`` call."""
+        chart-type reference, and a short note describing the single optional
+        ``execute_python_script`` inspection call."""
         prompt = _MINI_PROMPT_TEMPLATE
         prompt = prompt.replace("{chart_types}", _MINI_CHART_REFERENCE)
-        prompt = prompt.replace(
-            "{inspect_note}", _INSPECT_NOTE if self.allow_inspection else "")
+        prompt = prompt.replace("{inspect_note}", _INSPECT_NOTE)
         if self.language_instruction:
             prompt = prompt + "\n\n" + self.language_instruction
         return prompt
 
     # ------------------------------------------------------------------
-    # Tool set: only visualize + explain (+ one inspection in the tool variant)
+    # Tool set: only visualize + explain (+ the one inspection, until spent)
     # ------------------------------------------------------------------
 
     def _mini_tools(self, allow_inspect: bool) -> list[dict[str, Any]]:
@@ -487,7 +473,7 @@ def run(
                 self._explore_session = explore_session
                 kind, payload = yield from self._decide(
                     messages, input_tables, iteration,
-                    allow_inspect=self.allow_inspection,
+                    allow_inspect=True,
                 )
                 self._explore_session = None
 
diff --git a/py-src/data_formulator/routes/agents.py b/py-src/data_formulator/routes/agents.py
index c3bdffb2..ab77f6e5 100644
--- a/py-src/data_formulator/routes/agents.py
+++ b/py-src/data_formulator/routes/agents.py
@@ -353,7 +353,7 @@ def generate():
         try:
             if agent_mode == "mini":
                 # Single-decision agent; it forces max_iterations=1 internally and
-                # may run one optional inspection (allow_inspection defaults True).
+                # may run one optional data inspection before answering.
                 agent = MiniAnalystAgent(
                     client=client,
                     workspace=workspace,
diff --git a/tests/backend/agents/test_mini_agent.py b/tests/backend/agents/test_mini_agent.py
index d2af354c..8f4f0ec8 100644
--- a/tests/backend/agents/test_mini_agent.py
+++ b/tests/backend/agents/test_mini_agent.py
@@ -70,7 +70,9 @@ def close(self):
 
 def _bare_mini(allow_inspection=True):
     """A MiniAnalystAgent with just the seams its decision logic touches stubbed
-    — no real LLM / sandbox / registry tool building."""
+    — no real LLM / sandbox / registry tool building. ``allow_inspection`` only
+    seeds the per-turn ``_decide(allow_inspect=...)`` input used by the ``_decide``
+    helper below."""
     agent = MiniAnalystAgent.__new__(MiniAnalystAgent)
     agent.allow_inspection = allow_inspection
     agent.language_instruction = ""
@@ -122,21 +124,15 @@ def test_chart_reference_is_reduced_set(self):
         assert "Waterfall" not in ma._MINI_CHART_REFERENCE
         assert len(ma._MINI_CHART_TYPES) == 7
 
-    def test_inspection_note_present_only_for_tool_variant(self):
-        agent = _bare_mini(allow_inspection=True)
+    def test_inspection_note_present(self):
+        agent = _bare_mini()
         out = agent._build_system_prompt()
         assert "execute_python_script" in out
         assert all(c in out for c in ma._MINI_CHART_TYPES)
 
-    def test_no_tool_variant_omits_inspection(self):
-        agent = _bare_mini(allow_inspection=False)
-        out = agent._build_system_prompt()
-        assert "execute_python_script" not in out
-        assert '"tool": "visualize"' in out
-
 
 class TestMiniTools:
-    def test_tool_variant_offers_visualize_explain_and_inspection(self):
+    def test_mini_tools_offer_visualize_explain_and_inspection(self):
         from data_formulator.analyst.skills import build_registry
         agent = MiniAnalystAgent.__new__(MiniAnalystAgent)
         agent.registry = build_registry()
@@ -147,7 +143,7 @@ def test_tool_variant_offers_visualize_explain_and_inspection(self):
         assert "explain" in names
         assert "execute_python_script" in names
 
-    def test_no_tool_variant_drops_inspection(self):
+    def test_mini_tools_drop_inspection_when_unavailable(self):
         from data_formulator.analyst.skills import build_registry
         agent = MiniAnalystAgent.__new__(MiniAnalystAgent)
         agent.registry = build_registry()

From 38e5667fdc2e0919fb1c6f271689037c90e05407 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <93549116+Chenglong-MS@users.noreply.github.com>
Date: Thu, 18 Jun 2026 21:29:20 +0000
Subject: [PATCH 26/29] Migrate model-evaluation README to high-level plan.md

Replace the long-form README with a condensed plan.md (high-level goal and steps). The detailed report (report.md) and working artifacts (work/) remain local-only and are intentionally untracked.
---
 loops/model-evaluation/README.md | 463 -------------------------------
 loops/model-evaluation/plan.md   |  66 +++++
 2 files changed, 66 insertions(+), 463 deletions(-)
 delete mode 100644 loops/model-evaluation/README.md
 create mode 100644 loops/model-evaluation/plan.md

diff --git a/loops/model-evaluation/README.md b/loops/model-evaluation/README.md
deleted file mode 100644
index e6ec16f7..00000000
--- a/loops/model-evaluation/README.md
+++ /dev/null
@@ -1,463 +0,0 @@
-# Loop — Open-Source (Ollama) Model Review for the Data Formulator Analyst Agent
-
-**This document is an autonomous runbook.** You are a coding agent. Read it fully,
-then *spin the experiments yourself*: pick models, load data, generate questions,
-drive the `AnalystAgent`, grade the outputs, and write the report. Every step below
-is concrete enough to execute without further input. When a choice is genuinely
-ambiguous, pick the reasonable default named here and record the decision in the
-report rather than stopping to ask.
-
----
-
-## 0. Purpose
-
-Data Formulator can be pointed at any LLM, including open-source models served locally
-through Ollama. But we don't currently have a principled, evidence-based view of **which
-open-source models are actually good enough to drive the analyst agent**, nor of the
-hardware and settings each one needs. This loop produces that review: a reproducible
-benchmark of open-source models against the agent, with clear recommendations.
-
-**The questions this review must answer (these become the report's headline sections):**
-1. **Which Ollama models actually work** with the Data Formulator analyst agent? (a
-   clear pass / partial / fail list).
-2. **Minimum specs** — especially VRAM — to run each working model at a usable quality.
-3. **Required settings** — quantization, context length (`num_ctx`), and other Ollama
-   `options` that make the difference between working and failing.
-4. **The low-VRAM story** — how small can you go? Identify the *smallest model that still
-   works*, the quant / `num_ctx` it needs, and call out the practical VRAM floor for usable
-   quality.
-
-Keep these four questions pinned. Everything measured should ladder up to answering them.
-
----
-
-## 1. Mission
-
-Benchmark a broad set of open-source Ollama models against the unified **`AnalystAgent`**
-(the agent that powers Data Formulator's "Explore" / Data Thread). For each model, on
-several datasets, an LLM *asker* poses direct and open-ended analysis questions; the
-analyst agent answers by exploring data and producing charts / data-thread content; an
-LLM *grader* scores the answers. Reference runs with hosted models calibrate the scale.
-
-**Deliverable:** `loops/model-evaluation/report.md` — a results table + a model
-recommendation guide that directly answers the four questions in §0, including the
-practical low-VRAM floor.
-
-All scratch work, scripts, raw transcripts, and aggregates live under
-`loops/model-evaluation/work/` (see §9 for the layout).
-
----
-
-## 2. Environment & setup
-
-- **Repo:** this working tree. Python source is under `py-src/`; the project is
-  installed editable. **Use `uv` only**, never pip: `uv pip install -e .`,
-  `uv run python ...`, `uv run pytest`. The venv is `.venv/` — `source .venv/bin/activate`.
-- **GPU box:** run on the multi-GPU eval box (≈4×A100). Exploit the hardware: serve
-  several models concurrently and/or run datasets in parallel (see §7.4). Smaller models
-  can share a GPU; large models get a dedicated GPU.
-- **Ollama:** install/serve locally (`ollama serve`). Pull models with `ollama pull <model>`.
-  The agent talks to Ollama through LiteLLM via the `Client` wrapper (next section).
-- **Reference (hosted) models:** use the Azure OpenAI resource already wired for evals
-  (managed identity, no API key — `DefaultAzureCredential` resolves it). Deployments
-  available include `gpt-5.5`, `gpt-5-mini`. Confirm the endpoint
-  the same way the chart-reading handoff does, use az login with .env azure stuff.
-  ([agent_eval_plans/00-AGENT-HANDOFF.md](../../agent_eval_plans/00-AGENT-HANDOFF.md) §3).
-
-### 2.1 Talking to a model — the `Client` wrapper
-
-`data_formulator.agents.client_utils.Client` is the single LLM entry point for every
-agent. It already supports Ollama and Azure:
-
-```python
-from data_formulator.agents.client_utils import Client
-
-# Local model under test (Ollama)
-agent_client = Client(
-    endpoint="ollama",
-    model="qwen2.5-coder:7b",            # ollama/ prefix added automatically
-    api_base="http://localhost:11434",   # default; LiteLLM strips a trailing /api
-)
-
-# Reference / asker / grader (Azure managed identity — no api_key)
-ref_client = Client(
-    endpoint="azure",
-    model="gpt-5.5",                      # deployment name
-    api_base="https://<resource>.openai.azure.com/",
-)
-```
-
-> **Ollama `options` (quantization, context length, etc.):** LiteLLM forwards extra
-> kwargs to Ollama's `options` (e.g. `num_ctx`, `temperature`, `num_gpu`). The agent
-> calls `client.get_completion_with_tools(...)`. Decide early how you will pass
-> `num_ctx` (see §6) — either bake it into the Modelfile (`ollama create` with
-> `PARAMETER num_ctx ...`) or pass it through the client call. **Record exactly what you
-> used** — the context-length setting is one of the four questions in §0.
-
----
-
-## 3. Step 1 — Choose the Ollama models to test
-
-**Search first, then decide.** Use web search / the Ollama library to pick a current,
-representative set. Bias toward models with strong **coding** and **instruction-following**
-ability, since the analyst agent's core action (`visualize`) emits Python + a chart spec
-through native tool-calls (see §7.1). Cover a range of sizes so the VRAM story is complete.
-
-Selection guidance:
-- **Tool-calling matters.** The analyst commits actions as **native tool calls**, not
-  JSON-in-text. Prefer models Ollama lists as supporting tools/function-calling. Flag any
-  model that lacks tool support — it will likely fail the agent loop, which is itself a
-  finding.
-- **Size buckets to cover** (span the spectrum so the report can speak to different VRAM budgets):
-  - **~3–4B** (small, low-VRAM): e.g. `llama3.2:3b`, `qwen2.5-coder:3b`, `phi-class`.
-  - **~7–9B** (mainstream): e.g. `qwen2.5-coder:7b`, `llama3.1:8b`,
-    `mistral`/`ministral`, `granite` coder, `deepseek-coder` variants.
-  - **~14–34B** (mid/large): e.g. `qwen2.5-coder:14b/32b`, `devstral`, `gpt-oss`-class.
-  - **Large reference-tier OSS** if the box allows: pick one or two flagships.
-- Treat the list as a hypothesis; the actual Ollama catalog at run time is the authority.
-  **List the final set in the report** with: model name, parameter count, quantization
-  pulled, on-disk size, and Ollama tool-calling support (yes/no).
-
-**Hardware-requirement table (build this and put it in the report):**
-
-| Model | Params | Quant | Disk | Est. VRAM @ ctx | Tool-calling |
-|-------|--------|-------|------|-----------------|--------------|
-
-Estimate VRAM at the context length you actually run (KV cache grows with `num_ctx` — a
-3B model that fits at 4k may not at 32k). Verify estimates against `ollama ps` / `nvidia-smi`
-during a real run and record the *observed* peak.
-
----
-
-## 4. Step 2 — Seed datasets
-
-Assemble a **corpus of ~30 real tables** as the exploration starting points — a broad,
-varied set so each model is tested across many domains and data shapes, not a handful of
-tables. Source them from the families Data Formulator already ships / references:
-
-- **vega_datasets** — installed as a package (`from vega_datasets import data`) and also
-  configured in [py-src/data_formulator/example_datasets_config.py](../../py-src/data_formulator/example_datasets_config.py)
-  (Gapminder, Movies, US Income, Unemployment, Disasters...). Good clean multi-type tables.
-- **TidyTuesday** — real, messier tables. Several are wired in `example_datasets_config.py`
-  (College Majors, weekly gas prices, movies/shows) and more exist under
-  [experiment_data/tidytuesday/](../../experiment_data/tidytuesday/). Use these for the
-  "real-world schema" condition.
-- **spider** (optional / secondary) — text-to-SQL databases. Only include if you also want
-  a relational/multi-table condition; otherwise skip and note it as future work. (Most of
-  the analyst's value is single-table-to-chart, so vega + tidytuesday are the priority.)
-
-**Target ~30 tables total**, balanced across the sources and spanning domains and data
-types (temporal, categorical, quantitative, geographic; some clean, some with
-nulls/messiness; a range of row/column counts). For each, download/materialize it and
-record a one-line description + schema in `work/datasets/`. Keep each small enough to fit a
-local model's context (downsample very large tables to a few thousand rows, noting it).
-**Carve out a dev subset** of these for pipeline calibration (§6.1) — the remaining tables
-are the held-out test corpus.
-
-### 4.1 Loading a seed table into a workspace (the agent reads tables from disk)
-
-The agent's context builder reads tables via `workspace.read_data_as_df(name)`
-([py-src/data_formulator/agents/context.py](../../py-src/data_formulator/agents/context.py)),
-so a seed table must be **registered in a `Workspace`** before the agent can use it. Recipe:
-
-```python
-import pyarrow as pa, pandas as pd
-from data_formulator.datalake.workspace import Workspace
-
-ws = Workspace(identity_id="eval", root_dir="loops/model-evaluation/work/ws")
-df = pd.read_csv(...)                      # your seed table
-name = ws.get_fresh_name("gapminder")
-meta = ws.write_parquet_from_arrow(pa.Table.from_pandas(df), name)
-ws.add_table_metadata(meta)                # now read_data_as_df(name) works
-```
-
-Then pass `input_tables=[{"name": name}]` to `agent.run(...)`. **Smoke-test this once**
-(load one table, confirm `ws.read_data_as_df(name)` returns the frame) before scaling.
-
----
-
-## 5. Step 3 — Generate questions (the *asker*)
-
-For **each dataset**, use a hosted asker model (**`gpt-5.5`**) to generate **exactly 5
-analysis questions**. Generate once per dataset and **reuse the identical question set
-across all models under test** — the question must be a constant so model is the only
-variable. With ~30 datasets × 5 questions, **each model is tested on ~150 questions** (the
-test goal per model).
-
-- Per dataset, generate **5 questions** with a fixed mix — default **3 direct + 2
-  open-ended** (keep the split constant across datasets):
-  - **Direct** ("Show the trend of life expectancy over time for each cluster.") — has a
-    fairly determinate good answer; easier to grade for correctness.
-  - **Open-ended** ("What's the most interesting story in this data? Explore it.") — tests
-    initiative, multi-step exploration, and judgment.
-- Give the asker the table name, schema, and a few sample rows (use the same lightweight
-  table summary the agent sees). Ask it to return strict JSON: a list of
-  `{id, dataset, kind: "direct"|"open", question}`.
-- Persist the generated questions to `work/questions/<dataset>.json` and **freeze them**
-  (do not regenerate per model). Sanity-check that each file has exactly 5 well-formed,
-  on-topic questions before the sweep — a bad question contaminates every model equally.
-
----
-
-## 6. Settings to hold fixed (and to vary deliberately)
-
-To isolate the model variable, **freeze**: the asker model + prompt, the grader model +
-rubric, the question sets, the agent's `max_iterations` (start at **5**), and temperature
-(**0** for agent, asker, and grader unless a model rejects it).
-
-**Deliberately vary (these are findings, not nuisance):**
-- **Quantization** — record what each `ollama pull` actually fetched (usually q4_K_M). If a
-  model fails, optionally retry at a higher quant and note whether it helped.
-- **Context length (`num_ctx`)** — the agent's system prompt + table context + tool schemas
-  are sizable. A too-small `num_ctx` will truncate the system frame and the model will fail
-  to follow the action protocol. Establish a **working default** (try 8192; bump to 16384
-  if outputs look truncated) and note the minimum that works per model. This is central to
-  the low-VRAM story (bigger `num_ctx` ⇒ more VRAM).
-- **Tool-calling support** — if a model can't emit native tool calls, capture that as the
-  failure mode.
-
-### 6.1 Calibrate on a dev set before the mass run
-
-**Do not launch the full ~30-dataset sweep blind.** First carve out a small **dev set** and
-use it to shake out the pipeline end-to-end:
-
-- **Dev set:** ~3 datasets (pick varied ones — one clean vega table, one messy TidyTuesday
-  table, one with nulls/odd types) × their 5 frozen questions = ~15 dev items. Pick **2–3
-  models** spanning the size spectrum (one small, one mid, plus one **hosted reference**
-  `gpt-5.5`/`gpt-5-mini`). Keep the dev datasets clearly tagged so they're reportable
-  separately and not silently mixed into the headline test numbers.
-- **What the dev run must validate** before you trust the mass run:
-  - the harness drives `AnalystAgent.run` and records every event type (§7.1) without crashing;
-  - the **answer bundle + per-action execution outcomes** (§7.2) are captured correctly,
-    including a deliberately code-broken case (confirm the `code-broken` level is detected);
-  - the **outcome-level classifier** (§7.3) assigns sane levels — eyeball all ~15;
-  - the **grader** returns valid strict JSON on every item and its scores look reasonable
-    against your own read (this is the judge-calibration sample, §8);
-  - resume/idempotency works (re-running skips completed items);
-  - the reference model lands near the top of the scale (a sanity floor for the rubric).
-- **Tune here, freeze after.** Adjust prompts (asker/grader), `num_ctx` default, timeouts,
-  and the outcome taxonomy on the dev set. Once the dev run looks right, **freeze the
-  pipeline** and only then launch the full corpus. Record the dev-set findings (especially
-  any rubric/classifier adjustments) in the report's Method section.
-
----
-
-## 7. Step 4 — Run the analyst agent (the harness)
-
-Build a small harness under `work/` that, for each `(model, dataset, question)`, drives the
-real `AnalystAgent` and captures everything it emits.
-
-### 7.1 Driving the agent
-
-`AnalystAgent` lives at [py-src/data_formulator/analyst/agent.py](../../py-src/data_formulator/analyst/agent.py).
-`run(...)` is a **generator that yields event dicts**; consume them to exhaustion (or until a
-terminal event) and record them. Minimal shape:
-
-```python
-from data_formulator.analyst.agent import AnalystAgent
-
-agent = AnalystAgent(
-    client=agent_client,            # the Ollama Client from §2.1
-    workspace=ws,                   # the Workspace from §4.1
-    max_iterations=5,
-    identity_id="eval",             # enables reasoning log; pass None to skip
-)
-
-events = []
-for ev in agent.run(input_tables=[{"name": name}], user_question=question["question"]):
-    events.append(ev)
-    if ev.get("type") in ("completion", "interact", "error"):
-        break
-```
-
-**Event types to capture** (from the run loop / skills):
-- `agent_action` — the committed action (`visualize` / `interact` / `delegate` / `write_report`)
-  and its `action_data` (code, chart spec, etc.).
-- `result` — a visualization result: the transformed table (`rows`) + chart spec + `chart_id`.
-  This is the **data-thread content** — the primary artifact to grade.
-- `tool_start` / `tool_result` — inspection-tool activity (`execute_python_script`,
-  `inspect_source_data`, `load_skill`).
-- `text_delta` with `channel="report"` — streamed report markdown (if `write_report` runs).
-- `completion` — final answer / status (`success`, `tool_rounds_exhausted`, etc.).
-- `error` — capture the message + code; **classify the failure** (see §7.3).
-
-### 7.2 What "produces an answer" means here — capture the whole spectrum
-
-The agent answers by **acting on data**, not by prose alone. A good run typically commits
-one or more `visualize` actions (each yields a `result` with a derived table + chart), and
-ends with a concise closing answer. But the interesting signal is the *spectrum between*
-"did nothing" and "perfect": a model may emit code that **doesn't run**, code that **runs
-but produces the wrong/empty table**, a chart with the **wrong encodings**, or a technically
-correct answer that **misses the point of the question**. Capture enough to tell these apart.
-
-For every committed `visualize` (and every `execute_python_script` tool call), record the
-**execution outcome** explicitly — don't just keep the final answer:
-- **did the code run?** (the sandbox raised vs. returned) — from the `result` / `tool_result`
-  event and the observation the loop fed back. Note the exception type/message if it threw.
-- **how many repair attempts** the agent took before the code ran (or gave up).
-- **did it yield a non-empty, sensible output table?** (row/col counts; all-null or 0-row
-  outputs are a distinct "ran but empty" outcome).
-- **the chart spec actually produced** (type + encodings) vs. what the question called for.
-
-Persist, per `(model, dataset, question)`:
-- the full ordered event list (`work/runs/<model>/<dataset>/<qid>.jsonl`),
-- a distilled "answer bundle": the closing text, each chart's spec + a sample of its output
-  rows, the Python the agent ran, **and the per-action execution outcomes above**. This
-  bundle is what you hand the grader (§8).
-- run metadata: terminal status, action count, **code-error count + repair-loop count**,
-  wall-clock time, tokens if available, and observed peak VRAM (`nvidia-smi` snapshot).
-
-### 7.3 Outcome taxonomy — a graded spectrum, not just pass/fail (this is half the report)
-
-Classify **every run** (not only failures) into exactly one outcome level, so the report can
-show the full distribution per model rather than a binary. The levels, worst → best:
-
-1. **no-action** — model never emits a native action at all (often: no tool-calling support,
-   or `num_ctx` truncated the protocol). The "doesn't work at all" floor.
-2. **malformed-action** — emits actions but with broken/invalid args (bad JSON, missing
-   required field) and never recovers.
-3. **code-broken** — commits `visualize` but the code **never runs successfully** (throws
-   every attempt; agent exhausts the repair budget). Record the dominant exception.
-4. **ran-but-empty/wrong** — code runs, but the output table is **empty, all-null, or clearly
-   wrong** (bad aggregation/join/filter), so the chart is meaningless.
-5. **ran-but-suboptimal** — produces a valid chart, but it's a **weak answer**: wrong chart
-   type for the question, missing an obvious encoding/breakdown, answers a narrower question
-   than asked, or stops short on an open-ended prompt.
-6. **good** — runs cleanly and answers the question well; chart + transform are faithful and
-   appropriate; concise close.
-
-Also flag, orthogonally (a run can be `good` and still carry a flag): **protocol-drift**
-(narrates instead of acting, or re-explores the same thing without closing), **slow**
-(usable but far slower than the reference), and **timeout/OOM** (too big for VRAM or hangs —
-maps to level 1 for scoring but tag the cause).
-
-The per-model **distribution across these levels** (e.g. "40% good, 30% ran-but-suboptimal,
-20% code-broken, 10% no-action") plus the dominant failure level is exactly what turns the
-report from "works / doesn't" into an honest, graded review.
-
-### 7.4 Parallelism (use the box)
-
-Iterate `models × datasets × questions` (~30 datasets × 5 questions = ~150 per model). To
-exploit ≥4 GPUs: run several models concurrently (separate Ollama model loads / pinned GPUs),
-and/or fan out datasets per model. Keep the **asker and grader calls serialized enough** to
-respect Azure rate limits. Make the harness **resumable** (skip `(model,dataset,qid)` whose
-`.jsonl` already exists) so a crash mid-sweep doesn't restart everything. Always exercise the
-**dev-set calibration run (§6.1) first** and freeze the pipeline before launching the full
-corpus.
-
----
-
-## 8. Step 5 — Grade the answers (the *grader*)
-
-Use **`gpt-5.5`** as the grader (a different concern from the agent under test; the agent is
-the Ollama model, so there's no self-grading). Temperature 0.
-
-For each `(model, dataset, question)` answer bundle, the grader sees: the question, the table
-schema + samples, and the agent's answer bundle (closing text + chart specs + output-row
-samples + code + **the per-action execution outcomes from §7.2**). The grader is told the
-run's mechanical outcome level (§7.3) so it scores *quality given that the code ran* rather
-than re-deriving whether it ran. It returns **strict JSON** scores on a fixed rubric, e.g.
-(1–5 each):
-- **task_completion** — did it actually answer the question that was asked (not a narrower one)?
-- **code_executed** — did the agent's code run cleanly (no errors / few repairs)?
-- **result_correctness** — are the transforms/aggregations and the output table faithful to
-  the data (right filter/group/join; non-empty, sensible)?
-- **chart_appropriateness** — sensible chart type + encodings for the question.
-- **insightfulness** (esp. for open-ended) — did it surface something meaningful / explore,
-  or stop at the shallow first answer?
-- **protocol_adherence** — clean agent behavior (acted decisively, no flailing/repetition).
-- plus a one-line `rationale`, the **outcome level** from §7.3, and an overall
-  `verdict ∈ {pass, partial, fail}` (partial = ran-but-suboptimal / ran-but-empty: the
-  "works but not ideal" middle the review must surface).
-
-Also compute a **reference delta**: grade the hosted reference runs (`gpt-5.5`, `gpt-5-mini`)
-on the identical questions so each local model can be reported *relative to* a known-good
-ceiling, not just on an absolute scale. Persist all scores to `work/grades/`.
-
-**Calibrate the judge:** spot-check ~5–10 graded items by hand and confirm the grader's
-scores are sane; note any systematic judge bias in the report.
-
----
-
-## 9. Step 6 — Summarize & deliverables
-
-### 9.1 `work/` layout
-
-```
-loops/model-evaluation/work/
-  ws/                      # the eval Workspace (seed tables registered here)
-  models.json              # the chosen model set + hardware table (§3)
-  datasets/                # ~30 materialized seed tables + descriptions (§4); dev subset tagged
-  dataset_splits.json      # which datasets are dev (§6.1) vs held-out test
-  questions/<dataset>.json # frozen question sets — 5 per dataset (§5)
-  runs/<model>/<dataset>/<qid>.jsonl   # full event streams (§7)
-  bundles/<model>/<dataset>/<qid>.json # distilled answer bundles (§7.2)
-  grades/<model>/...                   # grader JSON (§8)
-  aggregates.{json,csv}    # per-model means, win-rates vs reference, outcome-level distribution
-  scripts/                 # all harness/asker/grader/aggregation scripts
-```
-
-Put **every script** under `work/scripts/`. Keep them runnable with `uv run python`.
-
-### 9.2 `report.md` (the headline deliverable)
-
-Write `loops/model-evaluation/report.md`. It must, up front, answer the four §0 questions,
-then back them with data. Required sections:
-
-1. **TL;DR verdict table** — every tested model with: size, quant, `num_ctx` used, observed
-   peak VRAM, overall score, score relative to the `gpt-5-mini` / `gpt-5.5` reference, and
-   the **outcome-level distribution** (§7.3) — e.g. `% good / suboptimal / empty-wrong /
-   code-broken / no-action` — plus the dominant level. Don't collapse to a single pass/fail.
-2. **"These models work, these don't" — and how they fall short** — group models into clear
-   tiers (reliable / usable-with-caveats / unusable), and for the middle tier name the
-   *specific* shortfall (e.g. "code runs but charts are often the wrong type", "fine on direct
-   questions, gives up on open-ended"). The graded middle is the point of the review.
-3. **Minimum specs & the low-VRAM floor** — name the *smallest model that actually works*,
-   the quant + `num_ctx` it needs, and the realistic quality at that size. State the
-   practical VRAM floor for usable quality and the recommended step-ups across the spectrum.
-4. **Recommended settings** — quantization, `num_ctx`, `max_iterations`, and any Ollama
-   `options` that materially helped. Include a copy-pasteable Ollama setup for the top pick.
-5. **Method** — datasets, question counts, asker/grader models, rubric, and reproduction
-   command(s). Note judge-calibration findings and limitations.
-6. **Per-model notes** — short paragraph each: outcome-level distribution, what it did well,
-   *how* it fell short (which level dominated and why), and example transcript pointers for
-   a representative good run and a representative failure.
-
-Keep it evidence-led and honest: the graded middle ("runs but not ideal") and a credible
-"fail" list are as valuable as the "works" list.
-
----
-
-## 10. Conventions & guardrails
-
-- **`uv` only.** `uv run python ...` / `uv pip install ...`. Source is in `py-src/`.
-- **No secrets** in the repo, scripts, or transcripts. Azure auth is managed-identity only;
-  do not write API keys anywhere.
-- **Freeze the controls** (asker, grader, questions, `max_iterations`, temperature) so the
-  model is the only variable; **record** every deliberately-varied setting (quant, `num_ctx`).
-- **Make the sweep resumable** and idempotent; never delete prior runs to "retry" — write to
-  a fresh path and keep the originals.
-- **Don't commit/push or run destructive git ops** on this working tree.
-- Keep all artifacts under `loops/model-evaluation/work/`; the only top-level deliverable is
-  `loops/model-evaluation/report.md`.
-
----
-
-## 11. Suggested order of work
-
-1. **Smoke the stack**: activate venv; confirm one Ollama chat completion through `Client`
-   and one Azure (`gpt-5.5`) completion; load one seed table into a `Workspace` and confirm
-   `read_data_as_df` works; drive `AnalystAgent.run` once on a tiny model + one question and
-   capture the event stream end-to-end. Fix wiring before scaling.
-2. **Pick models** (§3) and write `work/models.json` + the hardware table.
-3. **Materialize ~30 datasets** (§4) and **freeze the 5-question sets per dataset** (§5) with
-   `gpt-5.5`; record the dev vs. test split (§6.1) in `work/dataset_splits.json`.
-4. **Build the harness** (§7): runner → answer bundles + per-action execution outcomes,
-   resumable, with outcome-level tagging.
-5. **Calibrate on the dev set** (§6.1): run 2–3 models (incl. a hosted reference) over the
-   ~15 dev items end-to-end; validate capture, classifier, grader JSON, and resume; tune
-   prompts/`num_ctx`/timeouts; then **freeze the pipeline**.
-6. **Mass run**: sweep all `models × test-datasets × questions` (~150 questions/model).
-7. **Grade** (§8) all runs incl. the `gpt-5.5` / `gpt-5-mini` reference; calibrate the judge.
-8. **Aggregate** into `aggregates.{json,csv}` and **write `report.md`** (§9.2), leading with
-   the four §0 answers and the low-VRAM floor.
\ No newline at end of file
diff --git a/loops/model-evaluation/plan.md b/loops/model-evaluation/plan.md
new file mode 100644
index 00000000..1fd19bf3
--- /dev/null
+++ b/loops/model-evaluation/plan.md
@@ -0,0 +1,66 @@
+# Loop — Open-Source (Ollama) Model Evaluation
+
+**High-level plan.** Execute end-to-end, making reasonable decisions when details are
+ambiguous, and record them in the final report (`report.md`; all working artifacts go
+under `work/`).
+
+## Goal
+
+Benchmark open-source (Ollama) models that drive Data Formulator's analyst agents —
+inspect tabular data, write transformation code, and commit a visualization — and report
+**two independent axes**:
+
+1. **Success rate** — does the agent actually produce a rendered chart? (reliability)
+2. **Quality when produced** — how good is the chart when it finishes, scored 0-100 by a
+   code + vision grader? (competence)
+
+Keep them separate: a model can write good code yet fail to deliver it through the
+protocol. The dominant open-model failure mode is **driving the tool/transport, not
+analyzing the data**, so each model runs through more than one agent transport:
+
+- `analyst` — native function/tool calls (with a content-JSON salvage fallback).
+- `mini` — single-decision, pure-prompt JSON contract; the production low-cost agent.
+
+Always include the Azure references `gpt-5.5`, `gpt-5-mini` as the baseline.
+
+## Data
+
+A frozen **45-question** set across **15 datasets** from the `../visbench` benchmark, fed
+as the **raw / grouped source tables** (not VisBench's derived single-table `data.csv`) so
+the agent must do its own joins:
+
+- **vega_datasets** single tables — 9 single-table questions.
+- **TidyTuesday** multi-CSV weeks — 18 multi-table questions.
+- **Spider** databases grouped by DB — 18 multi-table questions.
+
+Reuse VisBench's quality-filtered question and reference chart for each item. The single-
+vs multi-table split (9 / 36) is the axis along which models diverge most.
+
+## Steps
+
+1. **Select & pull models** — the open roster across size tiers (1B → 120B) plus the three
+   Azure references.
+2. **Prepare the benchmark** — materialize the 45 questions as raw/grouped tables and
+   freeze the VisBench questions + reference charts, reused identically across every model
+   and agent.
+3. **Run agents** — every `(agent, model, question)` cell with `--agent` in `analyst`
+   and `mini`; capture the event stream and render each chart to PNG. Frozen controls:
+   `max_iterations = 5`, 240 s timeout, resumable.
+4. **Score (two phases, GPT-5.5 grader):**
+   - **Phase 1 — reliability:** five sequential gates (responded → emitted action → code
+     ran → output → **produced chart**). The chart gate is decisive and defines the
+     success rate; only those runs proceed.
+   - **Phase 2 — quality (0-100, produced charts only):** code review vs the question
+     (0-50) + vision review of the rendered PNG vs the reference chart (0-50).
+5. **Aggregate & report** — report the two axes separately (never collapse them); for
+   ranking only, derive success-weighted quality (Phase 2 over all 45, no-chart = 0) and
+   combined = `0.3 × (success_rate × 100) + 0.7 × success-weighted quality`. Always show
+   the single- vs multi-table split, the per-gate drop-off, comparison to the references,
+   and recommendations per size tier (with which `--agent`).
+
+## Principles
+
+- **Two axes stay separate** — `combined` is for ranking only.
+- **Freeze controls** — same questions, grader, `max_iterations`, and timeout across every cell.
+- **`mini` is the production low-cost agent** — `simple` was removed; don't run `--agent simple`.
+- **`uv` only**, no secrets (Azure auth via Entra ID), resumable, all artifacts under `work/`.

From 70d473070cc2e9b7935984de893f431c9495114b Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Tue, 23 Jun 2026 22:23:33 -0700
Subject: [PATCH 27/29] fixes

---
 py-src/data_formulator/analyst/mini_agent.py  | 126 +++++++++++++++---
 .../data_loader/local_folder_data_loader.py   |   8 +-
 src/app/App.tsx                               |  21 +--
 src/i18n/locales/en/common.json               |   3 +
 src/i18n/locales/en/messages.json             |   1 +
 src/i18n/locales/en/model.json                |   5 +-
 src/i18n/locales/zh/common.json               |   3 +
 src/i18n/locales/zh/messages.json             |   1 +
 src/i18n/locales/zh/model.json                |   5 +-
 src/views/DataSourceSidebar.tsx               |   4 +-
 src/views/ModelSelectionDialog.tsx            |  39 +++++-
 tests/backend/agents/test_mini_agent.py       |  52 ++++++++
 .../backend/data/test_local_folder_loader.py  |   5 +
 13 files changed, 230 insertions(+), 43 deletions(-)

diff --git a/py-src/data_formulator/analyst/mini_agent.py b/py-src/data_formulator/analyst/mini_agent.py
index 24979f5e..99088c3c 100644
--- a/py-src/data_formulator/analyst/mini_agent.py
+++ b/py-src/data_formulator/analyst/mini_agent.py
@@ -16,8 +16,11 @@
 
 Before deciding, the agent may look at the data once: the model MAY run a single
 ``execute_python_script`` inspection (e.g. to check a join or a column's exact
-values), then must produce its visualize/explain. The inspection budget is one
-call, so it never becomes a loop (see ``loops/model-evaluation`` Section 9).
+values), then must produce its visualize/explain. If the committed chart then
+fails, each in-place repair attempt may likewise run one inspection to diagnose
+the failure before re-emitting the chart. Inspection is one call per decision and
+the repair budget is bounded, so the run stays finite rather than an open loop
+(see ``loops/model-evaluation`` Section 9).
 
 The chart-type set is deliberately **reduced** to a handful of common types, and
 the prompt is tightly scoped, so small open-weight models reliably emit a
@@ -66,6 +69,17 @@
 _THOUGHT_KEYS = ("thought", "thoughts", "reasoning", "thinking", "rationale")
 
 
+# Shown when a run finishes without anything user-visible: the model returned an
+# empty reply, or it burned its protocol budget (e.g. a small model that kept
+# asking to inspect) without ever committing a chart. A mini run must never end
+# silently — the frontend drops an empty summary, so we surface this instead so
+# the user can retry or switch to a more capable model rather than seeing nothing.
+_NO_OUTPUT_FALLBACK = (
+    "I couldn't produce a chart or a clear answer for this request. Try "
+    "rephrasing it, or switch to a more capable model for mini mode."
+)
+
+
 # The reduced chart-type set. Every name here is a valid Data Formulator
 # ``chart_type`` that the eval renderer and the visualize skill both understand;
 # the list is kept short on purpose so a small model picks a sensible type
@@ -191,15 +205,25 @@ class MiniAnalystAgent(AnalystAgent):
     with weak or absent function-calling still work, and dispatches the committed
     ``visualize`` through the base core skill, so the emitted ``result`` /
     ``completion`` events are identical to the loop-based agent. Before committing,
-    the model may run a single ``execute_python_script`` inspection (a budget of
-    one, so it never loops).
+    the model may run a single ``execute_python_script`` inspection; if the chart
+    then fails, a bounded auto-revision loop lets it inspect again and fix the
+    SAME chart (capped by ``max_repair_attempts``), so the run stays finite.
     """
 
+    # Auto-revision floor: small/local models often need a few tries — inspect
+    # the data, read the error, fix the code — before a chart succeeds, so a
+    # single blind retry isn't enough. A higher caller-provided value is kept.
+    _AUTO_REVISION_ATTEMPTS = 3
+
     def __init__(self, *args: Any, **kwargs: Any) -> None:
         # One committing action per run; the base machinery is never asked to
         # take a second analytic step.
         kwargs.setdefault("max_iterations", 1)
         super().__init__(*args, **kwargs)
+        # Give the in-place repair loop room to revise (inspect -> fix -> retry)
+        # instead of giving up after one attempt.
+        self.max_repair_attempts = max(
+            int(self.max_repair_attempts), self._AUTO_REVISION_ATTEMPTS)
 
     # ------------------------------------------------------------------
     # Prompt: a tightly scoped, single-decision system prompt
@@ -478,19 +502,35 @@ def run(
                 self._explore_session = None
 
             if kind == "explain":
+                summary = payload.strip() if isinstance(payload, str) else ""
                 yield {
                     "type": "completion",
                     "iteration": iteration,
                     "status": "success",
-                    "content": {"summary": payload, "total_steps": 0},
+                    "content": {"summary": summary or _NO_OUTPUT_FALLBACK,
+                                "total_steps": 0},
                 }
                 self._log_session_end(rlog, "success", iteration, 0, session_start)
                 return
 
             if kind == "visualize":
-                produced = yield from self._visualize_with_repair(
+                produced, viz_error = yield from self._visualize_with_repair(
                     payload, messages, input_tables, iteration, completed_steps)
                 status = "success" if produced else "completed_no_viz"
+                if not produced:
+                    # A failed chart would otherwise end the run silently: the
+                    # skill's error events are internal retry signals the shell
+                    # router drops. Surface the failure (with the reason, when we
+                    # have it) so the user sees why nothing rendered.
+                    detail = f" ({viz_error})" if viz_error else ""
+                    yield self._error_event(
+                        iteration,
+                        "I couldn't build a working chart for this request"
+                        f"{detail}. Try rephrasing it, or switch to a more capable "
+                        "model for mini mode.",
+                        message_code="agent.miniNoChart",
+                        message_params={"error": detail},
+                    )
                 yield {
                     "type": "completion",
                     "iteration": iteration,
@@ -503,13 +543,20 @@ def run(
             # kind == "none": an LLM error or an exhausted protocol; payload is
             # the status string.
             if payload == "llm_error":
+                # The error event is this path's user-visible feedback.
                 yield self._error_event(
                     iteration, "LLM API error", message_code="agent.llmApiError")
+                summary = ""
+            else:
+                # Exhausted the protocol without committing (e.g. a small model
+                # that kept asking to inspect): surface a message so the run is
+                # never silent.
+                summary = _NO_OUTPUT_FALLBACK
             yield {
                 "type": "completion",
                 "iteration": iteration,
                 "status": payload,
-                "content": {"summary": "", "total_steps": 0},
+                "content": {"summary": summary, "total_steps": 0},
             }
             self._log_session_end(rlog, payload, iteration, 0, session_start)
             return
@@ -574,10 +621,22 @@ def _decide(
 
             # --- plain text -> the explain answer ---------------------------
             if parsed is None:
+                stripped = content.strip()
+                # An empty reply is a failure, not a deliberate answer: nudge
+                # once for a real answer rather than ending the run with nothing.
+                if not stripped and corrections_left > 0:
+                    corrections_left -= 1
+                    messages.append({"role": "assistant", "content": content or None})
+                    messages.append({"role": "user", "content": (
+                        "[OBSERVATION] Your reply was empty. Emit your visualize "
+                        "JSON object now, or an explain object with your answer.")})
+                    rlog.log("llm_response", iteration=iteration,
+                             latency_ms=latency, finish_reason="empty_reply")
+                    continue
                 rlog.log("llm_response", iteration=iteration,
                          latency_ms=latency, finish_reason="final_text")
                 messages.append({"role": "assistant", "content": content or None})
-                return ("explain", content.strip())
+                return ("explain", stripped)
 
             thought, name, args = parsed
             messages.append({"role": "assistant", "content": content})
@@ -658,6 +717,22 @@ def _decide(
     # Visualize: dispatch through the core skill, repair the SAME chart on failure
     # ------------------------------------------------------------------
 
+    @staticmethod
+    def _extract_viz_error(observation: str | None) -> str | None:
+        """Pull a one-line error summary out of a failed-visualize observation.
+
+        The visualize skill reports a failure as
+        ``"[OBSERVATION – Step N FAILED]\\n\\nError: <msg>"`` and the shell router
+        drops the matching ``error`` event (an internal retry signal), so this
+        observation string is the only place the reason survives. Returns the
+        first non-empty line of ``<msg>`` (truncated) or ``None`` when there's
+        nothing useful to show."""
+        if not observation:
+            return None
+        text = observation.split("Error:", 1)[1] if "Error:" in observation else observation
+        first_line = next((ln.strip() for ln in text.splitlines() if ln.strip()), "")
+        return first_line[:200] or None
+
     def _visualize_with_repair(
         self,
         args: dict[str, Any],
@@ -665,13 +740,17 @@ def _visualize_with_repair(
         input_tables: list[dict[str, Any]] | None,
         iteration: int,
         completed_steps: list[dict[str, Any]],
-    ) -> Generator[dict, None, bool]:
+    ) -> Generator[dict, None, tuple[bool, str | None]]:
         """Execute the committed ``visualize`` via the base core-skill dispatch,
-        re-yielding its ``action`` / ``result`` / ``error`` events. If the code or
-        encodings fail, show the model the error and let it fix the SAME chart, up
-        to ``max_repair_attempts`` times. Returns ``True`` once a chart is
-        produced, ``False`` if every attempt failed."""
+        re-yielding its ``action`` / ``result`` events. If the code or encodings
+        fail, show the model the error and let it fix the SAME chart, up to
+        ``max_repair_attempts`` times; each retry may run one inspection first to
+        diagnose the failure. Returns ``(True, None)`` once a chart is produced,
+        or ``(False, last_error)`` if every attempt failed — the skill's ``error``
+        events are dropped by the shell router, so ``last_error`` carries the
+        reason out for the run to surface."""
         repairs_left = max(0, int(self.max_repair_attempts))
+        last_error: str | None = None
 
         while True:
             action = dict(args)
@@ -695,17 +774,24 @@ def _visualize_with_repair(
             self._set_action_observation(messages, None, observation)
 
             if produced:
-                return True
+                return True, None
+            # The skill's error EVENT was dropped by the router; the observation
+            # string is the only carrier of why the chart failed.
+            last_error = self._extract_viz_error(observation) or last_error
             if repairs_left <= 0:
-                return False
+                return False, last_error
 
             repairs_left -= 1
             messages.append({"role": "user", "content": (
-                "[SYSTEM] The visualize above FAILED. Fix the SAME chart: read the "
-                "error in the observation, correct your code and/or encodings, and "
-                "emit ONE corrected visualize JSON object (no other text).")})
+                "[SYSTEM] The visualize above FAILED. Read the error in the "
+                "observation and fix the SAME chart. If the error looks like the "
+                "data isn't what you assumed (a missing column, a wrong dtype, or "
+                "values that need parsing/splitting), FIRST run ONE "
+                "execute_python_script inspection to print the real columns and a "
+                "few values, then emit ONE corrected visualize JSON object. If the "
+                "fix is obvious, emit the corrected visualize directly.")})
             kind, new_args = yield from self._decide(
-                messages, input_tables, iteration, allow_inspect=False)
+                messages, input_tables, iteration, allow_inspect=True)
             if kind != "visualize":
-                return False
+                return False, last_error
             args = new_args
diff --git a/py-src/data_formulator/data_loader/local_folder_data_loader.py b/py-src/data_formulator/data_loader/local_folder_data_loader.py
index 240771b0..2c41bc4f 100644
--- a/py-src/data_formulator/data_loader/local_folder_data_loader.py
+++ b/py-src/data_formulator/data_loader/local_folder_data_loader.py
@@ -250,7 +250,13 @@ def fetch_data_as_arrow(
         if ext == ".parquet":
             table = pq.read_table(str(resolved))
         elif ext in (".csv", ".tsv"):
-            table = pa_csv.read_csv(str(resolved))
+            # ``.tsv`` is tab-separated; pyarrow's read_csv defaults to a comma
+            # delimiter, so without this a TSV collapses into a single column
+            # (e.g. "id\trate" stays one field). Keep comma for ``.csv``.
+            parse_options = (
+                pa_csv.ParseOptions(delimiter="\t") if ext == ".tsv" else None
+            )
+            table = pa_csv.read_csv(str(resolved), parse_options=parse_options)
         elif ext in (".json", ".jsonl"):
             import pyarrow.json as pa_json
             table = pa_json.read_json(str(resolved))
diff --git a/src/app/App.tsx b/src/app/App.tsx
index 3d6545e5..5a8007b4 100644
--- a/src/app/App.tsx
+++ b/src/app/App.tsx
@@ -51,8 +51,6 @@ import {
     ListItemText,
     CircularProgress,
     LinearProgress,
-    Switch,
-    FormControlLabel,
 } from '@mui/material';
 
 
@@ -326,7 +324,7 @@ const WorkspacePickerDialog: React.FC<{open: boolean, onClose: () => void}> = ({
             <DialogTitle sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
                 {t('workspace.sessions')}
                 <Tooltip title={t('workspace.refreshList')}>
-                    <IconButton size="small" onClick={fetchWsList} disabled={listLoading} sx={{ color: 'text.secondary' }}>
+                    <IconButton size="small" onClick={fetchWsList} disabled={listLoading} aria-label={t('workspace.refreshList')} sx={{ color: 'text.secondary' }}>
                         {listLoading ? <CircularProgress size={18} /> : <RefreshIcon fontSize="small" />}
                     </IconButton>
                 </Tooltip>
@@ -495,14 +493,12 @@ const ConfigDialog: React.FC = () => {
     const [paletteKey, setPaletteKey] = useState(
         (config.paletteKey && palettes[config.paletteKey]) ? config.paletteKey : defaultPaletteKey
     );
-    const [miniMode, setMiniMode] = useState(config.miniMode ?? false);
 
     const hasChanges = formulateTimeoutSeconds !== config.formulateTimeoutSeconds || 
                       defaultChartWidth !== config.defaultChartWidth ||
                       defaultChartHeight !== config.defaultChartHeight ||
                       maxStretchFactor !== config.maxStretchFactor ||
                       frontendRowLimit !== config.frontendRowLimit ||
-                      miniMode !== (config.miniMode ?? false) ||
                       paletteKey !== ((config.paletteKey && palettes[config.paletteKey]) ? config.paletteKey : defaultPaletteKey);
 
     return (
@@ -691,16 +687,6 @@ const ConfigDialog: React.FC = () => {
                                 </Typography>
                             </Box>
                         </Box>
-                        <Divider><Typography variant="caption">{t('config.agent', { defaultValue: 'Agent' })}</Typography></Divider>
-                        <Box>
-                            <FormControlLabel
-                                control={<Switch checked={miniMode} onChange={(e) => setMiniMode(e.target.checked)} size="small" />}
-                                label={t('config.miniMode', { defaultValue: 'Mini mode' })}
-                            />
-                            <Typography variant="caption" color="text.secondary" sx={{ display: 'block' }}>
-                                {t('config.miniModeHint', { defaultValue: 'Run the single-turn mini analyst: one visualize or explain per request, with one optional data inspection. Recommended for smaller or local models.' })}
-                            </Typography>
-                        </Box>
                     </Box>
                 </DialogContent>
                 <DialogActions sx={{'.MuiButton-root': {textTransform: 'none'}}}>
@@ -711,7 +697,6 @@ const ConfigDialog: React.FC = () => {
                         setMaxStretchFactor(2.0);
                         setFrontendRowLimit(rowLimitDefault);
                         setPaletteKey(defaultPaletteKey);
-                        setMiniMode(false);
                     }}>{t('session.resetToDefault')}</Button>
                     <Button onClick={() => setOpen(false)}>{t('app.cancel')}</Button>
                     <Button 
@@ -722,7 +707,7 @@ const ConfigDialog: React.FC = () => {
                             || isNaN(maxStretchFactor) || maxStretchFactor < 1 || maxStretchFactor > 5
                             || isNaN(frontendRowLimit) || frontendRowLimit < 100 || frontendRowLimit > rowLimitMax}
                         onClick={() => {
-                            dispatch(dfActions.setConfig({formulateTimeoutSeconds, defaultChartWidth, defaultChartHeight, maxStretchFactor, frontendRowLimit, paletteKey, miniMode}));
+                            dispatch(dfActions.setConfig({formulateTimeoutSeconds, defaultChartWidth, defaultChartHeight, maxStretchFactor, frontendRowLimit, paletteKey, miniMode: config.miniMode ?? false}));
                             setOpen(false);
                         }}
                     >
@@ -839,7 +824,7 @@ const AppShell: FC = () => {
                             <TopNavButton to="/gallery" label={t('appBar.gallery')} selected={isGalleryPage} />
                         </Box>
                         {tables.length === 0 && !activeWorkspace && (
-                            <Typography noWrap sx={{ position: 'absolute', left: '50%', transform: 'translateX(-50%)', fontWeight: 500, fontSize: '0.65rem', color: 'text.disabled', letterSpacing: '0.15em', textTransform: 'uppercase' }}>
+                            <Typography noWrap sx={{ position: 'absolute', left: '50%', transform: 'translateX(-50%)', fontWeight: 500, fontSize: '0.65rem', color: 'text.secondary', letterSpacing: '0.15em', textTransform: 'uppercase' }}>
                                 {t('appBar.microsoftResearch')}
                             </Typography>
                         )}
diff --git a/src/i18n/locales/en/common.json b/src/i18n/locales/en/common.json
index 72b8f426..d80746f6 100644
--- a/src/i18n/locales/en/common.json
+++ b/src/i18n/locales/en/common.json
@@ -875,6 +875,9 @@
     "sortOldestFirst": "oldest first",
     "sortRecentlyModifiedFirst": "recently modified",
     "sortNameAsc": "name (a–z)",
+    "sortSessions": "Sort sessions",
+    "runCatalogSearch": "Search",
+    "clearCatalogSearch": "Clear search",
     "timeJustNow": "just now",
     "timeMinutes": "{{count}}m",
     "timeHours": "{{count}}h",
diff --git a/src/i18n/locales/en/messages.json b/src/i18n/locales/en/messages.json
index d5f89405..ac6a38f6 100644
--- a/src/i18n/locales/en/messages.json
+++ b/src/i18n/locales/en/messages.json
@@ -68,6 +68,7 @@
       "emptyDataframe": "Output DataFrame is empty (0 rows). Check filters or data loading.",
       "fieldsNotFound": "Chart encoding fields not found in output DataFrame: {{missing}}. Available columns: {{available}}",
       "llmApiError": "LLM API error",
+      "miniNoChart": "I couldn't build a working chart for this request{{error}}. Try rephrasing it, or switch to a more capable model for mini mode.",
       "llmEmptyResponse": "LLM returned empty response",
       "parseActionFailed": "Failed to parse agent action from LLM response",
       "unknownAction": "Unknown action: {{actionType}}",
diff --git a/src/i18n/locales/en/model.json b/src/i18n/locales/en/model.json
index 7771faf5..a1b62cc1 100644
--- a/src/i18n/locales/en/model.json
+++ b/src/i18n/locales/en/model.json
@@ -54,6 +54,9 @@
     "userManagedSection": "My models",
     "testing": "Testing…",
     "configured": "Configured",
-    "configuredMessage": "Server configured, click to verify connectivity"
+    "configuredMessage": "Server configured, click to verify connectivity",
+    "miniMode": "Mini agent mode",
+    "miniModeBadge": "mini mode",
+    "miniModeHint": "If your model is smaller or less capable (e.g. local or lightweight models), enable mini agent mode for a more reliable experience. It runs a single-turn analyst — one visualize or explain per request, with one optional data inspection."
   }
 }
diff --git a/src/i18n/locales/zh/common.json b/src/i18n/locales/zh/common.json
index 60335ce0..c516ca95 100644
--- a/src/i18n/locales/zh/common.json
+++ b/src/i18n/locales/zh/common.json
@@ -875,6 +875,9 @@
     "sortOldestFirst": "最早优先",
     "sortRecentlyModifiedFirst": "最近修改优先",
     "sortNameAsc": "名称 (a–z)",
+    "sortSessions": "排序会话",
+    "runCatalogSearch": "搜索",
+    "clearCatalogSearch": "清除搜索",
     "timeJustNow": "刚刚",
     "timeMinutes": "{{count}}分钟",
     "timeHours": "{{count}}小时",
diff --git a/src/i18n/locales/zh/messages.json b/src/i18n/locales/zh/messages.json
index 59185a44..47b76cf6 100644
--- a/src/i18n/locales/zh/messages.json
+++ b/src/i18n/locales/zh/messages.json
@@ -68,6 +68,7 @@
       "emptyDataframe": "输出数据为空（0 行），请检查筛选条件或数据加载。",
       "fieldsNotFound": "输出数据中未找到图表编码字段：{{missing}}。可用列：{{available}}",
       "llmApiError": "LLM API 错误",
+      "miniNoChart": "我无法为此请求生成可用的图表{{error}}。请尝试重新表述，或切换到能力更强的模型以使用迷你模式。",
       "llmEmptyResponse": "LLM 返回了空响应",
       "parseActionFailed": "无法从 LLM 响应中解析操作",
       "unknownAction": "未知操作：{{actionType}}",
diff --git a/src/i18n/locales/zh/model.json b/src/i18n/locales/zh/model.json
index 330891fd..f5fad583 100644
--- a/src/i18n/locales/zh/model.json
+++ b/src/i18n/locales/zh/model.json
@@ -54,6 +54,9 @@
     "userManagedSection": "我添加的模型",
     "testing": "测试中…",
     "configured": "已配置",
-    "configuredMessage": "服务端已配置，点击可验证连通性"
+    "configuredMessage": "服务端已配置，点击可验证连通性",
+    "miniMode": "迷你智能体模式",
+    "miniModeBadge": "迷你模式",
+    "miniModeHint": "如果所选模型能力较弱（例如本地或轻量级模型），请启用迷你智能体模式以获得更可靠的体验。该模式运行单轮分析：每次请求仅执行一次可视化或解释，并可选地进行一次数据检查。"
   }
 }
diff --git a/src/views/DataSourceSidebar.tsx b/src/views/DataSourceSidebar.tsx
index e9464806..f8d18759 100644
--- a/src/views/DataSourceSidebar.tsx
+++ b/src/views/DataSourceSidebar.tsx
@@ -1375,13 +1375,14 @@ const DataSourceSidebarPanel: React.FC<{
                                             size="small"
                                             onClick={runCatalogSearch}
                                             disabled={anyCatalogSearchLoading}
+                                            aria-label={t('sidebar.runCatalogSearch')}
                                             sx={{ p: 0.25 }}
                                         >
                                             {anyCatalogSearchLoading
                                                 ? <CircularProgress size={12} />
                                                 : <SearchIcon sx={{ fontSize: 14, color: 'text.disabled' }} />}
                                         </IconButton>
-                                        <IconButton size="small" onClick={clearCatalogSearch} sx={{ p: 0.25 }}>
+                                        <IconButton size="small" onClick={clearCatalogSearch} aria-label={t('sidebar.clearCatalogSearch')} sx={{ p: 0.25 }}>
                                             <ClearIcon sx={{ fontSize: 14, color: 'text.disabled' }} />
                                         </IconButton>
                                     </InputAdornment>
@@ -1698,6 +1699,7 @@ const DataSourceSidebarPanel: React.FC<{
                         value={sessionSort}
                         onChange={(e) => setSessionSort(e.target.value as SessionSortKey)}
                         disableUnderline
+                        inputProps={{ 'aria-label': t('sidebar.sortSessions') }}
                         IconComponent={(props) => (
                             <ExpandMoreIcon {...props} sx={{ fontSize: 14, color: 'text.disabled', right: 0 }} />
                         )}
diff --git a/src/views/ModelSelectionDialog.tsx b/src/views/ModelSelectionDialog.tsx
index ccc23238..4f45f141 100644
--- a/src/views/ModelSelectionDialog.tsx
+++ b/src/views/ModelSelectionDialog.tsx
@@ -90,6 +90,7 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => {
     const models = useSelector((state: DataFormulatorState) => state.models);
     const selectedModelId = useSelector((state: DataFormulatorState) => state.selectedModelId);
     const testedModels = useSelector((state: DataFormulatorState) => state.testedModels);
+    const config = useSelector((state: DataFormulatorState) => state.config);
 
     const [modelDialogOpen, setModelDialogOpen] = useState<boolean>(false);
     const [showKeys, setShowKeys] = useState<boolean>(false);
@@ -552,6 +553,16 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => {
         <Tooltip title={t('model.selectModel')}>
             <Button sx={{fontSize: "inherit", textTransform: "none"}} variant="text" color={selectedReady ? "primary" : 'warning'} onClick={()=>{setModelDialogOpen(true)}}>
                 {selectedReady ? selectedModelName : t('model.selectModels')}
+                {selectedReady && (config.miniMode ?? false) && (
+                    <Tooltip title={t('model.miniModeHint')}>
+                        <Box
+                            component="span"
+                            sx={{ ml: 0.5, fontSize: '0.8em', fontWeight: 400, color: 'text.disabled', textTransform: 'none' }}
+                        >
+                            ({t('model.miniModeBadge')})
+                        </Box>
+                    </Tooltip>
+                )}
             </Button>
         </Tooltip>
         <Dialog 
@@ -585,7 +596,33 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => {
                     </Box>
                 </Box>
                 {modelTable}
-                
+
+                <Box sx={{
+                    mt: 2,
+                    pt: 2,
+                    borderTop: '1px solid',
+                    borderColor: 'divider',
+                }}>
+                    <FormControlLabel
+                        sx={{ ml: 0 }}
+                        control={
+                            <Switch
+                                size="small"
+                                checked={config.miniMode ?? false}
+                                onChange={(e) => dispatch(dfActions.setConfig({ ...config, miniMode: e.target.checked }))}
+                            />
+                        }
+                        label={
+                            <Typography variant="body2" sx={{ fontSize: '0.8rem', fontWeight: 600 }}>
+                                {t('model.miniMode')}
+                            </Typography>
+                        }
+                    />
+                    <Typography variant="caption" color="text.secondary" sx={{ display: 'block', mt: 0.5 }}>
+                        {t('model.miniModeHint')}
+                    </Typography>
+                </Box>
+
             </DialogContent>
             <DialogActions>
                 {!serverConfig.DISABLE_DISPLAY_KEYS && (
diff --git a/tests/backend/agents/test_mini_agent.py b/tests/backend/agents/test_mini_agent.py
index 8f4f0ec8..dd7d8924 100644
--- a/tests/backend/agents/test_mini_agent.py
+++ b/tests/backend/agents/test_mini_agent.py
@@ -325,6 +325,41 @@ def _viz_dispatch(*a, **k):
         comp = [e for e in events if e["type"] == "completion"]
         assert comp and comp[0]["status"] == "success"
 
+    def test_repair_can_inspect_before_refixing(self, monkeypatch):
+        # The auto-revision loop may inspect the data to diagnose a failure
+        # (e.g. discover the real columns) before emitting a corrected chart.
+        agent = _bare_mini(allow_inspection=True)
+        agent.max_repair_attempts = 1
+        viz_bad = json.dumps({"tool": "visualize", "arguments": {
+            "code": "out=df['rate']", "output_variable": "out",
+            "chart": {"chart_type": "Bar Chart"}}})
+        inspect = json.dumps({"tool": "execute_python_script",
+                              "arguments": {"code": "print(df.columns)"}})
+        viz_good = json.dumps({"tool": "visualize", "arguments": {
+            "code": "out=df", "output_variable": "out",
+            "chart": {"chart_type": "Bar Chart"}}})
+        # initial viz (fails) -> repair decides to inspect, then corrected viz
+        _prep_run(agent, [viz_bad, inspect, viz_good], monkeypatch)
+
+        calls = {"n": 0}
+
+        def _viz_dispatch(*a, **k):
+            calls["n"] += 1
+            if calls["n"] == 1:
+                yield {"type": "error", "message": "KeyError rate"}
+                return "[OBSERVATION – Step 1 FAILED]\n\nError: KeyError - 'rate'"
+            yield _viz_result_event()
+            return "[OBSERVATION] Chart created."
+        agent._dispatch_skill_action = _viz_dispatch
+
+        events = list(agent.run([{"name": "t"}], "show rate"))
+        # the repair turn ran an inspection before the corrected visualize
+        assert any(e["type"] == "tool_start"
+                   and e.get("tool") == "execute_python_script" for e in events)
+        assert any(e["type"] == "result" for e in events)
+        comp = [e for e in events if e["type"] == "completion"]
+        assert comp and comp[0]["status"] == "success"
+
     def test_unrepairable_visualize_completes_without_chart(self, monkeypatch):
         agent = _bare_mini(allow_inspection=False)
         agent.max_repair_attempts = 0  # no repair budget
@@ -342,6 +377,23 @@ def _viz_fail(*a, **k):
         assert not any(e["type"] == "result" for e in events)
         comp = [e for e in events if e["type"] == "completion"]
         assert comp and comp[0]["status"] == "completed_no_viz"
+        # The run must not end silently: a failed chart surfaces an error event
+        # carrying the reason. In production the skill's own error event is
+        # dropped by the shell router, so run() re-surfaces it from the
+        # observation; here the message must reach the user with the cause.
+        errs = [e for e in events if e["type"] == "error"
+                and e.get("message_code") == "agent.miniNoChart"]
+        assert errs and "boom" in errs[0]["message"]
+
+    def test_empty_reply_is_not_a_silent_explain(self, monkeypatch):
+        # A small model that returns nothing must not end the run with an empty
+        # completion; the summary falls back to a user-visible message.
+        agent = _bare_mini(allow_inspection=False)
+        _prep_run(agent, ["", ""], monkeypatch)  # empty reply, then empty again
+        events = list(agent.run([{"name": "t"}], "is it growing?"))
+        comp = [e for e in events if e["type"] == "completion"]
+        assert comp and comp[0]["content"]["summary"].strip()
+
 
 
 # --------------------------------------------------------------------------
diff --git a/tests/backend/data/test_local_folder_loader.py b/tests/backend/data/test_local_folder_loader.py
index c6d50804..5758d408 100644
--- a/tests/backend/data/test_local_folder_loader.py
+++ b/tests/backend/data/test_local_folder_loader.py
@@ -215,6 +215,11 @@ def test_fetch_tsv(self, data_dir: Path) -> None:
         loader.test_connection()
         table = loader.fetch_data_as_arrow("data.tsv")
         assert table.num_rows == 2
+        # A TSV must split on tabs into separate columns, not collapse into one
+        # field like "id\tvalue" (regression: read_csv defaulted to a comma
+        # delimiter, so a tab-separated file became a single string column).
+        assert table.column_names == ["id", "value"]
+        assert table.column("value").to_pylist() == ["foo", "bar"]
 
     def test_fetch_parquet(self, data_dir: Path) -> None:
         loader = LocalFolderDataLoader({"root_dir": str(data_dir)})

From 19c511a576a3bcfd72ca2e0fc9acde7a88221796 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Tue, 23 Jun 2026 22:31:25 -0700
Subject: [PATCH 28/29] some accessibility fixes

---
 src/app/App.tsx                 | 2 +-
 src/i18n/locales/en/common.json | 3 +++
 src/i18n/locales/zh/common.json | 3 +++
 src/views/KnowledgePanel.tsx    | 1 +
 src/views/SessionDistill.tsx    | 2 +-
 src/views/VisualizationView.tsx | 2 +-
 6 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/app/App.tsx b/src/app/App.tsx
index 5a8007b4..aa30ba5e 100644
--- a/src/app/App.tsx
+++ b/src/app/App.tsx
@@ -324,7 +324,7 @@ const WorkspacePickerDialog: React.FC<{open: boolean, onClose: () => void}> = ({
             <DialogTitle sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
                 {t('workspace.sessions')}
                 <Tooltip title={t('workspace.refreshList')}>
-                    <IconButton size="small" onClick={fetchWsList} disabled={listLoading} aria-label={t('workspace.refreshList')} sx={{ color: 'text.secondary' }}>
+                    <IconButton size="small" onClick={fetchWsList} disabled={listLoading} sx={{ color: 'text.secondary' }}>
                         {listLoading ? <CircularProgress size={18} /> : <RefreshIcon fontSize="small" />}
                     </IconButton>
                 </Tooltip>
diff --git a/src/i18n/locales/en/common.json b/src/i18n/locales/en/common.json
index d80746f6..cfb5bdf0 100644
--- a/src/i18n/locales/en/common.json
+++ b/src/i18n/locales/en/common.json
@@ -964,6 +964,9 @@
     "replayBusy": "The agent is busy — wait for it to finish before replaying.",
     "replayNoData": "Load a dataset before replaying a workflow.",
     "replayStarted": "Replaying workflow on the current data…",
+    "deleteItem": "Delete",
+    "threadExpand": "Expand thread",
+    "threadCollapse": "Collapse thread",
     "replayPrompt": "Reproduce the following analysis workflow on the currently loaded data. Follow the steps in order, adapting any column references to the columns available in the current dataset. It's fine if the result isn't identical — reproduce the same overall analysis.\n\nBefore making large assumptions, check whether the current data can actually support the workflow. If there is a major discrepancy — e.g. a required field or measure is missing, the granularity or shape is very different, or a step has no sensible equivalent on this data — pause and ask me to confirm how to proceed (or briefly explain the mismatch and your proposed adaptation) instead of guessing. Minor differences (renamed columns, extra columns) can be adapted silently.\n\n{{content}}"
   }
 }
diff --git a/src/i18n/locales/zh/common.json b/src/i18n/locales/zh/common.json
index c516ca95..d382d2f3 100644
--- a/src/i18n/locales/zh/common.json
+++ b/src/i18n/locales/zh/common.json
@@ -964,6 +964,9 @@
     "replayBusy": "Agent 正忙——请等待其完成后再重放。",
     "replayNoData": "请先加载一个数据集，再重放工作流。",
     "replayStarted": "正在当前数据上重放工作流…",
+    "deleteItem": "删除",
+    "threadExpand": "展开线程",
+    "threadCollapse": "收起线程",
     "replayPrompt": "在当前已加载的数据上复现以下分析流程。按顺序执行各步骤，并将其中的列引用调整为当前数据集中可用的列。结果不必完全一致——复现同样的整体分析即可。\n\n在做出较大假设之前，请先确认当前数据是否真的能支撑该流程。如果存在重大差异——例如缺少必需的字段或度量、数据粒度或结构差异很大、或某个步骤在当前数据上没有合理的对应方式——请暂停并向我确认如何继续（或简要说明不匹配之处及你建议的调整方案），而不要凭空猜测。对于细微差异（列被重命名、存在额外的列）可以直接静默调整。\n\n{{content}}"
   }
 }
diff --git a/src/views/KnowledgePanel.tsx b/src/views/KnowledgePanel.tsx
index 9fb5c974..9920d2cc 100644
--- a/src/views/KnowledgePanel.tsx
+++ b/src/views/KnowledgePanel.tsx
@@ -313,6 +313,7 @@ export const KnowledgePanel: React.FC = () => {
                     <IconButton
                         className="item-actions"
                         size="small"
+                        aria-label={t('knowledge.deleteItem')}
                         onClick={(e) => { e.stopPropagation(); setDeleteTarget({ category, path: item.path, title: item.title }); }}
                         sx={{ p: 0.25, mt: 'auto', display: 'none', color: 'text.secondary', '&:hover': { color: 'error.main' } }}
                     >
diff --git a/src/views/SessionDistill.tsx b/src/views/SessionDistill.tsx
index fd9efeae..4d26c436 100644
--- a/src/views/SessionDistill.tsx
+++ b/src/views/SessionDistill.tsx
@@ -536,7 +536,7 @@ const SessionDistillFromPanel: React.FC<{
                                     '&:hover': { bgcolor: 'action.hover' },
                                 }}
                             >
-                                <IconButton size="small" sx={{ p: 0.125 }} tabIndex={-1}>
+                                <IconButton size="small" sx={{ p: 0.125 }} tabIndex={-1} aria-label={isOpen ? t('knowledge.threadCollapse') : t('knowledge.threadExpand')}>
                                     {isOpen
                                         ? <ExpandLessIcon sx={{ fontSize: 14 }} />
                                         : <ExpandMoreIcon sx={{ fontSize: 14 }} />}
diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx
index 8ec0f443..fe3cfb46 100644
--- a/src/views/VisualizationView.tsx
+++ b/src/views/VisualizationView.tsx
@@ -940,7 +940,7 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
                 <DialogContent sx={{ overflowY: 'auto', overflowX: 'hidden' }} dividers>
                     {hasConcepts && (
                         <Box sx={{ pb: 1.5, mb: 1.5, borderBottom: '1px solid', borderColor: 'divider' }}>
-                            <Typography sx={{ fontSize: 10, fontWeight: 700, letterSpacing: '0.08em', textTransform: 'uppercase', color: 'text.disabled', mb: 0.75 }}>
+                            <Typography sx={{ fontSize: 10, fontWeight: 700, letterSpacing: '0.08em', textTransform: 'uppercase', color: 'text.secondary', mb: 0.75 }}>
                                 {t('chart.derivedConcepts')}
                             </Typography>
                             <ConceptExplCards

From d40da0eeb5dfb1a4516c49bae412c08a92494e62 Mon Sep 17 00:00:00 2001
From: Chenglong Wang <chenglong.wang@microsoft.com>
Date: Wed, 24 Jun 2026 08:54:16 -0700
Subject: [PATCH 29/29] accessibility fix

---
 src/i18n/locales/en/common.json    | 3 ++-
 src/i18n/locales/zh/common.json    | 3 ++-
 src/views/AgentChatInput.tsx       | 1 +
 src/views/AgentPausePanel.tsx      | 2 ++
 src/views/ChartQuickConfig.tsx     | 1 +
 src/views/ChartVariantStrip.tsx    | 1 +
 src/views/DataFormulator.tsx       | 1 +
 src/views/KnowledgePanel.tsx       | 1 +
 src/views/ModelSelectionDialog.tsx | 1 +
 src/views/SelectableDataGrid.tsx   | 1 +
 src/views/SimpleChartRecBox.tsx    | 3 +++
 src/views/VisualizationView.tsx    | 5 +++--
 12 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/i18n/locales/en/common.json b/src/i18n/locales/en/common.json
index cfb5bdf0..46e310a2 100644
--- a/src/i18n/locales/en/common.json
+++ b/src/i18n/locales/en/common.json
@@ -728,7 +728,8 @@
     "sortNewestFirst": "newest first",
     "sortOldestFirst": "oldest first",
     "sortRecentlyModifiedFirst": "recently modified",
-    "sortNameAsc": "name (a–z)"
+    "sortNameAsc": "name (a–z)",
+    "sortSessions": "Sort sessions"
   },
   "supersetCatalog": {
     "title": "Superset Datasets",
diff --git a/src/i18n/locales/zh/common.json b/src/i18n/locales/zh/common.json
index d382d2f3..e08983fb 100644
--- a/src/i18n/locales/zh/common.json
+++ b/src/i18n/locales/zh/common.json
@@ -728,7 +728,8 @@
     "sortNewestFirst": "最新优先",
     "sortOldestFirst": "最早优先",
     "sortRecentlyModifiedFirst": "最近修改优先",
-    "sortNameAsc": "名称 (a–z)"
+    "sortNameAsc": "名称 (a–z)",
+    "sortSessions": "排序会话"
   },
   "supersetCatalog": {
     "title": "Superset 数据集",
diff --git a/src/views/AgentChatInput.tsx b/src/views/AgentChatInput.tsx
index 44ce111a..3fea763b 100644
--- a/src/views/AgentChatInput.tsx
+++ b/src/views/AgentChatInput.tsx
@@ -236,6 +236,7 @@ export const AgentChatInput: React.FC<AgentChatInputProps> = ({
         <Tooltip title={sendTooltip ?? t('dataLoading.sendTooltip')} placement="top">
             <span>
                 <IconButton size="small" onClick={onSend} disabled={!canSend}
+                    aria-label={sendTooltip ?? t('dataLoading.sendTooltip')}
                     sx={{
                         width: 28, height: 28,
                         bgcolor: canSend ? 'primary.main' : 'transparent',
diff --git a/src/views/AgentPausePanel.tsx b/src/views/AgentPausePanel.tsx
index 4ea481c6..3f8b85e6 100644
--- a/src/views/AgentPausePanel.tsx
+++ b/src/views/AgentPausePanel.tsx
@@ -423,6 +423,7 @@ export const ClarificationPanel: FC<ClarificationPanelProps> = ({
             <span>
                 <IconButton
                     size="small"
+                    aria-label={t('chartRec.submitClarification')}
                     disabled={!canSubmit}
                     onClick={handlePanelSubmit}
                     sx={{
@@ -618,6 +619,7 @@ export const ClarificationPanel: FC<ClarificationPanelProps> = ({
                         <span>
                             <IconButton
                                 size="small"
+                                aria-label={t('chartRec.submitClarification')}
                                 disabled={!canSubmit}
                                 onClick={handlePanelSubmit}
                                 sx={{
diff --git a/src/views/ChartQuickConfig.tsx b/src/views/ChartQuickConfig.tsx
index 37508811..892c1bad 100644
--- a/src/views/ChartQuickConfig.tsx
+++ b/src/views/ChartQuickConfig.tsx
@@ -332,6 +332,7 @@ export const ChartQuickConfig: FC<ChartQuickConfigProps> = function ({ chartId,
                 <span>
                     <IconButton
                         size="small"
+                        aria-label={t('deleteChart')}
                         disabled={deleteDisabled}
                         onClick={() => dispatch(dfActions.deleteChartById(chartId))}
                         sx={{ color: 'text.disabled','&:hover': { color: 'error.main', backgroundColor: 'rgba(211, 47, 47, 0.08)' } }}
diff --git a/src/views/ChartVariantStrip.tsx b/src/views/ChartVariantStrip.tsx
index 1cf4ab80..60b5891b 100644
--- a/src/views/ChartVariantStrip.tsx
+++ b/src/views/ChartVariantStrip.tsx
@@ -508,6 +508,7 @@ export const ChartVariantStrip: FC<ChartVariantStripProps> = function ({ chartId
                                     size="small"
                                     color="primary"
                                     sx={{ p: 0.5 }}
+                                    aria-label="Restyle"
                                     disabled={isRestyling || !restylePrompt.trim()}
                                     onClick={() => handleRestyleSubmit(restylePrompt)}
                                 >
diff --git a/src/views/DataFormulator.tsx b/src/views/DataFormulator.tsx
index 90547525..7b4ed2d9 100644
--- a/src/views/DataFormulator.tsx
+++ b/src/views/DataFormulator.tsx
@@ -791,6 +791,7 @@ export const DataFormulatorFC = ({ }) => {
                         value={wsSort}
                         onChange={(e) => setWsSort(e.target.value as typeof wsSort)}
                         disableUnderline
+                        inputProps={{ 'aria-label': t('workspace.sortSessions') }}
                         IconComponent={(props) => (
                             <ExpandMoreIcon {...props} sx={{ fontSize: 16, color: 'text.disabled', right: 0 }} />
                         )}
diff --git a/src/views/KnowledgePanel.tsx b/src/views/KnowledgePanel.tsx
index 9920d2cc..beb74dc4 100644
--- a/src/views/KnowledgePanel.tsx
+++ b/src/views/KnowledgePanel.tsx
@@ -297,6 +297,7 @@ export const KnowledgePanel: React.FC = () => {
                             <span>
                                 <IconButton
                                     size="small"
+                                    aria-label={hasTables ? t('knowledge.replayTooltip') : t('knowledge.replayNoData')}
                                     disabled={!hasTables}
                                     onClick={(e) => { e.stopPropagation(); handleReplay(item); }}
                                     sx={{
diff --git a/src/views/ModelSelectionDialog.tsx b/src/views/ModelSelectionDialog.tsx
index 4f45f141..d067ac16 100644
--- a/src/views/ModelSelectionDialog.tsx
+++ b/src/views/ModelSelectionDialog.tsx
@@ -269,6 +269,7 @@ export const ModelSelectionButton: React.FC<{}> = ({ }) => {
             <Tooltip title={modelExists ? t('model.providerModelExists') : t('model.addAndTestModel')}>
                 <span>  
                     <IconButton color={modelExists ? 'error' : 'primary'}
+                        aria-label={modelExists ? t('model.providerModelExists') : t('model.addAndTestModel')}
                         disabled={!readyToTest}
                         size="small"
                         sx={{ cursor: modelExists ? 'help' : 'pointer', p: 0.25 }}
diff --git a/src/views/SelectableDataGrid.tsx b/src/views/SelectableDataGrid.tsx
index a400d4ab..4869ef77 100644
--- a/src/views/SelectableDataGrid.tsx
+++ b/src/views/SelectableDataGrid.tsx
@@ -696,6 +696,7 @@ export const SelectableDataGrid: React.FC<SelectableDataGridProps> = React.memo(
                             <IconButton 
                                 size="small" 
                                 color="primary" 
+                                aria-label={t('dataGrid.downloadAsCsv')}
                                 disabled={isDownloading}
                                 onClick={() => handleDownload('csv')}
                             >
diff --git a/src/views/SimpleChartRecBox.tsx b/src/views/SimpleChartRecBox.tsx
index 6d137b7b..97135c11 100644
--- a/src/views/SimpleChartRecBox.tsx
+++ b/src/views/SimpleChartRecBox.tsx
@@ -1883,6 +1883,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                                 <IconButton
                                     size="small"
                                     sx={{ p: 0.5, color: theme.palette.text.secondary }}
+                                    aria-label={t('chartRec.generateReport')}
                                     disabled={!focusedTableId || isChatFormulating || !!pendingClarification}
                                     onClick={() => submitChat(t('chartRec.reportPrompt'), undefined, t('chartRec.askedForReport'))}
                                 >
@@ -1895,6 +1896,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                                 <IconButton
                                     size="small"
                                     sx={{ p: 0.5, color: theme.palette.primary.main }}
+                                    aria-label={t('chartRec.getIdeaSuggestions')}
                                     disabled={!focusedTableId || isChatFormulating || !!pendingClarification}
                                     onClick={() => submitChat(t('chartRec.exploreIdeasPrompt'), undefined, t('chartRec.askedForRecommendations'))}
                                 >
@@ -1906,6 +1908,7 @@ export const SimpleChartRecBox: FC<{ onInputFocus?: () => void }> = function ({
                             <span>
                                 <IconButton
                                     size="small"
+                                    aria-label={t('chartRec.explore')}
                                     disabled={!canSend}
                                     onClick={() => {
                                         if (pendingClarification) {
diff --git a/src/views/VisualizationView.tsx b/src/views/VisualizationView.tsx
index fe3cfb46..698b5a72 100644
--- a/src/views/VisualizationView.tsx
+++ b/src/views/VisualizationView.tsx
@@ -754,6 +754,7 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
         <Tooltip key="vega-editor-tooltip" title={t('chart.openInVegaEditor')}>
             <span>
                 <IconButton key="vega-editor-btn" size="small" sx={actionBtnSx}
+                    aria-label={t('chart.openInVegaEditor')}
                     disabled={!renderedSpec || focusedChart.chartType === "Table" || focusedChart.chartType === "Auto"}
                     onClick={handleOpenInVegaEditor}>
                     <OpenInNewIcon sx={{ fontSize: 18 }} />
@@ -1026,7 +1027,7 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
     }} alignItems="center">
         <Tooltip key="zoom-out-tooltip" title={t('chart.zoomOut')}>
             <span>
-                <IconButton color="primary" size='small' disabled={localScaleFactor <= scaleMin} onClick={() => {
+                <IconButton color="primary" size='small' aria-label={t('chart.zoomOut')} disabled={localScaleFactor <= scaleMin} onClick={() => {
                     const next = Math.max(scaleMin, Math.round((localScaleFactor - 0.1) * 10) / 10);
                     setLocalScaleFactor(next);
                     persistScaleFactor(next);
@@ -1045,7 +1046,7 @@ export const ChartEditorFC: FC<{}> = function ChartEditorFC({}) {
                 }} />
         <Tooltip key="zoom-in-tooltip" title={t('chart.zoomIn')}>
             <span>
-                <IconButton color="primary" size='small' disabled={localScaleFactor >= scaleMax} onClick={() => {
+                <IconButton color="primary" size='small' aria-label={t('chart.zoomIn')} disabled={localScaleFactor >= scaleMax} onClick={() => {
                     const next = Math.min(scaleMax, Math.round((localScaleFactor + 0.1) * 10) / 10);
                     setLocalScaleFactor(next);
                     persistScaleFactor(next);