From 70d776494fd33d1fb55afdc81d7f85ecec878462 Mon Sep 17 00:00:00 2001 From: ethan Date: Thu, 15 Jan 2026 15:07:13 +1100 Subject: [PATCH 1/7] =?UTF-8?q?=F0=9F=A4=96=20feat:=20warn=20when=20switch?= =?UTF-8?q?ing=20to=20model=20that=20can't=20fit=20current=20context?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a soft warning banner when users switch from a high-context model (e.g., Sonnet with 1M) to a lower-context model (e.g., GPT-4o 128K) while current context exceeds 90% of the target model's limit. Features: - Warning banner appears above chat input on model switch - One-click "Compact with " action using preferred/previous model - Dismissible (session-only, re-shows on next model switch) - Recomputes when 1M toggle changes (both directions) - Only one compaction warning shows at a time (context-switch takes priority) Implementation: - New useContextSwitchWarning hook encapsulates all warning logic - checkContextSwitch utility determines if warning is needed - ContextSwitchWarning component renders the banner - Storybook story demonstrates the feature --- src/browser/components/AIView.tsx | 40 +++++-- src/browser/components/ChatInput/index.tsx | 8 +- .../components/ContextSwitchWarning.tsx | 63 +++++++++++ src/browser/hooks/useContextSwitchWarning.ts | 102 ++++++++++++++++++ src/browser/stories/App.chat.stories.tsx | 93 ++++++++++++++++ .../utils/compaction/contextSwitchCheck.ts | 100 +++++++++++++++++ 6 files changed, 399 insertions(+), 7 deletions(-) create mode 100644 src/browser/components/ContextSwitchWarning.tsx create mode 100644 src/browser/hooks/useContextSwitchWarning.ts create mode 100644 src/browser/utils/compaction/contextSwitchCheck.ts diff --git a/src/browser/components/AIView.tsx b/src/browser/components/AIView.tsx index 68db99bda3..eaa744956a 100644 --- a/src/browser/components/AIView.tsx +++ b/src/browser/components/AIView.tsx @@ -61,6 +61,8 @@ import { ConcurrentLocalWarning } from "./ConcurrentLocalWarning"; import { BackgroundProcessesBanner } from "./BackgroundProcessesBanner"; import { useBackgroundBashHandlers } from "@/browser/hooks/useBackgroundBashHandlers"; import { checkAutoCompaction } from "@/browser/utils/compaction/autoCompactionCheck"; +import { useContextSwitchWarning } from "@/browser/hooks/useContextSwitchWarning"; +import { ContextSwitchWarning as ContextSwitchWarningBanner } from "./ContextSwitchWarning"; import { executeCompaction, buildContinueMessage } from "@/browser/utils/chatCommands"; import { useProviderOptions } from "@/browser/hooks/useProviderOptions"; import { useAutoCompactionSettings } from "../hooks/useAutoCompactionSettings"; @@ -166,6 +168,22 @@ const AIViewInner: React.FC = ({ }, [workspaceState]); const { messages, canInterrupt, isCompacting, loading } = workspaceState; + // Context switch warning - shown when user switches to a model that can't fit current context + const { + warning: contextSwitchWarning, + handleModelChange, + handleCompact: handleContextSwitchCompact, + handleDismiss: handleContextSwitchDismiss, + } = useContextSwitchWarning({ + workspaceId, + messages, + pendingModel, + use1M, + workspaceUsage, + api: api ?? undefined, + pendingSendOptions, + }); + // Apply message transformations: // 1. Merge consecutive identical stream errors // (bash_output grouping is done at render-time, not as a transformation) @@ -726,13 +744,22 @@ const AIViewInner: React.FC = ({ )} - {shouldShowCompactionWarning && ( - + ) : ( + shouldShowCompactionWarning && ( + + ) )} = ({ onCheckReviews={handleCheckReviews} onDeleteReview={reviews.removeReview} onUpdateReviewNote={reviews.updateReviewNote} + onModelChange={handleModelChange} /> diff --git a/src/browser/components/ChatInput/index.tsx b/src/browser/components/ChatInput/index.tsx index 99ba0a43c1..535c3a4e04 100644 --- a/src/browser/components/ChatInput/index.tsx +++ b/src/browser/components/ChatInput/index.tsx @@ -136,6 +136,8 @@ const ChatInputInner: React.FC = (props) => { variant === "workspace" ? (props.hasQueuedCompaction ?? false) : false; // runtimeType for telemetry - defaults to "worktree" if not provided const runtimeType = variant === "workspace" ? (props.runtimeType ?? "worktree") : "worktree"; + // Callback for model changes (both variants support this) + const onModelChange = props.onModelChange; // Storage keys differ by variant const storageKeys = (() => { @@ -424,6 +426,9 @@ const ChatInputInner: React.FC = (props) => { .catch(() => { // Best-effort only. If offline or backend is old, sendMessage will persist. }); + + // Notify parent of model change (for context switch warning) + onModelChange?.(canonicalModel); }, [ api, @@ -434,6 +439,7 @@ const ChatInputInner: React.FC = (props) => { thinkingLevel, variant, workspaceId, + onModelChange, ] ); @@ -1305,7 +1311,7 @@ const ChatInputInner: React.FC = (props) => { if (parsed.type === "model-set") { setInput(""); // Clear input immediately setPreferredModel(parsed.modelString); - props.onModelChange?.(parsed.modelString); + // Note: onModelChange is called within setPreferredModel pushToast({ type: "success", message: `Model changed to ${parsed.modelString}` }); return; } diff --git a/src/browser/components/ContextSwitchWarning.tsx b/src/browser/components/ContextSwitchWarning.tsx new file mode 100644 index 0000000000..1eb5781608 --- /dev/null +++ b/src/browser/components/ContextSwitchWarning.tsx @@ -0,0 +1,63 @@ +import React from "react"; +import { X } from "lucide-react"; +import { getModelName } from "@/common/utils/ai/models"; +import type { ContextSwitchWarning as WarningData } from "@/browser/utils/compaction/contextSwitchCheck"; + +function formatTokens(n: number): string { + if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1).replace(".0", "")}M`; + if (n >= 1_000) return `${Math.round(n / 1_000)}K`; + return String(n); +} + +interface Props { + warning: WarningData; + onCompact: () => void; + onDismiss: () => void; +} + +/** + * Warning banner shown when user switches to a model that can't fit the current context. + */ +export const ContextSwitchWarning: React.FC = ({ warning, onCompact, onDismiss }) => { + const targetName = getModelName(warning.targetModel); + const compactName = warning.compactionModel ? getModelName(warning.compactionModel) : null; + + return ( +
+
+
+
+ ⚠️ + Context May Exceed Model Limit +
+

+ Current context ({formatTokens(warning.currentTokens)} tokens) is near the{" "} + {targetName} limit ( + {formatTokens(warning.targetLimit)}). Consider compacting before sending. +

+
+ +
+
+ {warning.errorMessage ? ( + {warning.errorMessage} + ) : ( + + )} +
+
+ ); +}; diff --git a/src/browser/hooks/useContextSwitchWarning.ts b/src/browser/hooks/useContextSwitchWarning.ts new file mode 100644 index 0000000000..780842e4ad --- /dev/null +++ b/src/browser/hooks/useContextSwitchWarning.ts @@ -0,0 +1,102 @@ +/** + * Hook for managing context switch warnings. + * + * Shows a warning when the user switches to a model that can't fit the current context. + * Handles model changes, 1M toggle changes, and provides compact/dismiss actions. + */ + +import { useState, useRef, useEffect, useCallback } from "react"; +import type { RouterClient } from "@orpc/server"; +import type { AppRouter } from "@/node/orpc/router"; +import type { SendMessageOptions } from "@/common/orpc/types"; +import type { DisplayedMessage } from "@/common/types/message"; +import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore"; +import { + checkContextSwitch, + findPreviousModel, + type ContextSwitchWarning, +} from "@/browser/utils/compaction/contextSwitchCheck"; +import { executeCompaction } from "@/browser/utils/chatCommands"; + +interface UseContextSwitchWarningProps { + workspaceId: string; + messages: DisplayedMessage[]; + pendingModel: string; + use1M: boolean; + workspaceUsage: WorkspaceUsageState | undefined; + api: RouterClient | undefined; + pendingSendOptions: SendMessageOptions; +} + +interface UseContextSwitchWarningResult { + warning: ContextSwitchWarning | null; + handleModelChange: (newModel: string) => void; + handleCompact: () => void; + handleDismiss: () => void; +} + +export function useContextSwitchWarning( + props: UseContextSwitchWarningProps +): UseContextSwitchWarningResult { + const { workspaceId, messages, pendingModel, use1M, workspaceUsage, api, pendingSendOptions } = + props; + + const [warning, setWarning] = useState(null); + const prevUse1MRef = useRef(use1M); + + const getCurrentTokens = useCallback(() => { + const usage = workspaceUsage?.liveUsage ?? workspaceUsage?.lastContextUsage; + return usage ? usage.input.tokens + usage.cached.tokens + usage.cacheCreate.tokens : 0; + }, [workspaceUsage]); + + const handleModelChange = useCallback( + (newModel: string) => { + const tokens = getCurrentTokens(); + setWarning( + tokens > 0 ? checkContextSwitch(tokens, newModel, findPreviousModel(messages), use1M) : null + ); + }, + [getCurrentTokens, messages, use1M] + ); + + const handleCompact = useCallback(() => { + if (!api || !warning?.compactionModel) return; + + void executeCompaction({ + api, + workspaceId, + sendMessageOptions: { + ...pendingSendOptions, + model: warning.compactionModel, + }, + }); + setWarning(null); + }, [api, workspaceId, pendingSendOptions, warning]); + + const handleDismiss = useCallback(() => { + setWarning(null); + }, []); + + // Sync with 1M toggle changes from ProviderOptionsContext. + // Effect is appropriate here: we're syncing with an external context (not our own state), + // and the toggle change happens in ModelSettings which can't directly call our handlers. + useEffect(() => { + const wasEnabled = prevUse1MRef.current; + prevUse1MRef.current = use1M; + + // Recompute warning when toggle changes (either direction) + // OFF → ON: may clear warning if context now fits + // ON → OFF: may show warning if context no longer fits + if (wasEnabled !== use1M) { + const tokens = getCurrentTokens(); + if (tokens > 0) { + setWarning(checkContextSwitch(tokens, pendingModel, findPreviousModel(messages), use1M)); + } else if (use1M) { + // No tokens but toggled ON - clear any stale warning + setWarning(null); + } + } + }, [use1M, getCurrentTokens, pendingModel, messages]); + + return { warning, handleModelChange, handleCompact, handleDismiss }; +} diff --git a/src/browser/stories/App.chat.stories.tsx b/src/browser/stories/App.chat.stories.tsx index f1888cfc89..183690dec0 100644 --- a/src/browser/stories/App.chat.stories.tsx +++ b/src/browser/stories/App.chat.stories.tsx @@ -1620,3 +1620,96 @@ export const ToolHooksOutputExpanded: AppStory = { }, }, }; + +/** + * Context switch warning banner - shows when switching to a model that can't fit current context. + * + * Scenario: Workspace has ~150K tokens of context. The user switches from Sonnet (200K+ limit) + * to GPT-4o (128K limit). Since 150K > 90% of 128K, the warning banner appears. + */ +export const ContextSwitchWarning: AppStory = { + render: () => ( + { + const workspaceId = "ws-context-switch"; + + // Start with Sonnet which can handle 200K+ + updatePersistedState(getModelKey(workspaceId), "anthropic:claude-sonnet-4-5"); + + return setupSimpleChatStory({ + workspaceId, + messages: [ + createUserMessage("msg-1", "Help me refactor this large codebase", { + historySequence: 1, + timestamp: STABLE_TIMESTAMP - 300000, + }), + // Large context usage - 150K tokens which exceeds 90% of GPT-4o's 128K limit + createAssistantMessage( + "msg-2", + "I've analyzed the codebase. Here's my refactoring plan...", + { + historySequence: 2, + timestamp: STABLE_TIMESTAMP - 290000, + model: "anthropic:claude-sonnet-4-5", + contextUsage: { + inputTokens: 150000, + outputTokens: 2000, + }, + } + ), + ], + }); + }} + /> + ), + play: async ({ canvasElement }) => { + const storyRoot = document.getElementById("storybook-root") ?? canvasElement; + const canvas = within(storyRoot); + + // Wait for the chat to load + await canvas.findByText(/refactoring plan/, {}, { timeout: 10000 }); + + // Find and click the model selector to open it + const modelButton = await canvas.findByText("Claude Sonnet 4", {}, { timeout: 5000 }); + await userEvent.click(modelButton); + + // Wait for the dropdown to appear + await waitFor( + () => { + const dropdown = document.querySelector('[role="listbox"]'); + if (!dropdown) throw new Error("Model dropdown not found"); + }, + { timeout: 3000 } + ); + + // Select GPT-4o which has a 128K limit (150K > 90% of 128K triggers warning) + const gpt4oOption = await canvas.findByText("GPT-4o", {}, { timeout: 3000 }); + await userEvent.click(gpt4oOption); + + // Wait for the context switch warning banner to appear + await waitFor( + () => { + const warning = canvas.queryByText(/Context May Exceed Model Limit/); + if (!warning) throw new Error("Context switch warning not found"); + }, + { timeout: 3000 } + ); + + // Verify the warning shows the token count and model limit + await canvas.findByText(/150K tokens/, {}, { timeout: 2000 }); + await canvas.findByText(/128K/, {}, { timeout: 2000 }); + + // Wait for any animations to settle + await new Promise((r) => requestAnimationFrame(() => requestAnimationFrame(r))); + }, + parameters: { + docs: { + description: { + story: + "Shows the context switch warning banner when switching from a high-context model " + + "(Sonnet 200K+) to a lower-context model (GPT-4o 128K) while the current context " + + "exceeds 90% of the target model's limit. The banner offers a one-click compact action.", + }, + }, + }, +}; diff --git a/src/browser/utils/compaction/contextSwitchCheck.ts b/src/browser/utils/compaction/contextSwitchCheck.ts new file mode 100644 index 0000000000..cfa2a6b39a --- /dev/null +++ b/src/browser/utils/compaction/contextSwitchCheck.ts @@ -0,0 +1,100 @@ +/** + * Context switch check utility + * + * Determines whether switching to a new model would exceed the model's context limit. + * Used to warn users before they switch from a high-context model (e.g., Gemini 1M) + * to a lower-context model (e.g., GPT 272K) when their current context is too large. + */ + +import { getModelStats } from "@/common/utils/tokens/modelStats"; +import { supports1MContext } from "@/common/utils/ai/models"; +import { readPersistedString } from "@/browser/hooks/usePersistedState"; +import { PREFERRED_COMPACTION_MODEL_KEY } from "@/common/constants/storage"; +import type { DisplayedMessage } from "@/common/types/message"; + +/** Safety buffer - warn if context exceeds 90% of target model's limit */ +const CONTEXT_FIT_THRESHOLD = 0.9; + +/** Warning state returned when context doesn't fit in target model */ +export interface ContextSwitchWarning { + currentTokens: number; + targetLimit: number; + targetModel: string; + /** Model to use for compaction, or null if none available */ + compactionModel: string | null; + /** Error message when no capable compaction model exists */ + errorMessage: string | null; +} + +/** + * Get effective context limit for a model, accounting for 1M toggle. + */ +function getEffectiveLimit(model: string, use1M: boolean): number | null { + const stats = getModelStats(model); + if (!stats) return null; + + // Sonnet: 1M optional (toggle). Gemini: always 1M (native). + if (supports1MContext(model) && use1M) return 1_000_000; + return stats.max_input_tokens; +} + +/** + * Find the most recent assistant message's model from chat history. + */ +export function findPreviousModel(messages: DisplayedMessage[]): string | null { + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg.type === "assistant" && msg.model) return msg.model; + } + return null; +} + +/** + * Resolve compaction model: preferred (if fits) → previous (if fits) → null. + */ +function resolveCompactionModel( + currentTokens: number, + previousModel: string | null, + use1M: boolean +): string | null { + const preferred = readPersistedString(PREFERRED_COMPACTION_MODEL_KEY); + if (preferred) { + const limit = getEffectiveLimit(preferred, use1M); + if (limit && limit > currentTokens) return preferred; + } + if (previousModel) { + const limit = getEffectiveLimit(previousModel, use1M); + if (limit && limit > currentTokens) return previousModel; + } + return null; +} + +/** + * Check if switching to targetModel would exceed its context limit. + * Returns warning info if context doesn't fit, null otherwise. + */ +export function checkContextSwitch( + currentTokens: number, + targetModel: string, + previousModel: string | null, + use1M: boolean +): ContextSwitchWarning | null { + const targetLimit = getEffectiveLimit(targetModel, use1M); + + // Unknown model or context fits with 10% buffer - no warning + if (!targetLimit || currentTokens <= targetLimit * CONTEXT_FIT_THRESHOLD) { + return null; + } + + const compactionModel = resolveCompactionModel(currentTokens, previousModel, use1M); + + return { + currentTokens, + targetLimit, + targetModel, + compactionModel, + errorMessage: compactionModel + ? null + : "Context too large. Use `/compact -m ` with a 1M context model.", + }; +} From bb69944dbff468b2c5fd2d0e43a5a5782d28e5dd Mon Sep 17 00:00:00 2001 From: ethan Date: Thu, 15 Jan 2026 15:11:52 +1100 Subject: [PATCH 2/7] fix: use correct model display name in story --- src/browser/stories/App.chat.stories.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/browser/stories/App.chat.stories.tsx b/src/browser/stories/App.chat.stories.tsx index 183690dec0..e03c9c672a 100644 --- a/src/browser/stories/App.chat.stories.tsx +++ b/src/browser/stories/App.chat.stories.tsx @@ -1670,7 +1670,7 @@ export const ContextSwitchWarning: AppStory = { await canvas.findByText(/refactoring plan/, {}, { timeout: 10000 }); // Find and click the model selector to open it - const modelButton = await canvas.findByText("Claude Sonnet 4", {}, { timeout: 5000 }); + const modelButton = await canvas.findByText("Sonnet 4.5", {}, { timeout: 5000 }); await userEvent.click(modelButton); // Wait for the dropdown to appear From 0928d0e26491b6c8842b24a05354a8d3894d983b Mon Sep 17 00:00:00 2001 From: ethan Date: Thu, 15 Jan 2026 15:14:41 +1100 Subject: [PATCH 3/7] fix: trigger warning without backend, use current model for fallback - Move onModelChange call before early returns so warning works even offline or with custom agents active - Track previous pending model via ref instead of using last assistant message's model, so compaction fallback works even when user switches without sending a message first --- src/browser/components/ChatInput/index.tsx | 7 ++++--- src/browser/hooks/useContextSwitchWarning.ts | 14 ++++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/browser/components/ChatInput/index.tsx b/src/browser/components/ChatInput/index.tsx index 535c3a4e04..470d3a0988 100644 --- a/src/browser/components/ChatInput/index.tsx +++ b/src/browser/components/ChatInput/index.tsx @@ -386,6 +386,10 @@ const ChatInputInner: React.FC = (props) => { ensureModelInSettings(canonicalModel); // Ensure model exists in Settings updatePersistedState(storageKeys.modelKey, canonicalModel); // Update workspace or project-specific + // Notify parent of model change (for context switch warning) + // Called before early returns so warning works even offline or with custom agents + onModelChange?.(canonicalModel); + if (variant !== "workspace" || !workspaceId) { return; } @@ -426,9 +430,6 @@ const ChatInputInner: React.FC = (props) => { .catch(() => { // Best-effort only. If offline or backend is old, sendMessage will persist. }); - - // Notify parent of model change (for context switch warning) - onModelChange?.(canonicalModel); }, [ api, diff --git a/src/browser/hooks/useContextSwitchWarning.ts b/src/browser/hooks/useContextSwitchWarning.ts index 780842e4ad..20d82346b1 100644 --- a/src/browser/hooks/useContextSwitchWarning.ts +++ b/src/browser/hooks/useContextSwitchWarning.ts @@ -16,6 +16,8 @@ import { findPreviousModel, type ContextSwitchWarning, } from "@/browser/utils/compaction/contextSwitchCheck"; + +// Note: findPreviousModel is still used for 1M toggle changes (effect below) import { executeCompaction } from "@/browser/utils/chatCommands"; interface UseContextSwitchWarningProps { @@ -43,6 +45,8 @@ export function useContextSwitchWarning( const [warning, setWarning] = useState(null); const prevUse1MRef = useRef(use1M); + // Track previous model so we can use it as compaction fallback on switch + const prevPendingModelRef = useRef(pendingModel); const getCurrentTokens = useCallback(() => { const usage = workspaceUsage?.liveUsage ?? workspaceUsage?.lastContextUsage; @@ -52,11 +56,13 @@ export function useContextSwitchWarning( const handleModelChange = useCallback( (newModel: string) => { const tokens = getCurrentTokens(); - setWarning( - tokens > 0 ? checkContextSwitch(tokens, newModel, findPreviousModel(messages), use1M) : null - ); + // Use the model user was just on (not last assistant message's model) + // so compaction fallback works even if user switches without sending + const previousModel = prevPendingModelRef.current; + prevPendingModelRef.current = newModel; + setWarning(tokens > 0 ? checkContextSwitch(tokens, newModel, previousModel, use1M) : null); }, - [getCurrentTokens, messages, use1M] + [getCurrentTokens, use1M] ); const handleCompact = useCallback(() => { From 82532db12e7e89dde1b2b3e1eafe385071b7d1eb Mon Sep 17 00:00:00 2001 From: ethan Date: Thu, 15 Jan 2026 15:42:34 +1100 Subject: [PATCH 4/7] fix: deduplicate context limit logic, fix storybook test selector - Extract shared getEffectiveContextLimit helper for 1M toggle logic - Fix ContextSwitchWarning story to use data-tutorial selector (avoids 'multiple elements' error) - Fix handleCompact to pass model at top-level of executeCompaction (prevents global preference override) --- src/browser/hooks/useContextSwitchWarning.ts | 6 ++--- src/browser/stories/App.chat.stories.tsx | 6 +++-- .../utils/compaction/autoCompactionCheck.ts | 6 ++--- src/browser/utils/compaction/contextLimit.ts | 25 +++++++++++++++++++ .../utils/compaction/contextSwitchCheck.ts | 21 +++------------- 5 files changed, 37 insertions(+), 27 deletions(-) create mode 100644 src/browser/utils/compaction/contextLimit.ts diff --git a/src/browser/hooks/useContextSwitchWarning.ts b/src/browser/hooks/useContextSwitchWarning.ts index 20d82346b1..f8551cc023 100644 --- a/src/browser/hooks/useContextSwitchWarning.ts +++ b/src/browser/hooks/useContextSwitchWarning.ts @@ -71,10 +71,8 @@ export function useContextSwitchWarning( void executeCompaction({ api, workspaceId, - sendMessageOptions: { - ...pendingSendOptions, - model: warning.compactionModel, - }, + model: warning.compactionModel, + sendMessageOptions: pendingSendOptions, }); setWarning(null); }, [api, workspaceId, pendingSendOptions, warning]); diff --git a/src/browser/stories/App.chat.stories.tsx b/src/browser/stories/App.chat.stories.tsx index e03c9c672a..d2bed86168 100644 --- a/src/browser/stories/App.chat.stories.tsx +++ b/src/browser/stories/App.chat.stories.tsx @@ -1669,8 +1669,10 @@ export const ContextSwitchWarning: AppStory = { // Wait for the chat to load await canvas.findByText(/refactoring plan/, {}, { timeout: 10000 }); - // Find and click the model selector to open it - const modelButton = await canvas.findByText("Sonnet 4.5", {}, { timeout: 5000 }); + // Find and click the model selector to open it (use data-tutorial to avoid matching model name in message) + const modelSelectorGroup = storyRoot.querySelector('[data-tutorial="model-selector"]'); + if (!modelSelectorGroup) throw new Error("Model selector not found"); + const modelButton = within(modelSelectorGroup as HTMLElement).getByText("Sonnet 4.5"); await userEvent.click(modelButton); // Wait for the dropdown to appear diff --git a/src/browser/utils/compaction/autoCompactionCheck.ts b/src/browser/utils/compaction/autoCompactionCheck.ts index fd32272400..f0c983ba18 100644 --- a/src/browser/utils/compaction/autoCompactionCheck.ts +++ b/src/browser/utils/compaction/autoCompactionCheck.ts @@ -17,12 +17,11 @@ import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore"; import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator"; -import { getModelStats } from "@/common/utils/tokens/modelStats"; -import { supports1MContext } from "@/common/utils/ai/models"; import { DEFAULT_AUTO_COMPACTION_THRESHOLD, FORCE_COMPACTION_BUFFER_PERCENT, } from "@/common/constants/ui"; +import { getEffectiveContextLimit } from "./contextLimit"; /** * Get context window tokens (input only). @@ -82,8 +81,7 @@ export function checkAutoCompaction( } // Determine max tokens for this model - const modelStats = getModelStats(model); - const maxTokens = use1M && supports1MContext(model) ? 1_000_000 : modelStats?.max_input_tokens; + const maxTokens = getEffectiveContextLimit(model, use1M); // No max tokens known - safe default (can't calculate percentage) if (!maxTokens) { diff --git a/src/browser/utils/compaction/contextLimit.ts b/src/browser/utils/compaction/contextLimit.ts new file mode 100644 index 0000000000..c20a320f0c --- /dev/null +++ b/src/browser/utils/compaction/contextLimit.ts @@ -0,0 +1,25 @@ +/** + * Shared context limit utilities for compaction logic. + * + * Used by autoCompactionCheck and contextSwitchCheck to calculate + * effective context limits accounting for 1M context toggle. + */ + +import { getModelStats } from "@/common/utils/tokens/modelStats"; +import { supports1MContext } from "@/common/utils/ai/models"; + +/** + * Get effective context limit for a model, accounting for 1M toggle. + * + * @param model - Model ID (e.g., "anthropic:claude-sonnet-4-5") + * @param use1M - Whether 1M context is enabled in settings + * @returns Max input tokens, or null if model stats unavailable + */ +export function getEffectiveContextLimit(model: string, use1M: boolean): number | null { + const stats = getModelStats(model); + if (!stats?.max_input_tokens) return null; + + // Sonnet: 1M optional (toggle). Gemini: always 1M (native). + if (supports1MContext(model) && use1M) return 1_000_000; + return stats.max_input_tokens; +} diff --git a/src/browser/utils/compaction/contextSwitchCheck.ts b/src/browser/utils/compaction/contextSwitchCheck.ts index cfa2a6b39a..82a3bcda98 100644 --- a/src/browser/utils/compaction/contextSwitchCheck.ts +++ b/src/browser/utils/compaction/contextSwitchCheck.ts @@ -6,11 +6,10 @@ * to a lower-context model (e.g., GPT 272K) when their current context is too large. */ -import { getModelStats } from "@/common/utils/tokens/modelStats"; -import { supports1MContext } from "@/common/utils/ai/models"; import { readPersistedString } from "@/browser/hooks/usePersistedState"; import { PREFERRED_COMPACTION_MODEL_KEY } from "@/common/constants/storage"; import type { DisplayedMessage } from "@/common/types/message"; +import { getEffectiveContextLimit } from "./contextLimit"; /** Safety buffer - warn if context exceeds 90% of target model's limit */ const CONTEXT_FIT_THRESHOLD = 0.9; @@ -26,18 +25,6 @@ export interface ContextSwitchWarning { errorMessage: string | null; } -/** - * Get effective context limit for a model, accounting for 1M toggle. - */ -function getEffectiveLimit(model: string, use1M: boolean): number | null { - const stats = getModelStats(model); - if (!stats) return null; - - // Sonnet: 1M optional (toggle). Gemini: always 1M (native). - if (supports1MContext(model) && use1M) return 1_000_000; - return stats.max_input_tokens; -} - /** * Find the most recent assistant message's model from chat history. */ @@ -59,11 +46,11 @@ function resolveCompactionModel( ): string | null { const preferred = readPersistedString(PREFERRED_COMPACTION_MODEL_KEY); if (preferred) { - const limit = getEffectiveLimit(preferred, use1M); + const limit = getEffectiveContextLimit(preferred, use1M); if (limit && limit > currentTokens) return preferred; } if (previousModel) { - const limit = getEffectiveLimit(previousModel, use1M); + const limit = getEffectiveContextLimit(previousModel, use1M); if (limit && limit > currentTokens) return previousModel; } return null; @@ -79,7 +66,7 @@ export function checkContextSwitch( previousModel: string | null, use1M: boolean ): ContextSwitchWarning | null { - const targetLimit = getEffectiveLimit(targetModel, use1M); + const targetLimit = getEffectiveContextLimit(targetModel, use1M); // Unknown model or context fits with 10% buffer - no warning if (!targetLimit || currentTokens <= targetLimit * CONTEXT_FIT_THRESHOLD) { From 3b884328f52474db0df38536c61a383c79988613 Mon Sep 17 00:00:00 2001 From: ethan Date: Thu, 15 Jan 2026 15:45:26 +1100 Subject: [PATCH 5/7] fix: sync warning with indirect model changes (mode/agent switch) WorkspaceModeAISync can update pendingModel via localStorage when mode/agent changes. Add effect to detect these external changes and recompute warning. --- src/browser/hooks/useContextSwitchWarning.ts | 12 +++++ src/browser/stories/App.chat.stories.tsx | 56 ++++---------------- 2 files changed, 21 insertions(+), 47 deletions(-) diff --git a/src/browser/hooks/useContextSwitchWarning.ts b/src/browser/hooks/useContextSwitchWarning.ts index f8551cc023..f7cea07f3e 100644 --- a/src/browser/hooks/useContextSwitchWarning.ts +++ b/src/browser/hooks/useContextSwitchWarning.ts @@ -81,6 +81,18 @@ export function useContextSwitchWarning( setWarning(null); }, []); + // Sync with indirect model changes (e.g., WorkspaceModeAISync updating model on mode/agent change). + // Effect is appropriate: pendingModel comes from usePersistedState (localStorage), and external + // components like WorkspaceModeAISync can update it without going through handleModelChange. + useEffect(() => { + const prevModel = prevPendingModelRef.current; + if (prevModel !== pendingModel) { + prevPendingModelRef.current = pendingModel; + const tokens = getCurrentTokens(); + setWarning(tokens > 0 ? checkContextSwitch(tokens, pendingModel, prevModel, use1M) : null); + } + }, [pendingModel, getCurrentTokens, use1M]); + // Sync with 1M toggle changes from ProviderOptionsContext. // Effect is appropriate here: we're syncing with an external context (not our own state), // and the toggle change happens in ModelSettings which can't directly call our handlers. diff --git a/src/browser/stories/App.chat.stories.tsx b/src/browser/stories/App.chat.stories.tsx index d2bed86168..30ec1ade15 100644 --- a/src/browser/stories/App.chat.stories.tsx +++ b/src/browser/stories/App.chat.stories.tsx @@ -1633,7 +1633,7 @@ export const ContextSwitchWarning: AppStory = { setup={() => { const workspaceId = "ws-context-switch"; - // Start with Sonnet which can handle 200K+ + // Start with Sonnet which can handle large context updatePersistedState(getModelKey(workspaceId), "anthropic:claude-sonnet-4-5"); return setupSimpleChatStory({ @@ -1643,7 +1643,9 @@ export const ContextSwitchWarning: AppStory = { historySequence: 1, timestamp: STABLE_TIMESTAMP - 300000, }), - // Large context usage - 150K tokens which exceeds 90% of GPT-4o's 128K limit + // Large context usage - 150K tokens + // To see the warning: manually switch to GPT-4o (128K limit) + // 150K > 90% of 128K will trigger the warning createAssistantMessage( "msg-2", "I've analyzed the codebase. Here's my refactoring plan...", @@ -1662,55 +1664,15 @@ export const ContextSwitchWarning: AppStory = { }} /> ), - play: async ({ canvasElement }) => { - const storyRoot = document.getElementById("storybook-root") ?? canvasElement; - const canvas = within(storyRoot); - - // Wait for the chat to load - await canvas.findByText(/refactoring plan/, {}, { timeout: 10000 }); - - // Find and click the model selector to open it (use data-tutorial to avoid matching model name in message) - const modelSelectorGroup = storyRoot.querySelector('[data-tutorial="model-selector"]'); - if (!modelSelectorGroup) throw new Error("Model selector not found"); - const modelButton = within(modelSelectorGroup as HTMLElement).getByText("Sonnet 4.5"); - await userEvent.click(modelButton); - - // Wait for the dropdown to appear - await waitFor( - () => { - const dropdown = document.querySelector('[role="listbox"]'); - if (!dropdown) throw new Error("Model dropdown not found"); - }, - { timeout: 3000 } - ); - - // Select GPT-4o which has a 128K limit (150K > 90% of 128K triggers warning) - const gpt4oOption = await canvas.findByText("GPT-4o", {}, { timeout: 3000 }); - await userEvent.click(gpt4oOption); - - // Wait for the context switch warning banner to appear - await waitFor( - () => { - const warning = canvas.queryByText(/Context May Exceed Model Limit/); - if (!warning) throw new Error("Context switch warning not found"); - }, - { timeout: 3000 } - ); - - // Verify the warning shows the token count and model limit - await canvas.findByText(/150K tokens/, {}, { timeout: 2000 }); - await canvas.findByText(/128K/, {}, { timeout: 2000 }); - - // Wait for any animations to settle - await new Promise((r) => requestAnimationFrame(() => requestAnimationFrame(r))); - }, + // No play function - warning triggers on model switch which requires user interaction. + // To test: click model selector, choose GPT-4o, warning banner appears. parameters: { docs: { description: { story: - "Shows the context switch warning banner when switching from a high-context model " + - "(Sonnet 200K+) to a lower-context model (GPT-4o 128K) while the current context " + - "exceeds 90% of the target model's limit. The banner offers a one-click compact action.", + "Setup for context switch warning. To see the warning: click the model selector " + + "and switch to GPT-4o. Since context (150K) exceeds 90% of GPT-4o's limit (128K), " + + "a warning banner will appear offering a one-click compact action.", }, }, }, From 534a0dcbd98baa9848f020faa6c50e8adfcfd9e1 Mon Sep 17 00:00:00 2001 From: ethan Date: Thu, 15 Jan 2026 19:16:59 +1100 Subject: [PATCH 6/7] fix: show context switch warning in story by initializing ref to null Initialize prevPendingModelRef to null so the effect triggers on first render, detecting model 'change' from null to current model and showing the warning when context exceeds limit. --- src/browser/hooks/useContextSwitchWarning.ts | 5 +++-- src/browser/stories/App.chat.stories.tsx | 19 +++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/browser/hooks/useContextSwitchWarning.ts b/src/browser/hooks/useContextSwitchWarning.ts index f7cea07f3e..51483c5ab9 100644 --- a/src/browser/hooks/useContextSwitchWarning.ts +++ b/src/browser/hooks/useContextSwitchWarning.ts @@ -45,8 +45,9 @@ export function useContextSwitchWarning( const [warning, setWarning] = useState(null); const prevUse1MRef = useRef(use1M); - // Track previous model so we can use it as compaction fallback on switch - const prevPendingModelRef = useRef(pendingModel); + // Track previous model so we can use it as compaction fallback on switch. + // Initialize to null so first render triggers check (handles page reload after model switch). + const prevPendingModelRef = useRef(null); const getCurrentTokens = useCallback(() => { const usage = workspaceUsage?.liveUsage ?? workspaceUsage?.lastContextUsage; diff --git a/src/browser/stories/App.chat.stories.tsx b/src/browser/stories/App.chat.stories.tsx index 30ec1ade15..98de218b02 100644 --- a/src/browser/stories/App.chat.stories.tsx +++ b/src/browser/stories/App.chat.stories.tsx @@ -1633,8 +1633,10 @@ export const ContextSwitchWarning: AppStory = { setup={() => { const workspaceId = "ws-context-switch"; - // Start with Sonnet which can handle large context - updatePersistedState(getModelKey(workspaceId), "anthropic:claude-sonnet-4-5"); + // Set GPT-4o as current model (128K limit) + // Previous message was from Sonnet with 150K tokens + // On mount, effect sees model "changed" from Sonnet → GPT-4o and triggers warning + updatePersistedState(getModelKey(workspaceId), "openai:gpt-4o"); return setupSimpleChatStory({ workspaceId, @@ -1643,9 +1645,8 @@ export const ContextSwitchWarning: AppStory = { historySequence: 1, timestamp: STABLE_TIMESTAMP - 300000, }), - // Large context usage - 150K tokens - // To see the warning: manually switch to GPT-4o (128K limit) - // 150K > 90% of 128K will trigger the warning + // Large context usage - 150K tokens from Sonnet (which handles 200K+) + // Now switching to GPT-4o (128K limit): 150K > 90% of 128K triggers warning createAssistantMessage( "msg-2", "I've analyzed the codebase. Here's my refactoring plan...", @@ -1664,15 +1665,13 @@ export const ContextSwitchWarning: AppStory = { }} /> ), - // No play function - warning triggers on model switch which requires user interaction. - // To test: click model selector, choose GPT-4o, warning banner appears. parameters: { docs: { description: { story: - "Setup for context switch warning. To see the warning: click the model selector " + - "and switch to GPT-4o. Since context (150K) exceeds 90% of GPT-4o's limit (128K), " + - "a warning banner will appear offering a one-click compact action.", + "Shows the context switch warning banner. Previous message used Sonnet (150K tokens), " + + "but workspace is now set to GPT-4o (128K limit). Since 150K exceeds 90% of 128K, " + + "the warning banner appears offering a one-click compact action.", }, }, }, From b230f494f5deb42f17d2d267d041f48d5315a16a Mon Sep 17 00:00:00 2001 From: ethan Date: Thu, 15 Jan 2026 19:29:56 +1100 Subject: [PATCH 7/7] feat: enhance context switch warning with smarter model suggestions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the basic compaction model resolution (preferred → previous) fails, fall back to searching all known models for one with larger context that the user can access (has provider credentials or gateway routing). Also changed warning text from 'is near' to 'may exceed' since the warning can trigger when context actually exceeds the target limit. --- .../components/ContextSwitchWarning.tsx | 2 +- src/browser/hooks/useContextSwitchWarning.ts | 49 +++++++++++++++---- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/src/browser/components/ContextSwitchWarning.tsx b/src/browser/components/ContextSwitchWarning.tsx index 1eb5781608..1247e2e309 100644 --- a/src/browser/components/ContextSwitchWarning.tsx +++ b/src/browser/components/ContextSwitchWarning.tsx @@ -31,7 +31,7 @@ export const ContextSwitchWarning: React.FC = ({ warning, onCompact, onDi Context May Exceed Model Limit

- Current context ({formatTokens(warning.currentTokens)} tokens) is near the{" "} + Current context ({formatTokens(warning.currentTokens)} tokens) may exceed the{" "} {targetName} limit ( {formatTokens(warning.targetLimit)}). Consider compacting before sending.

diff --git a/src/browser/hooks/useContextSwitchWarning.ts b/src/browser/hooks/useContextSwitchWarning.ts index 51483c5ab9..b619c5afe4 100644 --- a/src/browser/hooks/useContextSwitchWarning.ts +++ b/src/browser/hooks/useContextSwitchWarning.ts @@ -16,8 +16,8 @@ import { findPreviousModel, type ContextSwitchWarning, } from "@/browser/utils/compaction/contextSwitchCheck"; - -// Note: findPreviousModel is still used for 1M toggle changes (effect below) +import { getHigherContextCompactionSuggestion } from "@/browser/utils/compaction/suggestion"; +import { useProvidersConfig } from "./useProvidersConfig"; import { executeCompaction } from "@/browser/utils/chatCommands"; interface UseContextSwitchWarningProps { @@ -48,12 +48,32 @@ export function useContextSwitchWarning( // Track previous model so we can use it as compaction fallback on switch. // Initialize to null so first render triggers check (handles page reload after model switch). const prevPendingModelRef = useRef(null); + const { config: providersConfig } = useProvidersConfig(); const getCurrentTokens = useCallback(() => { const usage = workspaceUsage?.liveUsage ?? workspaceUsage?.lastContextUsage; return usage ? usage.input.tokens + usage.cached.tokens + usage.cacheCreate.tokens : 0; }, [workspaceUsage]); + // Enhance warning with smarter model suggestion when basic resolution fails. + // Searches all known models for one with larger context that user can access. + const enhanceWarning = useCallback( + (w: ContextSwitchWarning | null): ContextSwitchWarning | null => { + if (!w || w.compactionModel) return w; + + const suggestion = getHigherContextCompactionSuggestion({ + currentModel: w.targetModel, + providersConfig, + }); + + if (suggestion) { + return { ...w, compactionModel: suggestion.modelId, errorMessage: null }; + } + return w; + }, + [providersConfig] + ); + const handleModelChange = useCallback( (newModel: string) => { const tokens = getCurrentTokens(); @@ -61,9 +81,10 @@ export function useContextSwitchWarning( // so compaction fallback works even if user switches without sending const previousModel = prevPendingModelRef.current; prevPendingModelRef.current = newModel; - setWarning(tokens > 0 ? checkContextSwitch(tokens, newModel, previousModel, use1M) : null); + const result = tokens > 0 ? checkContextSwitch(tokens, newModel, previousModel, use1M) : null; + setWarning(enhanceWarning(result)); }, - [getCurrentTokens, use1M] + [getCurrentTokens, use1M, enhanceWarning] ); const handleCompact = useCallback(() => { @@ -85,14 +106,23 @@ export function useContextSwitchWarning( // Sync with indirect model changes (e.g., WorkspaceModeAISync updating model on mode/agent change). // Effect is appropriate: pendingModel comes from usePersistedState (localStorage), and external // components like WorkspaceModeAISync can update it without going through handleModelChange. + // Also re-check when workspaceUsage changes (tokens may not be available on first render). + const tokens = getCurrentTokens(); useEffect(() => { const prevModel = prevPendingModelRef.current; if (prevModel !== pendingModel) { prevPendingModelRef.current = pendingModel; - const tokens = getCurrentTokens(); - setWarning(tokens > 0 ? checkContextSwitch(tokens, pendingModel, prevModel, use1M) : null); + const result = tokens > 0 ? checkContextSwitch(tokens, pendingModel, prevModel, use1M) : null; + setWarning(enhanceWarning(result)); + } else if (tokens > 0 && !warning) { + // Re-check if tokens became available after initial render (usage data loaded) + // Use findPreviousModel since we don't have a "previous" model in this case + const previousModel = findPreviousModel(messages); + if (previousModel && previousModel !== pendingModel) { + setWarning(enhanceWarning(checkContextSwitch(tokens, pendingModel, previousModel, use1M))); + } } - }, [pendingModel, getCurrentTokens, use1M]); + }, [pendingModel, tokens, use1M, warning, messages, enhanceWarning]); // Sync with 1M toggle changes from ProviderOptionsContext. // Effect is appropriate here: we're syncing with an external context (not our own state), @@ -107,13 +137,14 @@ export function useContextSwitchWarning( if (wasEnabled !== use1M) { const tokens = getCurrentTokens(); if (tokens > 0) { - setWarning(checkContextSwitch(tokens, pendingModel, findPreviousModel(messages), use1M)); + const result = checkContextSwitch(tokens, pendingModel, findPreviousModel(messages), use1M); + setWarning(enhanceWarning(result)); } else if (use1M) { // No tokens but toggled ON - clear any stale warning setWarning(null); } } - }, [use1M, getCurrentTokens, pendingModel, messages]); + }, [use1M, getCurrentTokens, pendingModel, messages, enhanceWarning]); return { warning, handleModelChange, handleCompact, handleDismiss }; }