diff --git a/src/browser/components/AIView.tsx b/src/browser/components/AIView.tsx index 68db99bda3..eaa744956a 100644 --- a/src/browser/components/AIView.tsx +++ b/src/browser/components/AIView.tsx @@ -61,6 +61,8 @@ import { ConcurrentLocalWarning } from "./ConcurrentLocalWarning"; import { BackgroundProcessesBanner } from "./BackgroundProcessesBanner"; import { useBackgroundBashHandlers } from "@/browser/hooks/useBackgroundBashHandlers"; import { checkAutoCompaction } from "@/browser/utils/compaction/autoCompactionCheck"; +import { useContextSwitchWarning } from "@/browser/hooks/useContextSwitchWarning"; +import { ContextSwitchWarning as ContextSwitchWarningBanner } from "./ContextSwitchWarning"; import { executeCompaction, buildContinueMessage } from "@/browser/utils/chatCommands"; import { useProviderOptions } from "@/browser/hooks/useProviderOptions"; import { useAutoCompactionSettings } from "../hooks/useAutoCompactionSettings"; @@ -166,6 +168,22 @@ const AIViewInner: React.FC = ({ }, [workspaceState]); const { messages, canInterrupt, isCompacting, loading } = workspaceState; + // Context switch warning - shown when user switches to a model that can't fit current context + const { + warning: contextSwitchWarning, + handleModelChange, + handleCompact: handleContextSwitchCompact, + handleDismiss: handleContextSwitchDismiss, + } = useContextSwitchWarning({ + workspaceId, + messages, + pendingModel, + use1M, + workspaceUsage, + api: api ?? undefined, + pendingSendOptions, + }); + // Apply message transformations: // 1. Merge consecutive identical stream errors // (bash_output grouping is done at render-time, not as a transformation) @@ -726,13 +744,22 @@ const AIViewInner: React.FC = ({ )} - {shouldShowCompactionWarning && ( - + ) : ( + shouldShowCompactionWarning && ( + + ) )} = ({ onCheckReviews={handleCheckReviews} onDeleteReview={reviews.removeReview} onUpdateReviewNote={reviews.updateReviewNote} + onModelChange={handleModelChange} /> diff --git a/src/browser/components/ChatInput/index.tsx b/src/browser/components/ChatInput/index.tsx index 99ba0a43c1..470d3a0988 100644 --- a/src/browser/components/ChatInput/index.tsx +++ b/src/browser/components/ChatInput/index.tsx @@ -136,6 +136,8 @@ const ChatInputInner: React.FC = (props) => { variant === "workspace" ? (props.hasQueuedCompaction ?? false) : false; // runtimeType for telemetry - defaults to "worktree" if not provided const runtimeType = variant === "workspace" ? (props.runtimeType ?? "worktree") : "worktree"; + // Callback for model changes (both variants support this) + const onModelChange = props.onModelChange; // Storage keys differ by variant const storageKeys = (() => { @@ -384,6 +386,10 @@ const ChatInputInner: React.FC = (props) => { ensureModelInSettings(canonicalModel); // Ensure model exists in Settings updatePersistedState(storageKeys.modelKey, canonicalModel); // Update workspace or project-specific + // Notify parent of model change (for context switch warning) + // Called before early returns so warning works even offline or with custom agents + onModelChange?.(canonicalModel); + if (variant !== "workspace" || !workspaceId) { return; } @@ -434,6 +440,7 @@ const ChatInputInner: React.FC = (props) => { thinkingLevel, variant, workspaceId, + onModelChange, ] ); @@ -1305,7 +1312,7 @@ const ChatInputInner: React.FC = (props) => { if (parsed.type === "model-set") { setInput(""); // Clear input immediately setPreferredModel(parsed.modelString); - props.onModelChange?.(parsed.modelString); + // Note: onModelChange is called within setPreferredModel pushToast({ type: "success", message: `Model changed to ${parsed.modelString}` }); return; } diff --git a/src/browser/components/ContextSwitchWarning.tsx b/src/browser/components/ContextSwitchWarning.tsx new file mode 100644 index 0000000000..1247e2e309 --- /dev/null +++ b/src/browser/components/ContextSwitchWarning.tsx @@ -0,0 +1,63 @@ +import React from "react"; +import { X } from "lucide-react"; +import { getModelName } from "@/common/utils/ai/models"; +import type { ContextSwitchWarning as WarningData } from "@/browser/utils/compaction/contextSwitchCheck"; + +function formatTokens(n: number): string { + if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1).replace(".0", "")}M`; + if (n >= 1_000) return `${Math.round(n / 1_000)}K`; + return String(n); +} + +interface Props { + warning: WarningData; + onCompact: () => void; + onDismiss: () => void; +} + +/** + * Warning banner shown when user switches to a model that can't fit the current context. + */ +export const ContextSwitchWarning: React.FC = ({ warning, onCompact, onDismiss }) => { + const targetName = getModelName(warning.targetModel); + const compactName = warning.compactionModel ? getModelName(warning.compactionModel) : null; + + return ( +
+
+
+
+ ⚠️ + Context May Exceed Model Limit +
+

+ Current context ({formatTokens(warning.currentTokens)} tokens) may exceed the{" "} + {targetName} limit ( + {formatTokens(warning.targetLimit)}). Consider compacting before sending. +

+
+ +
+
+ {warning.errorMessage ? ( + {warning.errorMessage} + ) : ( + + )} +
+
+ ); +}; diff --git a/src/browser/hooks/useContextSwitchWarning.ts b/src/browser/hooks/useContextSwitchWarning.ts new file mode 100644 index 0000000000..b619c5afe4 --- /dev/null +++ b/src/browser/hooks/useContextSwitchWarning.ts @@ -0,0 +1,150 @@ +/** + * Hook for managing context switch warnings. + * + * Shows a warning when the user switches to a model that can't fit the current context. + * Handles model changes, 1M toggle changes, and provides compact/dismiss actions. + */ + +import { useState, useRef, useEffect, useCallback } from "react"; +import type { RouterClient } from "@orpc/server"; +import type { AppRouter } from "@/node/orpc/router"; +import type { SendMessageOptions } from "@/common/orpc/types"; +import type { DisplayedMessage } from "@/common/types/message"; +import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore"; +import { + checkContextSwitch, + findPreviousModel, + type ContextSwitchWarning, +} from "@/browser/utils/compaction/contextSwitchCheck"; +import { getHigherContextCompactionSuggestion } from "@/browser/utils/compaction/suggestion"; +import { useProvidersConfig } from "./useProvidersConfig"; +import { executeCompaction } from "@/browser/utils/chatCommands"; + +interface UseContextSwitchWarningProps { + workspaceId: string; + messages: DisplayedMessage[]; + pendingModel: string; + use1M: boolean; + workspaceUsage: WorkspaceUsageState | undefined; + api: RouterClient | undefined; + pendingSendOptions: SendMessageOptions; +} + +interface UseContextSwitchWarningResult { + warning: ContextSwitchWarning | null; + handleModelChange: (newModel: string) => void; + handleCompact: () => void; + handleDismiss: () => void; +} + +export function useContextSwitchWarning( + props: UseContextSwitchWarningProps +): UseContextSwitchWarningResult { + const { workspaceId, messages, pendingModel, use1M, workspaceUsage, api, pendingSendOptions } = + props; + + const [warning, setWarning] = useState(null); + const prevUse1MRef = useRef(use1M); + // Track previous model so we can use it as compaction fallback on switch. + // Initialize to null so first render triggers check (handles page reload after model switch). + const prevPendingModelRef = useRef(null); + const { config: providersConfig } = useProvidersConfig(); + + const getCurrentTokens = useCallback(() => { + const usage = workspaceUsage?.liveUsage ?? workspaceUsage?.lastContextUsage; + return usage ? usage.input.tokens + usage.cached.tokens + usage.cacheCreate.tokens : 0; + }, [workspaceUsage]); + + // Enhance warning with smarter model suggestion when basic resolution fails. + // Searches all known models for one with larger context that user can access. + const enhanceWarning = useCallback( + (w: ContextSwitchWarning | null): ContextSwitchWarning | null => { + if (!w || w.compactionModel) return w; + + const suggestion = getHigherContextCompactionSuggestion({ + currentModel: w.targetModel, + providersConfig, + }); + + if (suggestion) { + return { ...w, compactionModel: suggestion.modelId, errorMessage: null }; + } + return w; + }, + [providersConfig] + ); + + const handleModelChange = useCallback( + (newModel: string) => { + const tokens = getCurrentTokens(); + // Use the model user was just on (not last assistant message's model) + // so compaction fallback works even if user switches without sending + const previousModel = prevPendingModelRef.current; + prevPendingModelRef.current = newModel; + const result = tokens > 0 ? checkContextSwitch(tokens, newModel, previousModel, use1M) : null; + setWarning(enhanceWarning(result)); + }, + [getCurrentTokens, use1M, enhanceWarning] + ); + + const handleCompact = useCallback(() => { + if (!api || !warning?.compactionModel) return; + + void executeCompaction({ + api, + workspaceId, + model: warning.compactionModel, + sendMessageOptions: pendingSendOptions, + }); + setWarning(null); + }, [api, workspaceId, pendingSendOptions, warning]); + + const handleDismiss = useCallback(() => { + setWarning(null); + }, []); + + // Sync with indirect model changes (e.g., WorkspaceModeAISync updating model on mode/agent change). + // Effect is appropriate: pendingModel comes from usePersistedState (localStorage), and external + // components like WorkspaceModeAISync can update it without going through handleModelChange. + // Also re-check when workspaceUsage changes (tokens may not be available on first render). + const tokens = getCurrentTokens(); + useEffect(() => { + const prevModel = prevPendingModelRef.current; + if (prevModel !== pendingModel) { + prevPendingModelRef.current = pendingModel; + const result = tokens > 0 ? checkContextSwitch(tokens, pendingModel, prevModel, use1M) : null; + setWarning(enhanceWarning(result)); + } else if (tokens > 0 && !warning) { + // Re-check if tokens became available after initial render (usage data loaded) + // Use findPreviousModel since we don't have a "previous" model in this case + const previousModel = findPreviousModel(messages); + if (previousModel && previousModel !== pendingModel) { + setWarning(enhanceWarning(checkContextSwitch(tokens, pendingModel, previousModel, use1M))); + } + } + }, [pendingModel, tokens, use1M, warning, messages, enhanceWarning]); + + // Sync with 1M toggle changes from ProviderOptionsContext. + // Effect is appropriate here: we're syncing with an external context (not our own state), + // and the toggle change happens in ModelSettings which can't directly call our handlers. + useEffect(() => { + const wasEnabled = prevUse1MRef.current; + prevUse1MRef.current = use1M; + + // Recompute warning when toggle changes (either direction) + // OFF → ON: may clear warning if context now fits + // ON → OFF: may show warning if context no longer fits + if (wasEnabled !== use1M) { + const tokens = getCurrentTokens(); + if (tokens > 0) { + const result = checkContextSwitch(tokens, pendingModel, findPreviousModel(messages), use1M); + setWarning(enhanceWarning(result)); + } else if (use1M) { + // No tokens but toggled ON - clear any stale warning + setWarning(null); + } + } + }, [use1M, getCurrentTokens, pendingModel, messages, enhanceWarning]); + + return { warning, handleModelChange, handleCompact, handleDismiss }; +} diff --git a/src/browser/stories/App.chat.stories.tsx b/src/browser/stories/App.chat.stories.tsx index f1888cfc89..98de218b02 100644 --- a/src/browser/stories/App.chat.stories.tsx +++ b/src/browser/stories/App.chat.stories.tsx @@ -1620,3 +1620,59 @@ export const ToolHooksOutputExpanded: AppStory = { }, }, }; + +/** + * Context switch warning banner - shows when switching to a model that can't fit current context. + * + * Scenario: Workspace has ~150K tokens of context. The user switches from Sonnet (200K+ limit) + * to GPT-4o (128K limit). Since 150K > 90% of 128K, the warning banner appears. + */ +export const ContextSwitchWarning: AppStory = { + render: () => ( + { + const workspaceId = "ws-context-switch"; + + // Set GPT-4o as current model (128K limit) + // Previous message was from Sonnet with 150K tokens + // On mount, effect sees model "changed" from Sonnet → GPT-4o and triggers warning + updatePersistedState(getModelKey(workspaceId), "openai:gpt-4o"); + + return setupSimpleChatStory({ + workspaceId, + messages: [ + createUserMessage("msg-1", "Help me refactor this large codebase", { + historySequence: 1, + timestamp: STABLE_TIMESTAMP - 300000, + }), + // Large context usage - 150K tokens from Sonnet (which handles 200K+) + // Now switching to GPT-4o (128K limit): 150K > 90% of 128K triggers warning + createAssistantMessage( + "msg-2", + "I've analyzed the codebase. Here's my refactoring plan...", + { + historySequence: 2, + timestamp: STABLE_TIMESTAMP - 290000, + model: "anthropic:claude-sonnet-4-5", + contextUsage: { + inputTokens: 150000, + outputTokens: 2000, + }, + } + ), + ], + }); + }} + /> + ), + parameters: { + docs: { + description: { + story: + "Shows the context switch warning banner. Previous message used Sonnet (150K tokens), " + + "but workspace is now set to GPT-4o (128K limit). Since 150K exceeds 90% of 128K, " + + "the warning banner appears offering a one-click compact action.", + }, + }, + }, +}; diff --git a/src/browser/utils/compaction/autoCompactionCheck.ts b/src/browser/utils/compaction/autoCompactionCheck.ts index fd32272400..f0c983ba18 100644 --- a/src/browser/utils/compaction/autoCompactionCheck.ts +++ b/src/browser/utils/compaction/autoCompactionCheck.ts @@ -17,12 +17,11 @@ import type { WorkspaceUsageState } from "@/browser/stores/WorkspaceStore"; import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator"; -import { getModelStats } from "@/common/utils/tokens/modelStats"; -import { supports1MContext } from "@/common/utils/ai/models"; import { DEFAULT_AUTO_COMPACTION_THRESHOLD, FORCE_COMPACTION_BUFFER_PERCENT, } from "@/common/constants/ui"; +import { getEffectiveContextLimit } from "./contextLimit"; /** * Get context window tokens (input only). @@ -82,8 +81,7 @@ export function checkAutoCompaction( } // Determine max tokens for this model - const modelStats = getModelStats(model); - const maxTokens = use1M && supports1MContext(model) ? 1_000_000 : modelStats?.max_input_tokens; + const maxTokens = getEffectiveContextLimit(model, use1M); // No max tokens known - safe default (can't calculate percentage) if (!maxTokens) { diff --git a/src/browser/utils/compaction/contextLimit.ts b/src/browser/utils/compaction/contextLimit.ts new file mode 100644 index 0000000000..c20a320f0c --- /dev/null +++ b/src/browser/utils/compaction/contextLimit.ts @@ -0,0 +1,25 @@ +/** + * Shared context limit utilities for compaction logic. + * + * Used by autoCompactionCheck and contextSwitchCheck to calculate + * effective context limits accounting for 1M context toggle. + */ + +import { getModelStats } from "@/common/utils/tokens/modelStats"; +import { supports1MContext } from "@/common/utils/ai/models"; + +/** + * Get effective context limit for a model, accounting for 1M toggle. + * + * @param model - Model ID (e.g., "anthropic:claude-sonnet-4-5") + * @param use1M - Whether 1M context is enabled in settings + * @returns Max input tokens, or null if model stats unavailable + */ +export function getEffectiveContextLimit(model: string, use1M: boolean): number | null { + const stats = getModelStats(model); + if (!stats?.max_input_tokens) return null; + + // Sonnet: 1M optional (toggle). Gemini: always 1M (native). + if (supports1MContext(model) && use1M) return 1_000_000; + return stats.max_input_tokens; +} diff --git a/src/browser/utils/compaction/contextSwitchCheck.ts b/src/browser/utils/compaction/contextSwitchCheck.ts new file mode 100644 index 0000000000..82a3bcda98 --- /dev/null +++ b/src/browser/utils/compaction/contextSwitchCheck.ts @@ -0,0 +1,87 @@ +/** + * Context switch check utility + * + * Determines whether switching to a new model would exceed the model's context limit. + * Used to warn users before they switch from a high-context model (e.g., Gemini 1M) + * to a lower-context model (e.g., GPT 272K) when their current context is too large. + */ + +import { readPersistedString } from "@/browser/hooks/usePersistedState"; +import { PREFERRED_COMPACTION_MODEL_KEY } from "@/common/constants/storage"; +import type { DisplayedMessage } from "@/common/types/message"; +import { getEffectiveContextLimit } from "./contextLimit"; + +/** Safety buffer - warn if context exceeds 90% of target model's limit */ +const CONTEXT_FIT_THRESHOLD = 0.9; + +/** Warning state returned when context doesn't fit in target model */ +export interface ContextSwitchWarning { + currentTokens: number; + targetLimit: number; + targetModel: string; + /** Model to use for compaction, or null if none available */ + compactionModel: string | null; + /** Error message when no capable compaction model exists */ + errorMessage: string | null; +} + +/** + * Find the most recent assistant message's model from chat history. + */ +export function findPreviousModel(messages: DisplayedMessage[]): string | null { + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg.type === "assistant" && msg.model) return msg.model; + } + return null; +} + +/** + * Resolve compaction model: preferred (if fits) → previous (if fits) → null. + */ +function resolveCompactionModel( + currentTokens: number, + previousModel: string | null, + use1M: boolean +): string | null { + const preferred = readPersistedString(PREFERRED_COMPACTION_MODEL_KEY); + if (preferred) { + const limit = getEffectiveContextLimit(preferred, use1M); + if (limit && limit > currentTokens) return preferred; + } + if (previousModel) { + const limit = getEffectiveContextLimit(previousModel, use1M); + if (limit && limit > currentTokens) return previousModel; + } + return null; +} + +/** + * Check if switching to targetModel would exceed its context limit. + * Returns warning info if context doesn't fit, null otherwise. + */ +export function checkContextSwitch( + currentTokens: number, + targetModel: string, + previousModel: string | null, + use1M: boolean +): ContextSwitchWarning | null { + const targetLimit = getEffectiveContextLimit(targetModel, use1M); + + // Unknown model or context fits with 10% buffer - no warning + if (!targetLimit || currentTokens <= targetLimit * CONTEXT_FIT_THRESHOLD) { + return null; + } + + const compactionModel = resolveCompactionModel(currentTokens, previousModel, use1M); + + return { + currentTokens, + targetLimit, + targetModel, + compactionModel, + errorMessage: compactionModel + ? null + : "Context too large. Use `/compact -m ` with a 1M context model.", + }; +}