From 23d1497908e469cdf0faa674c6e70204bd992062 Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 5 Jun 2026 16:36:28 -0500 Subject: [PATCH 1/5] =?UTF-8?q?=F0=9F=A4=96=20feat:=20add=20Fast/Slow=20se?= =?UTF-8?q?rvice-tier=20control=20and=20/fast=20/slow=20one-shots?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a lightning-bolt service-tier control to the chat input (right of the thinking slider) for models that support service_tier (OpenAI/GPT today). The bolt glows orange for Fast (priority), turns blue for Slow (flex), and is neutral grey for Auto/default. Clicking opens a per-chat override menu. Also add /fast and /slow one-shot slash commands that reuse the existing model-oneshot send path, with TipsCarousel discovery. Uses Fast/Slow wording to stay provider-agnostic for future models. --- .../ServiceTierPicker.test.tsx | 114 +++++++++ .../ServiceTierPicker/ServiceTierPicker.tsx | 225 ++++++++++++++++++ src/browser/features/ChatInput/index.tsx | 43 +++- .../features/ChatInput/placeholderTips.ts | 2 + src/browser/hooks/useSendMessageOptions.ts | 14 +- src/browser/hooks/useServiceTier.ts | 27 +++ src/browser/styles/globals.css | 6 + src/browser/utils/messages/sendOptions.ts | 18 +- .../utils/slashCommands/parser.test.ts | 26 ++ src/browser/utils/slashCommands/parser.ts | 23 +- .../utils/slashCommands/suggestions.test.ts | 14 ++ .../utils/slashCommands/suggestions.ts | 21 ++ src/browser/utils/slashCommands/types.ts | 3 + src/common/constants/storage.ts | 10 + src/common/utils/ai/serviceTier.test.ts | 100 ++++++++ src/common/utils/ai/serviceTier.ts | 93 ++++++++ 16 files changed, 728 insertions(+), 11 deletions(-) create mode 100644 src/browser/components/ServiceTierPicker/ServiceTierPicker.test.tsx create mode 100644 src/browser/components/ServiceTierPicker/ServiceTierPicker.tsx create mode 100644 src/browser/hooks/useServiceTier.ts create mode 100644 src/common/utils/ai/serviceTier.test.ts create mode 100644 src/common/utils/ai/serviceTier.ts diff --git a/src/browser/components/ServiceTierPicker/ServiceTierPicker.test.tsx b/src/browser/components/ServiceTierPicker/ServiceTierPicker.test.tsx new file mode 100644 index 0000000000..2b4d9f7daa --- /dev/null +++ b/src/browser/components/ServiceTierPicker/ServiceTierPicker.test.tsx @@ -0,0 +1,114 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { cleanup, fireEvent, render, waitFor } from "@testing-library/react"; +import { installDom } from "../../../../tests/ui/dom"; + +import { TooltipProvider } from "@/browser/components/Tooltip/Tooltip"; +import { getServiceTierKey } from "@/common/constants/storage"; +import { ServiceTierPicker } from "./ServiceTierPicker"; + +const OPENAI_MODEL = "openai:gpt-5.5"; +const ANTHROPIC_MODEL = "anthropic:claude-haiku-4-5"; +const SCOPE = "ws-service-tier-test"; + +let cleanupDom: (() => void) | null = null; + +function renderPicker(modelString: string) { + return render( + + + + ); +} + +describe("ServiceTierPicker", () => { + beforeEach(() => { + cleanupDom = installDom(); + globalThis.window.localStorage.clear(); + }); + + afterEach(() => { + cleanup(); + cleanupDom?.(); + cleanupDom = null; + }); + + test("renders nothing for models without service-tier support", () => { + const { queryByTestId } = renderPicker(ANTHROPIC_MODEL); + expect(queryByTestId("service-tier-trigger")).toBeNull(); + }); + + test("shows the neutral (default) state for a supported model with no override", () => { + const { getByTestId } = renderPicker(OPENAI_MODEL); + const trigger = getByTestId("service-tier-trigger"); + expect(trigger.getAttribute("data-service-tier")).toBe("default"); + }); + + test("opens a menu and applies the Fast override", async () => { + const { getByTestId, queryByTestId, getAllByTestId } = renderPicker(OPENAI_MODEL); + + // Menu is closed initially. + expect(queryByTestId("service-tier-option")).toBeNull(); + + fireEvent.click(getByTestId("service-tier-trigger")); + + await waitFor(() => { + expect(getAllByTestId("service-tier-option").length).toBe(3); + }); + + const fast = getAllByTestId("service-tier-option").find( + (el) => el.getAttribute("data-speed") === "fast" + ); + expect(fast).toBeTruthy(); + fireEvent.click(fast!); + + await waitFor(() => { + expect(getByTestId("service-tier-trigger").getAttribute("data-service-tier")).toBe("fast"); + }); + + // Override is persisted under the scoped key as the provider wire value. + expect(globalThis.window.localStorage.getItem(getServiceTierKey(SCOPE))).toBe( + JSON.stringify("priority") + ); + // Menu closes after selection. + expect(queryByTestId("service-tier-option")).toBeNull(); + }); + + test("applies the Slow override", async () => { + const { getByTestId, getAllByTestId } = renderPicker(OPENAI_MODEL); + fireEvent.click(getByTestId("service-tier-trigger")); + + await waitFor(() => expect(getAllByTestId("service-tier-option").length).toBe(3)); + const slow = getAllByTestId("service-tier-option").find( + (el) => el.getAttribute("data-speed") === "slow" + ); + fireEvent.click(slow!); + + await waitFor(() => { + expect(getByTestId("service-tier-trigger").getAttribute("data-service-tier")).toBe("slow"); + }); + expect(globalThis.window.localStorage.getItem(getServiceTierKey(SCOPE))).toBe( + JSON.stringify("flex") + ); + }); + + test("selecting Auto clears an existing override", async () => { + // Seed an existing Fast override. + globalThis.window.localStorage.setItem(getServiceTierKey(SCOPE), JSON.stringify("priority")); + + const { getByTestId, getAllByTestId } = renderPicker(OPENAI_MODEL); + expect(getByTestId("service-tier-trigger").getAttribute("data-service-tier")).toBe("fast"); + + fireEvent.click(getByTestId("service-tier-trigger")); + await waitFor(() => expect(getAllByTestId("service-tier-option").length).toBe(3)); + const auto = getAllByTestId("service-tier-option").find( + (el) => el.getAttribute("data-speed") === "default" + ); + fireEvent.click(auto!); + + await waitFor(() => { + expect(getByTestId("service-tier-trigger").getAttribute("data-service-tier")).toBe("default"); + }); + // Auto clears the override entirely (key removed), so the provider/global default applies. + expect(globalThis.window.localStorage.getItem(getServiceTierKey(SCOPE))).toBeNull(); + }); +}); diff --git a/src/browser/components/ServiceTierPicker/ServiceTierPicker.tsx b/src/browser/components/ServiceTierPicker/ServiceTierPicker.tsx new file mode 100644 index 0000000000..2f256e09b1 --- /dev/null +++ b/src/browser/components/ServiceTierPicker/ServiceTierPicker.tsx @@ -0,0 +1,225 @@ +import React, { useCallback, useEffect, useRef, useState } from "react"; +import { Check, Zap } from "lucide-react"; + +import { cn } from "@/common/lib/utils"; +import { type ServiceTier } from "@/common/config/schemas/providersConfig"; +import { + getServiceTierSpeed, + SERVICE_TIER_FAST, + SERVICE_TIER_SLOW, + supportsServiceTier, + type ServiceTierSpeed, +} from "@/common/utils/ai/serviceTier"; +import { useServiceTier } from "@/browser/hooks/useServiceTier"; +import { Tooltip, TooltipContent, TooltipTrigger } from "../Tooltip/Tooltip"; +import { stopKeyboardPropagation } from "@/browser/utils/events"; + +interface ServiceTierPickerProps { + /** Canonical model string used to gate visibility (only shown for supporting models). */ + modelString: string; + /** Workspace id (workspace view) or project scope id (creation view). */ + scopeId: string; + className?: string; +} + +interface ServiceTierOption { + speed: ServiceTierSpeed; + /** null clears the override (falls back to the provider/global default). */ + tier: ServiceTier | null; + label: string; + description: string; +} + +// "Fast"/"Slow"/"Auto" wording keeps the control provider-agnostic even though +// only OpenAI honors service_tier today. +const OPTIONS: readonly ServiceTierOption[] = [ + { speed: "default", tier: null, label: "Auto", description: "Provider default speed" }, + { + speed: "fast", + tier: SERVICE_TIER_FAST, + label: "Fast", + description: "Prioritize low latency (higher cost)", + }, + { + speed: "slow", + tier: SERVICE_TIER_SLOW, + label: "Slow", + description: "Prioritize lower cost (higher latency)", + }, +]; + +/** CSS variable for the active speed, or undefined for the neutral (grey) state. */ +function getSpeedColorVar(speed: ServiceTierSpeed): string | undefined { + if (speed === "fast") return "var(--color-service-tier-fast)"; + if (speed === "slow") return "var(--color-service-tier-slow)"; + return undefined; +} + +/** + * Lightning-bolt control for the chat-specific service-tier (speed) override. + * + * - Fast → bolt glows orange, Slow → bolt turns blue, Auto/default → neutral grey. + * - Clicking opens a small keyboard-navigable menu that sets the per-chat override. + * + * Rendered only for models that support service tiers (OpenAI/GPT today). Uses + * conditional rendering (not a Radix portal) so it stays testable under happy-dom. + */ +export const ServiceTierPicker: React.FC = (props) => { + const [serviceTier, setServiceTier] = useServiceTier(props.scopeId); + const [isOpen, setIsOpen] = useState(false); + const [highlightedIndex, setHighlightedIndex] = useState(-1); + + const containerRef = useRef(null); + const dropdownRef = useRef(null); + + const currentSpeed = getServiceTierSpeed(serviceTier); + + const closePicker = useCallback(() => { + setIsOpen(false); + setHighlightedIndex(-1); + }, []); + + const openPicker = useCallback(() => { + setIsOpen(true); + const currentIndex = OPTIONS.findIndex((opt) => opt.speed === currentSpeed); + setHighlightedIndex(currentIndex >= 0 ? currentIndex : 0); + requestAnimationFrame(() => dropdownRef.current?.focus()); + }, [currentSpeed]); + + const handleSelect = useCallback( + (option: ServiceTierOption) => { + setServiceTier(option.tier); + closePicker(); + }, + [closePicker, setServiceTier] + ); + + // Close when clicking outside the control. + useEffect(() => { + if (!isOpen) { + return; + } + const handleClickOutside = (e: MouseEvent) => { + if (containerRef.current?.contains(e.target as Node)) { + return; + } + closePicker(); + }; + document.addEventListener("mousedown", handleClickOutside); + return () => document.removeEventListener("mousedown", handleClickOutside); + }, [closePicker, isOpen]); + + const handleDropdownKeyDown = (e: React.KeyboardEvent) => { + if (e.key === "Escape") { + e.preventDefault(); + stopKeyboardPropagation(e); + closePicker(); + return; + } + if (e.key === "Enter") { + e.preventDefault(); + const option = OPTIONS[highlightedIndex >= 0 ? highlightedIndex : 0]; + if (option) { + handleSelect(option); + } + return; + } + if (e.key === "ArrowDown") { + e.preventDefault(); + setHighlightedIndex((prev) => Math.min(prev + 1, OPTIONS.length - 1)); + return; + } + if (e.key === "ArrowUp") { + e.preventDefault(); + setHighlightedIndex((prev) => Math.max(prev - 1, 0)); + return; + } + }; + + // Only models that honor service tiers expose this affordance. + if (!supportsServiceTier(props.modelString)) { + return null; + } + + const activeColor = getSpeedColorVar(currentSpeed); + const activeLabel = OPTIONS.find((opt) => opt.speed === currentSpeed)?.label ?? "Auto"; + + return ( +
+ + + + + + Service tier: {activeLabel}. Sets request speed for + this chat. Saved per workspace. + + + + {isOpen && ( +
+
+ {OPTIONS.map((option, index) => { + const isHighlighted = index === highlightedIndex; + const isSelected = option.speed === currentSpeed; + const color = getSpeedColorVar(option.speed); + return ( +
setHighlightedIndex(index)} + onClick={() => handleSelect(option)} + > + +
+
{option.label}
+
{option.description}
+
+ {isSelected && } +
+ ); + })} +
+
+ )} +
+ ); +}; diff --git a/src/browser/features/ChatInput/index.tsx b/src/browser/features/ChatInput/index.tsx index e0213d2075..47741bfc8a 100644 --- a/src/browser/features/ChatInput/index.tsx +++ b/src/browser/features/ChatInput/index.tsx @@ -31,6 +31,7 @@ import { useWorkspaceContext } from "@/browser/contexts/WorkspaceContext"; import { useProjectContext } from "@/browser/contexts/ProjectContext"; import { useAgent } from "@/browser/contexts/AgentContext"; import { ThinkingSliderComponent } from "@/browser/components/ThinkingSlider/ThinkingSlider"; +import { ServiceTierPicker } from "@/browser/components/ServiceTierPicker/ServiceTierPicker"; import { getAllowedRuntimeModesForUi, isParsedRuntimeAllowedByPolicy, @@ -39,7 +40,8 @@ import { usePolicy } from "@/browser/contexts/PolicyContext"; import { useAPI } from "@/browser/contexts/API"; import { useThinkingLevel } from "@/browser/hooks/useThinkingLevel"; import { useExperimentValue } from "@/browser/hooks/useExperiments"; -import { normalizeSelectedModel } from "@/common/utils/ai/models"; +import { normalizeSelectedModel, getModelName } from "@/common/utils/ai/models"; +import { supportsServiceTier, withServiceTierOverride } from "@/common/utils/ai/serviceTier"; import { useAdditionalSystemContextHydrated, useAdditionalSystemContextSnapshot, @@ -713,10 +715,11 @@ const ChatInputInner: React.FC = (props) => { }, [variant, startTutorial]); // Get current send message options from shared hook (must be at component top level) - // For creation variant, use project-scoped key; for workspace, use workspace ID - const sendMessageOptions = useSendMessageOptions( - variant === "workspace" ? props.workspaceId : getProjectScopeId(creationParentProjectPath) - ); + // For creation variant, use project-scoped key; for workspace, use workspace ID. + // Shared so the service-tier override and send options resolve the same scope. + const sendOptionsScopeId = + variant === "workspace" ? props.workspaceId : getProjectScopeId(creationParentProjectPath); + const sendMessageOptions = useSendMessageOptions(sendOptionsScopeId); const additionalSystemContext = useAdditionalSystemContextSnapshot( variant === "workspace" ? props.workspaceId : "" ); @@ -2483,6 +2486,18 @@ const ChatInputInner: React.FC = (props) => { const modelOverride = modelOneShot?.modelString; + // /fast and /slow one-shot tier override only applies to models that honor service + // tiers (OpenAI today). Block with a clear message (preserving the composer) rather + // than silently dropping the tier when the active model can't use it. + const tierOverride = modelOneShot?.serviceTier; + if (tierOverride && !supportsServiceTier(modelOverride ?? baseModel)) { + pushToast({ + type: "error", + message: `Fast/Slow isn't supported by ${getModelName(modelOverride ?? baseModel)}`, + }); + return; + } + // Regular message (or / one-shot override) - send directly via API const messageTextForSend = modelOneShot?.message ?? skillInvocation?.userText ?? messageText; const skillMuxMetadata = skillInvocation @@ -2661,11 +2676,24 @@ const ChatInputInner: React.FC = (props) => { : undefined; const goalInterventionPolicy = overrides?.goalInterventionPolicy; + // One-shot /fast or /slow rides in providerOptions for this message only, + // layering over any persisted per-chat service tier from useSendMessageOptions. + const oneshotProviderOptions = tierOverride + ? { + providerOptions: withServiceTierOverride( + sendMessageOptions.providerOptions ?? {}, + tierOverride, + modelOverride ?? baseModel + ), + } + : {}; + const sendOptions = { ...sendMessageOptions, ...compactionOptions, ...(modelOverride ? { model: modelOverride } : {}), ...(thinkingOverride ? { thinkingLevel: thinkingOverride } : {}), + ...oneshotProviderOptions, ...(modelOneShot ? { skipAiSettingsPersistence: true } : {}), ...(goalInterventionPolicy ? { goalInterventionPolicy } : {}), ...(overrides?.queueDispatchMode @@ -3212,6 +3240,11 @@ const ChatInputInner: React.FC = (props) => { > + + {/* Service-tier (Fast/Slow) speed override. Renders its own root only for + models that support service tiers (OpenAI/GPT today); otherwise it returns + null and occupies no layout space (no stray flex gap). */} +
to ask a side question without nudging the agent", "Try /haiku to send just this message on a different model", "Try /+high to crank up reasoning for this message only", + "Try /fast to send one message on a faster service tier", + "Try /slow to send one message on a cheaper service tier", "Try /compact to summarize the conversation when context gets tight", "Try /fork to branch this chat into a new workspace", "Try /plan to view or edit the current plan inline", diff --git a/src/browser/hooks/useSendMessageOptions.ts b/src/browser/hooks/useSendMessageOptions.ts index 53664d1061..3a52103bfc 100644 --- a/src/browser/hooks/useSendMessageOptions.ts +++ b/src/browser/hooks/useSendMessageOptions.ts @@ -9,6 +9,8 @@ import { import { DEFAULT_MODEL_KEY, getModelKey } from "@/common/constants/storage"; import type { SendMessageOptions } from "@/common/orpc/types"; import { useProviderOptions } from "./useProviderOptions"; +import { useServiceTier } from "./useServiceTier"; +import { withServiceTierOverride } from "@/common/utils/ai/serviceTier"; import { useExperimentOverrideValue } from "./useExperiments"; import { EXPERIMENT_IDS } from "@/common/constants/experiments"; import { useWorkspaceContext } from "@/browser/contexts/WorkspaceContext"; @@ -74,11 +76,21 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptionsWi metadataSettings.model ?? defaultModel ); + // Per-chat service-tier override (Fast/Slow). It rides along in providerOptions so the + // backend applies it per request without persisting to workspace metadata. Only attached + // for models that honor service tiers (OpenAI today). + const [serviceTierOverride] = useServiceTier(workspaceId); + const effectiveProviderOptions = withServiceTierOverride( + providerOptions, + serviceTierOverride, + baseModel + ); + const options = buildSendMessageOptions({ agentId, thinkingLevel, model: baseModel, - providerOptions, + providerOptions: effectiveProviderOptions, experiments: { programmaticToolCalling, programmaticToolCallingExclusive, diff --git a/src/browser/hooks/useServiceTier.ts b/src/browser/hooks/useServiceTier.ts new file mode 100644 index 0000000000..1757bb2598 --- /dev/null +++ b/src/browser/hooks/useServiceTier.ts @@ -0,0 +1,27 @@ +import { type ServiceTier } from "@/common/config/schemas/providersConfig"; +import { getServiceTierKey } from "@/common/constants/storage"; +import { usePersistedState } from "./usePersistedState"; + +/** + * Chat-specific (per workspace/project scope) service-tier override. + * + * `null` means "no override" — the provider/global default applies. Backed by + * localStorage (keyed by scope) with cross-component sync so the chat-input bolt + * and the send path stay in agreement without prop drilling. + * + * Unlike thinking level, this is intentionally NOT persisted to backend metadata: + * the tier rides along with each send via `providerOptions.openai.serviceTier`, + * so localStorage is the single source of truth (mirroring the other provider + * option toggles like Anthropic 1M context). + * + * @param scopeId workspaceId (workspace view) or a project scope id (creation view) + * @returns `[serviceTier, setServiceTier]` tuple + */ +export function useServiceTier(scopeId: string) { + const [serviceTier, setServiceTier] = usePersistedState( + getServiceTierKey(scopeId), + null, + { listener: true } + ); + return [serviceTier, setServiceTier] as const; +} diff --git a/src/browser/styles/globals.css b/src/browser/styles/globals.css index 448b3ce33f..b6bae911a7 100644 --- a/src/browser/styles/globals.css +++ b/src/browser/styles/globals.css @@ -88,6 +88,12 @@ --color-thinking-mode-light: hsl(271 76% 65%); --color-thinking-border: hsl(271 76% 53%); + /* Service tier (Fast/Slow speed override). + Fast = energetic orange (low latency); Slow = calm blue (lower cost). + Defined once at the theme root and inherited by all themes. */ + --color-service-tier-fast: hsl(28 96% 54%); + --color-service-tier-slow: hsl(206 90% 56%); + /* Runtime icon colors (matches Tailwind blue-500/purple-500) */ --color-runtime-ssh: #3b82f6; --color-runtime-ssh-text: #60a5fa; /* blue-400 */ diff --git a/src/browser/utils/messages/sendOptions.ts b/src/browser/utils/messages/sendOptions.ts index 4d6cbeac67..3847f5fb22 100644 --- a/src/browser/utils/messages/sendOptions.ts +++ b/src/browser/utils/messages/sendOptions.ts @@ -3,6 +3,7 @@ import { getModelKey, getThinkingLevelByModelKey, getThinkingLevelKey, + getServiceTierKey, getDisableWorkspaceAgentsKey, } from "@/common/constants/storage"; import { readPersistedState, updatePersistedState } from "@/browser/hooks/usePersistedState"; @@ -14,6 +15,8 @@ import { import type { SendMessageOptions } from "@/common/orpc/types"; import type { ThinkingLevel } from "@/common/types/thinking"; import type { MuxProviderOptions } from "@/common/types/providerOptions"; +import type { ServiceTier } from "@/common/config/schemas/providersConfig"; +import { withServiceTierOverride } from "@/common/utils/ai/serviceTier"; import { WORKSPACE_DEFAULTS } from "@/constants/workspaceDefaults"; import { isExperimentEnabled } from "@/browser/hooks/useExperiments"; import { EXPERIMENT_IDS } from "@/common/constants/experiments"; @@ -66,6 +69,19 @@ export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptio const providerOptions = getProviderOptions(); + // Mirror useSendMessageOptions: attach the per-chat service-tier override for models + // that honor it (OpenAI today) so non-React send paths (resume, idle-compaction, plan + // execution) stay consistent with interactive sends. + const serviceTierOverride = readPersistedState( + getServiceTierKey(workspaceId), + null + ); + const effectiveProviderOptions = withServiceTierOverride( + providerOptions, + serviceTierOverride, + baseModel + ); + const disableWorkspaceAgents = readPersistedState( getDisableWorkspaceAgentsKey(workspaceId), false @@ -75,7 +91,7 @@ export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptio model: baseModel, agentId, thinkingLevel, - providerOptions, + providerOptions: effectiveProviderOptions, disableWorkspaceAgents, experiments: { programmaticToolCalling: isExperimentEnabled(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING), diff --git a/src/browser/utils/slashCommands/parser.test.ts b/src/browser/utils/slashCommands/parser.test.ts index 427673bc26..51a9aff68d 100644 --- a/src/browser/utils/slashCommands/parser.test.ts +++ b/src/browser/utils/slashCommands/parser.test.ts @@ -111,6 +111,32 @@ describe("commandParser", () => { expectParse("/sonnet ", { type: "model-help" }); // whitespace only }); + it("should parse /fast and /slow as service-tier one-shots", () => { + expectParse("/fast ship it", { + type: "model-oneshot", + serviceTier: "priority", + message: "ship it", + }); + expectParse("/slow take your time", { + type: "model-oneshot", + serviceTier: "flex", + message: "take your time", + }); + }); + + it("should preserve multiline messages for service-tier one-shots", () => { + expectParse("/fast first line\nsecond line", { + type: "model-oneshot", + serviceTier: "priority", + message: "first line\nsecond line", + }); + }); + + it("should return model-help for /fast or /slow without a message", () => { + expectParse("/fast", { type: "model-help" }); + expectParse("/slow ", { type: "model-help" }); // whitespace only + }); + it("should return unknown-command for unknown aliases", () => { expectParse("/xyz do something", { type: "unknown-command", diff --git a/src/browser/utils/slashCommands/parser.ts b/src/browser/utils/slashCommands/parser.ts index 99f2492c04..dfa4d8e637 100644 --- a/src/browser/utils/slashCommands/parser.ts +++ b/src/browser/utils/slashCommands/parser.ts @@ -7,6 +7,8 @@ import { SLASH_COMMAND_DEFINITION_MAP } from "./registry"; import { MODEL_ABBREVIATIONS } from "@/common/constants/knownModels"; import { normalizeModelInput } from "@/browser/utils/models/normalizeModelInput"; import { parseThinkingInput, type ParsedThinkingInput } from "@/common/types/thinking"; +import { getServiceTierForCommandKey } from "@/common/utils/ai/serviceTier"; +import type { ServiceTier } from "@/common/config/schemas/providersConfig"; /** * Parse a raw command string into a structured command @@ -118,7 +120,7 @@ export function getSlashCommandDefinitions(): readonly SlashCommandDefinition[] } /** - * Parse a oneshot command key into model + thinking overrides. + * Parse a oneshot command key into model / thinking / service-tier overrides. * * Supported forms: * - "haiku" → model override only (existing behavior) @@ -126,12 +128,25 @@ export function getSlashCommandDefinitions(): readonly SlashCommandDefinition[] * - "haiku+medium" → model + named thinking level * - "+0" → thinking-only override (use current model) * - "+high" → thinking-only override with named level + * - "fast"/"slow" → service-tier-only override (use current model) + * + * `/fast` and `/slow` deliberately reuse the model-oneshot path so message + * extraction, the bare-key → help fallback, and the rendered command prefix all + * behave exactly like `/` one-shots. * * Returns null if the key doesn't match any valid oneshot pattern. */ -function parseOneshotCommandKey( - key: string -): { modelString?: string; thinkingLevel?: ParsedThinkingInput } | null { +function parseOneshotCommandKey(key: string): { + modelString?: string; + thinkingLevel?: ParsedThinkingInput; + serviceTier?: ServiceTier; +} | null { + // Service-tier one-shots (/fast, /slow) carry no model/thinking change. + const serviceTier = getServiceTierForCommandKey(key); + if (serviceTier) { + return { serviceTier }; + } + const plusIndex = key.indexOf("+"); if (plusIndex === -1) { diff --git a/src/browser/utils/slashCommands/suggestions.test.ts b/src/browser/utils/slashCommands/suggestions.test.ts index d20271d150..d74c1ab209 100644 --- a/src/browser/utils/slashCommands/suggestions.test.ts +++ b/src/browser/utils/slashCommands/suggestions.test.ts @@ -52,6 +52,20 @@ describe("getSlashCommandSuggestions", () => { expect(labels).toContain("/model"); }); + it("suggests /fast and /slow service-tier one-shots", () => { + const suggestions = getSlashCommandSuggestions("/"); + const labels = suggestions.map((s) => s.display); + + expect(labels).toContain("/fast"); + expect(labels).toContain("/slow"); + }); + + it("filters service-tier one-shots by prefix", () => { + const suggestions = getSlashCommandSuggestions("/fa"); + expect(suggestions.map((s) => s.display)).toContain("/fast"); + expect(suggestions.map((s) => s.display)).not.toContain("/slow"); + }); + it("includes agent skills when provided in context", () => { const suggestions = getSlashCommandSuggestions("/", { agentSkills: [ diff --git a/src/browser/utils/slashCommands/suggestions.ts b/src/browser/utils/slashCommands/suggestions.ts index a195563571..5b3931bc29 100644 --- a/src/browser/utils/slashCommands/suggestions.ts +++ b/src/browser/utils/slashCommands/suggestions.ts @@ -5,6 +5,7 @@ import { matchesNameBySegmentPrefix } from "@/browser/utils/suggestionMatching"; import { MODEL_ABBREVIATIONS } from "@/common/constants/knownModels"; import { formatModelDisplayName } from "@/common/utils/ai/modelDisplay"; +import { SERVICE_TIER_COMMAND_KEYS } from "@/common/utils/ai/serviceTier"; import { getSlashCommandDefinitions } from "./parser"; import { isSlashCommandVisible, SLASH_COMMAND_DEFINITION_MAP } from "./registry"; import type { @@ -88,6 +89,7 @@ function buildTopLevelSuggestions( .filter((workflow) => !SLASH_COMMAND_DEFINITION_MAP.has(workflow.name)) .filter((workflow) => !skillNames.has(workflow.name)) .filter((workflow) => !Object.hasOwn(MODEL_ABBREVIATIONS, workflow.name)) + .filter((workflow) => !SERVICE_TIER_COMMAND_KEYS.includes(workflow.name as never)) .map((workflow) => ({ key: workflow.name, description: `${workflow.description} (${workflow.scope} workflow)`, @@ -126,11 +128,30 @@ function buildTopLevelSuggestions( }) ); + // Service-tier one-shot suggestions (/fast, /slow). These reuse the model-oneshot + // send path; "Fast"/"Slow" wording keeps them provider-agnostic for future models. + const serviceTierDefinitions: SuggestionDefinition[] = [ + { key: "fast", description: "Send one message on the Fast service tier (lower latency)" }, + { key: "slow", description: "Send one message on the Slow service tier (lower cost)" }, + ]; + + const serviceTierSuggestions = filterAndMapSuggestions( + serviceTierDefinitions, + partial, + (definition) => ({ + id: `model-oneshot:${definition.key}`, + display: `/${definition.key}`, + description: definition.description, + replacement: `/${definition.key} `, + }) + ); + return [ ...commandSuggestions, ...skillSuggestions, ...workflowSuggestions, ...modelAliasSuggestions, + ...serviceTierSuggestions, ]; } diff --git a/src/browser/utils/slashCommands/types.ts b/src/browser/utils/slashCommands/types.ts index 6fc84f8476..7a933eb102 100644 --- a/src/browser/utils/slashCommands/types.ts +++ b/src/browser/utils/slashCommands/types.ts @@ -13,6 +13,7 @@ import type { ExperimentId } from "@/common/constants/experiments"; import type { AgentSkillDescriptor } from "@/common/types/agentSkill"; import type { WorkflowDefinitionDescriptor } from "@/common/types/workflow"; import type { ParsedThinkingInput } from "@/common/types/thinking"; +import type { ServiceTier } from "@/common/config/schemas/providersConfig"; export type ParsedCommand = | { type: "model-set"; modelString: string } @@ -22,6 +23,8 @@ export type ParsedCommand = modelString?: string; /** One-shot thinking level override — named (ThinkingLevel) or numeric index (resolved at send time against the model's policy). */ thinkingLevel?: ParsedThinkingInput; + /** One-shot service-tier (Fast/Slow) override applied for this message only (e.g. "/fast", "/slow"). */ + serviceTier?: ServiceTier; message: string; } | { type: "model-help" } diff --git a/src/common/constants/storage.ts b/src/common/constants/storage.ts index 793180b9a5..8f49500de7 100644 --- a/src/common/constants/storage.ts +++ b/src/common/constants/storage.ts @@ -178,6 +178,15 @@ export function getThinkingLevelKey(scopeId: string): string { return `thinkingLevel:${scopeId}`; } +/** + * Get the localStorage key for the chat-specific service-tier override per scope + * (workspace/project). `null` means no override (use the provider/global default). + * Format: "serviceTier:{scopeId}" + */ +export function getServiceTierKey(scopeId: string): string { + return `serviceTier:${scopeId}`; +} + /** * Get the localStorage key for per-agent workspace AI overrides cache. * Format: "workspaceAiSettingsByAgent:{workspaceId}" @@ -746,6 +755,7 @@ const PERSISTENT_WORKSPACE_KEY_FUNCTIONS: Array<(workspaceId: string) => string> getAgentIdKey, getPinnedAgentIdKey, getThinkingLevelKey, + getServiceTierKey, getReviewStateKey, getHunkFirstSeenKey, getReviewExpandStateKey, diff --git a/src/common/utils/ai/serviceTier.test.ts b/src/common/utils/ai/serviceTier.test.ts new file mode 100644 index 0000000000..2ded4c0e6a --- /dev/null +++ b/src/common/utils/ai/serviceTier.test.ts @@ -0,0 +1,100 @@ +import { describe, it, expect } from "bun:test"; +import type { MuxProviderOptions } from "@/common/types/providerOptions"; +import { + getServiceTierForCommandKey, + getServiceTierSpeed, + getServiceTierSpeedLabel, + SERVICE_TIER_FAST, + SERVICE_TIER_SLOW, + supportsServiceTier, + withServiceTierOverride, +} from "./serviceTier"; + +const OPENAI_MODEL = "openai:gpt-5.5"; +const ANTHROPIC_MODEL = "anthropic:claude-haiku-4-5"; + +describe("serviceTier helpers", () => { + describe("getServiceTierForCommandKey", () => { + it("maps /fast and /slow to provider wire tiers", () => { + expect(getServiceTierForCommandKey("fast")).toBe(SERVICE_TIER_FAST); + expect(getServiceTierForCommandKey("slow")).toBe(SERVICE_TIER_SLOW); + }); + + it("uses OpenAI priority/flex as the Fast/Slow wire values", () => { + expect(SERVICE_TIER_FAST).toBe("priority"); + expect(SERVICE_TIER_SLOW).toBe("flex"); + }); + + it("returns null for non service-tier keys", () => { + expect(getServiceTierForCommandKey("haiku")).toBeNull(); + expect(getServiceTierForCommandKey("compact")).toBeNull(); + expect(getServiceTierForCommandKey("")).toBeNull(); + }); + }); + + describe("getServiceTierSpeed", () => { + it("collapses concrete tiers into UI speed buckets", () => { + expect(getServiceTierSpeed("priority")).toBe("fast"); + expect(getServiceTierSpeed("flex")).toBe("slow"); + }); + + it("treats auto/default/absent as the neutral default", () => { + expect(getServiceTierSpeed("auto")).toBe("default"); + expect(getServiceTierSpeed("default")).toBe("default"); + expect(getServiceTierSpeed(null)).toBe("default"); + expect(getServiceTierSpeed(undefined)).toBe("default"); + }); + }); + + describe("getServiceTierSpeedLabel", () => { + it("renders provider-agnostic labels", () => { + expect(getServiceTierSpeedLabel("fast")).toBe("Fast"); + expect(getServiceTierSpeedLabel("slow")).toBe("Slow"); + expect(getServiceTierSpeedLabel("default")).toBe("Auto"); + }); + }); + + describe("supportsServiceTier", () => { + it("is supported only for OpenAI models today", () => { + expect(supportsServiceTier(OPENAI_MODEL)).toBe(true); + expect(supportsServiceTier(ANTHROPIC_MODEL)).toBe(false); + expect(supportsServiceTier("google:gemini-3.1-pro-preview")).toBe(false); + }); + }); + + describe("withServiceTierOverride", () => { + it("attaches the tier under openai for supported models", () => { + const result = withServiceTierOverride({}, SERVICE_TIER_FAST, OPENAI_MODEL); + expect(result.openai?.serviceTier).toBe("priority"); + }); + + it("preserves other openai provider options", () => { + const result = withServiceTierOverride( + { openai: { wireFormat: "responses" } }, + SERVICE_TIER_SLOW, + OPENAI_MODEL + ); + expect(result.openai?.serviceTier).toBe("flex"); + expect(result.openai?.wireFormat).toBe("responses"); + }); + + it("returns options unchanged when there is no override", () => { + const input = { anthropic: { use1MContext: true } }; + expect(withServiceTierOverride(input, null, OPENAI_MODEL)).toBe(input); + expect(withServiceTierOverride(input, undefined, OPENAI_MODEL)).toBe(input); + }); + + it("never attaches a tier for unsupported models", () => { + const input = {}; + const result = withServiceTierOverride(input, SERVICE_TIER_FAST, ANTHROPIC_MODEL); + expect(result).toBe(input); + expect(result.openai).toBeUndefined(); + }); + + it("does not mutate the input options", () => { + const input: MuxProviderOptions = { openai: { wireFormat: "responses" } }; + withServiceTierOverride(input, SERVICE_TIER_FAST, OPENAI_MODEL); + expect(input.openai?.serviceTier).toBeUndefined(); + }); + }); +}); diff --git a/src/common/utils/ai/serviceTier.ts b/src/common/utils/ai/serviceTier.ts new file mode 100644 index 0000000000..56a0785364 --- /dev/null +++ b/src/common/utils/ai/serviceTier.ts @@ -0,0 +1,93 @@ +/** + * Service-tier helpers shared across the send path, slash commands, and UI. + * + * A "service tier" tells the provider how to schedule a request. OpenAI exposes + * this as `service_tier` (e.g. `priority` for low latency, `flex` for cheaper but + * slower). To keep the product generic for future providers, we surface it in the + * UI as **Fast** / **Slow** rather than the provider-specific wire values. + * + * Mapping (the only place this translation should live): + * - Fast → `priority` (low latency, higher cost) + * - Slow → `flex` (lower cost, higher latency) + * - Auto → no override (falls back to the provider/global default) + */ + +import { type ServiceTier } from "@/common/config/schemas/providersConfig"; +import type { MuxProviderOptions } from "@/common/types/providerOptions"; +import { getModelProvider } from "./models"; + +/** Wire value for the user-facing "Fast" speed. */ +export const SERVICE_TIER_FAST: ServiceTier = "priority"; +/** Wire value for the user-facing "Slow" speed. */ +export const SERVICE_TIER_SLOW: ServiceTier = "flex"; + +/** Generic, provider-agnostic speed buckets used for UI state and styling. */ +export type ServiceTierSpeed = "fast" | "slow" | "default"; + +/** Collapse a concrete service tier (or absence of one) into a UI speed bucket. */ +export function getServiceTierSpeed(tier: ServiceTier | null | undefined): ServiceTierSpeed { + if (tier === SERVICE_TIER_FAST) return "fast"; + if (tier === SERVICE_TIER_SLOW) return "slow"; + // "auto" / "default" / null / undefined all render as the neutral (grey) state. + return "default"; +} + +/** Human-readable label for a speed bucket. */ +export function getServiceTierSpeedLabel(speed: ServiceTierSpeed): string { + switch (speed) { + case "fast": + return "Fast"; + case "slow": + return "Slow"; + case "default": + return "Auto"; + } +} + +/** + * Slash-command keys that map to a one-shot service tier (e.g. `/fast`, `/slow`). + * Kept as a const map so the parser, suggestions, and workflow-collision guards + * stay in sync from a single source. + */ +export const SERVICE_TIER_COMMAND_KEYS = ["fast", "slow"] as const; +export type ServiceTierCommandKey = (typeof SERVICE_TIER_COMMAND_KEYS)[number]; + +/** Resolve a slash-command key into its service tier, or null when it isn't one. */ +export function getServiceTierForCommandKey(key: string): ServiceTier | null { + if (key === "fast") return SERVICE_TIER_FAST; + if (key === "slow") return SERVICE_TIER_SLOW; + return null; +} + +/** + * Whether a model honors a chat-level service-tier override. + * + * Today only OpenAI (GPT-class) models support `service_tier`, so we gate on the + * provider. This is intentionally a single helper so the UI affordance, the send + * path, and future providers all share one definition of "supported". + */ +export function supportsServiceTier(modelString: string): boolean { + return getModelProvider(modelString) === "openai"; +} + +/** + * Merge a service-tier override into provider options for a given model. + * + * Returns the options unchanged when there is no override or the model can't use + * service tiers, so a stale override never leaks onto an unsupported request. + * Centralized here so every send path (interactive hook, non-React storage path, + * and one-shot `/fast` `/slow`) applies the override identically. + */ +export function withServiceTierOverride( + providerOptions: MuxProviderOptions, + serviceTier: ServiceTier | null | undefined, + modelString: string +): MuxProviderOptions { + if (!serviceTier || !supportsServiceTier(modelString)) { + return providerOptions; + } + return { + ...providerOptions, + openai: { ...providerOptions.openai, serviceTier }, + }; +} From 8c7fdf93cb86167a44d2d565355211b07813712a Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 5 Jun 2026 16:46:09 -0500 Subject: [PATCH 2/5] fix: restrict service tier to direct/passthrough OpenAI routes Addresses Codex P2: non-passthrough gateway-routed OpenAI models (e.g. openrouter:openai/gpt-5) canonicalize to 'openai' but the backend drops providerOptions.openai.serviceTier on those routes. supportsServiceTier now only returns true for direct openai:* models and passthrough gateways (mux-gateway), so the UI never advertises a no-op Fast/Slow override. --- src/common/utils/ai/serviceTier.test.ts | 15 +++++++++++++- src/common/utils/ai/serviceTier.ts | 26 ++++++++++++++++++++----- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/src/common/utils/ai/serviceTier.test.ts b/src/common/utils/ai/serviceTier.test.ts index 2ded4c0e6a..dbdeeea289 100644 --- a/src/common/utils/ai/serviceTier.test.ts +++ b/src/common/utils/ai/serviceTier.test.ts @@ -55,11 +55,24 @@ describe("serviceTier helpers", () => { }); describe("supportsServiceTier", () => { - it("is supported only for OpenAI models today", () => { + it("is supported for direct OpenAI models", () => { expect(supportsServiceTier(OPENAI_MODEL)).toBe(true); expect(supportsServiceTier(ANTHROPIC_MODEL)).toBe(false); expect(supportsServiceTier("google:gemini-3.1-pro-preview")).toBe(false); }); + + it("is NOT supported for non-passthrough gateway-routed OpenAI models", () => { + // openrouter:openai/gpt-5 canonicalizes to "openai", but openrouter is a + // non-passthrough gateway, so the backend drops serviceTier — a silent no-op. + expect(supportsServiceTier("openrouter:openai/gpt-5")).toBe(false); + // github-copilot is another non-passthrough gateway (canonical github-copilot). + expect(supportsServiceTier("github-copilot:gpt-5.5")).toBe(false); + }); + + it("is supported for passthrough gateway-routed OpenAI models", () => { + // mux-gateway is a passthrough gateway: it forwards openai provider options. + expect(supportsServiceTier("mux-gateway:openai/gpt-4o")).toBe(true); + }); }); describe("withServiceTierOverride", () => { diff --git a/src/common/utils/ai/serviceTier.ts b/src/common/utils/ai/serviceTier.ts index 56a0785364..61713e6a4a 100644 --- a/src/common/utils/ai/serviceTier.ts +++ b/src/common/utils/ai/serviceTier.ts @@ -14,7 +14,8 @@ import { type ServiceTier } from "@/common/config/schemas/providersConfig"; import type { MuxProviderOptions } from "@/common/types/providerOptions"; -import { getModelProvider } from "./models"; +import { PROVIDER_DEFINITIONS } from "@/common/constants/providers"; +import { getExplicitGatewayPrefix, getModelProvider } from "./models"; /** Wire value for the user-facing "Fast" speed. */ export const SERVICE_TIER_FAST: ServiceTier = "priority"; @@ -62,12 +63,27 @@ export function getServiceTierForCommandKey(key: string): ServiceTier | null { /** * Whether a model honors a chat-level service-tier override. * - * Today only OpenAI (GPT-class) models support `service_tier`, so we gate on the - * provider. This is intentionally a single helper so the UI affordance, the send - * path, and future providers all share one definition of "supported". + * Today only OpenAI (GPT-class) models support `service_tier`. Critically, the + * backend only forwards `providerOptions.openai.serviceTier` when the request is + * routed either directly to OpenAI or through a *passthrough* gateway. Non-passthrough + * gateways (e.g. openrouter, github-copilot) drop the field, so a model like + * `openrouter:openai/gpt-5` — which canonicalizes to `openai` — would silently ignore + * the tier. We mirror that routing here so the UI never advertises a no-op override. + * + * This is intentionally a single helper so the UI affordance, the send path, and + * future providers all share one definition of "supported". */ export function supportsServiceTier(modelString: string): boolean { - return getModelProvider(modelString) === "openai"; + if (getModelProvider(modelString) !== "openai") { + return false; + } + const gatewayPrefix = getExplicitGatewayPrefix(modelString); + if (gatewayPrefix) { + // Only passthrough gateways forward OpenAI provider options to the request. + const def = PROVIDER_DEFINITIONS[gatewayPrefix]; + return def != null && "passthrough" in def && def.passthrough === true; + } + return true; } /** From 615aa4883dfeec7885bcab67c64572fa0672a6eb Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 5 Jun 2026 16:53:15 -0500 Subject: [PATCH 3/5] fix: carry creation-time service tier into new workspace Addresses Codex P2: syncCreationPreferences() copied model/agent/thinking from the project scope to the new workspace but not the service-tier override, so a Fast/Slow choice made during creation reverted to Auto after the workspace opened. Now copies getServiceTierKey(projectScope) -> getServiceTierKey(workspace), mirroring thinking level. Extends the creation success test to assert the sync. --- .../features/ChatInput/useCreationWorkspace.test.tsx | 8 ++++++++ .../features/ChatInput/useCreationWorkspace.ts | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/src/browser/features/ChatInput/useCreationWorkspace.test.tsx b/src/browser/features/ChatInput/useCreationWorkspace.test.tsx index a9c414eb6a..644b155ad1 100644 --- a/src/browser/features/ChatInput/useCreationWorkspace.test.tsx +++ b/src/browser/features/ChatInput/useCreationWorkspace.test.tsx @@ -15,6 +15,7 @@ import { getPendingScopeId, getPendingWorkspaceSendErrorKey, getProjectScopeId, + getServiceTierKey, getThinkingLevelKey, } from "@/common/constants/storage"; import type { WorkspaceChatMessage } from "@/common/orpc/types"; @@ -709,6 +710,8 @@ describe("useCreationWorkspace", () => { persistedPreferences[getAgentIdKey(getProjectScopeId(TEST_PROJECT_PATH))] = "plan"; // Set model preference for the project scope (read by getSendOptionsFromStorage) persistedPreferences[getModelKey(getProjectScopeId(TEST_PROJECT_PATH))] = "gpt-4"; + // Chat-specific service-tier override chosen during creation must follow the workspace. + persistedPreferences[getServiceTierKey(getProjectScopeId(TEST_PROJECT_PATH))] = "priority"; draftSettingsState = createDraftSettingsHarness({ selectedRuntime: { mode: "ssh", host: "example.com" }, @@ -769,6 +772,11 @@ describe("useCreationWorkspace", () => { // Thinking is workspace-scoped, but this test doesn't set a project-scoped thinking preference. expect(updatePersistedStateCalls).toContainEqual([pendingInputKey, ""]); expect(updatePersistedStateCalls).toContainEqual([pendingImagesKey, undefined]); + // The creation-time service-tier override is carried into the new workspace scope. + expect(updatePersistedStateCalls).toContainEqual([ + getServiceTierKey(TEST_WORKSPACE_ID), + "priority", + ]); }); test("handleSend creates workspace and applies initial goal command without sending chat text", async () => { diff --git a/src/browser/features/ChatInput/useCreationWorkspace.ts b/src/browser/features/ChatInput/useCreationWorkspace.ts index 3c0c5e44ca..6345cf4537 100644 --- a/src/browser/features/ChatInput/useCreationWorkspace.ts +++ b/src/browser/features/ChatInput/useCreationWorkspace.ts @@ -10,6 +10,7 @@ import type { import type { RuntimeChoice } from "@/browser/utils/runtimeUi"; import { buildRuntimeConfig, RUNTIME_MODE } from "@/common/types/runtime"; import type { ThinkingLevel } from "@/common/types/thinking"; +import type { ServiceTier } from "@/common/config/schemas/providersConfig"; import { useDraftWorkspaceSettings } from "@/browser/hooks/useDraftWorkspaceSettings"; import { setWorkspaceModelWithOrigin } from "@/browser/utils/modelChange"; import { readPersistedState, updatePersistedState } from "@/browser/hooks/usePersistedState"; @@ -22,6 +23,7 @@ import { getNotifyOnResponseAutoEnableKey, getNotifyOnResponseKey, getThinkingLevelKey, + getServiceTierKey, getWorkspaceAISettingsByAgentKey, getPendingScopeId, getDraftScopeId, @@ -113,6 +115,16 @@ function syncCreationPreferences(projectPath: string, workspaceId: string): void updatePersistedState(getThinkingLevelKey(workspaceId), projectThinkingLevel); } + // Carry the chat-specific service-tier (Fast/Slow) override chosen during creation + // into the new workspace so the first and subsequent messages stay consistent. + const projectServiceTier = readPersistedState( + getServiceTierKey(projectScopeId), + null + ); + if (projectServiceTier !== null) { + updatePersistedState(getServiceTierKey(workspaceId), projectServiceTier); + } + if (projectModel) { const effectiveThinking: ThinkingLevel = projectThinkingLevel ?? "off"; From 6f75162084cace2a7e0c4e9ddc7858a53000eb94 Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 5 Jun 2026 18:02:18 -0500 Subject: [PATCH 4/5] refactor: drop /fast /slow one-shots, keep bolt service-tier control The /fast and /slow slash commands were standalone service-tier-only one-shots that could not compose with the existing /[+thinking] one-shot syntax. Remove them entirely (parser, suggestions, tips, the ParsedCommand serviceTier field, the index.tsx one-shot guard/merge, and the command-key helpers) while keeping the bolt UI control and the per-chat service-tier override plumbing. --- src/browser/features/ChatInput/index.tsx | 28 +------------------ .../features/ChatInput/placeholderTips.ts | 2 -- .../utils/slashCommands/parser.test.ts | 26 ----------------- src/browser/utils/slashCommands/parser.ts | 23 +++------------ .../utils/slashCommands/suggestions.test.ts | 14 ---------- .../utils/slashCommands/suggestions.ts | 21 -------------- src/browser/utils/slashCommands/types.ts | 3 -- src/common/utils/ai/serviceTier.test.ts | 19 ------------- src/common/utils/ai/serviceTier.ts | 21 ++------------ 9 files changed, 8 insertions(+), 149 deletions(-) diff --git a/src/browser/features/ChatInput/index.tsx b/src/browser/features/ChatInput/index.tsx index 47741bfc8a..7efd3908ab 100644 --- a/src/browser/features/ChatInput/index.tsx +++ b/src/browser/features/ChatInput/index.tsx @@ -40,8 +40,7 @@ import { usePolicy } from "@/browser/contexts/PolicyContext"; import { useAPI } from "@/browser/contexts/API"; import { useThinkingLevel } from "@/browser/hooks/useThinkingLevel"; import { useExperimentValue } from "@/browser/hooks/useExperiments"; -import { normalizeSelectedModel, getModelName } from "@/common/utils/ai/models"; -import { supportsServiceTier, withServiceTierOverride } from "@/common/utils/ai/serviceTier"; +import { normalizeSelectedModel } from "@/common/utils/ai/models"; import { useAdditionalSystemContextHydrated, useAdditionalSystemContextSnapshot, @@ -2486,18 +2485,6 @@ const ChatInputInner: React.FC = (props) => { const modelOverride = modelOneShot?.modelString; - // /fast and /slow one-shot tier override only applies to models that honor service - // tiers (OpenAI today). Block with a clear message (preserving the composer) rather - // than silently dropping the tier when the active model can't use it. - const tierOverride = modelOneShot?.serviceTier; - if (tierOverride && !supportsServiceTier(modelOverride ?? baseModel)) { - pushToast({ - type: "error", - message: `Fast/Slow isn't supported by ${getModelName(modelOverride ?? baseModel)}`, - }); - return; - } - // Regular message (or / one-shot override) - send directly via API const messageTextForSend = modelOneShot?.message ?? skillInvocation?.userText ?? messageText; const skillMuxMetadata = skillInvocation @@ -2676,24 +2663,11 @@ const ChatInputInner: React.FC = (props) => { : undefined; const goalInterventionPolicy = overrides?.goalInterventionPolicy; - // One-shot /fast or /slow rides in providerOptions for this message only, - // layering over any persisted per-chat service tier from useSendMessageOptions. - const oneshotProviderOptions = tierOverride - ? { - providerOptions: withServiceTierOverride( - sendMessageOptions.providerOptions ?? {}, - tierOverride, - modelOverride ?? baseModel - ), - } - : {}; - const sendOptions = { ...sendMessageOptions, ...compactionOptions, ...(modelOverride ? { model: modelOverride } : {}), ...(thinkingOverride ? { thinkingLevel: thinkingOverride } : {}), - ...oneshotProviderOptions, ...(modelOneShot ? { skipAiSettingsPersistence: true } : {}), ...(goalInterventionPolicy ? { goalInterventionPolicy } : {}), ...(overrides?.queueDispatchMode diff --git a/src/browser/features/ChatInput/placeholderTips.ts b/src/browser/features/ChatInput/placeholderTips.ts index a1b09e0a4f..83621780ca 100644 --- a/src/browser/features/ChatInput/placeholderTips.ts +++ b/src/browser/features/ChatInput/placeholderTips.ts @@ -51,8 +51,6 @@ export const PLACEHOLDER_TIPS: readonly string[] = [ "Try /btw to ask a side question without nudging the agent", "Try /haiku to send just this message on a different model", "Try /+high to crank up reasoning for this message only", - "Try /fast to send one message on a faster service tier", - "Try /slow to send one message on a cheaper service tier", "Try /compact to summarize the conversation when context gets tight", "Try /fork to branch this chat into a new workspace", "Try /plan to view or edit the current plan inline", diff --git a/src/browser/utils/slashCommands/parser.test.ts b/src/browser/utils/slashCommands/parser.test.ts index 51a9aff68d..427673bc26 100644 --- a/src/browser/utils/slashCommands/parser.test.ts +++ b/src/browser/utils/slashCommands/parser.test.ts @@ -111,32 +111,6 @@ describe("commandParser", () => { expectParse("/sonnet ", { type: "model-help" }); // whitespace only }); - it("should parse /fast and /slow as service-tier one-shots", () => { - expectParse("/fast ship it", { - type: "model-oneshot", - serviceTier: "priority", - message: "ship it", - }); - expectParse("/slow take your time", { - type: "model-oneshot", - serviceTier: "flex", - message: "take your time", - }); - }); - - it("should preserve multiline messages for service-tier one-shots", () => { - expectParse("/fast first line\nsecond line", { - type: "model-oneshot", - serviceTier: "priority", - message: "first line\nsecond line", - }); - }); - - it("should return model-help for /fast or /slow without a message", () => { - expectParse("/fast", { type: "model-help" }); - expectParse("/slow ", { type: "model-help" }); // whitespace only - }); - it("should return unknown-command for unknown aliases", () => { expectParse("/xyz do something", { type: "unknown-command", diff --git a/src/browser/utils/slashCommands/parser.ts b/src/browser/utils/slashCommands/parser.ts index dfa4d8e637..99f2492c04 100644 --- a/src/browser/utils/slashCommands/parser.ts +++ b/src/browser/utils/slashCommands/parser.ts @@ -7,8 +7,6 @@ import { SLASH_COMMAND_DEFINITION_MAP } from "./registry"; import { MODEL_ABBREVIATIONS } from "@/common/constants/knownModels"; import { normalizeModelInput } from "@/browser/utils/models/normalizeModelInput"; import { parseThinkingInput, type ParsedThinkingInput } from "@/common/types/thinking"; -import { getServiceTierForCommandKey } from "@/common/utils/ai/serviceTier"; -import type { ServiceTier } from "@/common/config/schemas/providersConfig"; /** * Parse a raw command string into a structured command @@ -120,7 +118,7 @@ export function getSlashCommandDefinitions(): readonly SlashCommandDefinition[] } /** - * Parse a oneshot command key into model / thinking / service-tier overrides. + * Parse a oneshot command key into model + thinking overrides. * * Supported forms: * - "haiku" → model override only (existing behavior) @@ -128,25 +126,12 @@ export function getSlashCommandDefinitions(): readonly SlashCommandDefinition[] * - "haiku+medium" → model + named thinking level * - "+0" → thinking-only override (use current model) * - "+high" → thinking-only override with named level - * - "fast"/"slow" → service-tier-only override (use current model) - * - * `/fast` and `/slow` deliberately reuse the model-oneshot path so message - * extraction, the bare-key → help fallback, and the rendered command prefix all - * behave exactly like `/` one-shots. * * Returns null if the key doesn't match any valid oneshot pattern. */ -function parseOneshotCommandKey(key: string): { - modelString?: string; - thinkingLevel?: ParsedThinkingInput; - serviceTier?: ServiceTier; -} | null { - // Service-tier one-shots (/fast, /slow) carry no model/thinking change. - const serviceTier = getServiceTierForCommandKey(key); - if (serviceTier) { - return { serviceTier }; - } - +function parseOneshotCommandKey( + key: string +): { modelString?: string; thinkingLevel?: ParsedThinkingInput } | null { const plusIndex = key.indexOf("+"); if (plusIndex === -1) { diff --git a/src/browser/utils/slashCommands/suggestions.test.ts b/src/browser/utils/slashCommands/suggestions.test.ts index d74c1ab209..d20271d150 100644 --- a/src/browser/utils/slashCommands/suggestions.test.ts +++ b/src/browser/utils/slashCommands/suggestions.test.ts @@ -52,20 +52,6 @@ describe("getSlashCommandSuggestions", () => { expect(labels).toContain("/model"); }); - it("suggests /fast and /slow service-tier one-shots", () => { - const suggestions = getSlashCommandSuggestions("/"); - const labels = suggestions.map((s) => s.display); - - expect(labels).toContain("/fast"); - expect(labels).toContain("/slow"); - }); - - it("filters service-tier one-shots by prefix", () => { - const suggestions = getSlashCommandSuggestions("/fa"); - expect(suggestions.map((s) => s.display)).toContain("/fast"); - expect(suggestions.map((s) => s.display)).not.toContain("/slow"); - }); - it("includes agent skills when provided in context", () => { const suggestions = getSlashCommandSuggestions("/", { agentSkills: [ diff --git a/src/browser/utils/slashCommands/suggestions.ts b/src/browser/utils/slashCommands/suggestions.ts index 5b3931bc29..a195563571 100644 --- a/src/browser/utils/slashCommands/suggestions.ts +++ b/src/browser/utils/slashCommands/suggestions.ts @@ -5,7 +5,6 @@ import { matchesNameBySegmentPrefix } from "@/browser/utils/suggestionMatching"; import { MODEL_ABBREVIATIONS } from "@/common/constants/knownModels"; import { formatModelDisplayName } from "@/common/utils/ai/modelDisplay"; -import { SERVICE_TIER_COMMAND_KEYS } from "@/common/utils/ai/serviceTier"; import { getSlashCommandDefinitions } from "./parser"; import { isSlashCommandVisible, SLASH_COMMAND_DEFINITION_MAP } from "./registry"; import type { @@ -89,7 +88,6 @@ function buildTopLevelSuggestions( .filter((workflow) => !SLASH_COMMAND_DEFINITION_MAP.has(workflow.name)) .filter((workflow) => !skillNames.has(workflow.name)) .filter((workflow) => !Object.hasOwn(MODEL_ABBREVIATIONS, workflow.name)) - .filter((workflow) => !SERVICE_TIER_COMMAND_KEYS.includes(workflow.name as never)) .map((workflow) => ({ key: workflow.name, description: `${workflow.description} (${workflow.scope} workflow)`, @@ -128,30 +126,11 @@ function buildTopLevelSuggestions( }) ); - // Service-tier one-shot suggestions (/fast, /slow). These reuse the model-oneshot - // send path; "Fast"/"Slow" wording keeps them provider-agnostic for future models. - const serviceTierDefinitions: SuggestionDefinition[] = [ - { key: "fast", description: "Send one message on the Fast service tier (lower latency)" }, - { key: "slow", description: "Send one message on the Slow service tier (lower cost)" }, - ]; - - const serviceTierSuggestions = filterAndMapSuggestions( - serviceTierDefinitions, - partial, - (definition) => ({ - id: `model-oneshot:${definition.key}`, - display: `/${definition.key}`, - description: definition.description, - replacement: `/${definition.key} `, - }) - ); - return [ ...commandSuggestions, ...skillSuggestions, ...workflowSuggestions, ...modelAliasSuggestions, - ...serviceTierSuggestions, ]; } diff --git a/src/browser/utils/slashCommands/types.ts b/src/browser/utils/slashCommands/types.ts index 7a933eb102..6fc84f8476 100644 --- a/src/browser/utils/slashCommands/types.ts +++ b/src/browser/utils/slashCommands/types.ts @@ -13,7 +13,6 @@ import type { ExperimentId } from "@/common/constants/experiments"; import type { AgentSkillDescriptor } from "@/common/types/agentSkill"; import type { WorkflowDefinitionDescriptor } from "@/common/types/workflow"; import type { ParsedThinkingInput } from "@/common/types/thinking"; -import type { ServiceTier } from "@/common/config/schemas/providersConfig"; export type ParsedCommand = | { type: "model-set"; modelString: string } @@ -23,8 +22,6 @@ export type ParsedCommand = modelString?: string; /** One-shot thinking level override — named (ThinkingLevel) or numeric index (resolved at send time against the model's policy). */ thinkingLevel?: ParsedThinkingInput; - /** One-shot service-tier (Fast/Slow) override applied for this message only (e.g. "/fast", "/slow"). */ - serviceTier?: ServiceTier; message: string; } | { type: "model-help" } diff --git a/src/common/utils/ai/serviceTier.test.ts b/src/common/utils/ai/serviceTier.test.ts index dbdeeea289..ece6080ba6 100644 --- a/src/common/utils/ai/serviceTier.test.ts +++ b/src/common/utils/ai/serviceTier.test.ts @@ -1,7 +1,6 @@ import { describe, it, expect } from "bun:test"; import type { MuxProviderOptions } from "@/common/types/providerOptions"; import { - getServiceTierForCommandKey, getServiceTierSpeed, getServiceTierSpeedLabel, SERVICE_TIER_FAST, @@ -14,24 +13,6 @@ const OPENAI_MODEL = "openai:gpt-5.5"; const ANTHROPIC_MODEL = "anthropic:claude-haiku-4-5"; describe("serviceTier helpers", () => { - describe("getServiceTierForCommandKey", () => { - it("maps /fast and /slow to provider wire tiers", () => { - expect(getServiceTierForCommandKey("fast")).toBe(SERVICE_TIER_FAST); - expect(getServiceTierForCommandKey("slow")).toBe(SERVICE_TIER_SLOW); - }); - - it("uses OpenAI priority/flex as the Fast/Slow wire values", () => { - expect(SERVICE_TIER_FAST).toBe("priority"); - expect(SERVICE_TIER_SLOW).toBe("flex"); - }); - - it("returns null for non service-tier keys", () => { - expect(getServiceTierForCommandKey("haiku")).toBeNull(); - expect(getServiceTierForCommandKey("compact")).toBeNull(); - expect(getServiceTierForCommandKey("")).toBeNull(); - }); - }); - describe("getServiceTierSpeed", () => { it("collapses concrete tiers into UI speed buckets", () => { expect(getServiceTierSpeed("priority")).toBe("fast"); diff --git a/src/common/utils/ai/serviceTier.ts b/src/common/utils/ai/serviceTier.ts index 61713e6a4a..7608af8a0f 100644 --- a/src/common/utils/ai/serviceTier.ts +++ b/src/common/utils/ai/serviceTier.ts @@ -1,5 +1,5 @@ /** - * Service-tier helpers shared across the send path, slash commands, and UI. + * Service-tier helpers shared across the send path and UI. * * A "service tier" tells the provider how to schedule a request. OpenAI exposes * this as `service_tier` (e.g. `priority` for low latency, `flex` for cheaper but @@ -45,21 +45,6 @@ export function getServiceTierSpeedLabel(speed: ServiceTierSpeed): string { } } -/** - * Slash-command keys that map to a one-shot service tier (e.g. `/fast`, `/slow`). - * Kept as a const map so the parser, suggestions, and workflow-collision guards - * stay in sync from a single source. - */ -export const SERVICE_TIER_COMMAND_KEYS = ["fast", "slow"] as const; -export type ServiceTierCommandKey = (typeof SERVICE_TIER_COMMAND_KEYS)[number]; - -/** Resolve a slash-command key into its service tier, or null when it isn't one. */ -export function getServiceTierForCommandKey(key: string): ServiceTier | null { - if (key === "fast") return SERVICE_TIER_FAST; - if (key === "slow") return SERVICE_TIER_SLOW; - return null; -} - /** * Whether a model honors a chat-level service-tier override. * @@ -91,8 +76,8 @@ export function supportsServiceTier(modelString: string): boolean { * * Returns the options unchanged when there is no override or the model can't use * service tiers, so a stale override never leaks onto an unsupported request. - * Centralized here so every send path (interactive hook, non-React storage path, - * and one-shot `/fast` `/slow`) applies the override identically. + * Centralized here so every send path (interactive hook and non-React storage path) + * applies the override identically. */ export function withServiceTierOverride( providerOptions: MuxProviderOptions, From c180454cbfe22f6ddd079fdd2816e088bfb33f9c Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 5 Jun 2026 18:12:23 -0500 Subject: [PATCH 5/5] fix: compose per-chat service tier with / one-shots Re-merge the persisted Fast/Slow tier against the effective (one-shot) model at send time. useSendMessageOptions bakes the tier against the saved model, so a / one-shot to an OpenAI model previously dropped a tier set while the saved model was non-OpenAI. withServiceTierOverride is now authoritative: it attaches the tier for supported models and strips any stale tier when the effective model can't honor it. --- src/browser/features/ChatInput/index.tsx | 18 ++++++++++++++++++ src/common/utils/ai/serviceTier.test.ts | 13 +++++++++++++ src/common/utils/ai/serviceTier.ts | 19 +++++++++++++------ 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/browser/features/ChatInput/index.tsx b/src/browser/features/ChatInput/index.tsx index 7efd3908ab..ceb5ce86e2 100644 --- a/src/browser/features/ChatInput/index.tsx +++ b/src/browser/features/ChatInput/index.tsx @@ -39,8 +39,10 @@ import { import { usePolicy } from "@/browser/contexts/PolicyContext"; import { useAPI } from "@/browser/contexts/API"; import { useThinkingLevel } from "@/browser/hooks/useThinkingLevel"; +import { useServiceTier } from "@/browser/hooks/useServiceTier"; import { useExperimentValue } from "@/browser/hooks/useExperiments"; import { normalizeSelectedModel } from "@/common/utils/ai/models"; +import { withServiceTierOverride } from "@/common/utils/ai/serviceTier"; import { useAdditionalSystemContextHydrated, useAdditionalSystemContextSnapshot, @@ -719,6 +721,10 @@ const ChatInputInner: React.FC = (props) => { const sendOptionsScopeId = variant === "workspace" ? props.workspaceId : getProjectScopeId(creationParentProjectPath); const sendMessageOptions = useSendMessageOptions(sendOptionsScopeId); + // The persisted per-chat service tier is also read here so a / one-shot can + // re-merge it against the effective (overridden) model. useSendMessageOptions bakes the + // tier against the saved model, which may differ from a one-shot model override. + const [serviceTierOverride] = useServiceTier(sendOptionsScopeId); const additionalSystemContext = useAdditionalSystemContextSnapshot( variant === "workspace" ? props.workspaceId : "" ); @@ -2667,6 +2673,18 @@ const ChatInputInner: React.FC = (props) => { ...sendMessageOptions, ...compactionOptions, ...(modelOverride ? { model: modelOverride } : {}), + // Re-merge the per-chat service tier against the one-shot model so Fast/Slow + // composes with /[+thinking]: useSendMessageOptions baked the tier against + // the saved model, which may not match (or support) the one-shot model override. + ...(modelOverride + ? { + providerOptions: withServiceTierOverride( + sendMessageOptions.providerOptions ?? {}, + serviceTierOverride, + modelOverride + ), + } + : {}), ...(thinkingOverride ? { thinkingLevel: thinkingOverride } : {}), ...(modelOneShot ? { skipAiSettingsPersistence: true } : {}), ...(goalInterventionPolicy ? { goalInterventionPolicy } : {}), diff --git a/src/common/utils/ai/serviceTier.test.ts b/src/common/utils/ai/serviceTier.test.ts index ece6080ba6..7ad5bfdafd 100644 --- a/src/common/utils/ai/serviceTier.test.ts +++ b/src/common/utils/ai/serviceTier.test.ts @@ -85,6 +85,19 @@ describe("serviceTier helpers", () => { expect(result.openai).toBeUndefined(); }); + it("strips a stale tier when re-merged against an unsupported model", () => { + // Simulates a / one-shot switching from an OpenAI saved model (tier baked in) + // to a non-OpenAI model: the tier must not ride along on the Anthropic request. + const input: MuxProviderOptions = { + openai: { wireFormat: "responses", serviceTier: "priority" }, + }; + const result = withServiceTierOverride(input, SERVICE_TIER_FAST, ANTHROPIC_MODEL); + expect(result.openai?.serviceTier).toBeUndefined(); + expect(result.openai?.wireFormat).toBe("responses"); + // Input is left untouched. + expect(input.openai?.serviceTier).toBe("priority"); + }); + it("does not mutate the input options", () => { const input: MuxProviderOptions = { openai: { wireFormat: "responses" } }; withServiceTierOverride(input, SERVICE_TIER_FAST, OPENAI_MODEL); diff --git a/src/common/utils/ai/serviceTier.ts b/src/common/utils/ai/serviceTier.ts index 7608af8a0f..f98cee1c9d 100644 --- a/src/common/utils/ai/serviceTier.ts +++ b/src/common/utils/ai/serviceTier.ts @@ -72,12 +72,14 @@ export function supportsServiceTier(modelString: string): boolean { } /** - * Merge a service-tier override into provider options for a given model. + * Reconcile a service-tier override against the *effective* model for a request. * - * Returns the options unchanged when there is no override or the model can't use - * service tiers, so a stale override never leaks onto an unsupported request. - * Centralized here so every send path (interactive hook and non-React storage path) - * applies the override identically. + * This is authoritative: it sets the tier when the model supports it and an override + * is present, and otherwise strips any previously-attached tier. The strip matters for + * composition with `/` one-shots — a tier baked against the saved model must not + * linger when the one-shot switches to a model that can't honor it (and conversely, a + * tier dropped against a non-OpenAI saved model gets re-applied once the effective model + * is OpenAI). Centralized here so every send path applies the override identically. */ export function withServiceTierOverride( providerOptions: MuxProviderOptions, @@ -85,7 +87,12 @@ export function withServiceTierOverride( modelString: string ): MuxProviderOptions { if (!serviceTier || !supportsServiceTier(modelString)) { - return providerOptions; + // No override, or the model can't use service tiers: ensure no stale tier rides along. + if (providerOptions.openai?.serviceTier == null) { + return providerOptions; + } + const { serviceTier: _omit, ...openaiRest } = providerOptions.openai; + return { ...providerOptions, openai: openaiRest }; } return { ...providerOptions,