= (props) => {
>
+
+ {/* Service-tier (Fast/Slow) speed override. Renders its own root only for
+ models that support service tiers (OpenAI/GPT today); otherwise it returns
+ null and occupies no layout space (no stray flex gap). */}
+
{
persistedPreferences[getAgentIdKey(getProjectScopeId(TEST_PROJECT_PATH))] = "plan";
// Set model preference for the project scope (read by getSendOptionsFromStorage)
persistedPreferences[getModelKey(getProjectScopeId(TEST_PROJECT_PATH))] = "gpt-4";
+ // Chat-specific service-tier override chosen during creation must follow the workspace.
+ persistedPreferences[getServiceTierKey(getProjectScopeId(TEST_PROJECT_PATH))] = "priority";
draftSettingsState = createDraftSettingsHarness({
selectedRuntime: { mode: "ssh", host: "example.com" },
@@ -769,6 +772,11 @@ describe("useCreationWorkspace", () => {
// Thinking is workspace-scoped, but this test doesn't set a project-scoped thinking preference.
expect(updatePersistedStateCalls).toContainEqual([pendingInputKey, ""]);
expect(updatePersistedStateCalls).toContainEqual([pendingImagesKey, undefined]);
+ // The creation-time service-tier override is carried into the new workspace scope.
+ expect(updatePersistedStateCalls).toContainEqual([
+ getServiceTierKey(TEST_WORKSPACE_ID),
+ "priority",
+ ]);
});
test("handleSend creates workspace and applies initial goal command without sending chat text", async () => {
diff --git a/src/browser/features/ChatInput/useCreationWorkspace.ts b/src/browser/features/ChatInput/useCreationWorkspace.ts
index 3c0c5e44ca..6345cf4537 100644
--- a/src/browser/features/ChatInput/useCreationWorkspace.ts
+++ b/src/browser/features/ChatInput/useCreationWorkspace.ts
@@ -10,6 +10,7 @@ import type {
import type { RuntimeChoice } from "@/browser/utils/runtimeUi";
import { buildRuntimeConfig, RUNTIME_MODE } from "@/common/types/runtime";
import type { ThinkingLevel } from "@/common/types/thinking";
+import type { ServiceTier } from "@/common/config/schemas/providersConfig";
import { useDraftWorkspaceSettings } from "@/browser/hooks/useDraftWorkspaceSettings";
import { setWorkspaceModelWithOrigin } from "@/browser/utils/modelChange";
import { readPersistedState, updatePersistedState } from "@/browser/hooks/usePersistedState";
@@ -22,6 +23,7 @@ import {
getNotifyOnResponseAutoEnableKey,
getNotifyOnResponseKey,
getThinkingLevelKey,
+ getServiceTierKey,
getWorkspaceAISettingsByAgentKey,
getPendingScopeId,
getDraftScopeId,
@@ -113,6 +115,16 @@ function syncCreationPreferences(projectPath: string, workspaceId: string): void
updatePersistedState(getThinkingLevelKey(workspaceId), projectThinkingLevel);
}
+ // Carry the chat-specific service-tier (Fast/Slow) override chosen during creation
+ // into the new workspace so the first and subsequent messages stay consistent.
+ const projectServiceTier = readPersistedState(
+ getServiceTierKey(projectScopeId),
+ null
+ );
+ if (projectServiceTier !== null) {
+ updatePersistedState(getServiceTierKey(workspaceId), projectServiceTier);
+ }
+
if (projectModel) {
const effectiveThinking: ThinkingLevel = projectThinkingLevel ?? "off";
diff --git a/src/browser/hooks/useSendMessageOptions.ts b/src/browser/hooks/useSendMessageOptions.ts
index 53664d1061..3a52103bfc 100644
--- a/src/browser/hooks/useSendMessageOptions.ts
+++ b/src/browser/hooks/useSendMessageOptions.ts
@@ -9,6 +9,8 @@ import {
import { DEFAULT_MODEL_KEY, getModelKey } from "@/common/constants/storage";
import type { SendMessageOptions } from "@/common/orpc/types";
import { useProviderOptions } from "./useProviderOptions";
+import { useServiceTier } from "./useServiceTier";
+import { withServiceTierOverride } from "@/common/utils/ai/serviceTier";
import { useExperimentOverrideValue } from "./useExperiments";
import { EXPERIMENT_IDS } from "@/common/constants/experiments";
import { useWorkspaceContext } from "@/browser/contexts/WorkspaceContext";
@@ -74,11 +76,21 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptionsWi
metadataSettings.model ?? defaultModel
);
+ // Per-chat service-tier override (Fast/Slow). It rides along in providerOptions so the
+ // backend applies it per request without persisting to workspace metadata. Only attached
+ // for models that honor service tiers (OpenAI today).
+ const [serviceTierOverride] = useServiceTier(workspaceId);
+ const effectiveProviderOptions = withServiceTierOverride(
+ providerOptions,
+ serviceTierOverride,
+ baseModel
+ );
+
const options = buildSendMessageOptions({
agentId,
thinkingLevel,
model: baseModel,
- providerOptions,
+ providerOptions: effectiveProviderOptions,
experiments: {
programmaticToolCalling,
programmaticToolCallingExclusive,
diff --git a/src/browser/hooks/useServiceTier.ts b/src/browser/hooks/useServiceTier.ts
new file mode 100644
index 0000000000..1757bb2598
--- /dev/null
+++ b/src/browser/hooks/useServiceTier.ts
@@ -0,0 +1,27 @@
+import { type ServiceTier } from "@/common/config/schemas/providersConfig";
+import { getServiceTierKey } from "@/common/constants/storage";
+import { usePersistedState } from "./usePersistedState";
+
+/**
+ * Chat-specific (per workspace/project scope) service-tier override.
+ *
+ * `null` means "no override" — the provider/global default applies. Backed by
+ * localStorage (keyed by scope) with cross-component sync so the chat-input bolt
+ * and the send path stay in agreement without prop drilling.
+ *
+ * Unlike thinking level, this is intentionally NOT persisted to backend metadata:
+ * the tier rides along with each send via `providerOptions.openai.serviceTier`,
+ * so localStorage is the single source of truth (mirroring the other provider
+ * option toggles like Anthropic 1M context).
+ *
+ * @param scopeId workspaceId (workspace view) or a project scope id (creation view)
+ * @returns `[serviceTier, setServiceTier]` tuple
+ */
+export function useServiceTier(scopeId: string) {
+ const [serviceTier, setServiceTier] = usePersistedState(
+ getServiceTierKey(scopeId),
+ null,
+ { listener: true }
+ );
+ return [serviceTier, setServiceTier] as const;
+}
diff --git a/src/browser/styles/globals.css b/src/browser/styles/globals.css
index 448b3ce33f..b6bae911a7 100644
--- a/src/browser/styles/globals.css
+++ b/src/browser/styles/globals.css
@@ -88,6 +88,12 @@
--color-thinking-mode-light: hsl(271 76% 65%);
--color-thinking-border: hsl(271 76% 53%);
+ /* Service tier (Fast/Slow speed override).
+ Fast = energetic orange (low latency); Slow = calm blue (lower cost).
+ Defined once at the theme root and inherited by all themes. */
+ --color-service-tier-fast: hsl(28 96% 54%);
+ --color-service-tier-slow: hsl(206 90% 56%);
+
/* Runtime icon colors (matches Tailwind blue-500/purple-500) */
--color-runtime-ssh: #3b82f6;
--color-runtime-ssh-text: #60a5fa; /* blue-400 */
diff --git a/src/browser/utils/messages/sendOptions.ts b/src/browser/utils/messages/sendOptions.ts
index 4d6cbeac67..3847f5fb22 100644
--- a/src/browser/utils/messages/sendOptions.ts
+++ b/src/browser/utils/messages/sendOptions.ts
@@ -3,6 +3,7 @@ import {
getModelKey,
getThinkingLevelByModelKey,
getThinkingLevelKey,
+ getServiceTierKey,
getDisableWorkspaceAgentsKey,
} from "@/common/constants/storage";
import { readPersistedState, updatePersistedState } from "@/browser/hooks/usePersistedState";
@@ -14,6 +15,8 @@ import {
import type { SendMessageOptions } from "@/common/orpc/types";
import type { ThinkingLevel } from "@/common/types/thinking";
import type { MuxProviderOptions } from "@/common/types/providerOptions";
+import type { ServiceTier } from "@/common/config/schemas/providersConfig";
+import { withServiceTierOverride } from "@/common/utils/ai/serviceTier";
import { WORKSPACE_DEFAULTS } from "@/constants/workspaceDefaults";
import { isExperimentEnabled } from "@/browser/hooks/useExperiments";
import { EXPERIMENT_IDS } from "@/common/constants/experiments";
@@ -66,6 +69,19 @@ export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptio
const providerOptions = getProviderOptions();
+ // Mirror useSendMessageOptions: attach the per-chat service-tier override for models
+ // that honor it (OpenAI today) so non-React send paths (resume, idle-compaction, plan
+ // execution) stay consistent with interactive sends.
+ const serviceTierOverride = readPersistedState(
+ getServiceTierKey(workspaceId),
+ null
+ );
+ const effectiveProviderOptions = withServiceTierOverride(
+ providerOptions,
+ serviceTierOverride,
+ baseModel
+ );
+
const disableWorkspaceAgents = readPersistedState(
getDisableWorkspaceAgentsKey(workspaceId),
false
@@ -75,7 +91,7 @@ export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptio
model: baseModel,
agentId,
thinkingLevel,
- providerOptions,
+ providerOptions: effectiveProviderOptions,
disableWorkspaceAgents,
experiments: {
programmaticToolCalling: isExperimentEnabled(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING),
diff --git a/src/common/constants/storage.ts b/src/common/constants/storage.ts
index 793180b9a5..8f49500de7 100644
--- a/src/common/constants/storage.ts
+++ b/src/common/constants/storage.ts
@@ -178,6 +178,15 @@ export function getThinkingLevelKey(scopeId: string): string {
return `thinkingLevel:${scopeId}`;
}
+/**
+ * Get the localStorage key for the chat-specific service-tier override per scope
+ * (workspace/project). `null` means no override (use the provider/global default).
+ * Format: "serviceTier:{scopeId}"
+ */
+export function getServiceTierKey(scopeId: string): string {
+ return `serviceTier:${scopeId}`;
+}
+
/**
* Get the localStorage key for per-agent workspace AI overrides cache.
* Format: "workspaceAiSettingsByAgent:{workspaceId}"
@@ -746,6 +755,7 @@ const PERSISTENT_WORKSPACE_KEY_FUNCTIONS: Array<(workspaceId: string) => string>
getAgentIdKey,
getPinnedAgentIdKey,
getThinkingLevelKey,
+ getServiceTierKey,
getReviewStateKey,
getHunkFirstSeenKey,
getReviewExpandStateKey,
diff --git a/src/common/utils/ai/serviceTier.test.ts b/src/common/utils/ai/serviceTier.test.ts
new file mode 100644
index 0000000000..7ad5bfdafd
--- /dev/null
+++ b/src/common/utils/ai/serviceTier.test.ts
@@ -0,0 +1,107 @@
+import { describe, it, expect } from "bun:test";
+import type { MuxProviderOptions } from "@/common/types/providerOptions";
+import {
+ getServiceTierSpeed,
+ getServiceTierSpeedLabel,
+ SERVICE_TIER_FAST,
+ SERVICE_TIER_SLOW,
+ supportsServiceTier,
+ withServiceTierOverride,
+} from "./serviceTier";
+
+const OPENAI_MODEL = "openai:gpt-5.5";
+const ANTHROPIC_MODEL = "anthropic:claude-haiku-4-5";
+
+describe("serviceTier helpers", () => {
+ describe("getServiceTierSpeed", () => {
+ it("collapses concrete tiers into UI speed buckets", () => {
+ expect(getServiceTierSpeed("priority")).toBe("fast");
+ expect(getServiceTierSpeed("flex")).toBe("slow");
+ });
+
+ it("treats auto/default/absent as the neutral default", () => {
+ expect(getServiceTierSpeed("auto")).toBe("default");
+ expect(getServiceTierSpeed("default")).toBe("default");
+ expect(getServiceTierSpeed(null)).toBe("default");
+ expect(getServiceTierSpeed(undefined)).toBe("default");
+ });
+ });
+
+ describe("getServiceTierSpeedLabel", () => {
+ it("renders provider-agnostic labels", () => {
+ expect(getServiceTierSpeedLabel("fast")).toBe("Fast");
+ expect(getServiceTierSpeedLabel("slow")).toBe("Slow");
+ expect(getServiceTierSpeedLabel("default")).toBe("Auto");
+ });
+ });
+
+ describe("supportsServiceTier", () => {
+ it("is supported for direct OpenAI models", () => {
+ expect(supportsServiceTier(OPENAI_MODEL)).toBe(true);
+ expect(supportsServiceTier(ANTHROPIC_MODEL)).toBe(false);
+ expect(supportsServiceTier("google:gemini-3.1-pro-preview")).toBe(false);
+ });
+
+ it("is NOT supported for non-passthrough gateway-routed OpenAI models", () => {
+ // openrouter:openai/gpt-5 canonicalizes to "openai", but openrouter is a
+ // non-passthrough gateway, so the backend drops serviceTier — a silent no-op.
+ expect(supportsServiceTier("openrouter:openai/gpt-5")).toBe(false);
+ // github-copilot is another non-passthrough gateway (canonical github-copilot).
+ expect(supportsServiceTier("github-copilot:gpt-5.5")).toBe(false);
+ });
+
+ it("is supported for passthrough gateway-routed OpenAI models", () => {
+ // mux-gateway is a passthrough gateway: it forwards openai provider options.
+ expect(supportsServiceTier("mux-gateway:openai/gpt-4o")).toBe(true);
+ });
+ });
+
+ describe("withServiceTierOverride", () => {
+ it("attaches the tier under openai for supported models", () => {
+ const result = withServiceTierOverride({}, SERVICE_TIER_FAST, OPENAI_MODEL);
+ expect(result.openai?.serviceTier).toBe("priority");
+ });
+
+ it("preserves other openai provider options", () => {
+ const result = withServiceTierOverride(
+ { openai: { wireFormat: "responses" } },
+ SERVICE_TIER_SLOW,
+ OPENAI_MODEL
+ );
+ expect(result.openai?.serviceTier).toBe("flex");
+ expect(result.openai?.wireFormat).toBe("responses");
+ });
+
+ it("returns options unchanged when there is no override", () => {
+ const input = { anthropic: { use1MContext: true } };
+ expect(withServiceTierOverride(input, null, OPENAI_MODEL)).toBe(input);
+ expect(withServiceTierOverride(input, undefined, OPENAI_MODEL)).toBe(input);
+ });
+
+ it("never attaches a tier for unsupported models", () => {
+ const input = {};
+ const result = withServiceTierOverride(input, SERVICE_TIER_FAST, ANTHROPIC_MODEL);
+ expect(result).toBe(input);
+ expect(result.openai).toBeUndefined();
+ });
+
+ it("strips a stale tier when re-merged against an unsupported model", () => {
+ // Simulates a / one-shot switching from an OpenAI saved model (tier baked in)
+ // to a non-OpenAI model: the tier must not ride along on the Anthropic request.
+ const input: MuxProviderOptions = {
+ openai: { wireFormat: "responses", serviceTier: "priority" },
+ };
+ const result = withServiceTierOverride(input, SERVICE_TIER_FAST, ANTHROPIC_MODEL);
+ expect(result.openai?.serviceTier).toBeUndefined();
+ expect(result.openai?.wireFormat).toBe("responses");
+ // Input is left untouched.
+ expect(input.openai?.serviceTier).toBe("priority");
+ });
+
+ it("does not mutate the input options", () => {
+ const input: MuxProviderOptions = { openai: { wireFormat: "responses" } };
+ withServiceTierOverride(input, SERVICE_TIER_FAST, OPENAI_MODEL);
+ expect(input.openai?.serviceTier).toBeUndefined();
+ });
+ });
+});
diff --git a/src/common/utils/ai/serviceTier.ts b/src/common/utils/ai/serviceTier.ts
new file mode 100644
index 0000000000..f98cee1c9d
--- /dev/null
+++ b/src/common/utils/ai/serviceTier.ts
@@ -0,0 +1,101 @@
+/**
+ * Service-tier helpers shared across the send path and UI.
+ *
+ * A "service tier" tells the provider how to schedule a request. OpenAI exposes
+ * this as `service_tier` (e.g. `priority` for low latency, `flex` for cheaper but
+ * slower). To keep the product generic for future providers, we surface it in the
+ * UI as **Fast** / **Slow** rather than the provider-specific wire values.
+ *
+ * Mapping (the only place this translation should live):
+ * - Fast → `priority` (low latency, higher cost)
+ * - Slow → `flex` (lower cost, higher latency)
+ * - Auto → no override (falls back to the provider/global default)
+ */
+
+import { type ServiceTier } from "@/common/config/schemas/providersConfig";
+import type { MuxProviderOptions } from "@/common/types/providerOptions";
+import { PROVIDER_DEFINITIONS } from "@/common/constants/providers";
+import { getExplicitGatewayPrefix, getModelProvider } from "./models";
+
+/** Wire value for the user-facing "Fast" speed. */
+export const SERVICE_TIER_FAST: ServiceTier = "priority";
+/** Wire value for the user-facing "Slow" speed. */
+export const SERVICE_TIER_SLOW: ServiceTier = "flex";
+
+/** Generic, provider-agnostic speed buckets used for UI state and styling. */
+export type ServiceTierSpeed = "fast" | "slow" | "default";
+
+/** Collapse a concrete service tier (or absence of one) into a UI speed bucket. */
+export function getServiceTierSpeed(tier: ServiceTier | null | undefined): ServiceTierSpeed {
+ if (tier === SERVICE_TIER_FAST) return "fast";
+ if (tier === SERVICE_TIER_SLOW) return "slow";
+ // "auto" / "default" / null / undefined all render as the neutral (grey) state.
+ return "default";
+}
+
+/** Human-readable label for a speed bucket. */
+export function getServiceTierSpeedLabel(speed: ServiceTierSpeed): string {
+ switch (speed) {
+ case "fast":
+ return "Fast";
+ case "slow":
+ return "Slow";
+ case "default":
+ return "Auto";
+ }
+}
+
+/**
+ * Whether a model honors a chat-level service-tier override.
+ *
+ * Today only OpenAI (GPT-class) models support `service_tier`. Critically, the
+ * backend only forwards `providerOptions.openai.serviceTier` when the request is
+ * routed either directly to OpenAI or through a *passthrough* gateway. Non-passthrough
+ * gateways (e.g. openrouter, github-copilot) drop the field, so a model like
+ * `openrouter:openai/gpt-5` — which canonicalizes to `openai` — would silently ignore
+ * the tier. We mirror that routing here so the UI never advertises a no-op override.
+ *
+ * This is intentionally a single helper so the UI affordance, the send path, and
+ * future providers all share one definition of "supported".
+ */
+export function supportsServiceTier(modelString: string): boolean {
+ if (getModelProvider(modelString) !== "openai") {
+ return false;
+ }
+ const gatewayPrefix = getExplicitGatewayPrefix(modelString);
+ if (gatewayPrefix) {
+ // Only passthrough gateways forward OpenAI provider options to the request.
+ const def = PROVIDER_DEFINITIONS[gatewayPrefix];
+ return def != null && "passthrough" in def && def.passthrough === true;
+ }
+ return true;
+}
+
+/**
+ * Reconcile a service-tier override against the *effective* model for a request.
+ *
+ * This is authoritative: it sets the tier when the model supports it and an override
+ * is present, and otherwise strips any previously-attached tier. The strip matters for
+ * composition with `/` one-shots — a tier baked against the saved model must not
+ * linger when the one-shot switches to a model that can't honor it (and conversely, a
+ * tier dropped against a non-OpenAI saved model gets re-applied once the effective model
+ * is OpenAI). Centralized here so every send path applies the override identically.
+ */
+export function withServiceTierOverride(
+ providerOptions: MuxProviderOptions,
+ serviceTier: ServiceTier | null | undefined,
+ modelString: string
+): MuxProviderOptions {
+ if (!serviceTier || !supportsServiceTier(modelString)) {
+ // No override, or the model can't use service tiers: ensure no stale tier rides along.
+ if (providerOptions.openai?.serviceTier == null) {
+ return providerOptions;
+ }
+ const { serviceTier: _omit, ...openaiRest } = providerOptions.openai;
+ return { ...providerOptions, openai: openaiRest };
+ }
+ return {
+ ...providerOptions,
+ openai: { ...providerOptions.openai, serviceTier },
+ };
+}