From 23d1497908e469cdf0faa674c6e70204bd992062 Mon Sep 17 00:00:00 2001
From: Ammar <ammar+ai@ammar.io>
Date: Fri, 5 Jun 2026 16:36:28 -0500
Subject: [PATCH 1/5] =?UTF-8?q?=F0=9F=A4=96=20feat:=20add=20Fast/Slow=20se?=
 =?UTF-8?q?rvice-tier=20control=20and=20/fast=20/slow=20one-shots?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a lightning-bolt service-tier control to the chat input (right of the
thinking slider) for models that support service_tier (OpenAI/GPT today).
The bolt glows orange for Fast (priority), turns blue for Slow (flex), and
is neutral grey for Auto/default. Clicking opens a per-chat override menu.

Also add /fast and /slow one-shot slash commands that reuse the existing
model-oneshot send path, with TipsCarousel discovery. Uses Fast/Slow wording
to stay provider-agnostic for future models.
---
 .../ServiceTierPicker.test.tsx                | 114 +++++++++
 .../ServiceTierPicker/ServiceTierPicker.tsx   | 225 ++++++++++++++++++
 src/browser/features/ChatInput/index.tsx      |  43 +++-
 .../features/ChatInput/placeholderTips.ts     |   2 +
 src/browser/hooks/useSendMessageOptions.ts    |  14 +-
 src/browser/hooks/useServiceTier.ts           |  27 +++
 src/browser/styles/globals.css                |   6 +
 src/browser/utils/messages/sendOptions.ts     |  18 +-
 .../utils/slashCommands/parser.test.ts        |  26 ++
 src/browser/utils/slashCommands/parser.ts     |  23 +-
 .../utils/slashCommands/suggestions.test.ts   |  14 ++
 .../utils/slashCommands/suggestions.ts        |  21 ++
 src/browser/utils/slashCommands/types.ts      |   3 +
 src/common/constants/storage.ts               |  10 +
 src/common/utils/ai/serviceTier.test.ts       | 100 ++++++++
 src/common/utils/ai/serviceTier.ts            |  93 ++++++++
 16 files changed, 728 insertions(+), 11 deletions(-)
 create mode 100644 src/browser/components/ServiceTierPicker/ServiceTierPicker.test.tsx
 create mode 100644 src/browser/components/ServiceTierPicker/ServiceTierPicker.tsx
 create mode 100644 src/browser/hooks/useServiceTier.ts
 create mode 100644 src/common/utils/ai/serviceTier.test.ts
 create mode 100644 src/common/utils/ai/serviceTier.ts
diff --git a/src/browser/components/ServiceTierPicker/ServiceTierPicker.test.tsx b/src/browser/components/ServiceTierPicker/ServiceTierPicker.test.tsx
new file mode 100644
index 0000000000..2b4d9f7daa
--- /dev/null
+++ b/src/browser/components/ServiceTierPicker/ServiceTierPicker.test.tsx
@@ -0,0 +1,114 @@
+import { afterEach, beforeEach, describe, expect, test } from "bun:test";
+import { cleanup, fireEvent, render, waitFor } from "@testing-library/react";
+import { installDom } from "../../../../tests/ui/dom";
+
+import { TooltipProvider } from "@/browser/components/Tooltip/Tooltip";
+import { getServiceTierKey } from "@/common/constants/storage";
+import { ServiceTierPicker } from "./ServiceTierPicker";
+
+const OPENAI_MODEL = "openai:gpt-5.5";
+const ANTHROPIC_MODEL = "anthropic:claude-haiku-4-5";
+const SCOPE = "ws-service-tier-test";
+
+let cleanupDom: (() => void) | null = null;
+
+function renderPicker(modelString: string) {
+  return render(
+    <TooltipProvider>
+      <ServiceTierPicker modelString={modelString} scopeId={SCOPE} />
+    </TooltipProvider>
+  );
+}
+
+describe("ServiceTierPicker", () => {
+  beforeEach(() => {
+    cleanupDom = installDom();
+    globalThis.window.localStorage.clear();
+  });
+
+  afterEach(() => {
+    cleanup();
+    cleanupDom?.();
+    cleanupDom = null;
+  });
+
+  test("renders nothing for models without service-tier support", () => {
+    const { queryByTestId } = renderPicker(ANTHROPIC_MODEL);
+    expect(queryByTestId("service-tier-trigger")).toBeNull();
+  });
+
+  test("shows the neutral (default) state for a supported model with no override", () => {
+    const { getByTestId } = renderPicker(OPENAI_MODEL);
+    const trigger = getByTestId("service-tier-trigger");
+    expect(trigger.getAttribute("data-service-tier")).toBe("default");
+  });
+
+  test("opens a menu and applies the Fast override", async () => {
+    const { getByTestId, queryByTestId, getAllByTestId } = renderPicker(OPENAI_MODEL);
+
+    // Menu is closed initially.
+    expect(queryByTestId("service-tier-option")).toBeNull();
+
+    fireEvent.click(getByTestId("service-tier-trigger"));
+
+    await waitFor(() => {
+      expect(getAllByTestId("service-tier-option").length).toBe(3);
+    });
+
+    const fast = getAllByTestId("service-tier-option").find(
+      (el) => el.getAttribute("data-speed") === "fast"
+    );
+    expect(fast).toBeTruthy();
+    fireEvent.click(fast!);
+
+    await waitFor(() => {
+      expect(getByTestId("service-tier-trigger").getAttribute("data-service-tier")).toBe("fast");
+    });
+
+    // Override is persisted under the scoped key as the provider wire value.
+    expect(globalThis.window.localStorage.getItem(getServiceTierKey(SCOPE))).toBe(
+      JSON.stringify("priority")
+    );
+    // Menu closes after selection.
+    expect(queryByTestId("service-tier-option")).toBeNull();
+  });
+
+  test("applies the Slow override", async () => {
+    const { getByTestId, getAllByTestId } = renderPicker(OPENAI_MODEL);
+    fireEvent.click(getByTestId("service-tier-trigger"));
+
+    await waitFor(() => expect(getAllByTestId("service-tier-option").length).toBe(3));
+    const slow = getAllByTestId("service-tier-option").find(
+      (el) => el.getAttribute("data-speed") === "slow"
+    );
+    fireEvent.click(slow!);
+
+    await waitFor(() => {
+      expect(getByTestId("service-tier-trigger").getAttribute("data-service-tier")).toBe("slow");
+    });
+    expect(globalThis.window.localStorage.getItem(getServiceTierKey(SCOPE))).toBe(
+      JSON.stringify("flex")
+    );
+  });
+
+  test("selecting Auto clears an existing override", async () => {
+    // Seed an existing Fast override.
+    globalThis.window.localStorage.setItem(getServiceTierKey(SCOPE), JSON.stringify("priority"));
+
+    const { getByTestId, getAllByTestId } = renderPicker(OPENAI_MODEL);
+    expect(getByTestId("service-tier-trigger").getAttribute("data-service-tier")).toBe("fast");
+
+    fireEvent.click(getByTestId("service-tier-trigger"));
+    await waitFor(() => expect(getAllByTestId("service-tier-option").length).toBe(3));
+    const auto = getAllByTestId("service-tier-option").find(
+      (el) => el.getAttribute("data-speed") === "default"
+    );
+    fireEvent.click(auto!);
+
+    await waitFor(() => {
+      expect(getByTestId("service-tier-trigger").getAttribute("data-service-tier")).toBe("default");
+    });
+    // Auto clears the override entirely (key removed), so the provider/global default applies.
+    expect(globalThis.window.localStorage.getItem(getServiceTierKey(SCOPE))).toBeNull();
+  });
+});
diff --git a/src/browser/components/ServiceTierPicker/ServiceTierPicker.tsx b/src/browser/components/ServiceTierPicker/ServiceTierPicker.tsx
new file mode 100644
index 0000000000..2f256e09b1
--- /dev/null
+++ b/src/browser/components/ServiceTierPicker/ServiceTierPicker.tsx
@@ -0,0 +1,225 @@
+import React, { useCallback, useEffect, useRef, useState } from "react";
+import { Check, Zap } from "lucide-react";
+
+import { cn } from "@/common/lib/utils";
+import { type ServiceTier } from "@/common/config/schemas/providersConfig";
+import {
+  getServiceTierSpeed,
+  SERVICE_TIER_FAST,
+  SERVICE_TIER_SLOW,
+  supportsServiceTier,
+  type ServiceTierSpeed,
+} from "@/common/utils/ai/serviceTier";
+import { useServiceTier } from "@/browser/hooks/useServiceTier";
+import { Tooltip, TooltipContent, TooltipTrigger } from "../Tooltip/Tooltip";
+import { stopKeyboardPropagation } from "@/browser/utils/events";
+
+interface ServiceTierPickerProps {
+  /** Canonical model string used to gate visibility (only shown for supporting models). */
+  modelString: string;
+  /** Workspace id (workspace view) or project scope id (creation view). */
+  scopeId: string;
+  className?: string;
+}
+
+interface ServiceTierOption {
+  speed: ServiceTierSpeed;
+  /** null clears the override (falls back to the provider/global default). */
+  tier: ServiceTier | null;
+  label: string;
+  description: string;
+}
+
+// "Fast"/"Slow"/"Auto" wording keeps the control provider-agnostic even though
+// only OpenAI honors service_tier today.
+const OPTIONS: readonly ServiceTierOption[] = [
+  { speed: "default", tier: null, label: "Auto", description: "Provider default speed" },
+  {
+    speed: "fast",
+    tier: SERVICE_TIER_FAST,
+    label: "Fast",
+    description: "Prioritize low latency (higher cost)",
+  },
+  {
+    speed: "slow",
+    tier: SERVICE_TIER_SLOW,
+    label: "Slow",
+    description: "Prioritize lower cost (higher latency)",
+  },
+];
+
+/** CSS variable for the active speed, or undefined for the neutral (grey) state. */
+function getSpeedColorVar(speed: ServiceTierSpeed): string | undefined {
+  if (speed === "fast") return "var(--color-service-tier-fast)";
+  if (speed === "slow") return "var(--color-service-tier-slow)";
+  return undefined;
+}
+
+/**
+ * Lightning-bolt control for the chat-specific service-tier (speed) override.
+ *
+ * - Fast → bolt glows orange, Slow → bolt turns blue, Auto/default → neutral grey.
+ * - Clicking opens a small keyboard-navigable menu that sets the per-chat override.
+ *
+ * Rendered only for models that support service tiers (OpenAI/GPT today). Uses
+ * conditional rendering (not a Radix portal) so it stays testable under happy-dom.
+ */
+export const ServiceTierPicker: React.FC<ServiceTierPickerProps> = (props) => {
+  const [serviceTier, setServiceTier] = useServiceTier(props.scopeId);
+  const [isOpen, setIsOpen] = useState(false);
+  const [highlightedIndex, setHighlightedIndex] = useState(-1);
+
+  const containerRef = useRef<HTMLDivElement>(null);
+  const dropdownRef = useRef<HTMLDivElement>(null);
+
+  const currentSpeed = getServiceTierSpeed(serviceTier);
+
+  const closePicker = useCallback(() => {
+    setIsOpen(false);
+    setHighlightedIndex(-1);
+  }, []);
+
+  const openPicker = useCallback(() => {
+    setIsOpen(true);
+    const currentIndex = OPTIONS.findIndex((opt) => opt.speed === currentSpeed);
+    setHighlightedIndex(currentIndex >= 0 ? currentIndex : 0);
+    requestAnimationFrame(() => dropdownRef.current?.focus());
+  }, [currentSpeed]);
+
+  const handleSelect = useCallback(
+    (option: ServiceTierOption) => {
+      setServiceTier(option.tier);
+      closePicker();
+    },
+    [closePicker, setServiceTier]
+  );
+
+  // Close when clicking outside the control.
+  useEffect(() => {
+    if (!isOpen) {
+      return;
+    }
+    const handleClickOutside = (e: MouseEvent) => {
+      if (containerRef.current?.contains(e.target as Node)) {
+        return;
+      }
+      closePicker();
+    };
+    document.addEventListener("mousedown", handleClickOutside);
+    return () => document.removeEventListener("mousedown", handleClickOutside);
+  }, [closePicker, isOpen]);
+
+  const handleDropdownKeyDown = (e: React.KeyboardEvent<HTMLDivElement>) => {
+    if (e.key === "Escape") {
+      e.preventDefault();
+      stopKeyboardPropagation(e);
+      closePicker();
+      return;
+    }
+    if (e.key === "Enter") {
+      e.preventDefault();
+      const option = OPTIONS[highlightedIndex >= 0 ? highlightedIndex : 0];
+      if (option) {
+        handleSelect(option);
+      }
+      return;
+    }
+    if (e.key === "ArrowDown") {
+      e.preventDefault();
+      setHighlightedIndex((prev) => Math.min(prev + 1, OPTIONS.length - 1));
+      return;
+    }
+    if (e.key === "ArrowUp") {
+      e.preventDefault();
+      setHighlightedIndex((prev) => Math.max(prev - 1, 0));
+      return;
+    }
+  };
+
+  // Only models that honor service tiers expose this affordance.
+  if (!supportsServiceTier(props.modelString)) {
+    return null;
+  }
+
+  const activeColor = getSpeedColorVar(currentSpeed);
+  const activeLabel = OPTIONS.find((opt) => opt.speed === currentSpeed)?.label ?? "Auto";
+
+  return (
+    <div ref={containerRef} className={cn("relative flex items-center", props.className)}>
+      <Tooltip>
+        <TooltipTrigger asChild>
+          <button
+            type="button"
+            onClick={() => (isOpen ? closePicker() : openPicker())}
+            data-testid="service-tier-trigger"
+            data-service-tier={currentSpeed}
+            aria-haspopup="menu"
+            aria-expanded={isOpen}
+            aria-label={`Service tier: ${activeLabel}. Click to change.`}
+            className={cn(
+              "flex h-4 w-4 items-center justify-center rounded-sm transition-colors",
+              activeColor ? "" : "text-muted hover:text-foreground hover:bg-hover"
+            )}
+            style={
+              activeColor
+                ? {
+                    color: activeColor,
+                    // Orange "glow" for Fast; a softer halo for Slow.
+                    filter: `drop-shadow(0 0 ${currentSpeed === "fast" ? "5px" : "3px"} ${activeColor})`,
+                  }
+                : undefined
+            }
+          >
+            <Zap className="h-3 w-3" />
+          </button>
+        </TooltipTrigger>
+        <TooltipContent align="center">
+          Service tier: <span className="font-medium">{activeLabel}</span>. Sets request speed for
+          this chat. Saved per workspace.
+        </TooltipContent>
+      </Tooltip>
+
+      {isOpen && (
+        <div
+          ref={dropdownRef}
+          tabIndex={-1}
+          role="menu"
+          onKeyDown={handleDropdownKeyDown}
+          className="bg-separator border-border-light absolute bottom-full left-0 z-[1020] mb-1 min-w-48 overflow-hidden rounded border shadow-[0_4px_12px_rgba(0,0,0,0.3)] outline-none"
+        >
+          <div className="py-1">
+            {OPTIONS.map((option, index) => {
+              const isHighlighted = index === highlightedIndex;
+              const isSelected = option.speed === currentSpeed;
+              const color = getSpeedColorVar(option.speed);
+              return (
+                <div
+                  key={option.speed}
+                  role="menuitemradio"
+                  aria-checked={isSelected}
+                  tabIndex={-1}
+                  data-testid="service-tier-option"
+                  data-speed={option.speed}
+                  className={cn(
+                    "flex cursor-pointer items-center gap-2.5 px-2.5 py-1.5 transition-colors duration-100",
+                    isHighlighted ? "bg-hover text-foreground" : "bg-transparent hover:bg-hover",
+                    isSelected ? "text-foreground" : "text-light hover:text-foreground"
+                  )}
+                  onMouseEnter={() => setHighlightedIndex(index)}
+                  onClick={() => handleSelect(option)}
+                >
+                  <Zap className="h-3.5 w-3.5 shrink-0" style={color ? { color } : undefined} />
+                  <div className="min-w-0 flex-1">
+                    <div className="text-[11px] font-medium">{option.label}</div>
+                    <div className="text-muted-light text-[10px]">{option.description}</div>
+                  </div>
+                  {isSelected && <Check className="text-accent h-3.5 w-3.5 shrink-0" />}
+                </div>
+              );
+            })}
+          </div>
+        </div>
+      )}
+    </div>
+  );
+};
diff --git a/src/browser/features/ChatInput/index.tsx b/src/browser/features/ChatInput/index.tsx
index e0213d2075..47741bfc8a 100644
--- a/src/browser/features/ChatInput/index.tsx
+++ b/src/browser/features/ChatInput/index.tsx
@@ -31,6 +31,7 @@ import { useWorkspaceContext } from "@/browser/contexts/WorkspaceContext";
 import { useProjectContext } from "@/browser/contexts/ProjectContext";
 import { useAgent } from "@/browser/contexts/AgentContext";
 import { ThinkingSliderComponent } from "@/browser/components/ThinkingSlider/ThinkingSlider";
+import { ServiceTierPicker } from "@/browser/components/ServiceTierPicker/ServiceTierPicker";
 import {
   getAllowedRuntimeModesForUi,
   isParsedRuntimeAllowedByPolicy,
@@ -39,7 +40,8 @@ import { usePolicy } from "@/browser/contexts/PolicyContext";
 import { useAPI } from "@/browser/contexts/API";
 import { useThinkingLevel } from "@/browser/hooks/useThinkingLevel";
 import { useExperimentValue } from "@/browser/hooks/useExperiments";
-import { normalizeSelectedModel } from "@/common/utils/ai/models";
+import { normalizeSelectedModel, getModelName } from "@/common/utils/ai/models";
+import { supportsServiceTier, withServiceTierOverride } from "@/common/utils/ai/serviceTier";
 import {
   useAdditionalSystemContextHydrated,
   useAdditionalSystemContextSnapshot,
@@ -713,10 +715,11 @@ const ChatInputInner: React.FC<ChatInputProps> = (props) => {
   }, [variant, startTutorial]);
 
   // Get current send message options from shared hook (must be at component top level)
-  // For creation variant, use project-scoped key; for workspace, use workspace ID
-  const sendMessageOptions = useSendMessageOptions(
-    variant === "workspace" ? props.workspaceId : getProjectScopeId(creationParentProjectPath)
-  );
+  // For creation variant, use project-scoped key; for workspace, use workspace ID.
+  // Shared so the service-tier override and send options resolve the same scope.
+  const sendOptionsScopeId =
+    variant === "workspace" ? props.workspaceId : getProjectScopeId(creationParentProjectPath);
+  const sendMessageOptions = useSendMessageOptions(sendOptionsScopeId);
   const additionalSystemContext = useAdditionalSystemContextSnapshot(
     variant === "workspace" ? props.workspaceId : ""
   );
@@ -2483,6 +2486,18 @@ const ChatInputInner: React.FC<ChatInputProps> = (props) => {
 
       const modelOverride = modelOneShot?.modelString;
 
+      // /fast and /slow one-shot tier override only applies to models that honor service
+      // tiers (OpenAI today). Block with a clear message (preserving the composer) rather
+      // than silently dropping the tier when the active model can't use it.
+      const tierOverride = modelOneShot?.serviceTier;
+      if (tierOverride && !supportsServiceTier(modelOverride ?? baseModel)) {
+        pushToast({
+          type: "error",
+          message: `Fast/Slow isn't supported by ${getModelName(modelOverride ?? baseModel)}`,
+        });
+        return;
+      }
+
       // Regular message (or /<model-alias> one-shot override) - send directly via API
       const messageTextForSend = modelOneShot?.message ?? skillInvocation?.userText ?? messageText;
       const skillMuxMetadata = skillInvocation
@@ -2661,11 +2676,24 @@ const ChatInputInner: React.FC<ChatInputProps> = (props) => {
             : undefined;
         const goalInterventionPolicy = overrides?.goalInterventionPolicy;
 
+        // One-shot /fast or /slow rides in providerOptions for this message only,
+        // layering over any persisted per-chat service tier from useSendMessageOptions.
+        const oneshotProviderOptions = tierOverride
+          ? {
+              providerOptions: withServiceTierOverride(
+                sendMessageOptions.providerOptions ?? {},
+                tierOverride,
+                modelOverride ?? baseModel
+              ),
+            }
+          : {};
+
         const sendOptions = {
           ...sendMessageOptions,
           ...compactionOptions,
           ...(modelOverride ? { model: modelOverride } : {}),
           ...(thinkingOverride ? { thinkingLevel: thinkingOverride } : {}),
+          ...oneshotProviderOptions,
           ...(modelOneShot ? { skipAiSettingsPersistence: true } : {}),
           ...(goalInterventionPolicy ? { goalInterventionPolicy } : {}),
           ...(overrides?.queueDispatchMode
@@ -3212,6 +3240,11 @@ const ChatInputInner: React.FC<ChatInputProps> = (props) => {
                 >
                   <ThinkingSliderComponent modelString={baseModel} />
                 </div>
+
+                {/* Service-tier (Fast/Slow) speed override. Renders its own root only for
+                    models that support service tiers (OpenAI/GPT today); otherwise it returns
+                    null and occupies no layout space (no stray flex gap). */}
+                <ServiceTierPicker modelString={baseModel} scopeId={sendOptionsScopeId} />
               </div>
 
               <div
diff --git a/src/browser/features/ChatInput/placeholderTips.ts b/src/browser/features/ChatInput/placeholderTips.ts
index 83621780ca..a1b09e0a4f 100644
--- a/src/browser/features/ChatInput/placeholderTips.ts
+++ b/src/browser/features/ChatInput/placeholderTips.ts
@@ -51,6 +51,8 @@ export const PLACEHOLDER_TIPS: readonly string[] = [
   "Try /btw <question> to ask a side question without nudging the agent",
   "Try /haiku <msg> to send just this message on a different model",
   "Try /+high <msg> to crank up reasoning for this message only",
+  "Try /fast <msg> to send one message on a faster service tier",
+  "Try /slow <msg> to send one message on a cheaper service tier",
   "Try /compact to summarize the conversation when context gets tight",
   "Try /fork <start> to branch this chat into a new workspace",
   "Try /plan to view or edit the current plan inline",
diff --git a/src/browser/hooks/useSendMessageOptions.ts b/src/browser/hooks/useSendMessageOptions.ts
index 53664d1061..3a52103bfc 100644
--- a/src/browser/hooks/useSendMessageOptions.ts
+++ b/src/browser/hooks/useSendMessageOptions.ts
@@ -9,6 +9,8 @@ import {
 import { DEFAULT_MODEL_KEY, getModelKey } from "@/common/constants/storage";
 import type { SendMessageOptions } from "@/common/orpc/types";
 import { useProviderOptions } from "./useProviderOptions";
+import { useServiceTier } from "./useServiceTier";
+import { withServiceTierOverride } from "@/common/utils/ai/serviceTier";
 import { useExperimentOverrideValue } from "./useExperiments";
 import { EXPERIMENT_IDS } from "@/common/constants/experiments";
 import { useWorkspaceContext } from "@/browser/contexts/WorkspaceContext";
@@ -74,11 +76,21 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptionsWi
     metadataSettings.model ?? defaultModel
   );
 
+  // Per-chat service-tier override (Fast/Slow). It rides along in providerOptions so the
+  // backend applies it per request without persisting to workspace metadata. Only attached
+  // for models that honor service tiers (OpenAI today).
+  const [serviceTierOverride] = useServiceTier(workspaceId);
+  const effectiveProviderOptions = withServiceTierOverride(
+    providerOptions,
+    serviceTierOverride,
+    baseModel
+  );
+
   const options = buildSendMessageOptions({
     agentId,
     thinkingLevel,
     model: baseModel,
-    providerOptions,
+    providerOptions: effectiveProviderOptions,
     experiments: {
       programmaticToolCalling,
       programmaticToolCallingExclusive,
diff --git a/src/browser/hooks/useServiceTier.ts b/src/browser/hooks/useServiceTier.ts
new file mode 100644
index 0000000000..1757bb2598
--- /dev/null
+++ b/src/browser/hooks/useServiceTier.ts
@@ -0,0 +1,27 @@
+import { type ServiceTier } from "@/common/config/schemas/providersConfig";
+import { getServiceTierKey } from "@/common/constants/storage";
+import { usePersistedState } from "./usePersistedState";
+
+/**
+ * Chat-specific (per workspace/project scope) service-tier override.
+ *
+ * `null` means "no override" — the provider/global default applies. Backed by
+ * localStorage (keyed by scope) with cross-component sync so the chat-input bolt
+ * and the send path stay in agreement without prop drilling.
+ *
+ * Unlike thinking level, this is intentionally NOT persisted to backend metadata:
+ * the tier rides along with each send via `providerOptions.openai.serviceTier`,
+ * so localStorage is the single source of truth (mirroring the other provider
+ * option toggles like Anthropic 1M context).
+ *
+ * @param scopeId workspaceId (workspace view) or a project scope id (creation view)
+ * @returns `[serviceTier, setServiceTier]` tuple
+ */
+export function useServiceTier(scopeId: string) {
+  const [serviceTier, setServiceTier] = usePersistedState<ServiceTier | null>(
+    getServiceTierKey(scopeId),
+    null,
+    { listener: true }
+  );
+  return [serviceTier, setServiceTier] as const;
+}
diff --git a/src/browser/styles/globals.css b/src/browser/styles/globals.css
index 448b3ce33f..b6bae911a7 100644
--- a/src/browser/styles/globals.css
+++ b/src/browser/styles/globals.css
@@ -88,6 +88,12 @@
     --color-thinking-mode-light: hsl(271 76% 65%);
     --color-thinking-border: hsl(271 76% 53%);
 
+    /* Service tier (Fast/Slow speed override).
+       Fast = energetic orange (low latency); Slow = calm blue (lower cost).
+       Defined once at the theme root and inherited by all themes. */
+    --color-service-tier-fast: hsl(28 96% 54%);
+    --color-service-tier-slow: hsl(206 90% 56%);
+
     /* Runtime icon colors (matches Tailwind blue-500/purple-500) */
     --color-runtime-ssh: #3b82f6;
     --color-runtime-ssh-text: #60a5fa; /* blue-400 */
diff --git a/src/browser/utils/messages/sendOptions.ts b/src/browser/utils/messages/sendOptions.ts
index 4d6cbeac67..3847f5fb22 100644
--- a/src/browser/utils/messages/sendOptions.ts
+++ b/src/browser/utils/messages/sendOptions.ts
@@ -3,6 +3,7 @@ import {
   getModelKey,
   getThinkingLevelByModelKey,
   getThinkingLevelKey,
+  getServiceTierKey,
   getDisableWorkspaceAgentsKey,
 } from "@/common/constants/storage";
 import { readPersistedState, updatePersistedState } from "@/browser/hooks/usePersistedState";
@@ -14,6 +15,8 @@ import {
 import type { SendMessageOptions } from "@/common/orpc/types";
 import type { ThinkingLevel } from "@/common/types/thinking";
 import type { MuxProviderOptions } from "@/common/types/providerOptions";
+import type { ServiceTier } from "@/common/config/schemas/providersConfig";
+import { withServiceTierOverride } from "@/common/utils/ai/serviceTier";
 import { WORKSPACE_DEFAULTS } from "@/constants/workspaceDefaults";
 import { isExperimentEnabled } from "@/browser/hooks/useExperiments";
 import { EXPERIMENT_IDS } from "@/common/constants/experiments";
@@ -66,6 +69,19 @@ export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptio
 
   const providerOptions = getProviderOptions();
 
+  // Mirror useSendMessageOptions: attach the per-chat service-tier override for models
+  // that honor it (OpenAI today) so non-React send paths (resume, idle-compaction, plan
+  // execution) stay consistent with interactive sends.
+  const serviceTierOverride = readPersistedState<ServiceTier | null>(
+    getServiceTierKey(workspaceId),
+    null
+  );
+  const effectiveProviderOptions = withServiceTierOverride(
+    providerOptions,
+    serviceTierOverride,
+    baseModel
+  );
+
   const disableWorkspaceAgents = readPersistedState<boolean>(
     getDisableWorkspaceAgentsKey(workspaceId),
     false
@@ -75,7 +91,7 @@ export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptio
     model: baseModel,
     agentId,
     thinkingLevel,
-    providerOptions,
+    providerOptions: effectiveProviderOptions,
     disableWorkspaceAgents,
     experiments: {
       programmaticToolCalling: isExperimentEnabled(EXPERIMENT_IDS.PROGRAMMATIC_TOOL_CALLING),
diff --git a/src/browser/utils/slashCommands/parser.test.ts b/src/browser/utils/slashCommands/parser.test.ts
index 427673bc26..51a9aff68d 100644
--- a/src/browser/utils/slashCommands/parser.test.ts
+++ b/src/browser/utils/slashCommands/parser.test.ts
@@ -111,6 +111,32 @@ describe("commandParser", () => {
       expectParse("/sonnet  ", { type: "model-help" }); // whitespace only
     });
 
+    it("should parse /fast and /slow as service-tier one-shots", () => {
+      expectParse("/fast ship it", {
+        type: "model-oneshot",
+        serviceTier: "priority",
+        message: "ship it",
+      });
+      expectParse("/slow take your time", {
+        type: "model-oneshot",
+        serviceTier: "flex",
+        message: "take your time",
+      });
+    });
+
+    it("should preserve multiline messages for service-tier one-shots", () => {
+      expectParse("/fast first line\nsecond line", {
+        type: "model-oneshot",
+        serviceTier: "priority",
+        message: "first line\nsecond line",
+      });
+    });
+
+    it("should return model-help for /fast or /slow without a message", () => {
+      expectParse("/fast", { type: "model-help" });
+      expectParse("/slow   ", { type: "model-help" }); // whitespace only
+    });
+
     it("should return unknown-command for unknown aliases", () => {
       expectParse("/xyz do something", {
         type: "unknown-command",
diff --git a/src/browser/utils/slashCommands/parser.ts b/src/browser/utils/slashCommands/parser.ts
index 99f2492c04..dfa4d8e637 100644
--- a/src/browser/utils/slashCommands/parser.ts
+++ b/src/browser/utils/slashCommands/parser.ts
@@ -7,6 +7,8 @@ import { SLASH_COMMAND_DEFINITION_MAP } from "./registry";
 import { MODEL_ABBREVIATIONS } from "@/common/constants/knownModels";
 import { normalizeModelInput } from "@/browser/utils/models/normalizeModelInput";
 import { parseThinkingInput, type ParsedThinkingInput } from "@/common/types/thinking";
+import { getServiceTierForCommandKey } from "@/common/utils/ai/serviceTier";
+import type { ServiceTier } from "@/common/config/schemas/providersConfig";
 
 /**
  * Parse a raw command string into a structured command
@@ -118,7 +120,7 @@ export function getSlashCommandDefinitions(): readonly SlashCommandDefinition[]
 }
 
 /**
- * Parse a oneshot command key into model + thinking overrides.
+ * Parse a oneshot command key into model / thinking / service-tier overrides.
  *
  * Supported forms:
  * - "haiku"        → model override only (existing behavior)
@@ -126,12 +128,25 @@ export function getSlashCommandDefinitions(): readonly SlashCommandDefinition[]
  * - "haiku+medium" → model + named thinking level
  * - "+0"           → thinking-only override (use current model)
  * - "+high"        → thinking-only override with named level
+ * - "fast"/"slow"  → service-tier-only override (use current model)
+ *
+ * `/fast` and `/slow` deliberately reuse the model-oneshot path so message
+ * extraction, the bare-key → help fallback, and the rendered command prefix all
+ * behave exactly like `/<model>` one-shots.
  *
  * Returns null if the key doesn't match any valid oneshot pattern.
  */
-function parseOneshotCommandKey(
-  key: string
-): { modelString?: string; thinkingLevel?: ParsedThinkingInput } | null {
+function parseOneshotCommandKey(key: string): {
+  modelString?: string;
+  thinkingLevel?: ParsedThinkingInput;
+  serviceTier?: ServiceTier;
+} | null {
+  // Service-tier one-shots (/fast, /slow) carry no model/thinking change.
+  const serviceTier = getServiceTierForCommandKey(key);
+  if (serviceTier) {
+    return { serviceTier };
+  }
+
   const plusIndex = key.indexOf("+");
 
   if (plusIndex === -1) {
diff --git a/src/browser/utils/slashCommands/suggestions.test.ts b/src/browser/utils/slashCommands/suggestions.test.ts
index d20271d150..d74c1ab209 100644
--- a/src/browser/utils/slashCommands/suggestions.test.ts
+++ b/src/browser/utils/slashCommands/suggestions.test.ts
@@ -52,6 +52,20 @@ describe("getSlashCommandSuggestions", () => {
     expect(labels).toContain("/model");
   });
 
+  it("suggests /fast and /slow service-tier one-shots", () => {
+    const suggestions = getSlashCommandSuggestions("/");
+    const labels = suggestions.map((s) => s.display);
+
+    expect(labels).toContain("/fast");
+    expect(labels).toContain("/slow");
+  });
+
+  it("filters service-tier one-shots by prefix", () => {
+    const suggestions = getSlashCommandSuggestions("/fa");
+    expect(suggestions.map((s) => s.display)).toContain("/fast");
+    expect(suggestions.map((s) => s.display)).not.toContain("/slow");
+  });
+
   it("includes agent skills when provided in context", () => {
     const suggestions = getSlashCommandSuggestions("/", {
       agentSkills: [
diff --git a/src/browser/utils/slashCommands/suggestions.ts b/src/browser/utils/slashCommands/suggestions.ts
index a195563571..5b3931bc29 100644
--- a/src/browser/utils/slashCommands/suggestions.ts
+++ b/src/browser/utils/slashCommands/suggestions.ts
@@ -5,6 +5,7 @@
 import { matchesNameBySegmentPrefix } from "@/browser/utils/suggestionMatching";
 import { MODEL_ABBREVIATIONS } from "@/common/constants/knownModels";
 import { formatModelDisplayName } from "@/common/utils/ai/modelDisplay";
+import { SERVICE_TIER_COMMAND_KEYS } from "@/common/utils/ai/serviceTier";
 import { getSlashCommandDefinitions } from "./parser";
 import { isSlashCommandVisible, SLASH_COMMAND_DEFINITION_MAP } from "./registry";
 import type {
@@ -88,6 +89,7 @@ function buildTopLevelSuggestions(
     .filter((workflow) => !SLASH_COMMAND_DEFINITION_MAP.has(workflow.name))
     .filter((workflow) => !skillNames.has(workflow.name))
     .filter((workflow) => !Object.hasOwn(MODEL_ABBREVIATIONS, workflow.name))
+    .filter((workflow) => !SERVICE_TIER_COMMAND_KEYS.includes(workflow.name as never))
     .map((workflow) => ({
       key: workflow.name,
       description: `${workflow.description} (${workflow.scope} workflow)`,
@@ -126,11 +128,30 @@ function buildTopLevelSuggestions(
     })
   );
 
+  // Service-tier one-shot suggestions (/fast, /slow). These reuse the model-oneshot
+  // send path; "Fast"/"Slow" wording keeps them provider-agnostic for future models.
+  const serviceTierDefinitions: SuggestionDefinition[] = [
+    { key: "fast", description: "Send one message on the Fast service tier (lower latency)" },
+    { key: "slow", description: "Send one message on the Slow service tier (lower cost)" },
+  ];
+
+  const serviceTierSuggestions = filterAndMapSuggestions(
+    serviceTierDefinitions,
+    partial,
+    (definition) => ({
+      id: `model-oneshot:${definition.key}`,
+      display: `/${definition.key}`,
+      description: definition.description,
+      replacement: `/${definition.key} `,
+    })
+  );
+
   return [
     ...commandSuggestions,
     ...skillSuggestions,
     ...workflowSuggestions,
     ...modelAliasSuggestions,
+    ...serviceTierSuggestions,
   ];
 }
 
diff --git a/src/browser/utils/slashCommands/types.ts b/src/browser/utils/slashCommands/types.ts
index 6fc84f8476..7a933eb102 100644
--- a/src/browser/utils/slashCommands/types.ts
+++ b/src/browser/utils/slashCommands/types.ts
@@ -13,6 +13,7 @@ import type { ExperimentId } from "@/common/constants/experiments";
 import type { AgentSkillDescriptor } from "@/common/types/agentSkill";
 import type { WorkflowDefinitionDescriptor } from "@/common/types/workflow";
 import type { ParsedThinkingInput } from "@/common/types/thinking";
+import type { ServiceTier } from "@/common/config/schemas/providersConfig";
 
 export type ParsedCommand =
   | { type: "model-set"; modelString: string }
@@ -22,6 +23,8 @@ export type ParsedCommand =
       modelString?: string;
       /** One-shot thinking level override — named (ThinkingLevel) or numeric index (resolved at send time against the model's policy). */
       thinkingLevel?: ParsedThinkingInput;
+      /** One-shot service-tier (Fast/Slow) override applied for this message only (e.g. "/fast", "/slow"). */
+      serviceTier?: ServiceTier;
       message: string;
     }
   | { type: "model-help" }
diff --git a/src/common/constants/storage.ts b/src/common/constants/storage.ts
index 793180b9a5..8f49500de7 100644
--- a/src/common/constants/storage.ts
+++ b/src/common/constants/storage.ts
@@ -178,6 +178,15 @@ export function getThinkingLevelKey(scopeId: string): string {
   return `thinkingLevel:${scopeId}`;
 }
 
+/**
+ * Get the localStorage key for the chat-specific service-tier override per scope
+ * (workspace/project). `null` means no override (use the provider/global default).
+ * Format: "serviceTier:{scopeId}"
+ */
+export function getServiceTierKey(scopeId: string): string {
+  return `serviceTier:${scopeId}`;
+}
+
 /**
  * Get the localStorage key for per-agent workspace AI overrides cache.
  * Format: "workspaceAiSettingsByAgent:{workspaceId}"
@@ -746,6 +755,7 @@ const PERSISTENT_WORKSPACE_KEY_FUNCTIONS: Array<(workspaceId: string) => string>
   getAgentIdKey,
   getPinnedAgentIdKey,
   getThinkingLevelKey,
+  getServiceTierKey,
   getReviewStateKey,
   getHunkFirstSeenKey,
   getReviewExpandStateKey,
diff --git a/src/common/utils/ai/serviceTier.test.ts b/src/common/utils/ai/serviceTier.test.ts
new file mode 100644
index 0000000000..2ded4c0e6a
--- /dev/null
+++ b/src/common/utils/ai/serviceTier.test.ts
@@ -0,0 +1,100 @@
+import { describe, it, expect } from "bun:test";
+import type { MuxProviderOptions } from "@/common/types/providerOptions";
+import {
+  getServiceTierForCommandKey,
+  getServiceTierSpeed,
+  getServiceTierSpeedLabel,
+  SERVICE_TIER_FAST,
+  SERVICE_TIER_SLOW,
+  supportsServiceTier,
+  withServiceTierOverride,
+} from "./serviceTier";
+
+const OPENAI_MODEL = "openai:gpt-5.5";
+const ANTHROPIC_MODEL = "anthropic:claude-haiku-4-5";
+
+describe("serviceTier helpers", () => {
+  describe("getServiceTierForCommandKey", () => {
+    it("maps /fast and /slow to provider wire tiers", () => {
+      expect(getServiceTierForCommandKey("fast")).toBe(SERVICE_TIER_FAST);
+      expect(getServiceTierForCommandKey("slow")).toBe(SERVICE_TIER_SLOW);
+    });
+
+    it("uses OpenAI priority/flex as the Fast/Slow wire values", () => {
+      expect(SERVICE_TIER_FAST).toBe("priority");
+      expect(SERVICE_TIER_SLOW).toBe("flex");
+    });
+
+    it("returns null for non service-tier keys", () => {
+      expect(getServiceTierForCommandKey("haiku")).toBeNull();
+      expect(getServiceTierForCommandKey("compact")).toBeNull();
+      expect(getServiceTierForCommandKey("")).toBeNull();
+    });
+  });
+
+  describe("getServiceTierSpeed", () => {
+    it("collapses concrete tiers into UI speed buckets", () => {
+      expect(getServiceTierSpeed("priority")).toBe("fast");
+      expect(getServiceTierSpeed("flex")).toBe("slow");
+    });
+
+    it("treats auto/default/absent as the neutral default", () => {
+      expect(getServiceTierSpeed("auto")).toBe("default");
+      expect(getServiceTierSpeed("default")).toBe("default");
+      expect(getServiceTierSpeed(null)).toBe("default");
+      expect(getServiceTierSpeed(undefined)).toBe("default");
+    });
+  });
+
+  describe("getServiceTierSpeedLabel", () => {
+    it("renders provider-agnostic labels", () => {
+      expect(getServiceTierSpeedLabel("fast")).toBe("Fast");
+      expect(getServiceTierSpeedLabel("slow")).toBe("Slow");
+      expect(getServiceTierSpeedLabel("default")).toBe("Auto");
+    });
+  });
+
+  describe("supportsServiceTier", () => {
+    it("is supported only for OpenAI models today", () => {
+      expect(supportsServiceTier(OPENAI_MODEL)).toBe(true);
+      expect(supportsServiceTier(ANTHROPIC_MODEL)).toBe(false);
+      expect(supportsServiceTier("google:gemini-3.1-pro-preview")).toBe(false);
+    });
+  });
+
+  describe("withServiceTierOverride", () => {
+    it("attaches the tier under openai for supported models", () => {
+      const result = withServiceTierOverride({}, SERVICE_TIER_FAST, OPENAI_MODEL);
+      expect(result.openai?.serviceTier).toBe("priority");
+    });
+
+    it("preserves other openai provider options", () => {
+      const result = withServiceTierOverride(
+        { openai: { wireFormat: "responses" } },
+        SERVICE_TIER_SLOW,
+        OPENAI_MODEL
+      );
+      expect(result.openai?.serviceTier).toBe("flex");
+      expect(result.openai?.wireFormat).toBe("responses");
+    });
+
+    it("returns options unchanged when there is no override", () => {
+      const input = { anthropic: { use1MContext: true } };
+      expect(withServiceTierOverride(input, null, OPENAI_MODEL)).toBe(input);
+      expect(withServiceTierOverride(input, undefined, OPENAI_MODEL)).toBe(input);
+    });
+
+    it("never attaches a tier for unsupported models", () => {
+      const input = {};
+      const result = withServiceTierOverride(input, SERVICE_TIER_FAST, ANTHROPIC_MODEL);
+      expect(result).toBe(input);
+      expect(result.openai).toBeUndefined();
+    });
+
+    it("does not mutate the input options", () => {
+      const input: MuxProviderOptions = { openai: { wireFormat: "responses" } };
+      withServiceTierOverride(input, SERVICE_TIER_FAST, OPENAI_MODEL);
+      expect(input.openai?.serviceTier).toBeUndefined();
+    });
+  });
+});
diff --git a/src/common/utils/ai/serviceTier.ts b/src/common/utils/ai/serviceTier.ts
new file mode 100644
index 0000000000..56a0785364
--- /dev/null
+++ b/src/common/utils/ai/serviceTier.ts
@@ -0,0 +1,93 @@
+/**
+ * Service-tier helpers shared across the send path, slash commands, and UI.
+ *
+ * A "service tier" tells the provider how to schedule a request. OpenAI exposes
+ * this as `service_tier` (e.g. `priority` for low latency, `flex` for cheaper but
+ * slower). To keep the product generic for future providers, we surface it in the
+ * UI as **Fast** / **Slow** rather than the provider-specific wire values.
+ *
+ * Mapping (the only place this translation should live):
+ * - Fast → `priority` (low latency, higher cost)
+ * - Slow → `flex`     (lower cost, higher latency)
+ * - Auto → no override (falls back to the provider/global default)
+ */
+
+import { type ServiceTier } from "@/common/config/schemas/providersConfig";
+import type { MuxProviderOptions } from "@/common/types/providerOptions";
+import { getModelProvider } from "./models";
+
+/** Wire value for the user-facing "Fast" speed. */
+export const SERVICE_TIER_FAST: ServiceTier = "priority";
+/** Wire value for the user-facing "Slow" speed. */
+export const SERVICE_TIER_SLOW: ServiceTier = "flex";
+
+/** Generic, provider-agnostic speed buckets used for UI state and styling. */
+export type ServiceTierSpeed = "fast" | "slow" | "default";
+
+/** Collapse a concrete service tier (or absence of one) into a UI speed bucket. */
+export function getServiceTierSpeed(tier: ServiceTier | null | undefined): ServiceTierSpeed {
+  if (tier === SERVICE_TIER_FAST) return "fast";
+  if (tier === SERVICE_TIER_SLOW) return "slow";
+  // "auto" / "default" / null / undefined all render as the neutral (grey) state.
+  return "default";
+}
+
+/** Human-readable label for a speed bucket. */
+export function getServiceTierSpeedLabel(speed: ServiceTierSpeed): string {
+  switch (speed) {
+    case "fast":
+      return "Fast";
+    case "slow":
+      return "Slow";
+    case "default":
+      return "Auto";
+  }
+}
+
+/**
+ * Slash-command keys that map to a one-shot service tier (e.g. `/fast`, `/slow`).
+ * Kept as a const map so the parser, suggestions, and workflow-collision guards
+ * stay in sync from a single source.
+ */
+export const SERVICE_TIER_COMMAND_KEYS = ["fast", "slow"] as const;
+export type ServiceTierCommandKey = (typeof SERVICE_TIER_COMMAND_KEYS)[number];
+
+/** Resolve a slash-command key into its service tier, or null when it isn't one. */
+export function getServiceTierForCommandKey(key: string): ServiceTier | null {
+  if (key === "fast") return SERVICE_TIER_FAST;
+  if (key === "slow") return SERVICE_TIER_SLOW;
+  return null;
+}
+
+/**
+ * Whether a model honors a chat-level service-tier override.
+ *
+ * Today only OpenAI (GPT-class) models support `service_tier`, so we gate on the
+ * provider. This is intentionally a single helper so the UI affordance, the send
+ * path, and future providers all share one definition of "supported".
+ */
+export function supportsServiceTier(modelString: string): boolean {
+  return getModelProvider(modelString) === "openai";
+}
+
+/**
+ * Merge a service-tier override into provider options for a given model.
+ *
+ * Returns the options unchanged when there is no override or the model can't use
+ * service tiers, so a stale override never leaks onto an unsupported request.
+ * Centralized here so every send path (interactive hook, non-React storage path,
+ * and one-shot `/fast` `/slow`) applies the override identically.
+ */
+export function withServiceTierOverride(
+  providerOptions: MuxProviderOptions,
+  serviceTier: ServiceTier | null | undefined,
+  modelString: string
+): MuxProviderOptions {
+  if (!serviceTier || !supportsServiceTier(modelString)) {
+    return providerOptions;
+  }
+  return {
+    ...providerOptions,
+    openai: { ...providerOptions.openai, serviceTier },
+  };
+}

From 8c7fdf93cb86167a44d2d565355211b07813712a Mon Sep 17 00:00:00 2001
From: Ammar <ammar+ai@ammar.io>
Date: Fri, 5 Jun 2026 16:46:09 -0500
Subject: [PATCH 2/5] fix: restrict service tier to direct/passthrough OpenAI
 routes

Addresses Codex P2: non-passthrough gateway-routed OpenAI models (e.g.
openrouter:openai/gpt-5) canonicalize to 'openai' but the backend drops
providerOptions.openai.serviceTier on those routes. supportsServiceTier now
only returns true for direct openai:* models and passthrough gateways
(mux-gateway), so the UI never advertises a no-op Fast/Slow override.
---
 src/common/utils/ai/serviceTier.test.ts | 15 +++++++++++++-
 src/common/utils/ai/serviceTier.ts      | 26 ++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/src/common/utils/ai/serviceTier.test.ts b/src/common/utils/ai/serviceTier.test.ts
index 2ded4c0e6a..dbdeeea289 100644
--- a/src/common/utils/ai/serviceTier.test.ts
+++ b/src/common/utils/ai/serviceTier.test.ts
@@ -55,11 +55,24 @@ describe("serviceTier helpers", () => {
   });
 
   describe("supportsServiceTier", () => {
-    it("is supported only for OpenAI models today", () => {
+    it("is supported for direct OpenAI models", () => {
       expect(supportsServiceTier(OPENAI_MODEL)).toBe(true);
       expect(supportsServiceTier(ANTHROPIC_MODEL)).toBe(false);
       expect(supportsServiceTier("google:gemini-3.1-pro-preview")).toBe(false);
     });
+
+    it("is NOT supported for non-passthrough gateway-routed OpenAI models", () => {
+      // openrouter:openai/gpt-5 canonicalizes to "openai", but openrouter is a
+      // non-passthrough gateway, so the backend drops serviceTier — a silent no-op.
+      expect(supportsServiceTier("openrouter:openai/gpt-5")).toBe(false);
+      // github-copilot is another non-passthrough gateway (canonical github-copilot).
+      expect(supportsServiceTier("github-copilot:gpt-5.5")).toBe(false);
+    });
+
+    it("is supported for passthrough gateway-routed OpenAI models", () => {
+      // mux-gateway is a passthrough gateway: it forwards openai provider options.
+      expect(supportsServiceTier("mux-gateway:openai/gpt-4o")).toBe(true);
+    });
   });
 
   describe("withServiceTierOverride", () => {
diff --git a/src/common/utils/ai/serviceTier.ts b/src/common/utils/ai/serviceTier.ts
index 56a0785364..61713e6a4a 100644
--- a/src/common/utils/ai/serviceTier.ts
+++ b/src/common/utils/ai/serviceTier.ts
@@ -14,7 +14,8 @@
 
 import { type ServiceTier } from "@/common/config/schemas/providersConfig";
 import type { MuxProviderOptions } from "@/common/types/providerOptions";
-import { getModelProvider } from "./models";
+import { PROVIDER_DEFINITIONS } from "@/common/constants/providers";
+import { getExplicitGatewayPrefix, getModelProvider } from "./models";
 
 /** Wire value for the user-facing "Fast" speed. */
 export const SERVICE_TIER_FAST: ServiceTier = "priority";
@@ -62,12 +63,27 @@ export function getServiceTierForCommandKey(key: string): ServiceTier | null {
 /**
  * Whether a model honors a chat-level service-tier override.
  *
- * Today only OpenAI (GPT-class) models support `service_tier`, so we gate on the
- * provider. This is intentionally a single helper so the UI affordance, the send
- * path, and future providers all share one definition of "supported".
+ * Today only OpenAI (GPT-class) models support `service_tier`. Critically, the
+ * backend only forwards `providerOptions.openai.serviceTier` when the request is
+ * routed either directly to OpenAI or through a *passthrough* gateway. Non-passthrough
+ * gateways (e.g. openrouter, github-copilot) drop the field, so a model like
+ * `openrouter:openai/gpt-5` — which canonicalizes to `openai` — would silently ignore
+ * the tier. We mirror that routing here so the UI never advertises a no-op override.
+ *
+ * This is intentionally a single helper so the UI affordance, the send path, and
+ * future providers all share one definition of "supported".
  */
 export function supportsServiceTier(modelString: string): boolean {
-  return getModelProvider(modelString) === "openai";
+  if (getModelProvider(modelString) !== "openai") {
+    return false;
+  }
+  const gatewayPrefix = getExplicitGatewayPrefix(modelString);
+  if (gatewayPrefix) {
+    // Only passthrough gateways forward OpenAI provider options to the request.
+    const def = PROVIDER_DEFINITIONS[gatewayPrefix];
+    return def != null && "passthrough" in def && def.passthrough === true;
+  }
+  return true;
 }
 
 /**

From 615aa4883dfeec7885bcab67c64572fa0672a6eb Mon Sep 17 00:00:00 2001
From: Ammar <ammar+ai@ammar.io>
Date: Fri, 5 Jun 2026 16:53:15 -0500
Subject: [PATCH 3/5] fix: carry creation-time service tier into new workspace

Addresses Codex P2: syncCreationPreferences() copied model/agent/thinking
from the project scope to the new workspace but not the service-tier override,
so a Fast/Slow choice made during creation reverted to Auto after the workspace
opened. Now copies getServiceTierKey(projectScope) -> getServiceTierKey(workspace),
mirroring thinking level. Extends the creation success test to assert the sync.
---
 .../features/ChatInput/useCreationWorkspace.test.tsx |  8 ++++++++
 .../features/ChatInput/useCreationWorkspace.ts       | 12 ++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/src/browser/features/ChatInput/useCreationWorkspace.test.tsx b/src/browser/features/ChatInput/useCreationWorkspace.test.tsx
index a9c414eb6a..644b155ad1 100644
--- a/src/browser/features/ChatInput/useCreationWorkspace.test.tsx
+++ b/src/browser/features/ChatInput/useCreationWorkspace.test.tsx
@@ -15,6 +15,7 @@ import {
   getPendingScopeId,
   getPendingWorkspaceSendErrorKey,
   getProjectScopeId,
+  getServiceTierKey,
   getThinkingLevelKey,
 } from "@/common/constants/storage";
 import type { WorkspaceChatMessage } from "@/common/orpc/types";
@@ -709,6 +710,8 @@ describe("useCreationWorkspace", () => {
     persistedPreferences[getAgentIdKey(getProjectScopeId(TEST_PROJECT_PATH))] = "plan";
     // Set model preference for the project scope (read by getSendOptionsFromStorage)
     persistedPreferences[getModelKey(getProjectScopeId(TEST_PROJECT_PATH))] = "gpt-4";
+    // Chat-specific service-tier override chosen during creation must follow the workspace.
+    persistedPreferences[getServiceTierKey(getProjectScopeId(TEST_PROJECT_PATH))] = "priority";
 
     draftSettingsState = createDraftSettingsHarness({
       selectedRuntime: { mode: "ssh", host: "example.com" },
@@ -769,6 +772,11 @@ describe("useCreationWorkspace", () => {
     // Thinking is workspace-scoped, but this test doesn't set a project-scoped thinking preference.
     expect(updatePersistedStateCalls).toContainEqual([pendingInputKey, ""]);
     expect(updatePersistedStateCalls).toContainEqual([pendingImagesKey, undefined]);
+    // The creation-time service-tier override is carried into the new workspace scope.
+    expect(updatePersistedStateCalls).toContainEqual([
+      getServiceTierKey(TEST_WORKSPACE_ID),
+      "priority",
+    ]);
   });
 
   test("handleSend creates workspace and applies initial goal command without sending chat text", async () => {
diff --git a/src/browser/features/ChatInput/useCreationWorkspace.ts b/src/browser/features/ChatInput/useCreationWorkspace.ts
index 3c0c5e44ca..6345cf4537 100644
--- a/src/browser/features/ChatInput/useCreationWorkspace.ts
+++ b/src/browser/features/ChatInput/useCreationWorkspace.ts
@@ -10,6 +10,7 @@ import type {
 import type { RuntimeChoice } from "@/browser/utils/runtimeUi";
 import { buildRuntimeConfig, RUNTIME_MODE } from "@/common/types/runtime";
 import type { ThinkingLevel } from "@/common/types/thinking";
+import type { ServiceTier } from "@/common/config/schemas/providersConfig";
 import { useDraftWorkspaceSettings } from "@/browser/hooks/useDraftWorkspaceSettings";
 import { setWorkspaceModelWithOrigin } from "@/browser/utils/modelChange";
 import { readPersistedState, updatePersistedState } from "@/browser/hooks/usePersistedState";
@@ -22,6 +23,7 @@ import {
   getNotifyOnResponseAutoEnableKey,
   getNotifyOnResponseKey,
   getThinkingLevelKey,
+  getServiceTierKey,
   getWorkspaceAISettingsByAgentKey,
   getPendingScopeId,
   getDraftScopeId,
@@ -113,6 +115,16 @@ function syncCreationPreferences(projectPath: string, workspaceId: string): void
     updatePersistedState(getThinkingLevelKey(workspaceId), projectThinkingLevel);
   }
 
+  // Carry the chat-specific service-tier (Fast/Slow) override chosen during creation
+  // into the new workspace so the first and subsequent messages stay consistent.
+  const projectServiceTier = readPersistedState<ServiceTier | null>(
+    getServiceTierKey(projectScopeId),
+    null
+  );
+  if (projectServiceTier !== null) {
+    updatePersistedState(getServiceTierKey(workspaceId), projectServiceTier);
+  }
+
   if (projectModel) {
     const effectiveThinking: ThinkingLevel = projectThinkingLevel ?? "off";
 

From 6f75162084cace2a7e0c4e9ddc7858a53000eb94 Mon Sep 17 00:00:00 2001
From: Ammar <ammar+ai@ammar.io>
Date: Fri, 5 Jun 2026 18:02:18 -0500
Subject: [PATCH 4/5] refactor: drop /fast /slow one-shots, keep bolt
 service-tier control

The /fast and /slow slash commands were standalone service-tier-only
one-shots that could not compose with the existing /<model>[+thinking]
one-shot syntax. Remove them entirely (parser, suggestions, tips, the
ParsedCommand serviceTier field, the index.tsx one-shot guard/merge, and
the command-key helpers) while keeping the bolt UI control and the
per-chat service-tier override plumbing.
---
 src/browser/features/ChatInput/index.tsx      | 28 +------------------
 .../features/ChatInput/placeholderTips.ts     |  2 --
 .../utils/slashCommands/parser.test.ts        | 26 -----------------
 src/browser/utils/slashCommands/parser.ts     | 23 +++------------
 .../utils/slashCommands/suggestions.test.ts   | 14 ----------
 .../utils/slashCommands/suggestions.ts        | 21 --------------
 src/browser/utils/slashCommands/types.ts      |  3 --
 src/common/utils/ai/serviceTier.test.ts       | 19 -------------
 src/common/utils/ai/serviceTier.ts            | 21 ++------------
 9 files changed, 8 insertions(+), 149 deletions(-)

diff --git a/src/browser/features/ChatInput/index.tsx b/src/browser/features/ChatInput/index.tsx
index 47741bfc8a..7efd3908ab 100644
--- a/src/browser/features/ChatInput/index.tsx
+++ b/src/browser/features/ChatInput/index.tsx
@@ -40,8 +40,7 @@ import { usePolicy } from "@/browser/contexts/PolicyContext";
 import { useAPI } from "@/browser/contexts/API";
 import { useThinkingLevel } from "@/browser/hooks/useThinkingLevel";
 import { useExperimentValue } from "@/browser/hooks/useExperiments";
-import { normalizeSelectedModel, getModelName } from "@/common/utils/ai/models";
-import { supportsServiceTier, withServiceTierOverride } from "@/common/utils/ai/serviceTier";
+import { normalizeSelectedModel } from "@/common/utils/ai/models";
 import {
   useAdditionalSystemContextHydrated,
   useAdditionalSystemContextSnapshot,
@@ -2486,18 +2485,6 @@ const ChatInputInner: React.FC<ChatInputProps> = (props) => {
 
       const modelOverride = modelOneShot?.modelString;
 
-      // /fast and /slow one-shot tier override only applies to models that honor service
-      // tiers (OpenAI today). Block with a clear message (preserving the composer) rather
-      // than silently dropping the tier when the active model can't use it.
-      const tierOverride = modelOneShot?.serviceTier;
-      if (tierOverride && !supportsServiceTier(modelOverride ?? baseModel)) {
-        pushToast({
-          type: "error",
-          message: `Fast/Slow isn't supported by ${getModelName(modelOverride ?? baseModel)}`,
-        });
-        return;
-      }
-
       // Regular message (or /<model-alias> one-shot override) - send directly via API
       const messageTextForSend = modelOneShot?.message ?? skillInvocation?.userText ?? messageText;
       const skillMuxMetadata = skillInvocation
@@ -2676,24 +2663,11 @@ const ChatInputInner: React.FC<ChatInputProps> = (props) => {
             : undefined;
         const goalInterventionPolicy = overrides?.goalInterventionPolicy;
 
-        // One-shot /fast or /slow rides in providerOptions for this message only,
-        // layering over any persisted per-chat service tier from useSendMessageOptions.
-        const oneshotProviderOptions = tierOverride
-          ? {
-              providerOptions: withServiceTierOverride(
-                sendMessageOptions.providerOptions ?? {},
-                tierOverride,
-                modelOverride ?? baseModel
-              ),
-            }
-          : {};
-
         const sendOptions = {
           ...sendMessageOptions,
           ...compactionOptions,
           ...(modelOverride ? { model: modelOverride } : {}),
           ...(thinkingOverride ? { thinkingLevel: thinkingOverride } : {}),
-          ...oneshotProviderOptions,
           ...(modelOneShot ? { skipAiSettingsPersistence: true } : {}),
           ...(goalInterventionPolicy ? { goalInterventionPolicy } : {}),
           ...(overrides?.queueDispatchMode
diff --git a/src/browser/features/ChatInput/placeholderTips.ts b/src/browser/features/ChatInput/placeholderTips.ts
index a1b09e0a4f..83621780ca 100644
--- a/src/browser/features/ChatInput/placeholderTips.ts
+++ b/src/browser/features/ChatInput/placeholderTips.ts
@@ -51,8 +51,6 @@ export const PLACEHOLDER_TIPS: readonly string[] = [
   "Try /btw <question> to ask a side question without nudging the agent",
   "Try /haiku <msg> to send just this message on a different model",
   "Try /+high <msg> to crank up reasoning for this message only",
-  "Try /fast <msg> to send one message on a faster service tier",
-  "Try /slow <msg> to send one message on a cheaper service tier",
   "Try /compact to summarize the conversation when context gets tight",
   "Try /fork <start> to branch this chat into a new workspace",
   "Try /plan to view or edit the current plan inline",
diff --git a/src/browser/utils/slashCommands/parser.test.ts b/src/browser/utils/slashCommands/parser.test.ts
index 51a9aff68d..427673bc26 100644
--- a/src/browser/utils/slashCommands/parser.test.ts
+++ b/src/browser/utils/slashCommands/parser.test.ts
@@ -111,32 +111,6 @@ describe("commandParser", () => {
       expectParse("/sonnet  ", { type: "model-help" }); // whitespace only
     });
 
-    it("should parse /fast and /slow as service-tier one-shots", () => {
-      expectParse("/fast ship it", {
-        type: "model-oneshot",
-        serviceTier: "priority",
-        message: "ship it",
-      });
-      expectParse("/slow take your time", {
-        type: "model-oneshot",
-        serviceTier: "flex",
-        message: "take your time",
-      });
-    });
-
-    it("should preserve multiline messages for service-tier one-shots", () => {
-      expectParse("/fast first line\nsecond line", {
-        type: "model-oneshot",
-        serviceTier: "priority",
-        message: "first line\nsecond line",
-      });
-    });
-
-    it("should return model-help for /fast or /slow without a message", () => {
-      expectParse("/fast", { type: "model-help" });
-      expectParse("/slow   ", { type: "model-help" }); // whitespace only
-    });
-
     it("should return unknown-command for unknown aliases", () => {
       expectParse("/xyz do something", {
         type: "unknown-command",
diff --git a/src/browser/utils/slashCommands/parser.ts b/src/browser/utils/slashCommands/parser.ts
index dfa4d8e637..99f2492c04 100644
--- a/src/browser/utils/slashCommands/parser.ts
+++ b/src/browser/utils/slashCommands/parser.ts
@@ -7,8 +7,6 @@ import { SLASH_COMMAND_DEFINITION_MAP } from "./registry";
 import { MODEL_ABBREVIATIONS } from "@/common/constants/knownModels";
 import { normalizeModelInput } from "@/browser/utils/models/normalizeModelInput";
 import { parseThinkingInput, type ParsedThinkingInput } from "@/common/types/thinking";
-import { getServiceTierForCommandKey } from "@/common/utils/ai/serviceTier";
-import type { ServiceTier } from "@/common/config/schemas/providersConfig";
 
 /**
  * Parse a raw command string into a structured command
@@ -120,7 +118,7 @@ export function getSlashCommandDefinitions(): readonly SlashCommandDefinition[]
 }
 
 /**
- * Parse a oneshot command key into model / thinking / service-tier overrides.
+ * Parse a oneshot command key into model + thinking overrides.
  *
  * Supported forms:
  * - "haiku"        → model override only (existing behavior)
@@ -128,25 +126,12 @@ export function getSlashCommandDefinitions(): readonly SlashCommandDefinition[]
  * - "haiku+medium" → model + named thinking level
  * - "+0"           → thinking-only override (use current model)
  * - "+high"        → thinking-only override with named level
- * - "fast"/"slow"  → service-tier-only override (use current model)
- *
- * `/fast` and `/slow` deliberately reuse the model-oneshot path so message
- * extraction, the bare-key → help fallback, and the rendered command prefix all
- * behave exactly like `/<model>` one-shots.
  *
  * Returns null if the key doesn't match any valid oneshot pattern.
  */
-function parseOneshotCommandKey(key: string): {
-  modelString?: string;
-  thinkingLevel?: ParsedThinkingInput;
-  serviceTier?: ServiceTier;
-} | null {
-  // Service-tier one-shots (/fast, /slow) carry no model/thinking change.
-  const serviceTier = getServiceTierForCommandKey(key);
-  if (serviceTier) {
-    return { serviceTier };
-  }
-
+function parseOneshotCommandKey(
+  key: string
+): { modelString?: string; thinkingLevel?: ParsedThinkingInput } | null {
   const plusIndex = key.indexOf("+");
 
   if (plusIndex === -1) {
diff --git a/src/browser/utils/slashCommands/suggestions.test.ts b/src/browser/utils/slashCommands/suggestions.test.ts
index d74c1ab209..d20271d150 100644
--- a/src/browser/utils/slashCommands/suggestions.test.ts
+++ b/src/browser/utils/slashCommands/suggestions.test.ts
@@ -52,20 +52,6 @@ describe("getSlashCommandSuggestions", () => {
     expect(labels).toContain("/model");
   });
 
-  it("suggests /fast and /slow service-tier one-shots", () => {
-    const suggestions = getSlashCommandSuggestions("/");
-    const labels = suggestions.map((s) => s.display);
-
-    expect(labels).toContain("/fast");
-    expect(labels).toContain("/slow");
-  });
-
-  it("filters service-tier one-shots by prefix", () => {
-    const suggestions = getSlashCommandSuggestions("/fa");
-    expect(suggestions.map((s) => s.display)).toContain("/fast");
-    expect(suggestions.map((s) => s.display)).not.toContain("/slow");
-  });
-
   it("includes agent skills when provided in context", () => {
     const suggestions = getSlashCommandSuggestions("/", {
       agentSkills: [
diff --git a/src/browser/utils/slashCommands/suggestions.ts b/src/browser/utils/slashCommands/suggestions.ts
index 5b3931bc29..a195563571 100644
--- a/src/browser/utils/slashCommands/suggestions.ts
+++ b/src/browser/utils/slashCommands/suggestions.ts
@@ -5,7 +5,6 @@
 import { matchesNameBySegmentPrefix } from "@/browser/utils/suggestionMatching";
 import { MODEL_ABBREVIATIONS } from "@/common/constants/knownModels";
 import { formatModelDisplayName } from "@/common/utils/ai/modelDisplay";
-import { SERVICE_TIER_COMMAND_KEYS } from "@/common/utils/ai/serviceTier";
 import { getSlashCommandDefinitions } from "./parser";
 import { isSlashCommandVisible, SLASH_COMMAND_DEFINITION_MAP } from "./registry";
 import type {
@@ -89,7 +88,6 @@ function buildTopLevelSuggestions(
     .filter((workflow) => !SLASH_COMMAND_DEFINITION_MAP.has(workflow.name))
     .filter((workflow) => !skillNames.has(workflow.name))
     .filter((workflow) => !Object.hasOwn(MODEL_ABBREVIATIONS, workflow.name))
-    .filter((workflow) => !SERVICE_TIER_COMMAND_KEYS.includes(workflow.name as never))
     .map((workflow) => ({
       key: workflow.name,
       description: `${workflow.description} (${workflow.scope} workflow)`,
@@ -128,30 +126,11 @@ function buildTopLevelSuggestions(
     })
   );
 
-  // Service-tier one-shot suggestions (/fast, /slow). These reuse the model-oneshot
-  // send path; "Fast"/"Slow" wording keeps them provider-agnostic for future models.
-  const serviceTierDefinitions: SuggestionDefinition[] = [
-    { key: "fast", description: "Send one message on the Fast service tier (lower latency)" },
-    { key: "slow", description: "Send one message on the Slow service tier (lower cost)" },
-  ];
-
-  const serviceTierSuggestions = filterAndMapSuggestions(
-    serviceTierDefinitions,
-    partial,
-    (definition) => ({
-      id: `model-oneshot:${definition.key}`,
-      display: `/${definition.key}`,
-      description: definition.description,
-      replacement: `/${definition.key} `,
-    })
-  );
-
   return [
     ...commandSuggestions,
     ...skillSuggestions,
     ...workflowSuggestions,
     ...modelAliasSuggestions,
-    ...serviceTierSuggestions,
   ];
 }
 
diff --git a/src/browser/utils/slashCommands/types.ts b/src/browser/utils/slashCommands/types.ts
index 7a933eb102..6fc84f8476 100644
--- a/src/browser/utils/slashCommands/types.ts
+++ b/src/browser/utils/slashCommands/types.ts
@@ -13,7 +13,6 @@ import type { ExperimentId } from "@/common/constants/experiments";
 import type { AgentSkillDescriptor } from "@/common/types/agentSkill";
 import type { WorkflowDefinitionDescriptor } from "@/common/types/workflow";
 import type { ParsedThinkingInput } from "@/common/types/thinking";
-import type { ServiceTier } from "@/common/config/schemas/providersConfig";
 
 export type ParsedCommand =
   | { type: "model-set"; modelString: string }
@@ -23,8 +22,6 @@ export type ParsedCommand =
       modelString?: string;
       /** One-shot thinking level override — named (ThinkingLevel) or numeric index (resolved at send time against the model's policy). */
       thinkingLevel?: ParsedThinkingInput;
-      /** One-shot service-tier (Fast/Slow) override applied for this message only (e.g. "/fast", "/slow"). */
-      serviceTier?: ServiceTier;
       message: string;
     }
   | { type: "model-help" }
diff --git a/src/common/utils/ai/serviceTier.test.ts b/src/common/utils/ai/serviceTier.test.ts
index dbdeeea289..ece6080ba6 100644
--- a/src/common/utils/ai/serviceTier.test.ts
+++ b/src/common/utils/ai/serviceTier.test.ts
@@ -1,7 +1,6 @@
 import { describe, it, expect } from "bun:test";
 import type { MuxProviderOptions } from "@/common/types/providerOptions";
 import {
-  getServiceTierForCommandKey,
   getServiceTierSpeed,
   getServiceTierSpeedLabel,
   SERVICE_TIER_FAST,
@@ -14,24 +13,6 @@ const OPENAI_MODEL = "openai:gpt-5.5";
 const ANTHROPIC_MODEL = "anthropic:claude-haiku-4-5";
 
 describe("serviceTier helpers", () => {
-  describe("getServiceTierForCommandKey", () => {
-    it("maps /fast and /slow to provider wire tiers", () => {
-      expect(getServiceTierForCommandKey("fast")).toBe(SERVICE_TIER_FAST);
-      expect(getServiceTierForCommandKey("slow")).toBe(SERVICE_TIER_SLOW);
-    });
-
-    it("uses OpenAI priority/flex as the Fast/Slow wire values", () => {
-      expect(SERVICE_TIER_FAST).toBe("priority");
-      expect(SERVICE_TIER_SLOW).toBe("flex");
-    });
-
-    it("returns null for non service-tier keys", () => {
-      expect(getServiceTierForCommandKey("haiku")).toBeNull();
-      expect(getServiceTierForCommandKey("compact")).toBeNull();
-      expect(getServiceTierForCommandKey("")).toBeNull();
-    });
-  });
-
   describe("getServiceTierSpeed", () => {
     it("collapses concrete tiers into UI speed buckets", () => {
       expect(getServiceTierSpeed("priority")).toBe("fast");
diff --git a/src/common/utils/ai/serviceTier.ts b/src/common/utils/ai/serviceTier.ts
index 61713e6a4a..7608af8a0f 100644
--- a/src/common/utils/ai/serviceTier.ts
+++ b/src/common/utils/ai/serviceTier.ts
@@ -1,5 +1,5 @@
 /**
- * Service-tier helpers shared across the send path, slash commands, and UI.
+ * Service-tier helpers shared across the send path and UI.
  *
  * A "service tier" tells the provider how to schedule a request. OpenAI exposes
  * this as `service_tier` (e.g. `priority` for low latency, `flex` for cheaper but
@@ -45,21 +45,6 @@ export function getServiceTierSpeedLabel(speed: ServiceTierSpeed): string {
   }
 }
 
-/**
- * Slash-command keys that map to a one-shot service tier (e.g. `/fast`, `/slow`).
- * Kept as a const map so the parser, suggestions, and workflow-collision guards
- * stay in sync from a single source.
- */
-export const SERVICE_TIER_COMMAND_KEYS = ["fast", "slow"] as const;
-export type ServiceTierCommandKey = (typeof SERVICE_TIER_COMMAND_KEYS)[number];
-
-/** Resolve a slash-command key into its service tier, or null when it isn't one. */
-export function getServiceTierForCommandKey(key: string): ServiceTier | null {
-  if (key === "fast") return SERVICE_TIER_FAST;
-  if (key === "slow") return SERVICE_TIER_SLOW;
-  return null;
-}
-
 /**
  * Whether a model honors a chat-level service-tier override.
  *
@@ -91,8 +76,8 @@ export function supportsServiceTier(modelString: string): boolean {
  *
  * Returns the options unchanged when there is no override or the model can't use
  * service tiers, so a stale override never leaks onto an unsupported request.
- * Centralized here so every send path (interactive hook, non-React storage path,
- * and one-shot `/fast` `/slow`) applies the override identically.
+ * Centralized here so every send path (interactive hook and non-React storage path)
+ * applies the override identically.
  */
 export function withServiceTierOverride(
   providerOptions: MuxProviderOptions,

From c180454cbfe22f6ddd079fdd2816e088bfb33f9c Mon Sep 17 00:00:00 2001
From: Ammar <ammar+ai@ammar.io>
Date: Fri, 5 Jun 2026 18:12:23 -0500
Subject: [PATCH 5/5] fix: compose per-chat service tier with /<model>
 one-shots

Re-merge the persisted Fast/Slow tier against the effective (one-shot)
model at send time. useSendMessageOptions bakes the tier against the
saved model, so a /<model> one-shot to an OpenAI model previously dropped
a tier set while the saved model was non-OpenAI. withServiceTierOverride
is now authoritative: it attaches the tier for supported models and
strips any stale tier when the effective model can't honor it.
---
 src/browser/features/ChatInput/index.tsx | 18 ++++++++++++++++++
 src/common/utils/ai/serviceTier.test.ts  | 13 +++++++++++++
 src/common/utils/ai/serviceTier.ts       | 19 +++++++++++++------
 3 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/src/browser/features/ChatInput/index.tsx b/src/browser/features/ChatInput/index.tsx
index 7efd3908ab..ceb5ce86e2 100644
--- a/src/browser/features/ChatInput/index.tsx
+++ b/src/browser/features/ChatInput/index.tsx
@@ -39,8 +39,10 @@ import {
 import { usePolicy } from "@/browser/contexts/PolicyContext";
 import { useAPI } from "@/browser/contexts/API";
 import { useThinkingLevel } from "@/browser/hooks/useThinkingLevel";
+import { useServiceTier } from "@/browser/hooks/useServiceTier";
 import { useExperimentValue } from "@/browser/hooks/useExperiments";
 import { normalizeSelectedModel } from "@/common/utils/ai/models";
+import { withServiceTierOverride } from "@/common/utils/ai/serviceTier";
 import {
   useAdditionalSystemContextHydrated,
   useAdditionalSystemContextSnapshot,
@@ -719,6 +721,10 @@ const ChatInputInner: React.FC<ChatInputProps> = (props) => {
   const sendOptionsScopeId =
     variant === "workspace" ? props.workspaceId : getProjectScopeId(creationParentProjectPath);
   const sendMessageOptions = useSendMessageOptions(sendOptionsScopeId);
+  // The persisted per-chat service tier is also read here so a /<model> one-shot can
+  // re-merge it against the effective (overridden) model. useSendMessageOptions bakes the
+  // tier against the saved model, which may differ from a one-shot model override.
+  const [serviceTierOverride] = useServiceTier(sendOptionsScopeId);
   const additionalSystemContext = useAdditionalSystemContextSnapshot(
     variant === "workspace" ? props.workspaceId : ""
   );
@@ -2667,6 +2673,18 @@ const ChatInputInner: React.FC<ChatInputProps> = (props) => {
           ...sendMessageOptions,
           ...compactionOptions,
           ...(modelOverride ? { model: modelOverride } : {}),
+          // Re-merge the per-chat service tier against the one-shot model so Fast/Slow
+          // composes with /<model>[+thinking]: useSendMessageOptions baked the tier against
+          // the saved model, which may not match (or support) the one-shot model override.
+          ...(modelOverride
+            ? {
+                providerOptions: withServiceTierOverride(
+                  sendMessageOptions.providerOptions ?? {},
+                  serviceTierOverride,
+                  modelOverride
+                ),
+              }
+            : {}),
           ...(thinkingOverride ? { thinkingLevel: thinkingOverride } : {}),
           ...(modelOneShot ? { skipAiSettingsPersistence: true } : {}),
           ...(goalInterventionPolicy ? { goalInterventionPolicy } : {}),
diff --git a/src/common/utils/ai/serviceTier.test.ts b/src/common/utils/ai/serviceTier.test.ts
index ece6080ba6..7ad5bfdafd 100644
--- a/src/common/utils/ai/serviceTier.test.ts
+++ b/src/common/utils/ai/serviceTier.test.ts
@@ -85,6 +85,19 @@ describe("serviceTier helpers", () => {
       expect(result.openai).toBeUndefined();
     });
 
+    it("strips a stale tier when re-merged against an unsupported model", () => {
+      // Simulates a /<model> one-shot switching from an OpenAI saved model (tier baked in)
+      // to a non-OpenAI model: the tier must not ride along on the Anthropic request.
+      const input: MuxProviderOptions = {
+        openai: { wireFormat: "responses", serviceTier: "priority" },
+      };
+      const result = withServiceTierOverride(input, SERVICE_TIER_FAST, ANTHROPIC_MODEL);
+      expect(result.openai?.serviceTier).toBeUndefined();
+      expect(result.openai?.wireFormat).toBe("responses");
+      // Input is left untouched.
+      expect(input.openai?.serviceTier).toBe("priority");
+    });
+
     it("does not mutate the input options", () => {
       const input: MuxProviderOptions = { openai: { wireFormat: "responses" } };
       withServiceTierOverride(input, SERVICE_TIER_FAST, OPENAI_MODEL);
diff --git a/src/common/utils/ai/serviceTier.ts b/src/common/utils/ai/serviceTier.ts
index 7608af8a0f..f98cee1c9d 100644
--- a/src/common/utils/ai/serviceTier.ts
+++ b/src/common/utils/ai/serviceTier.ts
@@ -72,12 +72,14 @@ export function supportsServiceTier(modelString: string): boolean {
 }
 
 /**
- * Merge a service-tier override into provider options for a given model.
+ * Reconcile a service-tier override against the *effective* model for a request.
  *
- * Returns the options unchanged when there is no override or the model can't use
- * service tiers, so a stale override never leaks onto an unsupported request.
- * Centralized here so every send path (interactive hook and non-React storage path)
- * applies the override identically.
+ * This is authoritative: it sets the tier when the model supports it and an override
+ * is present, and otherwise strips any previously-attached tier. The strip matters for
+ * composition with `/<model>` one-shots — a tier baked against the saved model must not
+ * linger when the one-shot switches to a model that can't honor it (and conversely, a
+ * tier dropped against a non-OpenAI saved model gets re-applied once the effective model
+ * is OpenAI). Centralized here so every send path applies the override identically.
  */
 export function withServiceTierOverride(
   providerOptions: MuxProviderOptions,
@@ -85,7 +87,12 @@ export function withServiceTierOverride(
   modelString: string
 ): MuxProviderOptions {
   if (!serviceTier || !supportsServiceTier(modelString)) {
-    return providerOptions;
+    // No override, or the model can't use service tiers: ensure no stale tier rides along.
+    if (providerOptions.openai?.serviceTier == null) {
+      return providerOptions;
+    }
+    const { serviceTier: _omit, ...openaiRest } = providerOptions.openai;
+    return { ...providerOptions, openai: openaiRest };
   }
   return {
     ...providerOptions,