diff --git a/docs/hooks/tools.mdx b/docs/hooks/tools.mdx index cffe84a863..4898a63c46 100644 --- a/docs/hooks/tools.mdx +++ b/docs/hooks/tools.mdx @@ -600,15 +600,17 @@ If a value is too large for the environment, it may be omitted (not set). Mux al
-task (8) +task (10) | Env var | JSON path | Type | Description | | ---------------------------------- | ------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `MUX_TOOL_INPUT_AGENT_ID` | `agentId` | string | — | +| `MUX_TOOL_INPUT_MODEL` | `model` | string | Optional model override for the sub-agent, parsed with the same alias logic as the UI (an alias or a full 'provider:model' string). Omit this unless the user explicitly instructed a specific model — by default the sub-agent inherits the parent's model. Do not assume any particular model is available. | | `MUX_TOOL_INPUT_N` | `n` | number | Optional best-of count. Use n when several agents should try the same prompt independently. Mutually exclusive with variants; omit both for a single task. Only use grouped runs for sub-agents without interfering side effects, such as read-only agents like explore. | | `MUX_TOOL_INPUT_PROMPT` | `prompt` | string | — | | `MUX_TOOL_INPUT_RUN_IN_BACKGROUND` | `run_in_background` | boolean | — | | `MUX_TOOL_INPUT_SUBAGENT_TYPE` | `subagent_type` | string | — | +| `MUX_TOOL_INPUT_THINKING` | `thinking` | string | Optional thinking/reasoning-level override for the sub-agent. Accepts a level name (off, low, medium, high, xhigh, max) or a numeric index (resolved against the chosen model). Omit this unless the user explicitly instructed a specific thinking level — by default the sub-agent inherits the parent's thinking level. | | `MUX_TOOL_INPUT_TITLE` | `title` | string | — | | `MUX_TOOL_INPUT_VARIANTS_` | `variants[]` | string | Optional labels for sibling runs of the same prompt template. Use variants when the task should be repeated across labeled lanes such as issue numbers, commit windows, or frontend/backend/tests/docs review lanes. Mutually exclusive with n. When provided, Mux launches one sibling per label and substitutes ${variant} in the prompt. | | `MUX_TOOL_INPUT_VARIANTS_COUNT` | `variants.length` | number | Number of elements in variants (Optional labels for sibling runs of the same prompt template. Use variants when the task should be repeated across labeled lanes such as issue numbers, commit windows, or frontend/backend/tests/docs review lanes. Mutually exclusive with n. When provided, Mux launches one sibling per label and substitutes ${variant} in the prompt.) | diff --git a/src/browser/features/ChatInput/useCreationWorkspace.ts b/src/browser/features/ChatInput/useCreationWorkspace.ts index 3c0c5e44ca..12354cd4e8 100644 --- a/src/browser/features/ChatInput/useCreationWorkspace.ts +++ b/src/browser/features/ChatInput/useCreationWorkspace.ts @@ -53,7 +53,7 @@ import { getModelCapabilities, getModelCapabilitiesResolved, } from "@/common/utils/ai/modelCapabilities"; -import { normalizeModelInput } from "@/browser/utils/models/normalizeModelInput"; +import { normalizeModelInput } from "@/common/utils/ai/normalizeModelInput"; import { resolveDevcontainerSelection } from "@/browser/utils/devcontainerSelection"; import { getErrorMessage } from "@/common/utils/errors"; import { normalizeAgentId } from "@/common/utils/agentIds"; diff --git a/src/browser/utils/chatCommands.ts b/src/browser/utils/chatCommands.ts index 30d4784abb..5a1879ff39 100644 --- a/src/browser/utils/chatCommands.ts +++ b/src/browser/utils/chatCommands.ts @@ -53,7 +53,7 @@ import { } from "@/constants/slashCommands"; import { applyCompactionOverrides } from "@/browser/utils/messages/compactionOptions"; import { resolveCompactionModel } from "@/browser/utils/messages/compactionModelPreference"; -import { normalizeModelInput } from "@/browser/utils/models/normalizeModelInput"; +import { normalizeModelInput } from "@/common/utils/ai/normalizeModelInput"; import { getExplicitGatewayPrefix, normalizeToCanonical } from "@/common/utils/ai/models"; import type { QueueDispatchMode } from "@/browser/features/ChatInput/types"; import type { ChatAttachment } from "../features/ChatInput/ChatAttachments"; diff --git a/src/browser/utils/slashCommands/parser.ts b/src/browser/utils/slashCommands/parser.ts index 99f2492c04..8379abb01c 100644 --- a/src/browser/utils/slashCommands/parser.ts +++ b/src/browser/utils/slashCommands/parser.ts @@ -5,7 +5,7 @@ import type { ParsedCommand, SlashCommandDefinition } from "./types"; import { SLASH_COMMAND_DEFINITION_MAP } from "./registry"; import { MODEL_ABBREVIATIONS } from "@/common/constants/knownModels"; -import { normalizeModelInput } from "@/browser/utils/models/normalizeModelInput"; +import { normalizeModelInput } from "@/common/utils/ai/normalizeModelInput"; import { parseThinkingInput, type ParsedThinkingInput } from "@/common/types/thinking"; /** diff --git a/src/browser/utils/slashCommands/registry.ts b/src/browser/utils/slashCommands/registry.ts index 03c95adcbc..c3cfab2682 100644 --- a/src/browser/utils/slashCommands/registry.ts +++ b/src/browser/utils/slashCommands/registry.ts @@ -16,7 +16,7 @@ import { MODEL_ABBREVIATIONS } from "@/common/constants/knownModels"; import { SLASH_COMMAND_HINTS } from "@/common/constants/slashCommandHints"; import { assert } from "@/common/utils/assert"; import { isExperimentEnabled as readExperimentEnabled } from "@/browser/hooks/useExperiments"; -import { normalizeModelInput } from "@/browser/utils/models/normalizeModelInput"; +import { normalizeModelInput } from "@/common/utils/ai/normalizeModelInput"; import { parseGoalBudgetInputCents } from "@/common/utils/goals/budgetParser"; import { HEARTBEAT_MAX_INTERVAL_MS, HEARTBEAT_MIN_INTERVAL_MS } from "@/constants/heartbeat"; import { WORKSPACE_ONLY_COMMAND_KEYS } from "@/constants/slashCommands"; diff --git a/src/browser/utils/models/normalizeModelInput.test.ts b/src/common/utils/ai/normalizeModelInput.test.ts similarity index 100% rename from src/browser/utils/models/normalizeModelInput.test.ts rename to src/common/utils/ai/normalizeModelInput.test.ts diff --git a/src/browser/utils/models/normalizeModelInput.ts b/src/common/utils/ai/normalizeModelInput.ts similarity index 100% rename from src/browser/utils/models/normalizeModelInput.ts rename to src/common/utils/ai/normalizeModelInput.ts diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts index 95c02dfab0..bcbcc9d046 100644 --- a/src/common/utils/tools/toolDefinitions.ts +++ b/src/common/utils/tools/toolDefinitions.ts @@ -184,6 +184,16 @@ const TaskAgentIdSchema = z.preprocess( const TaskToolBestOfCountSchema = z.number().int().min(1).max(20); +// Model/thinking overrides for the spawned sub-agent. Accepted as free-form strings +// so they can be parsed with the SAME logic as the UI (alias resolution for model; +// named levels OR numeric indices for thinking). A numeric thinking value may arrive +// as a JSON number, so coerce it to a string before parsing in the handler. +const TaskToolModelSchema = z.string().trim().min(1); +const TaskToolThinkingSchema = z.preprocess( + (value) => (typeof value === "number" ? String(value) : value), + z.string().trim().min(1) +); + const TaskToolVariantSchema = z.string().trim().min(1); const TaskToolVariantsSchema = z.array(TaskToolVariantSchema).min(1).max(20); @@ -260,6 +270,12 @@ const TaskToolAgentArgsSchema = z variants: TaskToolVariantsSchema.nullish().describe( `Optional labels for sibling runs of the same prompt template. Use variants when the task should be repeated across labeled lanes such as issue numbers, commit windows, or frontend/backend/tests/docs review lanes. Mutually exclusive with n. When provided, Mux launches one sibling per label and substitutes ${TASK_VARIANT_PLACEHOLDER} in the prompt.` ), + model: TaskToolModelSchema.nullish().describe( + "Optional model override for the sub-agent, parsed with the same alias logic as the UI (an alias or a full 'provider:model' string). Omit this unless the user explicitly instructed a specific model — by default the sub-agent inherits the parent's model. Do not assume any particular model is available." + ), + thinking: TaskToolThinkingSchema.nullish().describe( + "Optional thinking/reasoning-level override for the sub-agent. Accepts a level name (off, low, medium, high, xhigh, max) or a numeric index (resolved against the chosen model). Omit this unless the user explicitly instructed a specific thinking level — by default the sub-agent inherits the parent's thinking level." + ), }) .strict() .superRefine((args, ctx) => { diff --git a/src/node/acp/slashCommands.ts b/src/node/acp/slashCommands.ts index 58c60c1d93..5496992ab4 100644 --- a/src/node/acp/slashCommands.ts +++ b/src/node/acp/slashCommands.ts @@ -2,12 +2,7 @@ import assert from "node:assert/strict"; import type { AvailableCommand } from "@agentclientprotocol/sdk"; import type { AgentSkillDescriptor } from "@/common/types/agentSkill"; import { SLASH_COMMAND_HINTS } from "@/common/constants/slashCommandHints"; -import { - getExplicitGatewayPrefix, - isValidModelFormat, - normalizeToCanonical, - resolveModelAlias, -} from "@/common/utils/ai/models"; +import { normalizeModelInput } from "@/common/utils/ai/normalizeModelInput"; import minimist from "minimist"; const CLEAR_COMMAND_NAME = "clear"; @@ -315,29 +310,9 @@ function parseSkillCommand( } function normalizeModelForCommand(modelInput: string): string | null { - const trimmed = modelInput.trim(); - if (trimmed.length === 0) { - return null; - } - - const resolved = resolveModelAlias(trimmed); - // Explicit gateway scoping is user intent — preserve it for the backend to honor. - const normalized = getExplicitGatewayPrefix(resolved) - ? resolved.trim() - : normalizeToCanonical(resolved).trim(); - - if (!isValidModelFormat(normalized)) { - return null; - } - - // Keep ACP slash commands aligned with the rest of model input handling by rejecting - // malformed provider::model strings that happen to satisfy the first-colon check. - const separatorIndex = normalized.indexOf(":"); - if (normalized.slice(separatorIndex + 1).startsWith(":")) { - return null; - } - - return normalized; + // Share the single model-input parser (alias resolution + gateway preservation + + // format validation) used by the UI and the task tool instead of duplicating it. + return normalizeModelInput(modelInput).model; } function parseMultilineCommand(rawInput: string): ParsedMultilineCommand { diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts index 5fe2e7dcf4..03546fffc9 100644 --- a/src/node/services/agentSkills/builtInSkillContent.generated.ts +++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts @@ -4402,15 +4402,17 @@ export const BUILTIN_SKILL_FILES: Record> = { "
", "", "
", - "task (8)", + "task (10)", "", "| Env var | JSON path | Type | Description |", "| ---------------------------------- | ------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |", "| `MUX_TOOL_INPUT_AGENT_ID` | `agentId` | string | — |", + "| `MUX_TOOL_INPUT_MODEL` | `model` | string | Optional model override for the sub-agent, parsed with the same alias logic as the UI (an alias or a full 'provider:model' string). Omit this unless the user explicitly instructed a specific model — by default the sub-agent inherits the parent's model. Do not assume any particular model is available. |", "| `MUX_TOOL_INPUT_N` | `n` | number | Optional best-of count. Use n when several agents should try the same prompt independently. Mutually exclusive with variants; omit both for a single task. Only use grouped runs for sub-agents without interfering side effects, such as read-only agents like explore. |", "| `MUX_TOOL_INPUT_PROMPT` | `prompt` | string | — |", "| `MUX_TOOL_INPUT_RUN_IN_BACKGROUND` | `run_in_background` | boolean | — |", "| `MUX_TOOL_INPUT_SUBAGENT_TYPE` | `subagent_type` | string | — |", + "| `MUX_TOOL_INPUT_THINKING` | `thinking` | string | Optional thinking/reasoning-level override for the sub-agent. Accepts a level name (off, low, medium, high, xhigh, max) or a numeric index (resolved against the chosen model). Omit this unless the user explicitly instructed a specific thinking level — by default the sub-agent inherits the parent's thinking level. |", "| `MUX_TOOL_INPUT_TITLE` | `title` | string | — |", "| `MUX_TOOL_INPUT_VARIANTS_` | `variants[]` | string | Optional labels for sibling runs of the same prompt template. Use variants when the task should be repeated across labeled lanes such as issue numbers, commit windows, or frontend/backend/tests/docs review lanes. Mutually exclusive with n. When provided, Mux launches one sibling per label and substitutes ${variant} in the prompt. |", "| `MUX_TOOL_INPUT_VARIANTS_COUNT` | `variants.length` | number | Number of elements in variants (Optional labels for sibling runs of the same prompt template. Use variants when the task should be repeated across labeled lanes such as issue numbers, commit windows, or frontend/backend/tests/docs review lanes. Mutually exclusive with n. When provided, Mux launches one sibling per label and substitutes ${variant} in the prompt.) |", diff --git a/src/node/services/taskService.test.ts b/src/node/services/taskService.test.ts index 85e5f9fb9d..f3805a818d 100644 --- a/src/node/services/taskService.test.ts +++ b/src/node/services/taskService.test.ts @@ -1583,6 +1583,59 @@ describe("TaskService", () => { expect(childEntry?.taskThinkingLevel).toBe("xhigh"); }, 20_000); + test("resolves a numeric thinking override against the inherited model's policy", async () => { + const config = await createTestConfig(rootDir); + stubStableIds(config, ["aaaaaaaaaa"], "bbbbbbbbbb"); + + const projectPath = await createTestProject(rootDir, "repo", { initGit: false }); + + const parentId = "1111111111"; + await saveWorkspaces( + config, + projectPath, + [ + { + path: projectPath, + id: parentId, + name: "parent", + createdAt: new Date().toISOString(), + runtimeConfig: { type: "local" }, + // opus-4-6 allows [off, low, medium, high, xhigh]; index 9 clamps to the highest (xhigh). + aiSettings: { model: "anthropic:claude-opus-4-6", thinkingLevel: "off" }, + }, + ], + testTaskSettings() + ); + + const { workspaceService, sendMessage } = createWorkspaceServiceMocks(); + const { taskService } = createTaskServiceHarness(config, { workspaceService }); + + const created = await createAgentTask(taskService, parentId, "run with numeric thinking", { + thinkingLevel: 9, + }); + expect(created.success).toBe(true); + if (!created.success) return; + + expect(sendMessage).toHaveBeenCalledWith( + created.data.taskId, + "run with numeric thinking", + { + model: "anthropic:claude-opus-4-6", + agentId: "explore", + thinkingLevel: "xhigh", + experiments: undefined, + }, + { agentInitiated: true } + ); + + const postCfg = config.loadConfigOrDefault(); + const childEntry = Array.from(postCfg.projects.values()) + .flatMap((p) => p.workspaces) + .find((w) => w.id === created.data.taskId); + expect(childEntry?.taskModelString).toBe("anthropic:claude-opus-4-6"); + expect(childEntry?.taskThinkingLevel).toBe("xhigh"); + }, 20_000); + test("agentAiDefaults outrank workspace aiSettingsByAgent for same agent", async () => { const config = await createTestConfig(rootDir); stubStableIds(config, ["aaaaaaaaaa"], "bbbbbbbbbb"); diff --git a/src/node/services/taskService.ts b/src/node/services/taskService.ts index 4a71f0edb3..a71614fdfc 100644 --- a/src/node/services/taskService.ts +++ b/src/node/services/taskService.ts @@ -70,7 +70,7 @@ import { getWorkspaceProjectRepos } from "@/node/services/workspaceProjectRepos" import type { SessionUsageService } from "@/node/services/sessionUsageService"; import type { WorkspaceGoalService } from "@/node/services/workspaceGoalService"; import { getTotalCost, sumUsageHistory } from "@/common/utils/tokens/usageAggregator"; -import type { ThinkingLevel } from "@/common/types/thinking"; +import type { ParsedThinkingInput, ThinkingLevel } from "@/common/types/thinking"; import type { ErrorEvent, StreamEndEvent } from "@/common/types/stream"; import type { WorkflowRunStatus } from "@/common/types/workflow"; import { isDynamicToolPart, type DynamicToolPart } from "@/common/types/toolParts"; @@ -83,7 +83,7 @@ import { } from "@/common/utils/tools/toolDefinitions"; import { isPlanLikeInResolvedChain } from "@/common/utils/agentTools"; import { formatSendMessageError } from "@/node/services/utils/sendMessageError"; -import { enforceThinkingPolicy } from "@/common/utils/thinking/policy"; +import { enforceThinkingPolicy, resolveThinkingInput } from "@/common/utils/thinking/policy"; import { taskQueueDebug } from "@/node/services/taskQueueDebug"; import { readSubagentGitPatchArtifact } from "@/node/services/subagentGitPatchArtifacts"; import { @@ -136,7 +136,12 @@ export interface TaskCreateArgs { /** Human-readable title for the task (displayed in sidebar) */ title: string; modelString?: string; - thinkingLevel?: ThinkingLevel; + /** + * Explicit thinking override. Named levels apply directly; a numeric index is + * deferred (ParsedThinkingInput) and resolved against the chosen model's policy + * in resolveTaskAISettings, mirroring the UI's `/model+level` semantics. + */ + thinkingLevel?: ParsedThinkingInput; parentRuntimeAiSettings?: { modelString?: string; thinkingLevel?: ThinkingLevel }; /** Shared grouping metadata when one tool call spawns multiple sibling tasks. */ bestOf?: { @@ -970,7 +975,7 @@ export class TaskService { }; agentId: string; modelString?: string; - thinkingLevel?: ThinkingLevel; + thinkingLevel?: ParsedThinkingInput; parentRuntimeAiSettings?: { modelString?: string; thinkingLevel?: ThinkingLevel }; }): { taskModelString: string; @@ -993,8 +998,14 @@ export class TaskService { const canonicalModel = normalizeToCanonical(taskModelString).trim(); assert(canonicalModel.length > 0, "resolveTaskAISettings: resolved model must be non-empty"); + // Resolve an explicit override first so numeric thinking indices map into the + // chosen model's allowed levels (named levels pass through unchanged). + const overrideThinkingLevel = + params.thinkingLevel != null + ? resolveThinkingInput(params.thinkingLevel, canonicalModel) + : undefined; const requestedThinkingLevel: ThinkingLevel = - params.thinkingLevel ?? + overrideThinkingLevel ?? subagentDefault?.thinkingLevel ?? agentDefault?.thinkingLevel ?? parentRuntimeAiSettings?.thinkingLevel ?? diff --git a/src/node/services/tools/task.test.ts b/src/node/services/tools/task.test.ts index 0f749b2023..5157fd8389 100644 --- a/src/node/services/tools/task.test.ts +++ b/src/node/services/tools/task.test.ts @@ -129,6 +129,119 @@ describe("task tool", () => { }); }); + it("forwards a model alias and named thinking override to taskService.create", async () => { + using tempDir = new TestTempDir("test-task-tool-model-thinking-override"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + + const create = mock( + (_: { + modelString?: unknown; + thinkingLevel?: unknown; + parentRuntimeAiSettings?: { modelString?: unknown; thinkingLevel?: unknown }; + }) => Ok({ taskId: "child-task", kind: "agent" as const, status: "queued" as const }) + ); + const waitForAgentReport = mock(() => Promise.resolve({ reportMarkdown: "ignored" })); + const taskService = { create, waitForAgentReport } as unknown as TaskService; + + const tool = createTaskTool({ + ...baseConfig, + muxEnv: { MUX_MODEL_STRING: "openai:gpt-4o-mini", MUX_THINKING_LEVEL: "low" }, + taskService, + }); + + await Promise.resolve( + tool.execute!( + { + subagent_type: "explore", + prompt: "do it", + title: "Child task", + run_in_background: true, + // "sonnet" is an alias; the handler must resolve it like the UI does. + model: "sonnet", + thinking: "high", + }, + mockToolCallOptions + ) + ); + + expect(create).toHaveBeenCalledTimes(1); + const createArgs = create.mock.calls[0]?.[0]; + expect(createArgs?.modelString).toBe("anthropic:claude-sonnet-4-6"); + expect(createArgs?.thinkingLevel).toBe("high"); + // Parent runtime hint is still forwarded so unspecified fields keep inheriting. + expect(createArgs?.parentRuntimeAiSettings).toEqual({ + modelString: "openai:gpt-4o-mini", + thinkingLevel: "low", + }); + }); + + it("forwards a numeric thinking override as a deferred index", async () => { + using tempDir = new TestTempDir("test-task-tool-numeric-thinking"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + + const create = mock((_: { thinkingLevel?: unknown }) => + Ok({ taskId: "child-task", kind: "agent" as const, status: "queued" as const }) + ); + const waitForAgentReport = mock(() => Promise.resolve({ reportMarkdown: "ignored" })); + const taskService = { create, waitForAgentReport } as unknown as TaskService; + + const tool = createTaskTool({ ...baseConfig, taskService }); + + await Promise.resolve( + tool.execute!( + { + subagent_type: "explore", + prompt: "do it", + title: "Child task", + run_in_background: true, + // Numeric indices stay deferred (resolved against the model in taskService). + thinking: "2", + }, + mockToolCallOptions + ) + ); + + expect(create).toHaveBeenCalledTimes(1); + const createArgs = create.mock.calls[0]?.[0]; + expect(createArgs?.thinkingLevel).toBe(2); + }); + + it("rejects an invalid model override before spawning a task", async () => { + using tempDir = new TestTempDir("test-task-tool-invalid-model"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + + const create = mock(() => + Ok({ taskId: "child-task", kind: "agent" as const, status: "queued" as const }) + ); + const taskService = { create } as unknown as TaskService; + + const tool = createTaskTool({ ...baseConfig, taskService }); + + let caught: unknown = null; + try { + await Promise.resolve( + tool.execute!( + { + subagent_type: "explore", + prompt: "do it", + title: "Child task", + run_in_background: true, + model: "definitely-not-a-model", + }, + mockToolCallOptions + ) + ); + } catch (error: unknown) { + caught = error; + } + + expect(caught).toBeInstanceOf(Error); + if (caught instanceof Error) { + expect(caught.message).toMatch(/invalid model/i); + } + expect(create).not.toHaveBeenCalled(); + }); + it("spawns best-of-n background tasks with shared grouping metadata", async () => { using tempDir = new TestTempDir("test-task-tool-best-of-background"); const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); diff --git a/src/node/services/tools/task.ts b/src/node/services/tools/task.ts index 7da730fa63..4676ac6b31 100644 --- a/src/node/services/tools/task.ts +++ b/src/node/services/tools/task.ts @@ -17,7 +17,13 @@ import { ForegroundWaitBackgroundedError } from "@/node/services/taskService"; import { buildTaskGroupLaunches, type TaskGroupKind } from "@/common/utils/tools/taskGroups"; import { parseToolResult, requireTaskService, requireWorkspaceId } from "./toolUtils"; import { getErrorMessage } from "@/common/utils/errors"; -import { coerceThinkingLevel, type ThinkingLevel } from "@/common/types/thinking"; +import { + coerceThinkingLevel, + parseThinkingInput, + type ParsedThinkingInput, + type ThinkingLevel, +} from "@/common/types/thinking"; +import { normalizeModelInput } from "@/common/utils/ai/normalizeModelInput"; import { coerceNonEmptyString } from "@/node/services/taskUtils"; /** @@ -61,6 +67,43 @@ function buildParentRuntimeAiSettings( }; } +/** + * Parse the optional `model`/`thinking` overrides supplied on a task launch, + * reusing the exact parsing the UI uses (`normalizeModelInput` for model alias + * resolution; `parseThinkingInput` for named levels OR numeric indices). Numeric + * thinking indices stay deferred as a `ParsedThinkingInput` so they resolve + * against the sub-agent's chosen model in `resolveTaskAISettings`. Throws a + * descriptive error on invalid input so the model can correct the call. + */ +function parseTaskAiOverrides(args: { model?: string | null; thinking?: string | null }): { + modelString?: string; + thinkingLevel?: ParsedThinkingInput; +} { + const overrides: { modelString?: string; thinkingLevel?: ParsedThinkingInput } = {}; + + if (args.model != null) { + const normalized = normalizeModelInput(args.model); + if (normalized.model == null) { + throw new Error( + `task tool: invalid model "${args.model}". Provide a known alias or a "provider:model" string.` + ); + } + overrides.modelString = normalized.model; + } + + if (args.thinking != null) { + const parsed = parseThinkingInput(args.thinking); + if (parsed == null) { + throw new Error( + `task tool: invalid thinking "${args.thinking}". Use a level name (off, low, medium, high, xhigh, max) or a numeric index.` + ); + } + overrides.thinkingLevel = parsed; + } + + return overrides; +} + interface SpawnedTaskInfo { taskId: string; status: "queued" | "running"; @@ -283,14 +326,28 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => { throw new Error("Interrupted"); } - const { agentId, subagent_type, prompt, title, run_in_background, n, variants } = - validatedArgs; + const { + agentId, + subagent_type, + prompt, + title, + run_in_background, + n, + variants, + model, + thinking, + } = validatedArgs; const requestedAgentId = typeof agentId === "string" && agentId.trim().length > 0 ? agentId : subagent_type; if (!requestedAgentId) { throw new Error("task tool input validation failed: expected agent task args"); } + // Explicit per-launch model/thinking overrides. Omitted by default so the + // sub-agent keeps inheriting the parent's live settings (see precedence in + // taskService.resolveTaskAISettings). + const aiOverrides = parseTaskAiOverrides({ model, thinking }); + const workspaceId = requireWorkspaceId(config, "task"); const taskService = requireTaskService(config, "task"); const taskGroupLaunches = buildTaskGroupLaunches({ prompt, n, variants }); @@ -325,6 +382,10 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => { prompt: launch.prompt, title, experiments: config.experiments, + ...(aiOverrides.modelString != null ? { modelString: aiOverrides.modelString } : {}), + ...(aiOverrides.thinkingLevel != null + ? { thinkingLevel: aiOverrides.thinkingLevel } + : {}), ...(parentRuntimeAiSettings != null ? { parentRuntimeAiSettings } : {}), bestOf: taskGroupId != null