impulse-studio
diff --git a/‎bun.lock‎
Lines changed: 0 additions & 5 deletions b/‎bun.lock‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎packages/backend/convex/agents/agenticRunner.ts‎
Lines changed: 49 additions & 22 deletions b/‎packages/backend/convex/agents/agenticRunner.ts‎
Lines changed: 49 additions & 22 deletions
diff --git a/‎packages/backend/convex/agents/browser/agent.ts‎
Lines changed: 1 addition & 1 deletion b/‎packages/backend/convex/agents/browser/agent.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/backend/convex/agents/browser/runner.ts‎
Lines changed: 97 additions & 56 deletions b/‎packages/backend/convex/agents/browser/runner.ts‎
Lines changed: 97 additions & 56 deletions
diff --git a/‎packages/backend/convex/agents/general/runner.ts‎
Lines changed: 3 additions & 4 deletions b/‎packages/backend/convex/agents/general/runner.ts‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎packages/backend/convex/agents/models.ts‎
Lines changed: 9 additions & 9 deletions b/‎packages/backend/convex/agents/models.ts‎
Lines changed: 9 additions & 9 deletions
@@ -1,7 +1,7 @@
 import { generateText } from "ai";
 import { internal } from "../_generated/api";
 import { mistral, MANAGER_MODEL, roleToModel } from "./models";
-import { buildSkillset } from "./skills/index";
+import { buildSkillset, type DoneSignal } from "./skills/index";
 import { buildSystemPrompt } from "./prompts";
 import type { RunnerCtx, RunnerResult } from "./shared/types";
 
@@ -29,7 +29,10 @@ export async function runAgenticTask(
 	agentName: string,
 	continuationState?: ContinuationState,
 ): Promise<RunnerResult> {
-	const tools = buildSkillset(ctx, agentId, role);
+	// Shared signal: flipped by updateTaskStatus when agent marks task as terminal.
+	// Checked by stopWhen to end the ReAct loop immediately after completion.
+	const doneSignal: DoneSignal = { value: false };
+	const tools = buildSkillset(ctx, agentId, role, doneSignal);
 	const systemPrompt = buildSystemPrompt(role, task, agentName);
 	const modelId = roleToModel[role] ?? MANAGER_MODEL;
 	const startTime = Date.now();
@@ -58,6 +61,8 @@ export async function runAgenticTask(
 
 	// Shared generateText options (everything except prompt/messages)
 	const stopWhen = ({ steps }: { steps: unknown[] }) => {
+		// Agent marked its own task as done/review/failed/cancelled — stop immediately
+		if (doneSignal.value) return true;
 		if (steps.length + stepsAlreadyDone >= MAX_STEPS) return true;
 		return Date.now() - startTime > SOFT_BUDGET_MS;
 	};
@@ -75,7 +80,7 @@ export async function runAgenticTask(
 			content: string;
 		}> = [];
 
-		// 1. Reasoning (extended thinking from Magistral models)
+		// 1. Reasoning (extended thinking)
 		if (event.reasoningText) {
 			entries.push({ type: "reasoning", content: event.reasoningText });
 		}
@@ -132,25 +137,35 @@ export async function runAgenticTask(
 		// Use separate generateText calls to satisfy TypeScript's discriminated union
 		// (prompt and messages are mutually exclusive)
 		const result = continuationState
-			? await generateText({
-					model: mistral(modelId),
-					system: systemPrompt,
-					// eslint-disable-next-line @typescript-eslint/no-explicit-any
-					messages: [
-						{ role: "user" as const, content: taskPrompt },
-						// ResponseMessages from prior run — already correctly shaped from AI SDK
-						...(JSON.parse(continuationState.messages) as any[]),
-						{
-							role: "user" as const,
-							content:
-								"Continue working. You were interrupted by a time limit. Resume from where you stopped. Do NOT repeat work already done — review the tool results above and continue from the last step.",
-						},
-					],
-					tools,
-					stopWhen,
-					abortSignal: controller.signal,
-					onStepFinish,
-				})
+			? await (async () => {
+					const priorMessages = JSON.parse(continuationState.messages) as Array<{
+						role: string;
+						content?: unknown;
+					}>;
+					const lastRole = priorMessages[priorMessages.length - 1]?.role;
+					const continueUserMsg = {
+						role: "user" as const,
+						content:
+							"Continue working. You were interrupted by a time limit. Resume from where you stopped. Do NOT repeat work already done — review the tool results above and continue from the last step.",
+					};
+					if (lastRole === "tool") {
+						priorMessages.push({
+							role: "assistant" as const,
+							content: "[Resuming after time limit — continuing from last tool results.]",
+						});
+					}
+					priorMessages.push(continueUserMsg);
+					return await generateText({
+						model: mistral(modelId),
+						system: systemPrompt,
+						// eslint-disable-next-line @typescript-eslint/no-explicit-any
+						messages: [{ role: "user" as const, content: taskPrompt }, ...(priorMessages as any[])],
+						tools,
+						stopWhen,
+						abortSignal: controller.signal,
+						onStepFinish,
+					});
+				})()
 			: await generateText({
 					model: mistral(modelId),
 					system: systemPrompt,
@@ -161,6 +176,18 @@ export async function runAgenticTask(
 					onStepFinish,
 				});
 
+		// If agent self-reported completion via updateTaskStatus, treat as success
+		// regardless of step/time budget — the agent decided it's done.
+		if (doneSignal.value) {
+			const summary = result.text || "(task completed — agent set terminal status)";
+			await ctx.runMutation(internal.logs.mutations.append, {
+				agentId,
+				type: "status" as const,
+				content: `[${role}] Task completed (agent self-reported)`,
+			});
+			return { success: true, result: summary };
+		}
+
 		// Check if stopped by time budget (not natural completion)
 		const hitBudget = Date.now() - startTime > SOFT_BUDGET_MS;
 		const hitStepLimit = result.steps.length + stepsAlreadyDone >= MAX_STEPS;
 
@@ -10,7 +10,7 @@ import {
 
 export const browserAgent = new Agent(components.agent, {
 	name: "Browser",
-	languageModel: mistral(MANAGER_MODEL), // mistral-large-latest — has vision
+	languageModel: mistral(MANAGER_MODEL), // Claude Sonnet 4.6 — has vision
 	instructions: `You are a browser agent that navigates websites using Computer Use.
 You see screenshots of the desktop and decide what to click, type, or scroll.
 You complete web tasks: research, form filling, data extraction, testing.
 
@@ -1,75 +1,114 @@
 import { generateObject } from "ai";
-import { createAmazonBedrock } from "@ai-sdk/amazon-bedrock";
 import { z } from "zod";
 import { internal } from "../../_generated/api";
 import type { RunnerCtx, RunnerResult } from "../shared/types";
-import { MANAGER_MODEL } from "../models";
+import { mistral, MANAGER_MODEL } from "../models";
 
 type TaskRecord = { title: string; description?: string };
 
 const MAX_ITERATIONS = 200;
 const ACTION_DELAY_MS = 1000;
 
-// Structured action schema — the model picks one action per step
-const ActionSchema = z.discriminatedUnion("action", [
-	z.object({
-		action: z.literal("click"),
-		x: z.number().describe("X coordinate to click"),
-		y: z.number().describe("Y coordinate to click"),
-		button: z.enum(["left", "right"]).default("left").describe("Mouse button"),
-		reasoning: z.string().describe("Why you are clicking here"),
-	}),
-	z.object({
-		action: z.literal("double_click"),
-		x: z.number().describe("X coordinate to double-click"),
-		y: z.number().describe("Y coordinate to double-click"),
-		reasoning: z.string().describe("Why you are double-clicking here"),
-	}),
-	z.object({
-		action: z.literal("type"),
-		text: z.string().describe("Text to type"),
-		reasoning: z.string().describe("Why you are typing this"),
-	}),
-	z.object({
-		action: z.literal("key"),
-		key: z.string().describe("Key to press (e.g. Enter, Tab, Escape)"),
-		modifiers: z.array(z.string()).optional().describe("Modifier keys (e.g. ctrl, alt, shift)"),
-		reasoning: z.string().describe("Why you are pressing this key"),
-	}),
-	z.object({
-		action: z.literal("hotkey"),
-		keys: z.string().describe("Key combo (e.g. ctrl+c, ctrl+l, alt+tab)"),
-		reasoning: z.string().describe("Why you are pressing this hotkey"),
-	}),
-	z.object({
-		action: z.literal("scroll"),
-		x: z.number().describe("X coordinate for scroll position"),
-		y: z.number().describe("Y coordinate for scroll position"),
-		direction: z.enum(["up", "down"]).describe("Scroll direction"),
-		amount: z.number().optional().describe("Scroll amount (default 3)"),
-		reasoning: z.string().describe("Why you are scrolling"),
-	}),
-	z.object({
-		action: z.literal("wait"),
-		seconds: z.number().min(1).max(5).describe("Seconds to wait for page to load"),
-		reasoning: z.string().describe("Why you are waiting"),
-	}),
-	z.object({
-		action: z.literal("done"),
-		result: z.string().describe("Summary of what was accomplished"),
-	}),
-]);
-
-type Action = z.infer<typeof ActionSchema>;
+// Flat action schema — all fields on one object; "action" acts as discriminator.
+const ActionSchema = z.object({
+	action: z
+		.enum(["click", "double_click", "type", "key", "hotkey", "scroll", "wait", "done"])
+		.describe("The action to perform"),
+	reasoning: z.string().optional().describe("Why you are taking this action"),
+	x: z.number().optional().describe("X coordinate (click, double_click, scroll)"),
+	y: z.number().optional().describe("Y coordinate (click, double_click, scroll)"),
+	button: z.enum(["left", "right"]).optional().describe("Mouse button for click (default: left)"),
+	text: z.string().optional().describe("Text to type (for type action)"),
+	key: z.string().optional().describe("Key to press, e.g. Enter, Tab, Escape (for key action)"),
+	modifiers: z
+		.array(z.string())
+		.optional()
+		.describe("Modifier keys e.g. ctrl, alt, shift (for key action)"),
+	keys: z.string().optional().describe("Key combo e.g. ctrl+c, alt+tab (for hotkey action)"),
+	direction: z.enum(["up", "down"]).optional().describe("Scroll direction (for scroll action)"),
+	amount: z.number().optional().describe("Scroll amount, default 3 (for scroll action)"),
+	seconds: z.number().optional().describe("Seconds to wait 1-5 (for wait action)"),
+	result: z.string().optional().describe("Summary of what was accomplished (for done action)"),
+});
+
+type FlatAction = z.infer<typeof ActionSchema>;
+
+// Typed action variants for executeAction/formatAction (narrow from flat schema)
+type Action =
+	| { action: "click"; x: number; y: number; button: string; reasoning?: string }
+	| { action: "double_click"; x: number; y: number; reasoning?: string }
+	| { action: "type"; text: string; reasoning?: string }
+	| { action: "key"; key: string; modifiers?: string[]; reasoning?: string }
+	| { action: "hotkey"; keys: string; reasoning?: string }
+	| {
+			action: "scroll";
+			x: number;
+			y: number;
+			direction: "up" | "down";
+			amount?: number;
+			reasoning?: string;
+	  }
+	| { action: "wait"; seconds: number; reasoning?: string }
+	| { action: "done"; result: string; reasoning?: string };
+
+function toAction(raw: FlatAction): Action {
+	switch (raw.action) {
+		case "click": {
+			return {
+				action: "click",
+				x: raw.x ?? 0,
+				y: raw.y ?? 0,
+				button: raw.button ?? "left",
+				reasoning: raw.reasoning,
+			};
+		}
+		case "double_click": {
+			return { action: "double_click", x: raw.x ?? 0, y: raw.y ?? 0, reasoning: raw.reasoning };
+		}
+		case "type": {
+			return { action: "type", text: raw.text ?? "", reasoning: raw.reasoning };
+		}
+		case "key": {
+			return {
+				action: "key",
+				key: raw.key ?? "Enter",
+				modifiers: raw.modifiers,
+				reasoning: raw.reasoning,
+			};
+		}
+		case "hotkey": {
+			return { action: "hotkey", keys: raw.keys ?? "", reasoning: raw.reasoning };
+		}
+		case "scroll": {
+			return {
+				action: "scroll",
+				x: raw.x ?? 0,
+				y: raw.y ?? 0,
+				direction: raw.direction ?? "down",
+				amount: raw.amount,
+				reasoning: raw.reasoning,
+			};
+		}
+		case "wait": {
+			return {
+				action: "wait",
+				seconds: Math.min(5, Math.max(1, raw.seconds ?? 2)),
+				reasoning: raw.reasoning,
+			};
+		}
+		case "done": {
+			return { action: "done", result: raw.result ?? "Task completed.", reasoning: raw.reasoning };
+		}
+	}
+}
 
 // Run a Computer Use task: start desktop → vision loop → return result
 export async function runComputerUseTask(
 	ctx: RunnerCtx,
 	agentId: string,
 	task: TaskRecord,
 ): Promise<RunnerResult> {
-	const bedrock = createAmazonBedrock({ region: "us-west-2" });
-	const model = bedrock(MANAGER_MODEL);
+	const model = mistral(MANAGER_MODEL);
 
 	// 1. Ensure Computer Use environment is started (Xvfb + xfce4 + VNC)
 	await ctx.runAction(internal.sandbox.lifecycle.ensureComputerUseStarted, { agentId });
@@ -183,7 +222,7 @@ export async function runComputerUseTask(
 		);
 
 		// Ask Mistral Large to decide next action
-		const { object: nextAction, usage: stepUsage } = await generateObject({
+		const { object, usage: stepUsage } = await generateObject({
 			model,
 			schema: ActionSchema,
 			messages: [
@@ -228,6 +267,8 @@ Rules:
 			],
 		});
 
+		const nextAction = toAction(object);
+
 		// Log the action + usage
 		const actionDesc = formatAction(nextAction);
 		actionLog.push(actionDesc);
 
@@ -1,9 +1,8 @@
-import { createAmazonBedrock } from "@ai-sdk/amazon-bedrock";
 import { generateObject, generateText } from "ai";
 import { z } from "zod";
 import { internal } from "../../_generated/api";
 import type { RunnerCtx, RunnerResult } from "../shared/types";
-import { MANAGER_MODEL } from "../models";
+import { mistral, MANAGER_MODEL } from "../models";
 import { SANDBOX_WORK_DIR, SHARED_WORKSPACE, SHARED_OUTPUTS } from "../../sandbox/constants";
 
 const MAX_RETRIES_PER_STEP = 2;
@@ -28,7 +27,7 @@ export async function runGeneralTask(
 	role: string,
 ): Promise<RunnerResult> {
 	const startTime = Date.now();
-	const mistralClient = createAmazonBedrock({ region: "us-west-2" });
+	const mistralClient = mistral;
 
 	// ── Phase 1: Planning ──────────────────────────────────────
 	await ctx.runMutation(internal.logs.mutations.append, {
@@ -37,7 +36,7 @@ export async function runGeneralTask(
 		content: `[${role}] Planning steps for: ${task.title}`,
 	});
 
-	// Use mistral-large for structured output (magistral doesn't support generateObject)
+	// Use manager model for structured output
 	const { object: plan } = await generateObject({
 		model: mistralClient(MANAGER_MODEL),
 		schema: planSchema,
 
@@ -1,15 +1,15 @@
-import { createAmazonBedrock } from "@ai-sdk/amazon-bedrock";
+import { createGateway } from "ai";
 
-// Bedrock provider — uses AWS_BEARER_TOKEN_BEDROCK env var for auth
-export const mistral = createAmazonBedrock({
-	region: "us-west-2",
+const gateway = createGateway({
+	apiKey: process.env.AI_GATEWAY_API_KEY ?? "",
 });
 
-// ── Model ID constants (Bedrock Mistral model IDs) ──────────
-export const MANAGER_MODEL = "mistral.mistral-large-3-675b-instruct";
-export const CODER_MODEL = "mistral.devstral-2-123b";
-export const ROUTING_MODEL = "mistral.ministral-3-8b-instruct";
-export const REASONING_MODEL = "mistral.magistral-small-2509";
+export const mistral = gateway;
+
+export const MANAGER_MODEL = "anthropic/claude-sonnet-4.6";
+export const CODER_MODEL = "anthropic/claude-sonnet-4.6";
+export const ROUTING_MODEL = "anthropic/claude-haiku-4.5";
+export const REASONING_MODEL = "anthropic/claude-sonnet-4.5";
 
 export const roleToModel: Record<string, string> = {
 	coder: CODER_MODEL,