Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/clear-hands-attend.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@browserbasehq/stagehand": patch
---

Add unified variables support across act and agent with a single VariableValue type
5 changes: 3 additions & 2 deletions packages/core/lib/prompt.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { ChatMessage } from "./v3/llm/LLMClient.js";
import type { Variables } from "./v3/types/public/agent.js";

export function buildUserInstructionsString(
userProvidedInstructions?: string,
Expand Down Expand Up @@ -170,7 +171,7 @@ Return the element that matches the instruction if it exists. Otherwise, return
export function buildActPrompt(
action: string,
supportedActions: string[],
variables?: Record<string, string>,
variables?: Variables,
): string {
// Base instruction
let instruction = `Find the most relevant element to perform an action on given the following action: ${action}.
Expand Down Expand Up @@ -215,7 +216,7 @@ export function buildStepTwoPrompt(
originalUserAction: string,
previousAction: string,
supportedActions: string[],
variables?: Record<string, string>,
variables?: Variables,
): string {
// Base instruction
let instruction = `
Expand Down
30 changes: 29 additions & 1 deletion packages/core/lib/v3/agent/prompts/agentSystemPrompt.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { AgentToolMode } from "../../types/public/agent.js";
import type { AgentToolMode, Variables } from "../../types/public/agent.js";

export interface AgentSystemPromptOptions {
url: string;
Expand All @@ -9,6 +9,8 @@ export interface AgentSystemPromptOptions {
isBrowserbase?: boolean;
/** Tools to exclude from the system prompt */
excludeTools?: string[];
/** Variables available to the agent for use in act/type tools */
variables?: Variables;
}

/**
Expand Down Expand Up @@ -122,6 +124,7 @@ export function buildAgentSystemPrompt(
systemInstructions,
isBrowserbase = false,
excludeTools,
variables,
} = options;
const localeDate = new Date().toLocaleDateString();
const isoDate = new Date().toISOString();
Expand Down Expand Up @@ -203,6 +206,30 @@ export function buildAgentSystemPrompt(
? `<customInstructions>${cdata(systemInstructions)}</customInstructions>\n `
: "";

// Build variables section only if variables are provided
const hasVariables = variables && Object.keys(variables).length > 0;
const variableToolsNote = isHybridMode
? "Use %variableName% syntax in the type, fillFormVision, or act tool's value/text/action fields."
: "Use %variableName% syntax in the act or fillForm tool's value/action fields.";
const variablesSection = hasVariables
? `<variables>
<note>You have access to the following variables. Use %variableName% syntax to substitute variable values. This is especially important for sensitive data like passwords.</note>
<usage>${variableToolsNote}</usage>
<example>To type a password, use: type %password% into the password field</example>
${Object.entries(variables)
.map(([name, v]) => {
const description =
typeof v === "object" && v !== null && "value" in v
? v.description
: undefined;
return description
? `<variable name="${name}">${description}</variable>`
: `<variable name="${name}" />`;
Comment on lines +226 to +227
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lol is xml still the best format for these in 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did a bit of research: it's still recommended for Claude, and GPT performs better. so I think this is the best model-agnostic solution

})
.join("\n ")}
</variables>`
: "";

return `<system>
<identity>You are a web automation assistant using browser automation tools to accomplish the user's goal.</identity>
${customInstructionsBlock}<task>
Expand Down Expand Up @@ -234,6 +261,7 @@ export function buildAgentSystemPrompt(
${commonStrategyItems}
</strategy>
${roadblocksSection}
${variablesSection}
<completion>
<note>When you complete the task, explain any information that was found that was relevant to the original task.</note>
<examples>
Expand Down
26 changes: 17 additions & 9 deletions packages/core/lib/v3/agent/tools/act.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,23 @@ import { tool } from "ai";
import { z } from "zod";
import type { V3 } from "../../v3.js";
import type { Action } from "../../types/public/methods.js";
import type { AgentModelConfig } from "../../types/public/agent.js";
import type { AgentModelConfig, Variables } from "../../types/public/agent.js";

export const actTool = (v3: V3, executionModel?: string | AgentModelConfig) =>
tool({
export const actTool = (
v3: V3,
executionModel?: string | AgentModelConfig,
variables?: Variables,
) => {
const hasVariables = variables && Object.keys(variables).length > 0;
const actionDescription = hasVariables
? `Describe what to click or type, e.g. "click the Login button" or "type %variableName% into the input". Available variables: ${Object.keys(variables).join(", ")}`
: 'Describe what to click or type, e.g. "click the Login button" or "type "John" into the first name input"';

return tool({
description:
"Perform an action on the page (click, type). Provide a short, specific phrase that mentions the element type.",
inputSchema: z.object({
action: z
.string()
.describe(
'Describe what to click or type, e.g. "click the Login button" or "type "John" into the first name input"',
),
action: z.string().describe(actionDescription),
}),
execute: async ({ action }) => {
try {
Expand All @@ -28,7 +33,9 @@ export const actTool = (v3: V3, executionModel?: string | AgentModelConfig) =>
},
},
});
const options = executionModel ? { model: executionModel } : undefined;
const options = executionModel
? { model: executionModel, variables }
: { variables };
const result = await v3.act(action, options);
const actions = (result.actions as Action[] | undefined) ?? [];
v3.recordAgentReplayStep({
Expand Down Expand Up @@ -57,3 +64,4 @@ export const actTool = (v3: V3, executionModel?: string | AgentModelConfig) =>
}
},
});
};
28 changes: 22 additions & 6 deletions packages/core/lib/v3/agent/tools/fillFormVision.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,24 @@ import type { Action } from "../../types/public/methods.js";
import type {
FillFormVisionToolResult,
ModelOutputContentItem,
Variables,
} from "../../types/public/agent.js";
import { processCoordinates } from "../utils/coordinateNormalization.js";
import { ensureXPath } from "../utils/xpath.js";
import { waitAndCaptureScreenshot } from "../utils/screenshotHandler.js";
import { substituteVariables } from "../utils/variables.js";

export const fillFormVisionTool = (v3: V3, provider?: string) =>
tool({
export const fillFormVisionTool = (
v3: V3,
provider?: string,
variables?: Variables,
) => {
const hasVariables = variables && Object.keys(variables).length > 0;
const valueDescription = hasVariables
? `Text to type into the target field. Use %variableName% to substitute a variable value. Available: ${Object.keys(variables).join(", ")}`
: "Text to type into the target field";

return tool({
description: `FORM FILL - SPECIALIZED MULTI-FIELD INPUT TOOL

CRITICAL: Use this for ANY form with 2+ input fields (text inputs, textareas, etc.)
Expand All @@ -38,7 +49,7 @@ MANDATORY USE CASES (always use fillFormVision for these):
.describe(
"Description of the typing action, e.g. 'type foo into the bar field'",
),
value: z.string().describe("Text to type into the target field"),
value: z.string().describe(valueDescription),
coordinates: z
.object({
x: z.number(),
Expand All @@ -53,7 +64,8 @@ MANDATORY USE CASES (always use fillFormVision for these):
try {
const page = await v3.context.awaitActivePage();

// Process coordinates for each field
// Process coordinates and substitute variables for each field
// Keep original values (with %tokens%) for logging/caching, substituted values for typing
const processedFields = fields.map((field) => {
const processed = processCoordinates(
field.coordinates.x,
Expand All @@ -63,6 +75,8 @@ MANDATORY USE CASES (always use fillFormVision for these):
);
return {
...field,
originalValue: field.value, // Keep original with %tokens% for cache
value: substituteVariables(field.value, variables),
coordinates: { x: processed.x, y: processed.y },
};
});
Expand All @@ -73,7 +87,7 @@ MANDATORY USE CASES (always use fillFormVision for these):
level: 1,
auxiliary: {
arguments: {
value: JSON.stringify({ fields, processedFields }),
value: JSON.stringify({ fields }), // Don't log substituted values
type: "object",
},
},
Expand All @@ -95,14 +109,15 @@ MANDATORY USE CASES (always use fillFormVision for these):
await page.type(field.value);

// Build Action with XPath for deterministic replay (only when caching)
// Use originalValue (with %tokens%) so cache stores references, not sensitive values
if (shouldCollectXpath) {
const normalizedXpath = ensureXPath(xpath);
if (normalizedXpath) {
actions.push({
selector: normalizedXpath,
description: field.action,
method: "type",
arguments: [field.value],
arguments: [field.originalValue],
});
}
}
Expand Down Expand Up @@ -169,3 +184,4 @@ MANDATORY USE CASES (always use fillFormVision for these):
};
},
});
};
18 changes: 13 additions & 5 deletions packages/core/lib/v3/agent/tools/fillform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@ import { tool } from "ai";
import { z } from "zod";
import type { V3 } from "../../v3.js";
import type { Action } from "../../types/public/methods.js";
import type { AgentModelConfig } from "../../types/public/agent.js";
import type { AgentModelConfig, Variables } from "../../types/public/agent.js";

export const fillFormTool = (
v3: V3,
executionModel?: string | AgentModelConfig,
) =>
tool({
variables?: Variables,
) => {
const hasVariables = variables && Object.keys(variables).length > 0;
const valueDescription = hasVariables
? `Text to type into the target. Use %variableName% to substitute a variable value. Available: ${Object.keys(variables).join(", ")}`
: "Text to type into the target";

return tool({
description: `📝 FORM FILL - MULTI-FIELD INPUT TOOL\nFor any form with 2+ inputs/textareas. Faster than individual typing.`,
inputSchema: z.object({
fields: z
Expand All @@ -19,7 +25,7 @@ export const fillFormTool = (
.describe(
'Description of typing action, e.g. "type foo into the email field"',
),
value: z.string().describe("Text to type into the target"),
value: z.string().describe(valueDescription),
}),
)
.min(1, "Provide at least one field to fill"),
Expand Down Expand Up @@ -48,7 +54,8 @@ export const fillFormTool = (
const completed = [] as unknown[];
const replayableActions: Action[] = [];
for (const res of observeResults) {
const actResult = await v3.act(res);
const actOptions = variables ? { variables } : undefined;
const actResult = await v3.act(res, actOptions);
completed.push(actResult);
if (Array.isArray(actResult.actions)) {
replayableActions.push(...(actResult.actions as Action[]));
Expand All @@ -67,3 +74,4 @@ export const fillFormTool = (
};
},
});
};
15 changes: 11 additions & 4 deletions packages/core/lib/v3/agent/tools/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import type { LogLine } from "../../types/public/logs.js";
import type {
AgentToolMode,
AgentModelConfig,
Variables,
} from "../../types/public/agent.js";

export interface V3AgentToolOptions {
Expand All @@ -42,6 +43,11 @@ export interface V3AgentToolOptions {
* These tools will be filtered out after mode-based filtering.
*/
excludeTools?: string[];
/**
* Variables available to the agent for use in act/type tools.
* When provided, these tools will have an optional useVariable field.
*/
variables?: Variables;
}

/**
Expand Down Expand Up @@ -83,23 +89,24 @@ export function createAgentTools(v3: V3, options?: V3AgentToolOptions) {
const mode = options?.mode ?? "dom";
const provider = options?.provider;
const excludeTools = options?.excludeTools;
const variables = options?.variables;

const allTools: ToolSet = {
act: actTool(v3, executionModel),
act: actTool(v3, executionModel, variables),
ariaTree: ariaTreeTool(v3),
click: clickTool(v3, provider),
clickAndHold: clickAndHoldTool(v3, provider),
dragAndDrop: dragAndDropTool(v3, provider),
extract: extractTool(v3, executionModel),
fillForm: fillFormTool(v3, executionModel),
fillFormVision: fillFormVisionTool(v3, provider),
fillForm: fillFormTool(v3, executionModel, variables),
fillFormVision: fillFormVisionTool(v3, provider, variables),
goto: gotoTool(v3),
keys: keysTool(v3),
navback: navBackTool(v3),
screenshot: screenshotTool(v3),
scroll: mode === "hybrid" ? scrollVisionTool(v3, provider) : scrollTool(v3),
think: thinkTool(),
type: typeTool(v3, provider),
type: typeTool(v3, provider, variables),
wait: waitTool(v3, mode),
};

Expand Down
21 changes: 16 additions & 5 deletions packages/core/lib/v3/agent/tools/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,20 @@ import type { Action } from "../../types/public/methods.js";
import type {
TypeToolResult,
ModelOutputContentItem,
Variables,
} from "../../types/public/agent.js";
import { processCoordinates } from "../utils/coordinateNormalization.js";
import { ensureXPath } from "../utils/xpath.js";
import { waitAndCaptureScreenshot } from "../utils/screenshotHandler.js";
import { substituteVariables } from "../utils/variables.js";

export const typeTool = (v3: V3, provider?: string) =>
tool({
export const typeTool = (v3: V3, provider?: string, variables?: Variables) => {
const hasVariables = variables && Object.keys(variables).length > 0;
const textDescription = hasVariables
? `The text to type into the element. Use %variableName% to substitute a variable value. Available: ${Object.keys(variables).join(", ")}`
: "The text to type into the element";

return tool({
description:
"Type text into an element using its coordinates. This will click the element and then type the text into it (this is the most reliable way to type into an element, always use this over act, unless the element is not visible in the screenshot, but shown in ariaTree)",
inputSchema: z.object({
Expand All @@ -20,7 +27,7 @@ export const typeTool = (v3: V3, provider?: string) =>
.describe(
"Describe the element to type into in a short, specific phrase that mentions the element type and a good visual description",
),
text: z.string().describe("The text to type into the element"),
text: z.string().describe(textDescription),
coordinates: z
.array(z.number())
.describe("The (x, y) coordinates to type into the element"),
Expand All @@ -39,6 +46,9 @@ export const typeTool = (v3: V3, provider?: string) =>
v3,
);

// Substitute any %variableName% tokens in the text
const actualText = substituteVariables(text, variables);

v3.logger({
category: "agent",
message: `Agent calling tool: type`,
Expand All @@ -57,7 +67,7 @@ export const typeTool = (v3: V3, provider?: string) =>
returnXpath: shouldCollectXpath,
});

await page.type(text);
await page.type(actualText);

const screenshotBase64 = await waitAndCaptureScreenshot(page);

Expand All @@ -83,7 +93,7 @@ export const typeTool = (v3: V3, provider?: string) =>
return {
success: true,
describe,
text,
text, // Return original text (with %variableName% tokens) to avoid exposing sensitive values to LLM
screenshotBase64,
};
} catch (error) {
Expand Down Expand Up @@ -128,3 +138,4 @@ export const typeTool = (v3: V3, provider?: string) =>
};
},
});
};
Loading
Loading