diff --git a/src/agents/openclaw-tools.ts b/src/agents/openclaw-tools.ts index cbd9b7b41402..d9e4525739a5 100644 --- a/src/agents/openclaw-tools.ts +++ b/src/agents/openclaw-tools.ts @@ -21,7 +21,12 @@ import { createSessionsSendTool } from "./tools/sessions-send-tool.js"; import { createSessionsSpawnTool } from "./tools/sessions-spawn-tool.js"; import { createSubagentsTool } from "./tools/subagents-tool.js"; import { createTtsTool } from "./tools/tts-tool.js"; -import { createWebFetchTool, createWebSearchTool } from "./tools/web-tools.js"; +import { + createFirecrawlScrapeTool, + createFirecrawlSearchTool, + createWebFetchTool, + createWebSearchTool, +} from "./tools/web-tools.js"; import { resolveWorkspaceRoot } from "./workspace-dir.js"; export function createOpenClawTools(options?: { @@ -107,6 +112,8 @@ export function createOpenClawTools(options?: { config: options?.config, sandboxed: options?.sandboxed, }); + const firecrawlSearchTool = createFirecrawlSearchTool({ config: options?.config }); + const firecrawlScrapeTool = createFirecrawlScrapeTool({ config: options?.config }); const messageTool = options?.disableMessageTool ? null : createMessageTool({ @@ -187,6 +194,8 @@ export function createOpenClawTools(options?: { }), ...(webSearchTool ? [webSearchTool] : []), ...(webFetchTool ? [webFetchTool] : []), + ...(firecrawlSearchTool ? [firecrawlSearchTool] : []), + ...(firecrawlScrapeTool ? [firecrawlScrapeTool] : []), ...(imageTool ? [imageTool] : []), ...(pdfTool ? [pdfTool] : []), ]; diff --git a/src/agents/tools/firecrawl-tools.test.ts b/src/agents/tools/firecrawl-tools.test.ts new file mode 100644 index 000000000000..0f5913e866b7 --- /dev/null +++ b/src/agents/tools/firecrawl-tools.test.ts @@ -0,0 +1,260 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { withFetchPreconnect } from "../../test-utils/fetch-mock.js"; +import { createFirecrawlScrapeTool, createFirecrawlSearchTool } from "./firecrawl-tools.js"; + +function installMockFetch(payload: unknown) { + const mockFetch = vi.fn((_input?: unknown, _init?: unknown) => + Promise.resolve({ + ok: true, + status: 200, + statusText: "OK", + json: () => Promise.resolve(payload), + } as Response), + ); + global.fetch = withFetchPreconnect(mockFetch); + return mockFetch; +} + +function configWithApiKey(apiKey: string) { + return { + config: { + tools: { + web: { + fetch: { + firecrawl: { apiKey }, + }, + }, + }, + }, + }; +} + +describe("firecrawl_search tool", () => { + const priorFetch = global.fetch; + + afterEach(() => { + vi.unstubAllEnvs(); + global.fetch = priorFetch; + }); + + it("returns null when no Firecrawl API key is present", () => { + vi.stubEnv("FIRECRAWL_API_KEY", ""); + const tool = createFirecrawlSearchTool({ config: {} }); + expect(tool).toBeNull(); + }); + + it("returns a tool when config API key is present", () => { + const tool = createFirecrawlSearchTool(configWithApiKey("fc-test-key")); + expect(tool).not.toBeNull(); + expect(tool?.name).toBe("firecrawl_search"); + }); + + it("returns a tool when FIRECRAWL_API_KEY env var is set", () => { + vi.stubEnv("FIRECRAWL_API_KEY", "fc-env-key"); + const tool = createFirecrawlSearchTool({ config: {} }); + expect(tool).not.toBeNull(); + expect(tool?.name).toBe("firecrawl_search"); + }); + + it("calls POST /v2/search with correct payload", async () => { + const mockFetch = installMockFetch({ + success: true, + data: [ + { + title: "Example", + url: "https://example.com", + description: "An example site", + }, + ], + }); + const tool = createFirecrawlSearchTool(configWithApiKey("fc-test-key")); + await tool?.execute?.("call-1", { query: "test query", limit: 3 }); + + expect(mockFetch).toHaveBeenCalledOnce(); + const [url, init] = mockFetch.mock.calls[0] as [string, RequestInit]; + expect(url).toBe("https://api.firecrawl.dev/v2/search"); + expect(init.method).toBe("POST"); + expect(init.headers).toMatchObject({ + Authorization: "Bearer fc-test-key", + "Content-Type": "application/json", + }); + const body = JSON.parse(init.body as string) as Record; + expect(body.query).toBe("test query"); + expect(body.limit).toBe(3); + }); + + it("uses default limit of 5", async () => { + const mockFetch = installMockFetch({ success: true, data: [] }); + const tool = createFirecrawlSearchTool(configWithApiKey("fc-test-key")); + await tool?.execute?.("call-1", { query: "test" }); + + const body = JSON.parse( + (mockFetch.mock.calls[0] as [string, RequestInit])[1].body as string, + ) as Record; + expect(body.limit).toBe(5); + }); + + it("clamps limit to 20", async () => { + const mockFetch = installMockFetch({ success: true, data: [] }); + const tool = createFirecrawlSearchTool(configWithApiKey("fc-test-key")); + await tool?.execute?.("call-1", { query: "test", limit: 50 }); + + const body = JSON.parse( + (mockFetch.mock.calls[0] as [string, RequestInit])[1].body as string, + ) as Record; + expect(body.limit).toBe(20); + }); + + it("wraps result descriptions but keeps URLs raw", async () => { + installMockFetch({ + success: true, + data: [ + { + title: "Test Title", + url: "https://example.com/page", + description: "Test description", + }, + ], + }); + const tool = createFirecrawlSearchTool(configWithApiKey("fc-test-key")); + const result = await tool?.execute?.("call-1", { query: "test" }); + const details = result?.details as { + results?: Array<{ title?: string; url?: string; description?: string }>; + externalContent?: { untrusted?: boolean; wrapped?: boolean }; + }; + + // URL should be raw for tool chaining + expect(details.results?.[0]?.url).toBe("https://example.com/page"); + // Title and description should be wrapped + expect(details.results?.[0]?.title).toMatch( + /<<>>/, + ); + expect(details.results?.[0]?.description).toMatch( + /<<>>/, + ); + expect(details.externalContent).toMatchObject({ + untrusted: true, + wrapped: true, + }); + }); + + it("handles v2 nested data.web format", async () => { + installMockFetch({ + success: true, + data: { + web: [ + { + title: "V2 Result", + url: "https://example.com/v2", + description: "From v2 API", + }, + ], + }, + }); + const tool = createFirecrawlSearchTool(configWithApiKey("fc-test-key")); + const result = await tool?.execute?.("call-1", { query: "test" }); + const details = result?.details as { + results?: Array<{ url?: string }>; + }; + expect(details.results).toHaveLength(1); + expect(details.results?.[0]?.url).toBe("https://example.com/v2"); + }); + + it("throws on API error", async () => { + const mockFetch = vi.fn(() => + Promise.resolve({ + ok: false, + status: 401, + statusText: "Unauthorized", + json: () => Promise.resolve({ success: false, error: "Invalid API key" }), + } as Response), + ); + global.fetch = withFetchPreconnect(mockFetch); + + const tool = createFirecrawlSearchTool(configWithApiKey("fc-bad-key")); + await expect(tool?.execute?.("call-1", { query: "test" })).rejects.toThrow( + /Firecrawl search failed \(401\)/, + ); + }); +}); + +describe("firecrawl_scrape tool", () => { + const priorFetch = global.fetch; + + afterEach(() => { + vi.unstubAllEnvs(); + global.fetch = priorFetch; + }); + + it("returns null when no Firecrawl API key is present", () => { + vi.stubEnv("FIRECRAWL_API_KEY", ""); + const tool = createFirecrawlScrapeTool({ config: {} }); + expect(tool).toBeNull(); + }); + + it("returns a tool when config API key is present", () => { + const tool = createFirecrawlScrapeTool(configWithApiKey("fc-test-key")); + expect(tool).not.toBeNull(); + expect(tool?.name).toBe("firecrawl_scrape"); + }); + + it("calls fetchFirecrawlContent via the Firecrawl scrape API", async () => { + const mockFetch = installMockFetch({ + success: true, + data: { + markdown: "# Hello World\n\nSome content here.", + metadata: { + title: "Hello World", + sourceURL: "https://example.com/hello", + statusCode: 200, + }, + }, + }); + + const tool = createFirecrawlScrapeTool(configWithApiKey("fc-test-key")); + const result = await tool?.execute?.("call-1", { url: "https://example.com/hello" }); + + expect(mockFetch).toHaveBeenCalledOnce(); + const [url, init] = mockFetch.mock.calls[0] as [string, RequestInit]; + expect(url).toContain("/v2/scrape"); + expect(init.headers).toMatchObject({ + Authorization: "Bearer fc-test-key", + }); + + const details = result?.details as { + url?: string; + title?: string; + text?: string; + truncated?: boolean; + externalContent?: { untrusted?: boolean; wrapped?: boolean }; + }; + expect(details.url).toBe("https://example.com/hello"); + expect(details.text).toContain("Hello World"); + expect(details.text).toMatch(/<<>>/); + expect(details.truncated).toBe(false); + expect(details.externalContent).toMatchObject({ + untrusted: true, + wrapped: true, + }); + }); + + it("truncates content when maxChars is specified", async () => { + const longContent = "x".repeat(1000); + installMockFetch({ + success: true, + data: { + markdown: longContent, + metadata: { title: "Long", statusCode: 200 }, + }, + }); + + const tool = createFirecrawlScrapeTool(configWithApiKey("fc-test-key")); + const result = await tool?.execute?.("call-1", { + url: "https://example.com", + maxChars: 200, + }); + + const details = result?.details as { truncated?: boolean }; + expect(details.truncated).toBe(true); + }); +}); diff --git a/src/agents/tools/firecrawl-tools.ts b/src/agents/tools/firecrawl-tools.ts new file mode 100644 index 000000000000..41ed8848285d --- /dev/null +++ b/src/agents/tools/firecrawl-tools.ts @@ -0,0 +1,199 @@ +import { Type } from "@sinclair/typebox"; +import type { OpenClawConfig } from "../../config/config.js"; +import { wrapExternalContent, wrapWebContent } from "../../security/external-content.js"; +import type { AnyAgentTool } from "./common.js"; +import { jsonResult, readNumberParam, readStringParam } from "./common.js"; +import { + fetchFirecrawlContent, + resolveFetchConfig, + resolveFirecrawlApiKey, + resolveFirecrawlBaseUrl, + resolveFirecrawlConfig, + resolveFirecrawlMaxAgeMsOrDefault, + resolveFirecrawlOnlyMainContent, +} from "./web-fetch.js"; +import { DEFAULT_TIMEOUT_SECONDS, resolveTimeoutSeconds, withTimeout } from "./web-shared.js"; + +const DEFAULT_SEARCH_LIMIT = 5; +const MAX_SEARCH_LIMIT = 20; + +const FirecrawlSearchSchema = Type.Object({ + query: Type.String({ description: "Search query." }), + limit: Type.Optional( + Type.Number({ + description: "Maximum number of results to return (1-20, default 5).", + minimum: 1, + maximum: MAX_SEARCH_LIMIT, + }), + ), +}); + +const FirecrawlScrapeSchema = Type.Object({ + url: Type.String({ description: "HTTP or HTTPS URL to scrape." }), + maxChars: Type.Optional( + Type.Number({ + description: "Maximum characters to return (truncates when exceeded).", + minimum: 100, + }), + ), +}); + +function resolveSearchEndpoint(baseUrl: string): string { + const trimmed = baseUrl.trim(); + const base = trimmed || "https://api.firecrawl.dev"; + try { + const url = new URL(base); + // Append /v2/search to existing path to preserve reverse-proxy prefixes + url.pathname = url.pathname.replace(/\/$/, "") + "/v2/search"; + return url.toString(); + } catch { + return "https://api.firecrawl.dev/v2/search"; + } +} + +type FirecrawlSearchResult = { + title?: string; + url?: string; + description?: string; + markdown?: string; +}; + +type FirecrawlSearchResponse = { + success?: boolean; + /** v2: data is { web: [...], news?: [...], images?: [...] } */ + data?: { web?: FirecrawlSearchResult[] } | FirecrawlSearchResult[]; + error?: string; +}; + +export function createFirecrawlSearchTool(options?: { + config?: OpenClawConfig; +}): AnyAgentTool | null { + const fetch = resolveFetchConfig(options?.config); + const firecrawl = resolveFirecrawlConfig(fetch); + const apiKey = resolveFirecrawlApiKey(firecrawl); + if (!apiKey) { + return null; + } + const baseUrl = resolveFirecrawlBaseUrl(firecrawl); + const timeoutSeconds = resolveTimeoutSeconds(firecrawl?.timeoutSeconds, DEFAULT_TIMEOUT_SECONDS); + + return { + label: "Firecrawl Search", + name: "firecrawl_search", + description: + "Search the web using Firecrawl and return results with optional scraped content. Use for web research when you need search results with clean markdown.", + parameters: FirecrawlSearchSchema, + execute: async (_toolCallId, args) => { + const params = args as Record; + const query = readStringParam(params, "query", { required: true }); + const rawLimit = readNumberParam(params, "limit", { integer: true }); + const limit = Math.min(MAX_SEARCH_LIMIT, Math.max(1, rawLimit ?? DEFAULT_SEARCH_LIMIT)); + + const endpoint = resolveSearchEndpoint(baseUrl); + const res = await globalThis.fetch(endpoint, { + method: "POST", + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ query, limit }), + signal: withTimeout(undefined, timeoutSeconds * 1000), + }); + + const payload = (await res.json()) as FirecrawlSearchResponse; + + if (!res.ok || payload?.success === false) { + const detail = payload?.error ?? res.statusText; + throw new Error(`Firecrawl search failed (${res.status}): ${detail}`); + } + + // v2 returns { data: { web: [...] } }, v1 returned { data: [...] } + const rawData = payload?.data; + const items: FirecrawlSearchResult[] = Array.isArray(rawData) + ? rawData + : ((rawData as { web?: FirecrawlSearchResult[] })?.web ?? []); + const results = items.map((item) => ({ + title: item.title + ? wrapExternalContent(item.title, { source: "web_search", includeWarning: false }) + : undefined, + url: item.url, // Keep raw for tool chaining + description: item.description + ? wrapExternalContent(item.description, { source: "web_search", includeWarning: false }) + : undefined, + markdown: item.markdown ? wrapWebContent(item.markdown, "web_fetch") : undefined, + })); + + return jsonResult({ + query, + results, + externalContent: { + untrusted: true, + source: "firecrawl_search", + wrapped: true, + }, + }); + }, + }; +} + +const DEFAULT_SCRAPE_MAX_CHARS = 50_000; + +export function createFirecrawlScrapeTool(options?: { + config?: OpenClawConfig; +}): AnyAgentTool | null { + const fetch = resolveFetchConfig(options?.config); + const firecrawl = resolveFirecrawlConfig(fetch); + const apiKey = resolveFirecrawlApiKey(firecrawl); + if (!apiKey) { + return null; + } + const baseUrl = resolveFirecrawlBaseUrl(firecrawl); + const onlyMainContent = resolveFirecrawlOnlyMainContent(firecrawl); + const maxAgeMs = resolveFirecrawlMaxAgeMsOrDefault(firecrawl); + const timeoutSeconds = resolveTimeoutSeconds(firecrawl?.timeoutSeconds, DEFAULT_TIMEOUT_SECONDS); + + return { + label: "Firecrawl Scrape", + name: "firecrawl_scrape", + description: + "Scrape a URL using Firecrawl and return clean markdown content. Use for extracting readable content from web pages, especially those requiring JS rendering.", + parameters: FirecrawlScrapeSchema, + execute: async (_toolCallId, args) => { + const params = args as Record; + const url = readStringParam(params, "url", { required: true }); + const rawMaxChars = readNumberParam(params, "maxChars", { integer: true }); + const maxChars = Math.max(100, rawMaxChars ?? DEFAULT_SCRAPE_MAX_CHARS); + + const result = await fetchFirecrawlContent({ + url, + extractMode: "markdown", + apiKey, + baseUrl, + onlyMainContent, + maxAgeMs, + proxy: "auto", + storeInCache: true, + timeoutSeconds, + }); + + const text = result.text.length > maxChars ? result.text.slice(0, maxChars) : result.text; + + return jsonResult({ + url, + finalUrl: result.finalUrl, + status: result.status, + title: result.title + ? wrapExternalContent(result.title, { source: "web_fetch", includeWarning: false }) + : undefined, + text: wrapWebContent(text, "web_fetch"), + truncated: result.text.length > maxChars, + externalContent: { + untrusted: true, + source: "firecrawl_scrape", + wrapped: true, + }, + warning: result.warning, + }); + }, + }; +} diff --git a/src/agents/tools/web-fetch.ts b/src/agents/tools/web-fetch.ts index 4ac7a1d7bfdd..c2d45cb055c5 100644 --- a/src/agents/tools/web-fetch.ts +++ b/src/agents/tools/web-fetch.ts @@ -79,7 +79,7 @@ type FirecrawlFetchConfig = } | undefined; -function resolveFetchConfig(cfg?: OpenClawConfig): WebFetchConfig { +export function resolveFetchConfig(cfg?: OpenClawConfig): WebFetchConfig { const fetch = cfg?.tools?.web?.fetch; if (!fetch || typeof fetch !== "object") { return undefined; @@ -124,7 +124,7 @@ function resolveFetchMaxResponseBytes(fetch?: WebFetchConfig): number { return Math.min(FETCH_MAX_RESPONSE_BYTES_MAX, Math.max(FETCH_MAX_RESPONSE_BYTES_MIN, value)); } -function resolveFirecrawlConfig(fetch?: WebFetchConfig): FirecrawlFetchConfig { +export function resolveFirecrawlConfig(fetch?: WebFetchConfig): FirecrawlFetchConfig { if (!fetch || typeof fetch !== "object") { return undefined; } @@ -135,7 +135,7 @@ function resolveFirecrawlConfig(fetch?: WebFetchConfig): FirecrawlFetchConfig { return firecrawl as FirecrawlFetchConfig; } -function resolveFirecrawlApiKey(firecrawl?: FirecrawlFetchConfig): string | undefined { +export function resolveFirecrawlApiKey(firecrawl?: FirecrawlFetchConfig): string | undefined { const fromConfig = firecrawl && "apiKey" in firecrawl && typeof firecrawl.apiKey === "string" ? normalizeSecretInput(firecrawl.apiKey) @@ -144,7 +144,7 @@ function resolveFirecrawlApiKey(firecrawl?: FirecrawlFetchConfig): string | unde return fromConfig || fromEnv || undefined; } -function resolveFirecrawlEnabled(params: { +export function resolveFirecrawlEnabled(params: { firecrawl?: FirecrawlFetchConfig; apiKey?: string; }): boolean { @@ -154,7 +154,7 @@ function resolveFirecrawlEnabled(params: { return Boolean(params.apiKey); } -function resolveFirecrawlBaseUrl(firecrawl?: FirecrawlFetchConfig): string { +export function resolveFirecrawlBaseUrl(firecrawl?: FirecrawlFetchConfig): string { const raw = firecrawl && "baseUrl" in firecrawl && typeof firecrawl.baseUrl === "string" ? firecrawl.baseUrl.trim() @@ -162,7 +162,7 @@ function resolveFirecrawlBaseUrl(firecrawl?: FirecrawlFetchConfig): string { return raw || DEFAULT_FIRECRAWL_BASE_URL; } -function resolveFirecrawlOnlyMainContent(firecrawl?: FirecrawlFetchConfig): boolean { +export function resolveFirecrawlOnlyMainContent(firecrawl?: FirecrawlFetchConfig): boolean { if (typeof firecrawl?.onlyMainContent === "boolean") { return firecrawl.onlyMainContent; } @@ -181,7 +181,7 @@ function resolveFirecrawlMaxAgeMs(firecrawl?: FirecrawlFetchConfig): number | un return parsed > 0 ? parsed : undefined; } -function resolveFirecrawlMaxAgeMsOrDefault(firecrawl?: FirecrawlFetchConfig): number { +export function resolveFirecrawlMaxAgeMsOrDefault(firecrawl?: FirecrawlFetchConfig): number { const resolved = resolveFirecrawlMaxAgeMs(firecrawl); if (typeof resolved === "number") { return resolved; diff --git a/src/agents/tools/web-tools.ts b/src/agents/tools/web-tools.ts index 3acaa4c763ff..ffac2860696d 100644 --- a/src/agents/tools/web-tools.ts +++ b/src/agents/tools/web-tools.ts @@ -1,2 +1,3 @@ +export { createFirecrawlScrapeTool, createFirecrawlSearchTool } from "./firecrawl-tools.js"; export { createWebFetchTool, extractReadableContent, fetchFirecrawlContent } from "./web-fetch.js"; export { createWebSearchTool } from "./web-search.js"; diff --git a/src/browser/bridge-server.ts b/src/browser/bridge-server.ts index c1d0c0822010..f3be65b92e9a 100644 --- a/src/browser/bridge-server.ts +++ b/src/browser/bridge-server.ts @@ -64,6 +64,8 @@ export async function startBrowserBridgeServer(params: { authPassword?: string; onEnsureAttachTarget?: (profile: ProfileContext["profile"]) => Promise; resolveSandboxNoVncToken?: (token: string) => ResolvedNoVncObserver | null; + firecrawlApiKey?: string; + firecrawlBaseUrl?: string; }): Promise { const host = params.host ?? "127.0.0.1"; if (!isLoopbackHost(host)) { @@ -111,6 +113,8 @@ export async function startBrowserBridgeServer(params: { const ctx = createBrowserRouteContext({ getState: () => state, onEnsureAttachTarget: params.onEnsureAttachTarget, + firecrawlApiKey: params.firecrawlApiKey, + firecrawlBaseUrl: params.firecrawlBaseUrl, }); registerBrowserRoutes(app as unknown as BrowserRouteRegistrar, ctx); diff --git a/src/browser/client.ts b/src/browser/client.ts index 5085825cb6e6..f2c0d2c257ab 100644 --- a/src/browser/client.ts +++ b/src/browser/client.ts @@ -19,6 +19,10 @@ export type BrowserStatus = { noSandbox?: boolean; executablePath?: string | null; attachOnly: boolean; + liveViewUrl?: string; + interactiveLiveViewUrl?: string; + /** Firecrawl session ID (for lifecycle management). */ + firecrawlSessionId?: string; }; export type ProfileStatus = { @@ -163,7 +167,7 @@ export async function browserCreateProfile( name: string; color?: string; cdpUrl?: string; - driver?: "openclaw" | "extension"; + driver?: "openclaw" | "extension" | "firecrawl"; }, ): Promise { return await fetchBrowserJson( diff --git a/src/browser/config.test.ts b/src/browser/config.test.ts index ec1c40cd66e0..d97e46ae8f89 100644 --- a/src/browser/config.test.ts +++ b/src/browser/config.test.ts @@ -243,6 +243,147 @@ describe("browser config", () => { expect(resolved.ssrfPolicy).toEqual({}); }); + describe("firecrawl profile resolution", () => { + it("resolves firecrawl profile with driver=firecrawl, empty cdpUrl, attachOnly=true", () => { + const resolved = resolveBrowserConfig({ + profiles: { + firecrawl: { driver: "firecrawl", color: "#FF4500" }, + }, + }); + const profile = resolveProfile(resolved, "firecrawl"); + expect(profile).not.toBeNull(); + expect(profile?.driver).toBe("firecrawl"); + expect(profile?.cdpPort).toBe(0); + expect(profile?.cdpUrl).toBe(""); + expect(profile?.cdpHost).toBe(""); + expect(profile?.cdpIsLoopback).toBe(false); + expect(profile?.attachOnly).toBe(true); + expect(profile?.color).toBe("#FF4500"); + expect(profile?.name).toBe("firecrawl"); + }); + + it("firecrawl profile always has attachOnly=true regardless of global config", () => { + const resolved = resolveBrowserConfig({ + attachOnly: false, + profiles: { + firecrawl: { driver: "firecrawl", color: "#00FF00" }, + }, + }); + const profile = resolveProfile(resolved, "firecrawl"); + expect(profile?.attachOnly).toBe(true); + }); + + it("firecrawl profile does not require cdpPort or cdpUrl", () => { + const resolved = resolveBrowserConfig({ + profiles: { + firecrawl: { driver: "firecrawl", color: "#0000FF" }, + }, + }); + // should not throw — firecrawl profiles bypass the cdpPort/cdpUrl requirement + const profile = resolveProfile(resolved, "firecrawl"); + expect(profile).not.toBeNull(); + }); + + it("firecrawl profile preserves custom color", () => { + const resolved = resolveBrowserConfig({ + profiles: { + firecrawl: { driver: "firecrawl", color: "#AABBCC" }, + }, + }); + const profile = resolveProfile(resolved, "firecrawl"); + expect(profile?.color).toBe("#AABBCC"); + }); + }); + + describe("firecrawl auto-enablement", () => { + it("auto-creates firecrawl profile when firecrawlApiKey is provided", () => { + const resolved = resolveBrowserConfig(undefined, undefined, { + firecrawlApiKey: "fc-test-key", + }); + expect(resolved.profiles.firecrawl).toBeDefined(); + expect(resolved.profiles.firecrawl?.driver).toBe("firecrawl"); + expect(resolved.profiles.firecrawl?.color).toBe("#FF4500"); + }); + + it("does not create firecrawl profile when no API key is provided", () => { + const resolved = resolveBrowserConfig(undefined); + expect(resolved.profiles.firecrawl).toBeUndefined(); + }); + + it("does not create firecrawl profile when API key is empty string", () => { + const resolved = resolveBrowserConfig(undefined, undefined, { + firecrawlApiKey: "", + }); + expect(resolved.profiles.firecrawl).toBeUndefined(); + }); + + it("does not overwrite an explicitly configured firecrawl profile", () => { + const resolved = resolveBrowserConfig( + { + profiles: { + firecrawl: { driver: "firecrawl", color: "#CUSTOM0" }, + }, + }, + undefined, + { firecrawlApiKey: "fc-test-key" }, + ); + // Should keep the user's custom color, not overwrite with #FF4500 + expect(resolved.profiles.firecrawl?.color).toBe("#CUSTOM0"); + }); + + it("sets firecrawl as default profile when auto-created", () => { + const resolved = resolveBrowserConfig(undefined, undefined, { + firecrawlApiKey: "fc-test-key", + }); + expect(resolved.defaultProfile).toBe("firecrawl"); + }); + + it("sets firecrawl as default when explicitly configured", () => { + const resolved = resolveBrowserConfig({ + profiles: { + firecrawl: { driver: "firecrawl", color: "#FF4500" }, + }, + }); + expect(resolved.defaultProfile).toBe("firecrawl"); + }); + + it("explicit defaultProfile overrides firecrawl auto-default", () => { + const resolved = resolveBrowserConfig({ defaultProfile: "openclaw" }, undefined, { + firecrawlApiKey: "fc-test-key", + }); + expect(resolved.defaultProfile).toBe("openclaw"); + // But firecrawl profile should still be created + expect(resolved.profiles.firecrawl).toBeDefined(); + }); + + it("auto-created firecrawl profile resolves correctly", () => { + const resolved = resolveBrowserConfig(undefined, undefined, { + firecrawlApiKey: "fc-test-key", + }); + const profile = resolveProfile(resolved, "firecrawl"); + expect(profile).not.toBeNull(); + expect(profile?.driver).toBe("firecrawl"); + expect(profile?.cdpPort).toBe(0); + expect(profile?.cdpUrl).toBe(""); + expect(profile?.attachOnly).toBe(true); + }); + + it("openclaw and chrome profiles still exist alongside auto-created firecrawl", () => { + const resolved = resolveBrowserConfig(undefined, undefined, { + firecrawlApiKey: "fc-test-key", + }); + const openclaw = resolveProfile(resolved, "openclaw"); + const chrome = resolveProfile(resolved, "chrome"); + const firecrawl = resolveProfile(resolved, "firecrawl"); + expect(openclaw).not.toBeNull(); + expect(chrome).not.toBeNull(); + expect(firecrawl).not.toBeNull(); + expect(openclaw?.driver).toBe("openclaw"); + expect(chrome?.driver).toBe("extension"); + expect(firecrawl?.driver).toBe("firecrawl"); + }); + }); + describe("default profile preference", () => { it("defaults to openclaw profile when defaultProfile is not configured", () => { const resolved = resolveBrowserConfig({ diff --git a/src/browser/config.ts b/src/browser/config.ts index 336049e8c69b..c58404e51e11 100644 --- a/src/browser/config.ts +++ b/src/browser/config.ts @@ -45,7 +45,7 @@ export type ResolvedBrowserProfile = { cdpHost: string; cdpIsLoopback: boolean; color: string; - driver: "openclaw" | "extension"; + driver: "openclaw" | "extension" | "firecrawl"; attachOnly: boolean; }; @@ -204,6 +204,7 @@ function ensureDefaultChromeExtensionProfile( export function resolveBrowserConfig( cfg: BrowserConfig | undefined, rootConfig?: OpenClawConfig, + opts?: { firecrawlApiKey?: string }, ): ResolvedBrowserConfig { const enabled = cfg?.enabled ?? DEFAULT_OPENCLAW_BROWSER_ENABLED; const evaluateEnabled = cfg?.evaluateEnabled ?? DEFAULT_BROWSER_EVALUATE_ENABLED; @@ -264,13 +265,21 @@ export function resolveBrowserConfig( ); const cdpProtocol = cdpInfo.parsed.protocol === "https:" ? "https" : "http"; + // Auto-create a "firecrawl" profile when an API key is available and no explicit one exists. + if (opts?.firecrawlApiKey && !profiles.firecrawl) { + profiles.firecrawl = { driver: "firecrawl", color: "#FF4500" }; + } + const defaultProfile = defaultProfileFromConfig ?? - (profiles[DEFAULT_BROWSER_DEFAULT_PROFILE_NAME] - ? DEFAULT_BROWSER_DEFAULT_PROFILE_NAME - : profiles[DEFAULT_OPENCLAW_BROWSER_PROFILE_NAME] - ? DEFAULT_OPENCLAW_BROWSER_PROFILE_NAME - : "chrome"); + // When a firecrawl profile exists (auto-created or explicit), prefer it as default. + (profiles.firecrawl + ? "firecrawl" + : profiles[DEFAULT_BROWSER_DEFAULT_PROFILE_NAME] + ? DEFAULT_BROWSER_DEFAULT_PROFILE_NAME + : profiles[DEFAULT_OPENCLAW_BROWSER_PROFILE_NAME] + ? DEFAULT_OPENCLAW_BROWSER_PROFILE_NAME + : "chrome"); const extraArgs = Array.isArray(cfg?.extraArgs) ? cfg.extraArgs.filter((a): a is string => typeof a === "string" && a.trim().length > 0) @@ -313,6 +322,19 @@ export function resolveProfile( return null; } + if (profile.driver === "firecrawl") { + return { + name: profileName, + cdpPort: 0, + cdpUrl: "", + cdpHost: "", + cdpIsLoopback: false, + color: profile.color, + driver: "firecrawl", + attachOnly: true, + }; + } + const rawProfileUrl = profile.cdpUrl?.trim() ?? ""; let cdpHost = resolved.cdpHost; let cdpPort = profile.cdpPort ?? 0; diff --git a/src/browser/control-service.ts b/src/browser/control-service.ts index 031bc5e00cd6..9b764ccc0f14 100644 --- a/src/browser/control-service.ts +++ b/src/browser/control-service.ts @@ -1,3 +1,8 @@ +import { + resolveFirecrawlApiKey, + resolveFirecrawlBaseUrl, + resolveFirecrawlConfig, +} from "../agents/tools/web-fetch.js"; import { loadConfig } from "../config/config.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { resolveBrowserConfig } from "./config.js"; @@ -14,9 +19,13 @@ export function getBrowserControlState(): BrowserServerState | null { } export function createBrowserControlContext() { + const cfg = loadConfig(); + const firecrawl = resolveFirecrawlConfig(cfg.tools?.web?.fetch); return createBrowserRouteContext({ getState: () => state, refreshConfigFromDisk: true, + firecrawlApiKey: resolveFirecrawlApiKey(firecrawl), + firecrawlBaseUrl: resolveFirecrawlBaseUrl(firecrawl), }); } @@ -26,7 +35,10 @@ export async function startBrowserControlServiceFromConfig(): Promise { + const baseParams = { + apiKey: "fc-test-key", + baseUrl: "https://api.firecrawl.dev", + }; + + beforeEach(() => { + vi.stubGlobal("fetch", vi.fn()); + }); + afterEach(() => { + vi.restoreAllMocks(); + }); + + describe("createFirecrawlBrowserSession", () => { + it("calls the v2 endpoint with auth header", async () => { + const mockResponse = { + success: true, + id: "sess-123", + cdpUrl: "wss://connect.firecrawl.dev/sess-123", + liveViewUrl: "https://connect.firecrawl.dev/v/sess-123", + expiresAt: "2026-03-02T12:00:00Z", + }; + (fetch as ReturnType).mockResolvedValue({ + ok: true, + json: async () => mockResponse, + }); + + const session = await createFirecrawlBrowserSession(baseParams); + + expect(fetch).toHaveBeenCalledWith( + "https://api.firecrawl.dev/v2/browser", + expect.objectContaining({ + method: "POST", + headers: { + Authorization: "Bearer fc-test-key", + "Content-Type": "application/json", + }, + }), + ); + expect(session.sessionId).toBe("sess-123"); + expect(session.cdpWebSocketUrl).toBe("wss://connect.firecrawl.dev/sess-123"); + expect(session.liveViewUrl).toBe("https://connect.firecrawl.dev/v/sess-123"); + expect(session.interactiveLiveViewUrl).toBe(""); + expect(session.expiresAt).toBe("2026-03-02T12:00:00Z"); + }); + + it("passes optional TTL and streaming params in request body", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: true, + json: async () => ({ + id: "sess-456", + cdpUrl: "wss://connect.firecrawl.dev/sess-456", + }), + }); + + await createFirecrawlBrowserSession({ + ...baseParams, + ttl: 600, + activityTtl: 120, + streamWebView: true, + }); + + const body = JSON.parse((fetch as ReturnType).mock.calls[0][1].body); + expect(body.ttl).toBe(600); + expect(body.activityTtl).toBe(120); + expect(body.streamWebView).toBe(true); + }); + + it("sends empty body when no optional params are provided", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: true, + json: async () => ({ + id: "sess-min", + cdpUrl: "wss://connect.firecrawl.dev/sess-min", + }), + }); + + await createFirecrawlBrowserSession(baseParams); + + const body = JSON.parse((fetch as ReturnType).mock.calls[0][1].body); + expect(body).toEqual({}); + }); + + it("strips trailing slash from baseUrl", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: true, + json: async () => ({ + id: "sess-slash", + cdpUrl: "wss://connect.firecrawl.dev/sess-slash", + }), + }); + + await createFirecrawlBrowserSession({ + apiKey: "fc-key", + baseUrl: "https://api.firecrawl.dev/", + }); + + expect(fetch).toHaveBeenCalledWith("https://api.firecrawl.dev/v2/browser", expect.anything()); + }); + + it("works with custom baseUrl", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: true, + json: async () => ({ + id: "sess-custom", + cdpUrl: "wss://custom.host/sess-custom", + }), + }); + + await createFirecrawlBrowserSession({ + apiKey: "fc-key", + baseUrl: "https://custom.host:8080", + }); + + expect(fetch).toHaveBeenCalledWith("https://custom.host:8080/v2/browser", expect.anything()); + }); + + it("defaults liveViewUrl to empty string when missing from response", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: true, + json: async () => ({ + id: "sess-no-liveview", + cdpUrl: "wss://connect.firecrawl.dev/sess-no-liveview", + }), + }); + + const session = await createFirecrawlBrowserSession(baseParams); + expect(session.liveViewUrl).toBe(""); + }); + + it("leaves expiresAt undefined when missing from response", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: true, + json: async () => ({ + id: "sess-no-expiry", + cdpUrl: "wss://connect.firecrawl.dev/sess-no-expiry", + }), + }); + + const session = await createFirecrawlBrowserSession(baseParams); + expect(session.expiresAt).toBeUndefined(); + }); + + it("throws on HTTP 401 Unauthorized", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: false, + status: 401, + statusText: "Unauthorized", + text: async () => "Invalid API key", + }); + + await expect(createFirecrawlBrowserSession(baseParams)).rejects.toThrow( + "Firecrawl browser session create failed (HTTP 401): Invalid API key", + ); + }); + + it("throws on HTTP 429 rate limit", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: false, + status: 429, + statusText: "Too Many Requests", + text: async () => "Rate limit exceeded", + }); + + await expect(createFirecrawlBrowserSession(baseParams)).rejects.toThrow( + "Firecrawl browser session create failed (HTTP 429)", + ); + }); + + it("throws on HTTP 500 server error", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: false, + status: 500, + statusText: "Internal Server Error", + text: async () => "", + }); + + await expect(createFirecrawlBrowserSession(baseParams)).rejects.toThrow( + "Firecrawl browser session create failed (HTTP 500): Internal Server Error", + ); + }); + + it("falls back to statusText when response body read fails", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: false, + status: 503, + statusText: "Service Unavailable", + text: async () => { + throw new Error("body read failed"); + }, + }); + + await expect(createFirecrawlBrowserSession(baseParams)).rejects.toThrow( + "Firecrawl browser session create failed (HTTP 503): Service Unavailable", + ); + }); + + it("throws when response is missing id", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: true, + json: async () => ({ + success: true, + cdpUrl: "wss://connect.firecrawl.dev/xxx", + }), + }); + + await expect(createFirecrawlBrowserSession(baseParams)).rejects.toThrow( + "missing id or cdpUrl", + ); + }); + + it("throws when response is missing cdpUrl", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: true, + json: async () => ({ + success: true, + id: "sess-no-ws", + }), + }); + + await expect(createFirecrawlBrowserSession(baseParams)).rejects.toThrow( + "missing id or cdpUrl", + ); + }); + + it("throws when response is completely empty object", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: true, + json: async () => ({}), + }); + + await expect(createFirecrawlBrowserSession(baseParams)).rejects.toThrow( + "missing id or cdpUrl", + ); + }); + + it("propagates network errors from fetch", async () => { + (fetch as ReturnType).mockRejectedValue(new Error("network failure")); + + await expect(createFirecrawlBrowserSession(baseParams)).rejects.toThrow("network failure"); + }); + + it("maps API response fields to internal names", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: true, + json: async () => ({ + success: true, + id: "api-id-field", + cdpUrl: "wss://api-cdp-field", + liveViewUrl: "https://readonly-view", + interactiveLiveViewUrl: "https://interactive-view", + expiresAt: "2026-12-31T00:00:00Z", + }), + }); + + const session = await createFirecrawlBrowserSession(baseParams); + expect(session.sessionId).toBe("api-id-field"); + expect(session.cdpWebSocketUrl).toBe("wss://api-cdp-field"); + expect(session.liveViewUrl).toBe("https://readonly-view"); + expect(session.interactiveLiveViewUrl).toBe("https://interactive-view"); + }); + }); + + describe("deleteFirecrawlBrowserSession", () => { + it("calls the v2 endpoint with DELETE method", async () => { + (fetch as ReturnType).mockResolvedValue({ ok: true }); + + await deleteFirecrawlBrowserSession({ + ...baseParams, + sessionId: "sess-789", + }); + + expect(fetch).toHaveBeenCalledWith( + "https://api.firecrawl.dev/v2/browser/sess-789", + expect.objectContaining({ + method: "DELETE", + headers: { Authorization: "Bearer fc-test-key" }, + }), + ); + }); + + it("URL-encodes session IDs with special characters", async () => { + (fetch as ReturnType).mockResolvedValue({ ok: true }); + + await deleteFirecrawlBrowserSession({ + ...baseParams, + sessionId: "sess/with+special chars", + }); + + expect(fetch).toHaveBeenCalledWith( + "https://api.firecrawl.dev/v2/browser/sess%2Fwith%2Bspecial%20chars", + expect.anything(), + ); + }); + + it("strips trailing slash from baseUrl", async () => { + (fetch as ReturnType).mockResolvedValue({ ok: true }); + + await deleteFirecrawlBrowserSession({ + apiKey: "fc-key", + baseUrl: "https://api.firecrawl.dev/", + sessionId: "sess-100", + }); + + expect(fetch).toHaveBeenCalledWith( + "https://api.firecrawl.dev/v2/browser/sess-100", + expect.anything(), + ); + }); + + it("throws on HTTP 404 Not Found", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: false, + status: 404, + statusText: "Not Found", + text: async () => "Session not found", + }); + + await expect( + deleteFirecrawlBrowserSession({ ...baseParams, sessionId: "sess-bad" }), + ).rejects.toThrow("Firecrawl browser session delete failed (HTTP 404): Session not found"); + }); + + it("falls back to statusText when body read fails on error", async () => { + (fetch as ReturnType).mockResolvedValue({ + ok: false, + status: 500, + statusText: "Internal Server Error", + text: async () => { + throw new Error("body read failed"); + }, + }); + + await expect( + deleteFirecrawlBrowserSession({ ...baseParams, sessionId: "sess-err" }), + ).rejects.toThrow( + "Firecrawl browser session delete failed (HTTP 500): Internal Server Error", + ); + }); + + it("propagates network errors from fetch", async () => { + (fetch as ReturnType).mockRejectedValue(new Error("connection refused")); + + await expect( + deleteFirecrawlBrowserSession({ ...baseParams, sessionId: "sess-net" }), + ).rejects.toThrow("connection refused"); + }); + }); + + describe("isFirecrawlSessionReachable", () => { + it("returns false for invalid URLs", async () => { + const result = await isFirecrawlSessionReachable("not-a-url", 500); + expect(result).toBe(false); + }); + + it("returns false for empty string URL", async () => { + const result = await isFirecrawlSessionReachable("", 500); + expect(result).toBe(false); + }); + }); +}); diff --git a/src/browser/firecrawl-browser.ts b/src/browser/firecrawl-browser.ts new file mode 100644 index 000000000000..694290b2c52f --- /dev/null +++ b/src/browser/firecrawl-browser.ts @@ -0,0 +1,149 @@ +/** + * Firecrawl cloud browser session lifecycle — pure fetch() API calls, no SDK. + * + * Uses Firecrawl v2 Browser API: + * POST /v2/browser → create session + * DELETE /v2/browser/{id} → delete session + * + * Response fields: { success, id, cdpUrl, liveViewUrl, interactiveLiveViewUrl, expiresAt } + */ +import { openCdpWebSocket } from "./cdp.helpers.js"; + +export type FirecrawlBrowserSession = { + sessionId: string; + cdpWebSocketUrl: string; + liveViewUrl: string; + interactiveLiveViewUrl: string; + expiresAt?: string; +}; + +export type CreateFirecrawlBrowserSessionParams = { + apiKey: string; + baseUrl: string; + /** Total session TTL in seconds (30–3600, default 300). */ + ttl?: number; + /** Inactivity TTL in seconds (10–3600, default 120). */ + activityTtl?: number; + /** Enable live web-view streaming. */ + streamWebView?: boolean; +}; + +export type DeleteFirecrawlBrowserSessionParams = { + apiKey: string; + baseUrl: string; + sessionId: string; +}; + +export async function createFirecrawlBrowserSession( + params: CreateFirecrawlBrowserSessionParams, +): Promise { + const { apiKey, baseUrl, ttl, activityTtl, streamWebView } = params; + const endpoint = `${baseUrl.replace(/\/$/, "")}/v2/browser`; + const body: Record = {}; + if (ttl !== undefined) { + body.ttl = ttl; + } + if (activityTtl !== undefined) { + body.activityTtl = activityTtl; + } + if (streamWebView !== undefined) { + body.streamWebView = streamWebView; + } + + const res = await fetch(endpoint, { + method: "POST", + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify(body), + signal: AbortSignal.timeout(30_000), + }); + + if (!res.ok) { + const text = await res.text().catch(() => ""); + throw new Error( + `Firecrawl browser session create failed (HTTP ${res.status}): ${text || res.statusText}`, + ); + } + + const data = (await res.json()) as { + success?: boolean; + id?: string; + cdpUrl?: string; + liveViewUrl?: string; + interactiveLiveViewUrl?: string; + expiresAt?: string; + }; + + if (!data.id || !data.cdpUrl) { + throw new Error("Firecrawl browser session response missing id or cdpUrl"); + } + + return { + sessionId: data.id, + cdpWebSocketUrl: data.cdpUrl, + liveViewUrl: data.liveViewUrl || "", + interactiveLiveViewUrl: data.interactiveLiveViewUrl || "", + expiresAt: data.expiresAt, + }; +} + +export async function deleteFirecrawlBrowserSession( + params: DeleteFirecrawlBrowserSessionParams, +): Promise { + const { apiKey, baseUrl, sessionId } = params; + const endpoint = `${baseUrl.replace(/\/$/, "")}/v2/browser/${encodeURIComponent(sessionId)}`; + + const res = await fetch(endpoint, { + method: "DELETE", + headers: { + Authorization: `Bearer ${apiKey}`, + }, + signal: AbortSignal.timeout(10_000), + }); + + if (!res.ok) { + const text = await res.text().catch(() => ""); + throw new Error( + `Firecrawl browser session delete failed (HTTP ${res.status}): ${text || res.statusText}`, + ); + } +} + +/** + * Check if a Firecrawl CDP WebSocket URL is reachable via WSS handshake. + */ +export async function isFirecrawlSessionReachable( + cdpWebSocketUrl: string, + timeoutMs = 3000, +): Promise { + return new Promise((resolve) => { + let resolved = false; + const done = (result: boolean) => { + if (resolved) { + return; + } + resolved = true; + resolve(result); + }; + + const timer = setTimeout(() => done(false), timeoutMs); + + try { + const ws = openCdpWebSocket(cdpWebSocketUrl, { handshakeTimeoutMs: timeoutMs }); + ws.on("open", () => { + clearTimeout(timer); + ws.close(); + done(true); + }); + ws.on("error", () => { + clearTimeout(timer); + done(false); + }); + } catch { + clearTimeout(timer); + done(false); + } + }); +} diff --git a/src/browser/profiles-service.ts b/src/browser/profiles-service.ts index 5625cc924dbe..fbb2e55a4934 100644 --- a/src/browser/profiles-service.ts +++ b/src/browser/profiles-service.ts @@ -20,7 +20,7 @@ export type CreateProfileParams = { name: string; color?: string; cdpUrl?: string; - driver?: "openclaw" | "extension"; + driver?: "openclaw" | "extension" | "firecrawl"; }; export type CreateProfileResult = { @@ -72,7 +72,8 @@ export function createBrowserProfilesService(ctx: BrowserRouteContext) { const createProfile = async (params: CreateProfileParams): Promise => { const name = params.name.trim(); const rawCdpUrl = params.cdpUrl?.trim() || undefined; - const driver = params.driver === "extension" ? "extension" : undefined; + const driver = + params.driver === "extension" || params.driver === "firecrawl" ? params.driver : undefined; if (!isValidProfileName(name)) { throw new Error("invalid profile name: use lowercase letters, numbers, and hyphens only"); @@ -95,7 +96,13 @@ export function createBrowserProfilesService(ctx: BrowserRouteContext) { params.color && HEX_COLOR_RE.test(params.color) ? params.color : allocateColor(usedColors); let profileConfig: BrowserProfileConfig; - if (rawCdpUrl) { + if (driver === "firecrawl") { + // Firecrawl profiles use cloud sessions, no local CDP port needed + profileConfig = { + driver: "firecrawl", + color: profileColor, + }; + } else if (rawCdpUrl) { const parsed = parseHttpUrl(rawCdpUrl, "browser.profiles.cdpUrl"); profileConfig = { cdpUrl: parsed.normalized, diff --git a/src/browser/resolved-config-refresh.ts b/src/browser/resolved-config-refresh.ts index 721049036d4d..5933b6e62f94 100644 --- a/src/browser/resolved-config-refresh.ts +++ b/src/browser/resolved-config-refresh.ts @@ -1,3 +1,4 @@ +import { resolveFirecrawlApiKey, resolveFirecrawlConfig } from "../agents/tools/web-fetch.js"; import { createConfigIO, loadConfig } from "../config/config.js"; import { resolveBrowserConfig, resolveProfile, type ResolvedBrowserProfile } from "./config.js"; import type { BrowserServerState } from "./server-context.types.js"; @@ -10,6 +11,12 @@ function applyResolvedConfig( for (const [name, runtime] of current.profiles) { const nextProfile = resolveProfile(freshResolved, name); if (nextProfile) { + // Preserve dynamic cdpUrl from active firecrawl sessions — resolveProfile + // always returns cdpUrl="" for firecrawl, but ensureBrowserAvailable sets it + // to the session's WSS URL at runtime. + if (nextProfile.driver === "firecrawl" && runtime.firecrawlSession) { + nextProfile.cdpUrl = runtime.firecrawlSession.cdpWebSocketUrl; + } runtime.profile = nextProfile; continue; } @@ -28,7 +35,9 @@ export function refreshResolvedBrowserConfigFromDisk(params: { return; } const cfg = params.mode === "fresh" ? createConfigIO().loadConfig() : loadConfig(); - const freshResolved = resolveBrowserConfig(cfg.browser, cfg); + const firecrawl = resolveFirecrawlConfig(cfg.tools?.web?.fetch); + const firecrawlApiKey = resolveFirecrawlApiKey(firecrawl); + const freshResolved = resolveBrowserConfig(cfg.browser, cfg, { firecrawlApiKey }); applyResolvedConfig(params.current, freshResolved); } diff --git a/src/browser/routes/agent.shared.ts b/src/browser/routes/agent.shared.ts index aee566965254..1d94818246ad 100644 --- a/src/browser/routes/agent.shared.ts +++ b/src/browser/routes/agent.shared.ts @@ -107,7 +107,7 @@ export async function withRouteTabContext( return await params.run({ profileCtx, tab, - cdpUrl: profileCtx.profile.cdpUrl, + cdpUrl: profileCtx.getCdpUrl(), }); } catch (err) { handleRouteError(params.ctx, params.res, err); diff --git a/src/browser/routes/agent.snapshot.ts b/src/browser/routes/agent.snapshot.ts index 7739caa051e3..28547fc73ff7 100644 --- a/src/browser/routes/agent.snapshot.ts +++ b/src/browser/routes/agent.snapshot.ts @@ -268,7 +268,7 @@ export function registerBrowserAgentSnapshotRoutes( Boolean(selectorValue) || Boolean(frameSelectorValue); const roleSnapshotArgs = { - cdpUrl: profileCtx.profile.cdpUrl, + cdpUrl: profileCtx.getCdpUrl(), targetId: tab.targetId, selector: selectorValue, frameSelector: frameSelectorValue, @@ -284,7 +284,7 @@ export function registerBrowserAgentSnapshotRoutes( ? await pw.snapshotRoleViaPlaywright(roleSnapshotArgs) : await pw .snapshotAiViaPlaywright({ - cdpUrl: profileCtx.profile.cdpUrl, + cdpUrl: profileCtx.getCdpUrl(), targetId: tab.targetId, ...(typeof resolvedMaxChars === "number" ? { maxChars: resolvedMaxChars } : {}), }) @@ -297,7 +297,7 @@ export function registerBrowserAgentSnapshotRoutes( }); if (labels) { const labeled = await pw.screenshotWithLabelsViaPlaywright({ - cdpUrl: profileCtx.profile.cdpUrl, + cdpUrl: profileCtx.getCdpUrl(), targetId: tab.targetId, refs: "refs" in snap ? snap.refs : {}, type: "png", @@ -347,7 +347,7 @@ export function registerBrowserAgentSnapshotRoutes( return null; } return await pw.snapshotAriaViaPlaywright({ - cdpUrl: profileCtx.profile.cdpUrl, + cdpUrl: profileCtx.getCdpUrl(), targetId: tab.targetId, limit, }); diff --git a/src/browser/routes/basic.ts b/src/browser/routes/basic.ts index 074e7ea285db..830db25e7860 100644 --- a/src/browser/routes/basic.ts +++ b/src/browser/routes/basic.ts @@ -68,6 +68,7 @@ export function registerBrowserBasicRoutes(app: BrowserRouteRegistrar, ctx: Brow detectError = String(err); } + const firecrawlSession = profileState?.firecrawlSession; res.json({ enabled: current.resolved.enabled, profile: profileCtx.profile.name, @@ -76,7 +77,7 @@ export function registerBrowserBasicRoutes(app: BrowserRouteRegistrar, ctx: Brow cdpHttp, pid: profileState?.running?.pid ?? null, cdpPort: profileCtx.profile.cdpPort, - cdpUrl: profileCtx.profile.cdpUrl, + cdpUrl: profileCtx.getCdpUrl(), chosenBrowser: profileState?.running?.exe.kind ?? null, detectedBrowser, detectedExecutablePath, @@ -87,6 +88,13 @@ export function registerBrowserBasicRoutes(app: BrowserRouteRegistrar, ctx: Brow noSandbox: current.resolved.noSandbox, executablePath: current.resolved.executablePath ?? null, attachOnly: profileCtx.profile.attachOnly, + ...(firecrawlSession + ? { + liveViewUrl: firecrawlSession.liveViewUrl, + interactiveLiveViewUrl: firecrawlSession.interactiveLiveViewUrl, + firecrawlSessionId: firecrawlSession.sessionId, + } + : {}), }); }); @@ -141,6 +149,7 @@ export function registerBrowserBasicRoutes(app: BrowserRouteRegistrar, ctx: Brow const driver = toStringOrEmpty((req.body as { driver?: unknown })?.driver) as | "openclaw" | "extension" + | "firecrawl" | ""; if (!name) { @@ -153,7 +162,7 @@ export function registerBrowserBasicRoutes(app: BrowserRouteRegistrar, ctx: Brow name, color: color || undefined, cdpUrl: cdpUrl || undefined, - driver: driver === "extension" ? "extension" : undefined, + driver: driver === "extension" || driver === "firecrawl" ? driver : undefined, }); res.json(result); } catch (err) { diff --git a/src/browser/server-context.availability.ts b/src/browser/server-context.availability.ts index 47865903b96f..0b1d60e10a4d 100644 --- a/src/browser/server-context.availability.ts +++ b/src/browser/server-context.availability.ts @@ -1,3 +1,4 @@ +import { loadConfig } from "../config/config.js"; import { PROFILE_ATTACH_RETRY_TIMEOUT_MS, PROFILE_POST_RESTART_WS_TIMEOUT_MS, @@ -14,6 +15,11 @@ import { ensureChromeExtensionRelayServer, stopChromeExtensionRelayServer, } from "./extension-relay.js"; +import { + createFirecrawlBrowserSession, + deleteFirecrawlBrowserSession, + isFirecrawlSessionReachable, +} from "./firecrawl-browser.js"; import { CDP_READY_AFTER_LAUNCH_MAX_TIMEOUT_MS, CDP_READY_AFTER_LAUNCH_MIN_TIMEOUT_MS, @@ -26,6 +32,23 @@ import type { ProfileRuntimeState, } from "./server-context.types.js"; +/** Re-resolve firecrawl API key from current config + env, falling back to the captured opts value. */ +function getFirecrawlApiKey(opts: ContextOptions): string | undefined { + const cfg = loadConfig(); + const fetch = cfg.tools?.web?.fetch; + const firecrawl = + fetch && typeof fetch === "object" && "firecrawl" in fetch ? fetch.firecrawl : undefined; + const fromConfig = + firecrawl && + typeof firecrawl === "object" && + "apiKey" in firecrawl && + typeof firecrawl.apiKey === "string" + ? firecrawl.apiKey.trim() + : ""; + const fromEnv = (process.env.FIRECRAWL_API_KEY || "").trim(); + return fromConfig || fromEnv || opts.firecrawlApiKey; +} + type AvailabilityDeps = { opts: ContextOptions; profile: ResolvedBrowserProfile; @@ -57,11 +80,23 @@ export function createProfileAvailability({ }); const isReachable = async (timeoutMs?: number) => { + if (profile.driver === "firecrawl") { + const session = getProfileState().firecrawlSession; + return session + ? await isFirecrawlSessionReachable(session.cdpWebSocketUrl, timeoutMs) + : false; + } const { httpTimeoutMs, wsTimeoutMs } = resolveTimeouts(timeoutMs); return await isChromeCdpReady(profile.cdpUrl, httpTimeoutMs, wsTimeoutMs); }; const isHttpReachable = async (timeoutMs?: number) => { + if (profile.driver === "firecrawl") { + const session = getProfileState().firecrawlSession; + return session + ? await isFirecrawlSessionReachable(session.cdpWebSocketUrl, timeoutMs) + : false; + } const { httpTimeoutMs } = resolveTimeouts(timeoutMs); return await isChromeReachable(profile.cdpUrl, httpTimeoutMs); }; @@ -107,6 +142,44 @@ export function createProfileAvailability({ const attachOnly = profile.attachOnly; const isExtension = profile.driver === "extension"; const profileState = getProfileState(); + + // Firecrawl cloud browser: manage session lifecycle + if (profile.driver === "firecrawl") { + if (profileState.firecrawlSession) { + if (await isFirecrawlSessionReachable(profileState.firecrawlSession.cdpWebSocketUrl)) { + // Re-apply dynamic cdpUrl in case config refresh overwrote it + profileState.profile = { + ...profileState.profile, + cdpUrl: profileState.firecrawlSession.cdpWebSocketUrl, + }; + return; // existing session still alive + } + // Best-effort cleanup of the unreachable session to avoid leaking cloud resources + const staleSession = profileState.firecrawlSession; + profileState.firecrawlSession = null; + const cleanupKey = getFirecrawlApiKey(opts); + if (cleanupKey && staleSession) { + deleteFirecrawlBrowserSession({ + apiKey: cleanupKey, + baseUrl: opts.firecrawlBaseUrl || "https://api.firecrawl.dev", + sessionId: staleSession.sessionId, + }).catch(() => {}); + } + } + const apiKey = getFirecrawlApiKey(opts); + const baseUrl = opts.firecrawlBaseUrl || "https://api.firecrawl.dev"; + if (!apiKey) { + throw new Error( + "Firecrawl browser profile requires an API key. Set tools.web.fetch.firecrawl.apiKey or FIRECRAWL_API_KEY.", + ); + } + const session = await createFirecrawlBrowserSession({ apiKey, baseUrl }); + profileState.firecrawlSession = session; + // Update the runtime profile with the session's CDP WebSocket URL + profileState.profile = { ...profileState.profile, cdpUrl: session.cdpWebSocketUrl }; + return; + } + const httpReachable = await isHttpReachable(); if (isExtension && remoteCdp) { @@ -198,6 +271,23 @@ export function createProfileAvailability({ }; const stopRunningBrowser = async (): Promise<{ stopped: boolean }> => { + if (profile.driver === "firecrawl") { + const profileState = getProfileState(); + const session = profileState.firecrawlSession; + if (session) { + const apiKey = getFirecrawlApiKey(opts); + const baseUrl = opts.firecrawlBaseUrl || "https://api.firecrawl.dev"; + if (apiKey) { + await deleteFirecrawlBrowserSession({ + apiKey, + baseUrl, + sessionId: session.sessionId, + }).catch(() => {}); // best-effort cleanup + } + profileState.firecrawlSession = null; + } + return { stopped: Boolean(session) }; + } if (profile.driver === "extension") { const stopped = await stopChromeExtensionRelayServer({ cdpUrl: profile.cdpUrl, diff --git a/src/browser/server-context.firecrawl-availability.test.ts b/src/browser/server-context.firecrawl-availability.test.ts new file mode 100644 index 000000000000..1dc26eb3dfea --- /dev/null +++ b/src/browser/server-context.firecrawl-availability.test.ts @@ -0,0 +1,714 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; + +// Mock chrome.js (standard test harness for server-context tests) +vi.mock("./chrome.js", () => ({ + isChromeCdpReady: vi.fn(async () => true), + isChromeReachable: vi.fn(async () => true), + launchOpenClawChrome: vi.fn(async () => { + throw new Error("unexpected launch"); + }), + resolveOpenClawUserDataDir: vi.fn(() => "/tmp/openclaw-test"), + stopOpenClawChrome: vi.fn(async () => {}), +})); + +// Mock firecrawl-browser.js +vi.mock("./firecrawl-browser.js", () => ({ + createFirecrawlBrowserSession: vi.fn(async () => { + throw new Error("unexpected firecrawl create"); + }), + deleteFirecrawlBrowserSession: vi.fn(async () => {}), + isFirecrawlSessionReachable: vi.fn(async () => false), +})); + +import * as firecrawlModule from "./firecrawl-browser.js"; +import type { BrowserServerState } from "./server-context.js"; +import { createBrowserRouteContext } from "./server-context.js"; + +function makeFirecrawlState(): BrowserServerState { + return { + // oxlint-disable-next-line typescript/no-explicit-any + server: null as any, + port: 0, + resolved: { + enabled: true, + controlPort: 18791, + cdpProtocol: "http", + cdpHost: "127.0.0.1", + cdpIsLoopback: true, + cdpPortRangeStart: 18800, + cdpPortRangeEnd: 18810, + evaluateEnabled: false, + remoteCdpTimeoutMs: 1500, + remoteCdpHandshakeTimeoutMs: 3000, + extraArgs: [], + color: "#FF4500", + headless: true, + noSandbox: false, + attachOnly: false, + ssrfPolicy: { allowPrivateNetwork: true }, + defaultProfile: "firecrawl", + profiles: { + firecrawl: { driver: "firecrawl", color: "#FF4500" }, + }, + }, + profiles: new Map(), + }; +} + +afterEach(() => { + vi.clearAllMocks(); + vi.restoreAllMocks(); +}); + +describe("firecrawl browser availability", () => { + const createMock = vi.mocked(firecrawlModule.createFirecrawlBrowserSession); + const deleteMock = vi.mocked(firecrawlModule.deleteFirecrawlBrowserSession); + const reachableMock = vi.mocked(firecrawlModule.isFirecrawlSessionReachable); + + const firecrawlSession = { + sessionId: "sess-test-1", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-test-1", + liveViewUrl: "https://connect.firecrawl.dev/v/sess-test-1", + interactiveLiveViewUrl: "https://connect.firecrawl.dev/v/sess-test-1", + expiresAt: "2026-03-02T12:00:00Z", + }; + + describe("ensureBrowserAvailable", () => { + it("creates a new firecrawl session when none exists", async () => { + createMock.mockResolvedValue(firecrawlSession); + + const state = makeFirecrawlState(); + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + firecrawlBaseUrl: "https://api.firecrawl.dev", + }); + const profile = ctx.forProfile("firecrawl"); + + await profile.ensureBrowserAvailable(); + + expect(createMock).toHaveBeenCalledWith({ + apiKey: "fc-test-key", + baseUrl: "https://api.firecrawl.dev", + }); + + const profileState = state.profiles.get("firecrawl"); + expect(profileState?.firecrawlSession).toEqual(firecrawlSession); + expect(profileState?.profile.cdpUrl).toBe("wss://connect.firecrawl.dev/sess-test-1"); + }); + + it("reuses existing session when reachable", async () => { + reachableMock.mockResolvedValue(true); + + const state = makeFirecrawlState(); + // Pre-seed the profile state with an existing session + state.profiles.set("firecrawl", { + profile: { + name: "firecrawl", + cdpPort: 0, + // Simulate config-refresh clobbering cdpUrl back to "" + cdpUrl: "", + cdpHost: "", + cdpIsLoopback: false, + color: "#FF4500", + driver: "firecrawl", + attachOnly: true, + }, + running: null, + firecrawlSession: { + sessionId: "sess-existing", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-existing", + liveViewUrl: "https://connect.firecrawl.dev/v/sess-existing", + interactiveLiveViewUrl: "https://connect.firecrawl.dev/v/sess-existing", + }, + }); + + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + await profile.ensureBrowserAvailable(); + + expect(createMock).not.toHaveBeenCalled(); + expect(reachableMock).toHaveBeenCalledWith("wss://connect.firecrawl.dev/sess-existing"); + // cdpUrl should be re-applied from the session even though config refresh cleared it + const runtimeProfile = state.profiles.get("firecrawl")?.profile; + expect(runtimeProfile?.cdpUrl).toBe("wss://connect.firecrawl.dev/sess-existing"); + }); + + it("replaces stale session when not reachable", async () => { + reachableMock.mockResolvedValue(false); + createMock.mockResolvedValue({ + sessionId: "sess-new", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-new", + liveViewUrl: "https://connect.firecrawl.dev/v/sess-new", + interactiveLiveViewUrl: "https://connect.firecrawl.dev/v/sess-new", + }); + + const state = makeFirecrawlState(); + // Pre-seed with a stale session + state.profiles.set("firecrawl", { + profile: { + name: "firecrawl", + cdpPort: 0, + cdpUrl: "wss://connect.firecrawl.dev/sess-stale", + cdpHost: "", + cdpIsLoopback: false, + color: "#FF4500", + driver: "firecrawl", + attachOnly: true, + }, + running: null, + firecrawlSession: { + sessionId: "sess-stale", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-stale", + liveViewUrl: "https://connect.firecrawl.dev/v/sess-stale", + interactiveLiveViewUrl: "https://connect.firecrawl.dev/v/sess-stale", + }, + }); + + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + await profile.ensureBrowserAvailable(); + + expect(reachableMock).toHaveBeenCalled(); + expect(createMock).toHaveBeenCalled(); + + const profileState = state.profiles.get("firecrawl"); + expect(profileState?.firecrawlSession?.sessionId).toBe("sess-new"); + expect(profileState?.profile.cdpUrl).toBe("wss://connect.firecrawl.dev/sess-new"); + }); + + it("clears stale session before creating new one", async () => { + reachableMock.mockResolvedValue(false); + createMock.mockResolvedValue(firecrawlSession); + + const state = makeFirecrawlState(); + state.profiles.set("firecrawl", { + profile: { + name: "firecrawl", + cdpPort: 0, + cdpUrl: "", + cdpHost: "", + cdpIsLoopback: false, + color: "#FF4500", + driver: "firecrawl", + attachOnly: true, + }, + running: null, + firecrawlSession: { + sessionId: "sess-old", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-old", + liveViewUrl: "", + interactiveLiveViewUrl: "", + }, + }); + + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + await profile.ensureBrowserAvailable(); + + // Verify create was called after the stale session was cleared + expect(createMock).toHaveBeenCalledTimes(1); + }); + + it("throws when no API key is configured", async () => { + const state = makeFirecrawlState(); + const ctx = createBrowserRouteContext({ + getState: () => state, + // no firecrawlApiKey + }); + const profile = ctx.forProfile("firecrawl"); + + await expect(profile.ensureBrowserAvailable()).rejects.toThrow( + /Firecrawl browser profile requires an API key/, + ); + expect(createMock).not.toHaveBeenCalled(); + }); + + it("propagates create session errors", async () => { + createMock.mockRejectedValue( + new Error("Firecrawl browser session create failed (HTTP 500): boom"), + ); + + const state = makeFirecrawlState(); + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + await expect(profile.ensureBrowserAvailable()).rejects.toThrow( + "Firecrawl browser session create failed (HTTP 500): boom", + ); + }); + + it("uses default baseUrl when firecrawlBaseUrl is not provided", async () => { + createMock.mockResolvedValue(firecrawlSession); + + const state = makeFirecrawlState(); + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + // no firecrawlBaseUrl — should default to https://api.firecrawl.dev + }); + const profile = ctx.forProfile("firecrawl"); + + await profile.ensureBrowserAvailable(); + + expect(createMock).toHaveBeenCalledWith({ + apiKey: "fc-test-key", + baseUrl: "https://api.firecrawl.dev", + }); + }); + + it("uses custom baseUrl when firecrawlBaseUrl is provided", async () => { + createMock.mockResolvedValue(firecrawlSession); + + const state = makeFirecrawlState(); + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + firecrawlBaseUrl: "https://custom-firecrawl.example.com", + }); + const profile = ctx.forProfile("firecrawl"); + + await profile.ensureBrowserAvailable(); + + expect(createMock).toHaveBeenCalledWith({ + apiKey: "fc-test-key", + baseUrl: "https://custom-firecrawl.example.com", + }); + }); + + it("does not call launchOpenClawChrome for firecrawl profiles", async () => { + const { launchOpenClawChrome } = await import("./chrome.js"); + createMock.mockResolvedValue(firecrawlSession); + + const state = makeFirecrawlState(); + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + await profile.ensureBrowserAvailable(); + + expect(launchOpenClawChrome).not.toHaveBeenCalled(); + }); + }); + + describe("isReachable / isHttpReachable", () => { + it("returns false when no firecrawl session exists", async () => { + const state = makeFirecrawlState(); + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + const reachable = await profile.isReachable(); + expect(reachable).toBe(false); + expect(reachableMock).not.toHaveBeenCalled(); + }); + + it("delegates to isFirecrawlSessionReachable when session exists", async () => { + reachableMock.mockResolvedValue(true); + + const state = makeFirecrawlState(); + state.profiles.set("firecrawl", { + profile: { + name: "firecrawl", + cdpPort: 0, + cdpUrl: "wss://connect.firecrawl.dev/sess-r", + cdpHost: "", + cdpIsLoopback: false, + color: "#FF4500", + driver: "firecrawl", + attachOnly: true, + }, + running: null, + firecrawlSession: { + sessionId: "sess-r", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-r", + liveViewUrl: "", + interactiveLiveViewUrl: "", + }, + }); + + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + const result = await profile.isReachable(5000); + expect(result).toBe(true); + expect(reachableMock).toHaveBeenCalledWith("wss://connect.firecrawl.dev/sess-r", 5000); + }); + + it("isHttpReachable returns false when no session", async () => { + const state = makeFirecrawlState(); + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + const reachable = await profile.isHttpReachable(); + expect(reachable).toBe(false); + }); + + it("isHttpReachable delegates to isFirecrawlSessionReachable when session exists", async () => { + reachableMock.mockResolvedValue(false); + + const state = makeFirecrawlState(); + state.profiles.set("firecrawl", { + profile: { + name: "firecrawl", + cdpPort: 0, + cdpUrl: "wss://connect.firecrawl.dev/sess-h", + cdpHost: "", + cdpIsLoopback: false, + color: "#FF4500", + driver: "firecrawl", + attachOnly: true, + }, + running: null, + firecrawlSession: { + sessionId: "sess-h", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-h", + liveViewUrl: "", + interactiveLiveViewUrl: "", + }, + }); + + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + const result = await profile.isHttpReachable(); + expect(result).toBe(false); + expect(reachableMock).toHaveBeenCalledWith( + "wss://connect.firecrawl.dev/sess-h", + // timeoutMs is passed through from isHttpReachable arg; undefined when called with no args + undefined, + ); + }); + + it("isHttpReachable passes explicit timeout to reachability check", async () => { + reachableMock.mockResolvedValue(true); + + const state = makeFirecrawlState(); + state.profiles.set("firecrawl", { + profile: { + name: "firecrawl", + cdpPort: 0, + cdpUrl: "wss://connect.firecrawl.dev/sess-ht", + cdpHost: "", + cdpIsLoopback: false, + color: "#FF4500", + driver: "firecrawl", + attachOnly: true, + }, + running: null, + firecrawlSession: { + sessionId: "sess-ht", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-ht", + liveViewUrl: "", + interactiveLiveViewUrl: "", + }, + }); + + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + const result = await profile.isHttpReachable(2000); + expect(result).toBe(true); + expect(reachableMock).toHaveBeenCalledWith("wss://connect.firecrawl.dev/sess-ht", 2000); + }); + + it("does not call isChromeReachable or isChromeCdpReady for firecrawl", async () => { + const { isChromeReachable, isChromeCdpReady } = await import("./chrome.js"); + + const state = makeFirecrawlState(); + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + await profile.isReachable(); + await profile.isHttpReachable(); + + expect(isChromeReachable).not.toHaveBeenCalled(); + expect(isChromeCdpReady).not.toHaveBeenCalled(); + }); + }); + + describe("stopRunningBrowser", () => { + it("deletes firecrawl session when one exists", async () => { + const state = makeFirecrawlState(); + state.profiles.set("firecrawl", { + profile: { + name: "firecrawl", + cdpPort: 0, + cdpUrl: "wss://connect.firecrawl.dev/sess-stop", + cdpHost: "", + cdpIsLoopback: false, + color: "#FF4500", + driver: "firecrawl", + attachOnly: true, + }, + running: null, + firecrawlSession: { + sessionId: "sess-stop", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-stop", + liveViewUrl: "", + interactiveLiveViewUrl: "", + }, + }); + + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + firecrawlBaseUrl: "https://api.firecrawl.dev", + }); + const profile = ctx.forProfile("firecrawl"); + + const result = await profile.stopRunningBrowser(); + + expect(result.stopped).toBe(true); + expect(deleteMock).toHaveBeenCalledWith({ + apiKey: "fc-test-key", + baseUrl: "https://api.firecrawl.dev", + sessionId: "sess-stop", + }); + expect(state.profiles.get("firecrawl")?.firecrawlSession).toBeNull(); + }); + + it("returns stopped=false when no session exists", async () => { + const state = makeFirecrawlState(); + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + const result = await profile.stopRunningBrowser(); + + expect(result.stopped).toBe(false); + expect(deleteMock).not.toHaveBeenCalled(); + }); + + it("clears session even when delete API call fails (best-effort)", async () => { + deleteMock.mockRejectedValue(new Error("delete failed")); + + const state = makeFirecrawlState(); + state.profiles.set("firecrawl", { + profile: { + name: "firecrawl", + cdpPort: 0, + cdpUrl: "wss://connect.firecrawl.dev/sess-fail", + cdpHost: "", + cdpIsLoopback: false, + color: "#FF4500", + driver: "firecrawl", + attachOnly: true, + }, + running: null, + firecrawlSession: { + sessionId: "sess-fail", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-fail", + liveViewUrl: "", + interactiveLiveViewUrl: "", + }, + }); + + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + // Should not throw — delete errors are caught + const result = await profile.stopRunningBrowser(); + expect(result.stopped).toBe(true); + expect(state.profiles.get("firecrawl")?.firecrawlSession).toBeNull(); + }); + + it("does not attempt delete when no API key is available", async () => { + const state = makeFirecrawlState(); + state.profiles.set("firecrawl", { + profile: { + name: "firecrawl", + cdpPort: 0, + cdpUrl: "", + cdpHost: "", + cdpIsLoopback: false, + color: "#FF4500", + driver: "firecrawl", + attachOnly: true, + }, + running: null, + firecrawlSession: { + sessionId: "sess-nokey", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-nokey", + liveViewUrl: "", + interactiveLiveViewUrl: "", + }, + }); + + const ctx = createBrowserRouteContext({ + getState: () => state, + // no firecrawlApiKey + }); + const profile = ctx.forProfile("firecrawl"); + + const result = await profile.stopRunningBrowser(); + expect(result.stopped).toBe(true); + expect(deleteMock).not.toHaveBeenCalled(); + expect(state.profiles.get("firecrawl")?.firecrawlSession).toBeNull(); + }); + + it("uses default baseUrl for delete when firecrawlBaseUrl not configured", async () => { + const state = makeFirecrawlState(); + state.profiles.set("firecrawl", { + profile: { + name: "firecrawl", + cdpPort: 0, + cdpUrl: "", + cdpHost: "", + cdpIsLoopback: false, + color: "#FF4500", + driver: "firecrawl", + attachOnly: true, + }, + running: null, + firecrawlSession: { + sessionId: "sess-default-url", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-default-url", + liveViewUrl: "", + interactiveLiveViewUrl: "", + }, + }); + + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + // no firecrawlBaseUrl + }); + const profile = ctx.forProfile("firecrawl"); + + await profile.stopRunningBrowser(); + + expect(deleteMock).toHaveBeenCalledWith({ + apiKey: "fc-test-key", + baseUrl: "https://api.firecrawl.dev", + sessionId: "sess-default-url", + }); + }); + + it("does not call stopOpenClawChrome for firecrawl profiles", async () => { + const { stopOpenClawChrome } = await import("./chrome.js"); + + const state = makeFirecrawlState(); + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + await profile.stopRunningBrowser(); + + expect(stopOpenClawChrome).not.toHaveBeenCalled(); + }); + }); + + describe("full lifecycle", () => { + it("create → reuse → stop lifecycle", async () => { + // Step 1: Create session + createMock.mockResolvedValue(firecrawlSession); + + const state = makeFirecrawlState(); + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + firecrawlBaseUrl: "https://api.firecrawl.dev", + }); + const profile = ctx.forProfile("firecrawl"); + + await profile.ensureBrowserAvailable(); + expect(createMock).toHaveBeenCalledTimes(1); + expect(state.profiles.get("firecrawl")?.firecrawlSession?.sessionId).toBe("sess-test-1"); + + // Step 2: Reuse existing session + reachableMock.mockResolvedValue(true); + await profile.ensureBrowserAvailable(); + expect(createMock).toHaveBeenCalledTimes(1); // no new create call + + // Step 3: Stop + const result = await profile.stopRunningBrowser(); + expect(result.stopped).toBe(true); + expect(deleteMock).toHaveBeenCalledWith({ + apiKey: "fc-test-key", + baseUrl: "https://api.firecrawl.dev", + sessionId: "sess-test-1", + }); + expect(state.profiles.get("firecrawl")?.firecrawlSession).toBeNull(); + }); + + it("create → expire → recreate lifecycle", async () => { + const session1 = { + ...firecrawlSession, + sessionId: "sess-1", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-1", + }; + const session2 = { + ...firecrawlSession, + sessionId: "sess-2", + cdpWebSocketUrl: "wss://connect.firecrawl.dev/sess-2", + }; + + createMock.mockResolvedValueOnce(session1).mockResolvedValueOnce(session2); + + const state = makeFirecrawlState(); + const ctx = createBrowserRouteContext({ + getState: () => state, + firecrawlApiKey: "fc-test-key", + }); + const profile = ctx.forProfile("firecrawl"); + + // Step 1: Create first session + await profile.ensureBrowserAvailable(); + expect(state.profiles.get("firecrawl")?.firecrawlSession?.sessionId).toBe("sess-1"); + expect(state.profiles.get("firecrawl")?.profile.cdpUrl).toBe( + "wss://connect.firecrawl.dev/sess-1", + ); + + // Step 2: Session expires (not reachable) + reachableMock.mockResolvedValue(false); + await profile.ensureBrowserAvailable(); + expect(createMock).toHaveBeenCalledTimes(2); + expect(state.profiles.get("firecrawl")?.firecrawlSession?.sessionId).toBe("sess-2"); + expect(state.profiles.get("firecrawl")?.profile.cdpUrl).toBe( + "wss://connect.firecrawl.dev/sess-2", + ); + }); + }); +}); diff --git a/src/browser/server-context.selection.ts b/src/browser/server-context.selection.ts index e1c78426eab6..7118982e2914 100644 --- a/src/browser/server-context.selection.ts +++ b/src/browser/server-context.selection.ts @@ -106,6 +106,8 @@ export function createProfileSelectionOps({ const focusTab = async (targetId: string): Promise => { const resolvedTargetId = await resolveTargetIdOrThrow(targetId); + // Use runtime cdpUrl (may be updated dynamically, e.g. firecrawl sessions) + const runtimeCdpUrl = getProfileState().profile.cdpUrl || profile.cdpUrl; if (!profile.cdpIsLoopback) { const mod = await getPwAiModule({ mode: "strict" }); @@ -113,7 +115,7 @@ export function createProfileSelectionOps({ ?.focusPageByTargetIdViaPlaywright; if (typeof focusPageByTargetIdViaPlaywright === "function") { await focusPageByTargetIdViaPlaywright({ - cdpUrl: profile.cdpUrl, + cdpUrl: runtimeCdpUrl, targetId: resolvedTargetId, }); const profileState = getProfileState(); @@ -122,13 +124,15 @@ export function createProfileSelectionOps({ } } - await fetchOk(appendCdpPath(profile.cdpUrl, `/json/activate/${resolvedTargetId}`)); + await fetchOk(appendCdpPath(runtimeCdpUrl, `/json/activate/${resolvedTargetId}`)); const profileState = getProfileState(); profileState.lastTargetId = resolvedTargetId; }; const closeTab = async (targetId: string): Promise => { const resolvedTargetId = await resolveTargetIdOrThrow(targetId); + // Use runtime cdpUrl (may be updated dynamically, e.g. firecrawl sessions) + const runtimeCdpUrl = getProfileState().profile.cdpUrl || profile.cdpUrl; // For remote profiles, use Playwright's persistent connection to close tabs if (!profile.cdpIsLoopback) { @@ -137,14 +141,14 @@ export function createProfileSelectionOps({ ?.closePageByTargetIdViaPlaywright; if (typeof closePageByTargetIdViaPlaywright === "function") { await closePageByTargetIdViaPlaywright({ - cdpUrl: profile.cdpUrl, + cdpUrl: runtimeCdpUrl, targetId: resolvedTargetId, }); return; } } - await fetchOk(appendCdpPath(profile.cdpUrl, `/json/close/${resolvedTargetId}`)); + await fetchOk(appendCdpPath(runtimeCdpUrl, `/json/close/${resolvedTargetId}`)); }; return { diff --git a/src/browser/server-context.tab-ops.ts b/src/browser/server-context.tab-ops.ts index cf026d658a7e..8ca49897aa04 100644 --- a/src/browser/server-context.tab-ops.ts +++ b/src/browser/server-context.tab-ops.ts @@ -59,12 +59,15 @@ export function createProfileTabOps({ getProfileState, }: TabOpsDeps): ProfileTabOps { const listTabs = async (): Promise => { + // Use runtime profile's cdpUrl (may be updated dynamically, e.g. firecrawl sessions) + const runtimeCdpUrl = getProfileState().profile.cdpUrl || profile.cdpUrl; + // For remote profiles, use Playwright's persistent connection to avoid ephemeral sessions if (!profile.cdpIsLoopback) { const mod = await getPwAiModule({ mode: "strict" }); const listPagesViaPlaywright = (mod as Partial | null)?.listPagesViaPlaywright; if (typeof listPagesViaPlaywright === "function") { - const pages = await listPagesViaPlaywright({ cdpUrl: profile.cdpUrl }); + const pages = await listPagesViaPlaywright({ cdpUrl: runtimeCdpUrl }); return pages.map((p) => ({ targetId: p.targetId, title: p.title, @@ -82,13 +85,13 @@ export function createProfileTabOps({ webSocketDebuggerUrl?: string; type?: string; }> - >(appendCdpPath(profile.cdpUrl, "/json/list")); + >(appendCdpPath(runtimeCdpUrl, "/json/list")); return raw .map((t) => ({ targetId: t.id ?? "", title: t.title ?? "", url: t.url ?? "", - wsUrl: normalizeWsUrl(t.webSocketDebuggerUrl, profile.cdpUrl), + wsUrl: normalizeWsUrl(t.webSocketDebuggerUrl, runtimeCdpUrl), type: t.type, })) .filter((t) => Boolean(t.targetId)); @@ -129,6 +132,8 @@ export function createProfileTabOps({ const openTab = async (url: string): Promise => { const ssrfPolicyOpts = withBrowserNavigationPolicy(state().resolved.ssrfPolicy); + // Use runtime profile's cdpUrl (may be updated dynamically, e.g. firecrawl sessions) + const runtimeCdpUrl = getProfileState().profile.cdpUrl || profile.cdpUrl; // For remote profiles, use Playwright's persistent connection to create tabs // This ensures the tab persists beyond a single request. @@ -137,7 +142,7 @@ export function createProfileTabOps({ const createPageViaPlaywright = (mod as Partial | null)?.createPageViaPlaywright; if (typeof createPageViaPlaywright === "function") { const page = await createPageViaPlaywright({ - cdpUrl: profile.cdpUrl, + cdpUrl: runtimeCdpUrl, url, ...ssrfPolicyOpts, }); @@ -154,7 +159,7 @@ export function createProfileTabOps({ } const createdViaCdp = await createTargetViaCdp({ - cdpUrl: profile.cdpUrl, + cdpUrl: runtimeCdpUrl, url, ...ssrfPolicyOpts, }) @@ -180,7 +185,7 @@ export function createProfileTabOps({ } const encoded = encodeURIComponent(url); - const endpointUrl = new URL(appendCdpPath(profile.cdpUrl, "/json/new")); + const endpointUrl = new URL(appendCdpPath(runtimeCdpUrl, "/json/new")); await assertBrowserNavigationAllowed({ url, ...ssrfPolicyOpts }); const endpoint = endpointUrl.search ? (() => { @@ -209,7 +214,7 @@ export function createProfileTabOps({ targetId: created.id, title: created.title ?? "", url: resolvedUrl, - wsUrl: normalizeWsUrl(created.webSocketDebuggerUrl, profile.cdpUrl), + wsUrl: normalizeWsUrl(created.webSocketDebuggerUrl, runtimeCdpUrl), type: created.type, }; }; diff --git a/src/browser/server-context.ts b/src/browser/server-context.ts index 29632c7b8a45..141c63c952c3 100644 --- a/src/browser/server-context.ts +++ b/src/browser/server-context.ts @@ -101,6 +101,7 @@ function createProfileContext( return { profile, + getCdpUrl: () => getProfileState().profile.cdpUrl || profile.cdpUrl, ensureBrowserAvailable, ensureTabAvailable, isHttpReachable, @@ -169,6 +170,16 @@ export function createBrowserRouteContext(opts: ContextOptions): BrowserRouteCon } catch { // Browser might not be responsive } + } else if (profileState?.firecrawlSession) { + // Firecrawl session is active (no RunningChrome process) + running = true; + try { + const ctx = createProfileContext(opts, profile); + const tabs = await ctx.listTabs(); + tabCount = tabs.filter((t) => t.type === "page").length; + } catch { + // Session might not be responsive + } } else { // Check if something is listening on the port try { @@ -187,7 +198,7 @@ export function createBrowserRouteContext(opts: ContextOptions): BrowserRouteCon result.push({ name, cdpPort: profile.cdpPort, - cdpUrl: profile.cdpUrl, + cdpUrl: profileState?.profile.cdpUrl || profile.cdpUrl, color: profile.color, running, tabCount, diff --git a/src/browser/server-context.types.ts b/src/browser/server-context.types.ts index b9dc634fe93a..4dd59e6fcf10 100644 --- a/src/browser/server-context.types.ts +++ b/src/browser/server-context.types.ts @@ -5,12 +5,25 @@ import type { ResolvedBrowserConfig, ResolvedBrowserProfile } from "./config.js" export type { BrowserTab }; +/** + * Firecrawl cloud browser session state. + */ +export type FirecrawlSessionState = { + sessionId: string; + cdpWebSocketUrl: string; + liveViewUrl: string; + interactiveLiveViewUrl: string; + expiresAt?: string; +}; + /** * Runtime state for a single profile's Chrome instance. */ export type ProfileRuntimeState = { profile: ResolvedBrowserProfile; running: RunningChrome | null; + /** Active Firecrawl cloud browser session (firecrawl driver only). */ + firecrawlSession?: FirecrawlSessionState | null; /** Sticky tab selection when callers omit targetId (keeps snapshot+act consistent). */ lastTargetId?: string | null; }; @@ -45,6 +58,8 @@ export type BrowserRouteContext = { export type ProfileContext = { profile: ResolvedBrowserProfile; + /** Runtime CDP URL — reflects dynamic session URLs (e.g. firecrawl WSS). */ + getCdpUrl: () => string; } & BrowserProfileActions; export type ProfileStatus = { @@ -62,4 +77,8 @@ export type ContextOptions = { getState: () => BrowserServerState | null; onEnsureAttachTarget?: (profile: ResolvedBrowserProfile) => Promise; refreshConfigFromDisk?: boolean; + /** Firecrawl API key for cloud browser sessions (resolved from config or env). */ + firecrawlApiKey?: string; + /** Firecrawl base URL (default: https://api.firecrawl.dev). */ + firecrawlBaseUrl?: string; }; diff --git a/src/browser/server.ts b/src/browser/server.ts index f6a269aee1ee..d16f124ed393 100644 --- a/src/browser/server.ts +++ b/src/browser/server.ts @@ -1,5 +1,10 @@ import type { Server } from "node:http"; import express from "express"; +import { + resolveFirecrawlApiKey, + resolveFirecrawlBaseUrl, + resolveFirecrawlConfig, +} from "../agents/tools/web-fetch.js"; import { loadConfig } from "../config/config.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; import { resolveBrowserConfig } from "./config.js"; @@ -24,7 +29,11 @@ export async function startBrowserControlServerFromConfig(): Promise state, refreshConfigFromDisk: true, + firecrawlApiKey, + firecrawlBaseUrl, }); registerBrowserRoutes(app as unknown as BrowserRouteRegistrar, ctx); diff --git a/src/commands/onboard-firecrawl.test.ts b/src/commands/onboard-firecrawl.test.ts new file mode 100644 index 000000000000..e7464405269c --- /dev/null +++ b/src/commands/onboard-firecrawl.test.ts @@ -0,0 +1,198 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { createWizardPrompter } from "../../test/helpers/wizard-prompter.js"; +import type { RuntimeEnv } from "../runtime.js"; +import type { WizardPrompter } from "../wizard/prompts.js"; +import { setupFirecrawl } from "./onboard-firecrawl.js"; + +// --------------------------------------------------------------------------- +// Mocks +// --------------------------------------------------------------------------- + +const openUrl = vi.hoisted(() => vi.fn(async () => true)); +const isRemoteEnvironment = vi.hoisted(() => vi.fn(() => false)); + +vi.mock("./onboard-helpers.js", () => ({ openUrl })); +vi.mock("./oauth-env.js", () => ({ isRemoteEnvironment })); + +const mockFetch = vi.hoisted(() => vi.fn()); + +beforeEach(() => { + vi.stubGlobal("fetch", mockFetch); + mockFetch.mockReset(); + openUrl.mockReset().mockResolvedValue(true); + isRemoteEnvironment.mockReset().mockReturnValue(false); +}); + +afterEach(() => { + vi.useRealTimers(); + vi.unstubAllGlobals(); + vi.stubGlobal("fetch", mockFetch); + delete process.env.FIRECRAWL_API_KEY; +}); + +function createRuntime(): RuntimeEnv { + return { log: vi.fn(), error: vi.fn(), exit: vi.fn() }; +} + +function useFastTimers() { + vi.useFakeTimers({ shouldAdvanceTime: true }); +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("setupFirecrawl", () => { + it("skips when API key already exists in config", async () => { + const cfg = { tools: { web: { fetch: { firecrawl: { apiKey: "fc-existing" } } } } }; + const prompter = createWizardPrompter(); + + const result = await setupFirecrawl(cfg, createRuntime(), prompter); + + expect(result).toBe(cfg); + }); + + it("skips when FIRECRAWL_API_KEY env var is set", async () => { + process.env.FIRECRAWL_API_KEY = "fc-from-env"; + const prompter = createWizardPrompter(); + + const result = await setupFirecrawl({}, createRuntime(), prompter); + + expect(result).toEqual({}); + }); + + it("returns config unchanged when user declines", async () => { + const prompter = createWizardPrompter({ confirm: vi.fn(async () => false) }); + + const result = await setupFirecrawl({}, createRuntime(), prompter); + + expect(result).toEqual({}); + }); + + it("stores key and enables tools via manual entry", async () => { + const prompter = createWizardPrompter({ + confirm: vi.fn(async () => true), + select: vi.fn(async () => "manual") as unknown as WizardPrompter["select"], + text: vi.fn(async () => "fc-test-key-123"), + }); + + const result = await setupFirecrawl({}, createRuntime(), prompter); + + expect(result.tools?.web?.fetch?.firecrawl).toEqual({ + enabled: true, + apiKey: "fc-test-key-123", + }); + expect(result.tools?.alsoAllow).toEqual( + expect.arrayContaining(["firecrawl_search", "firecrawl_scrape", "browser"]), + ); + }); + + it("deduplicates alsoAllow when tools already exist", async () => { + const existing = { + tools: { alsoAllow: ["firecrawl_search", "some_other_tool"] }, + }; + const prompter = createWizardPrompter({ + confirm: vi.fn(async () => true), + select: vi.fn(async () => "manual") as unknown as WizardPrompter["select"], + text: vi.fn(async () => "fc-dedup-key"), + }); + + const result = await setupFirecrawl(existing, createRuntime(), prompter); + + const counts = result.tools!.alsoAllow!.filter((t: string) => t === "firecrawl_search"); + expect(counts).toHaveLength(1); + expect(result.tools?.alsoAllow).toContain("some_other_tool"); + }); + + it("handles browser auth flow success", async () => { + useFastTimers(); + + const stopFn = vi.fn(); + const prompter = createWizardPrompter({ + confirm: vi.fn(async () => true), + select: vi.fn(async () => "browser") as unknown as WizardPrompter["select"], + progress: vi.fn(() => ({ update: vi.fn(), stop: stopFn })), + }); + + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ apiKey: "fc-browser-key", teamName: "My Team" }), + }); + + const result = await setupFirecrawl({}, createRuntime(), prompter); + + expect(result.tools?.web?.fetch?.firecrawl?.apiKey).toBe("fc-browser-key"); + expect(result.tools?.alsoAllow).toEqual( + expect.arrayContaining(["firecrawl_search", "firecrawl_scrape", "browser"]), + ); + expect(openUrl).toHaveBeenCalledWith(expect.stringContaining("source=openclaw")); + expect(stopFn).toHaveBeenCalledWith(expect.stringContaining("Authenticated")); + }); + + it("includes source=openclaw in auth URL", async () => { + useFastTimers(); + + const prompter = createWizardPrompter({ + confirm: vi.fn(async () => true), + select: vi.fn(async () => "browser") as unknown as WizardPrompter["select"], + progress: vi.fn(() => ({ update: vi.fn(), stop: vi.fn() })), + }); + + mockFetch.mockResolvedValueOnce({ + ok: true, + json: async () => ({ apiKey: "fc-source-key" }), + }); + + await setupFirecrawl({}, createRuntime(), prompter); + + expect(openUrl).toHaveBeenCalledWith(expect.stringContaining("&source=openclaw#session_id=")); + }); + + it("handles browser auth timeout gracefully", async () => { + useFastTimers(); + + const stopFn = vi.fn(); + const note = vi.fn(async () => {}); + const prompter = createWizardPrompter({ + confirm: vi.fn(async () => true), + select: vi.fn(async () => "browser") as unknown as WizardPrompter["select"], + progress: vi.fn(() => ({ update: vi.fn(), stop: stopFn })), + note, + }); + + mockFetch.mockResolvedValue({ ok: true, json: async () => ({}) }); + + const realDateNow = Date.now; + let callCount = 0; + vi.spyOn(Date, "now").mockImplementation(() => { + return realDateNow() + (callCount++ > 0 ? 10 * 60 * 1_000 : 0); + }); + + const result = await setupFirecrawl({}, createRuntime(), prompter); + + expect(result).toEqual({}); + expect(stopFn).toHaveBeenCalledWith("Timed out waiting for login."); + }); + + it("preserves existing config keys when storing firecrawl key", async () => { + const existing = { + tools: { + web: { + search: { enabled: true }, + fetch: { enabled: true, maxChars: 10_000 }, + }, + }, + }; + const prompter = createWizardPrompter({ + confirm: vi.fn(async () => true), + select: vi.fn(async () => "manual") as unknown as WizardPrompter["select"], + text: vi.fn(async () => "fc-preserve-test"), + }); + + const result = await setupFirecrawl(existing, createRuntime(), prompter); + + expect(result.tools?.web?.search).toEqual({ enabled: true }); + expect(result.tools?.web?.fetch?.maxChars).toBe(10_000); + expect(result.tools?.web?.fetch?.firecrawl?.apiKey).toBe("fc-preserve-test"); + }); +}); diff --git a/src/commands/onboard-firecrawl.ts b/src/commands/onboard-firecrawl.ts new file mode 100644 index 000000000000..55d24f808a69 --- /dev/null +++ b/src/commands/onboard-firecrawl.ts @@ -0,0 +1,262 @@ +import crypto from "node:crypto"; +import type { OpenClawConfig } from "../config/config.js"; +import type { RuntimeEnv } from "../runtime.js"; +import type { WizardPrompter } from "../wizard/prompts.js"; +import { isRemoteEnvironment } from "./oauth-env.js"; +import { openUrl } from "./onboard-helpers.js"; + +// --------------------------------------------------------------------------- +// PKCE helpers (matches Firecrawl CLI auth flow) +// --------------------------------------------------------------------------- + +function generateSessionId(): string { + return crypto.randomBytes(32).toString("hex"); +} + +function generateCodeVerifier(): string { + return crypto.randomBytes(32).toString("base64url"); +} + +function generateCodeChallenge(verifier: string): string { + const digest = crypto.createHash("sha256").update(verifier).digest(); + return digest.toString("base64url"); +} + +// --------------------------------------------------------------------------- +// Auth polling +// --------------------------------------------------------------------------- + +const FIRECRAWL_AUTH_STATUS_URL = "https://firecrawl.dev/api/auth/cli/status"; +const FIRECRAWL_AUTH_URL_BASE = "https://firecrawl.dev/cli-auth"; + +type FirecrawlAuthResult = { + apiKey: string; + teamName?: string; +}; + +async function pollFirecrawlAuthStatus( + sessionId: string, + codeVerifier: string, +): Promise { + const res = await fetch(FIRECRAWL_AUTH_STATUS_URL, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ session_id: sessionId, code_verifier: codeVerifier }), + signal: AbortSignal.timeout(10_000), + }); + if (!res.ok) { + return null; + } + const data = (await res.json()) as { apiKey?: string; teamName?: string }; + if (data.apiKey && !validateFirecrawlKey(data.apiKey)) { + return { apiKey: data.apiKey, teamName: data.teamName }; + } + return null; +} + +const POLL_INTERVAL_MS = 2_000; +const POLL_TIMEOUT_MS = 5 * 60 * 1_000; // 5 minutes + +async function waitForFirecrawlAuth( + sessionId: string, + codeVerifier: string, + spin: { update: (msg: string) => void }, +): Promise { + const deadline = Date.now() + POLL_TIMEOUT_MS; + let dots = 0; + while (Date.now() < deadline) { + await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS)); + dots = (dots + 1) % 4; + spin.update(`Waiting for browser login${".".repeat(dots)}`); + try { + const result = await pollFirecrawlAuthStatus(sessionId, codeVerifier); + if (result) { + return result; + } + } catch { + // Network blip — keep polling. + } + } + return null; +} + +// --------------------------------------------------------------------------- +// Config helpers +// --------------------------------------------------------------------------- + +function getExistingFirecrawlKey(cfg: OpenClawConfig): string | undefined { + const key = cfg.tools?.web?.fetch?.firecrawl?.apiKey; + if (key) { + return key; + } + return undefined; +} + +const FIRECRAWL_TOOL_NAMES = ["firecrawl_search", "firecrawl_scrape", "browser"]; + +function applyFirecrawlKey(cfg: OpenClawConfig, apiKey: string): OpenClawConfig { + // Merge firecrawl tools into the existing alsoAllow list (deduped) + const existing = cfg.tools?.alsoAllow ?? []; + const merged = [...new Set([...existing, ...FIRECRAWL_TOOL_NAMES])]; + + return { + ...cfg, + tools: { + ...cfg.tools, + alsoAllow: merged, + web: { + ...cfg.tools?.web, + fetch: { + ...cfg.tools?.web?.fetch, + firecrawl: { + ...cfg.tools?.web?.fetch?.firecrawl, + enabled: true, + apiKey, + }, + }, + }, + }, + }; +} + +function validateFirecrawlKey(value: string): string | undefined { + const trimmed = value.trim(); + if (!trimmed) { + return "API key is required"; + } + if (!trimmed.startsWith("fc-")) { + return 'Firecrawl API keys start with "fc-"'; + } + return undefined; +} + +// --------------------------------------------------------------------------- +// Main setup function +// --------------------------------------------------------------------------- + +export async function setupFirecrawl( + cfg: OpenClawConfig, + runtime: RuntimeEnv, + prompter: WizardPrompter, +): Promise { + // Check if already configured via config. + const existingKey = getExistingFirecrawlKey(cfg); + if (existingKey) { + await prompter.note("Firecrawl API key already configured.", "Firecrawl"); + return cfg; + } + + // Check if already configured via env var. + if (process.env.FIRECRAWL_API_KEY) { + await prompter.note( + "Firecrawl API key found in FIRECRAWL_API_KEY environment variable.", + "Firecrawl", + ); + return cfg; + } + + await prompter.note( + "Firecrawl adds web scraping, search, and browser automation.\nFree tier: 500 credits on signup, no credit card required.", + "Firecrawl (optional)", + ); + + const wantsSetup = await prompter.confirm({ + message: "Set up Firecrawl web scraping, search and web browsing?", + initialValue: true, + }); + if (!wantsSetup) { + return cfg; + } + + const method = await prompter.select<"browser" | "manual">({ + message: "How would you like to authenticate?", + options: [ + { value: "browser", label: "Browser login", hint: "recommended — opens firecrawl.dev" }, + { value: "manual", label: "Paste API key", hint: "if you already have one" }, + ], + initialValue: "browser", + }); + + if (method === "manual") { + return handleManualEntry(cfg, prompter); + } + + return handleBrowserAuth(cfg, runtime, prompter); +} + +// --------------------------------------------------------------------------- +// Manual API key entry +// --------------------------------------------------------------------------- + +async function handleManualEntry( + cfg: OpenClawConfig, + prompter: WizardPrompter, +): Promise { + const apiKey = await prompter.text({ + message: "Firecrawl API key", + placeholder: "fc-...", + validate: validateFirecrawlKey, + }); + + const trimmed = apiKey.trim(); + if (!trimmed) { + return cfg; + } + + await prompter.note("Firecrawl API key saved.", "Firecrawl"); + return applyFirecrawlKey(cfg, trimmed); +} + +// --------------------------------------------------------------------------- +// Browser OAuth flow (PKCE + polling) +// --------------------------------------------------------------------------- + +async function handleBrowserAuth( + cfg: OpenClawConfig, + runtime: RuntimeEnv, + prompter: WizardPrompter, +): Promise { + try { + const sessionId = generateSessionId(); + const codeVerifier = generateCodeVerifier(); + const codeChallenge = generateCodeChallenge(codeVerifier); + + const authUrl = `${FIRECRAWL_AUTH_URL_BASE}?code_challenge=${codeChallenge}&source=openclaw#session_id=${sessionId}`; + + const isRemote = isRemoteEnvironment(); + if (isRemote) { + await prompter.note(`Open this URL in your browser to log in:\n\n${authUrl}`, "Firecrawl"); + } else { + const opened = await openUrl(authUrl); + if (!opened) { + await prompter.note( + `Could not open browser. Visit this URL to log in:\n\n${authUrl}`, + "Firecrawl", + ); + } + } + + const spin = prompter.progress("Waiting for browser login..."); + const result = await waitForFirecrawlAuth(sessionId, codeVerifier, spin); + + if (!result) { + spin.stop("Timed out waiting for login."); + await prompter.note( + "Authentication timed out.\nYou can set up Firecrawl later via `openclaw configure --section web`\nor set the FIRECRAWL_API_KEY environment variable.", + "Firecrawl", + ); + return cfg; + } + + const teamNote = result.teamName ? ` (team: ${result.teamName})` : ""; + spin.stop(`Authenticated with Firecrawl${teamNote}`); + return applyFirecrawlKey(cfg, result.apiKey); + } catch (err) { + runtime.log("Firecrawl auth error:", err instanceof Error ? err.message : String(err)); + await prompter.note( + "Something went wrong during Firecrawl setup.\nYou can set up Firecrawl later via `openclaw configure --section web`\nor set the FIRECRAWL_API_KEY environment variable.", + "Firecrawl", + ); + return cfg; + } +} diff --git a/src/commands/onboard-types.ts b/src/commands/onboard-types.ts index fee12d392bbf..6c5044fe66ae 100644 --- a/src/commands/onboard-types.ts +++ b/src/commands/onboard-types.ts @@ -153,6 +153,7 @@ export type OnboardOptions = { /** @deprecated Legacy alias for `skipChannels`. */ skipProviders?: boolean; skipSkills?: boolean; + skipFirecrawl?: boolean; skipHealth?: boolean; skipUi?: boolean; nodeManager?: NodeManagerChoice; diff --git a/src/config/types.browser.ts b/src/config/types.browser.ts index 82a404037c41..40a0f00d78e3 100644 --- a/src/config/types.browser.ts +++ b/src/config/types.browser.ts @@ -4,7 +4,7 @@ export type BrowserProfileConfig = { /** CDP URL for this profile (use for remote Chrome). */ cdpUrl?: string; /** Profile driver (default: openclaw). */ - driver?: "openclaw" | "extension"; + driver?: "openclaw" | "extension" | "firecrawl"; /** If true, never launch a browser for this profile; only attach. Falls back to browser.attachOnly. */ attachOnly?: boolean; /** Profile color (hex). Auto-assigned at creation. */ diff --git a/src/config/zod-schema.agent-runtime.ts b/src/config/zod-schema.agent-runtime.ts index 3f1ddb61e716..ddb972a0715b 100644 --- a/src/config/zod-schema.agent-runtime.ts +++ b/src/config/zod-schema.agent-runtime.ts @@ -309,6 +309,18 @@ export const ToolsWebSearchSchema = z .strict() .optional(); +export const ToolsWebFetchFirecrawlSchema = z + .object({ + enabled: z.boolean().optional(), + apiKey: z.string().optional(), + baseUrl: z.string().optional(), + onlyMainContent: z.boolean().optional(), + maxAgeMs: z.number().nonnegative().optional(), + timeoutSeconds: z.number().int().positive().optional(), + }) + .strict() + .optional(); + export const ToolsWebFetchSchema = z .object({ enabled: z.boolean().optional(), @@ -318,6 +330,8 @@ export const ToolsWebFetchSchema = z cacheTtlMinutes: z.number().nonnegative().optional(), maxRedirects: z.number().int().nonnegative().optional(), userAgent: z.string().optional(), + readability: z.boolean().optional(), + firecrawl: ToolsWebFetchFirecrawlSchema, }) .strict() .optional(); diff --git a/src/wizard/onboarding.test.ts b/src/wizard/onboarding.test.ts index b4a5d6d44e30..5752e0780c2e 100644 --- a/src/wizard/onboarding.test.ts +++ b/src/wizard/onboarding.test.ts @@ -65,6 +65,7 @@ const setupInternalHooks = vi.hoisted(() => vi.fn(async (cfg) => cfg)); const setupChannels = vi.hoisted(() => vi.fn(async (cfg) => cfg)); const setupSkills = vi.hoisted(() => vi.fn(async (cfg) => cfg)); +const setupFirecrawl = vi.hoisted(() => vi.fn(async (cfg) => cfg)); const healthCommand = vi.hoisted(() => vi.fn(async () => {})); const ensureWorkspaceAndSessions = vi.hoisted(() => vi.fn(async () => {})); const writeConfigFile = vi.hoisted(() => vi.fn(async () => {})); @@ -96,6 +97,10 @@ vi.mock("../commands/onboard-skills.js", () => ({ setupSkills, })); +vi.mock("../commands/onboard-firecrawl.js", () => ({ + setupFirecrawl, +})); + vi.mock("../agents/auth-profiles.js", () => ({ ensureAuthProfileStore, })); diff --git a/src/wizard/onboarding.ts b/src/wizard/onboarding.ts index 49a6e292ed2e..5caad7a78396 100644 --- a/src/wizard/onboarding.ts +++ b/src/wizard/onboarding.ts @@ -451,6 +451,11 @@ export async function runOnboardingWizard( nextConfig = await setupSkills(nextConfig, workspaceDir, runtime, prompter); } + if (!opts.skipFirecrawl) { + const { setupFirecrawl } = await import("../commands/onboard-firecrawl.js"); + nextConfig = await setupFirecrawl(nextConfig, runtime, prompter); + } + // Setup hooks (session memory on /new) const { setupInternalHooks } = await import("../commands/onboard-hooks.js"); nextConfig = await setupInternalHooks(nextConfig, runtime, prompter);