diff --git a/app/_components/copy-page-override.tsx b/app/_components/copy-page-override.tsx index 7a880d10b..611d679db 100644 --- a/app/_components/copy-page-override.tsx +++ b/app/_components/copy-page-override.tsx @@ -10,6 +10,11 @@ const COPYING_TEXT = "Copying\u2026"; const COPY_FAILED_TEXT = "Failed to copy"; const DROPDOWN_IDENTIFIER = "Markdown for LLMs"; +// A toolkit reference page: //resources/integrations//. +// Captures the slug so we can pull full markdown from the data route. +const TOOLKIT_PAGE_PATH = + /^\/[^/]+\/resources\/integrations\/[^/]+\/([^/]+)\/?$/; + const ICON_COPY = ``; const ICON_SPINNER = ``; @@ -84,15 +89,34 @@ export function CopyPageOverride() { const fetchAndCopyMarkdown = useCallback(async (): Promise => { try { - const response = await fetch(pathname, { - headers: { Accept: "text/markdown" }, - }); + let markdown: string | null = null; + + // Toolkit reference pages render per-tool detail client-only, so the edge + // HTML→markdown view would miss parameters/output/examples. Pull full + // markdown from the data route instead. If the slug isn't a generated + // toolkit (e.g. a static partner page) the route 404s and we fall back to + // the normal page fetch below. + const toolkitSlug = pathname.match(TOOLKIT_PAGE_PATH)?.[1]; + if (toolkitSlug) { + const dataResponse = await fetch( + `/api/toolkit-data/${encodeURIComponent(toolkitSlug)}`, + { headers: { Accept: "text/markdown" } } + ); + if (dataResponse.ok) { + markdown = await dataResponse.text(); + } + } - if (!response.ok) { - throw new Error(`Failed to fetch markdown: ${response.status}`); + if (markdown === null) { + const response = await fetch(pathname, { + headers: { Accept: "text/markdown" }, + }); + if (!response.ok) { + throw new Error(`Failed to fetch markdown: ${response.status}`); + } + markdown = await response.text(); } - const markdown = await response.text(); await navigator.clipboard.writeText(markdown); return true; } catch { diff --git a/app/_components/scope-picker.tsx b/app/_components/scope-picker.tsx index ab031c40c..4e6e6535b 100644 --- a/app/_components/scope-picker.tsx +++ b/app/_components/scope-picker.tsx @@ -4,6 +4,7 @@ import { Button } from "@arcadeai/design-system"; import { Check, Copy, KeyRound, ShieldCheck, Wrench } from "lucide-react"; import posthog from "posthog-js"; import { useCallback, useEffect, useMemo, useState } from "react"; +import { loadToolkitDetail } from "./toolkit-docs/components/use-toolkit-detail"; const COPY_FEEDBACK_MS = 2000; @@ -47,20 +48,33 @@ type ScopePickerProps = { tools: Tool[]; selectedTools?: string[]; onSelectedToolsChange?: (selectedTools: string[]) => void; + /** Toolkit id — lets "Copy tools JSON" lazily fetch full per-tool detail. */ + toolkitId?: string; }; -function CopyButton({ text, label }: { text: string; label: string }) { +function CopyButton({ + text, + getText, + label, +}: { + text?: string; + // Build the text to copy on demand (e.g. lazily fetch full tool detail). + getText?: () => Promise; + label: string; +}) { const [copied, setCopied] = useState(false); const handleCopy = useCallback(async () => { try { - await navigator.clipboard.writeText(text); + await navigator.clipboard.writeText( + getText ? await getText() : (text ?? "") + ); setCopied(true); setTimeout(() => setCopied(false), COPY_FEEDBACK_MS); } catch { // Ignore clipboard errors (e.g., permissions, unsupported browser). } - }, [text]); + }, [text, getText]); return ( ); } +function ToolSelectionToggle({ + tool, + isSelected, + onToggleSelection, + hasScopes, + hasSecrets, +}: { + tool: ToolSummary; + isSelected: boolean; + onToggleSelection?: (toolName: string) => void; + hasScopes: boolean; + hasSecrets: boolean; +}) { + return ( + + ); +} + function ToolDescriptionSection({ tool, showDescription, }: { - tool: ToolSectionProps["tool"]; + tool: ToolSummary; showDescription: boolean; }) { return ( @@ -193,7 +217,7 @@ function ToolParametersSection({ tool, showParameters, }: { - tool: ToolSectionProps["tool"]; + tool: ToolDefinition; showParameters: boolean; }) { return ( @@ -231,11 +255,11 @@ function ToolRequirementsSection({ hasSecrets, showSecrets, }: { - tool: ToolSectionProps["tool"]; + tool: ToolSummary; showAdvanced: boolean; onToggleAdvanced: () => void; scopes: string[]; - secretsInfo: ToolSectionProps["tool"]["secretsInfo"]; + secretsInfo: ToolSummary["secretsInfo"]; hasScopes: boolean; hasSecrets: boolean; showSecrets: boolean; @@ -331,7 +355,7 @@ function ToolScopesDetailsSection({ showAuth, scopes, }: { - tool: ToolSectionProps["tool"]; + tool: ToolSummary; showAdvanced: boolean; hasScopes: boolean; showAuth: boolean; @@ -374,7 +398,7 @@ function ToolOutputSection({ tool, showOutput, }: { - tool: ToolSectionProps["tool"]; + tool: ToolDefinition; showOutput: boolean; }) { return ( @@ -422,7 +446,7 @@ function ToolOutputSection({ ); } -function ToolExampleSection({ tool }: { tool: ToolSectionProps["tool"] }) { +function ToolExampleSection({ tool }: { tool: ToolDefinition }) { return tool.codeExample ? (
@@ -434,18 +458,49 @@ function ToolExampleSection({ tool }: { tool: ToolSectionProps["tool"] }) { ); } +function ToolDetailLoading() { + return ( +
+ + Loading details... +
+ ); +} + +function ToolDetailError({ onRetry }: { onRetry: () => void }) { + return ( +
+ Couldn't load tool details. + +
+ ); +} + /** * ToolSection * - * Renders a single tool section with parameters, scopes, secrets, output, and example. + * Renders a single tool. The header, metadata and description render from the + * lightweight summary (and ship in the initial HTML); the parameters, scopes, + * secrets, output and code example load on expand from + * `/api/toolkit-data/[toolkitId]` so the initial document stays small enough + * for Googlebot's 2 MB crawl limit. Sections targeted by the URL hash expand + * automatically. */ export function ToolSection({ tool, + toolkitId, isSelected = false, showSelection = false, onToggleSelection, + forceExpanded = false, }: ToolSectionProps) { + const [expanded, setExpanded] = useState(forceExpanded); const [showAdvanced, setShowAdvanced] = useState(false); + const [reloadToken, setReloadToken] = useState(0); + const sectionRef = useRef(null); + const anchorId = toToolAnchorId(tool.qualifiedName); const scopes = tool.auth?.scopes ?? []; const secretsInfo = tool.secretsInfo ?? []; @@ -453,6 +508,30 @@ export function ToolSection({ const hasSecrets = (tool.secrets?.length ?? 0) > 0 || (tool.secretsInfo?.length ?? 0) > 0; + const detail = useToolDetail( + toolkitId, + tool.qualifiedName, + expanded, + reloadToken + ); + const fullTool: ToolDefinition | null = + detail.status === "ready" ? detail.tool : null; + + // Expand when the URL hash targets this tool (deep-link or sidebar/row click). + useEffect(() => { + if (forceExpanded) { + setExpanded(true); + } + }, [forceExpanded]); + + // A deep-linked section is short while its detail loads, so the browser's + // initial scroll lands above its final position — re-scroll once it grows. + useEffect(() => { + if (forceExpanded && fullTool) { + sectionRef.current?.scrollIntoView(); + } + }, [forceExpanded, fullTool]); + const showDescription = shouldRenderDefaultSection( tool.documentationChunks, "description" @@ -475,38 +554,70 @@ export function ToolSection({
- - - - setShowAdvanced(!showAdvanced)} - scopes={scopes} - secretsInfo={secretsInfo} - showAdvanced={showAdvanced} - showSecrets={showSecrets} + expanded={expanded} + onToggleExpanded={() => setExpanded((value) => !value)} tool={tool} /> - - - + + {expanded && ( + <> + {showSelection && ( + + )} + + + + {detail.status === "loading" && } + {detail.status === "error" && ( + setReloadToken((token) => token + 1)} + /> + )} + {fullTool && ( + <> + + setShowAdvanced(!showAdvanced)} + scopes={scopes} + secretsInfo={secretsInfo} + showAdvanced={showAdvanced} + showSecrets={showSecrets} + tool={tool} + /> + + + +
+ +
+ + )} + + )}
); } diff --git a/app/_components/toolkit-docs/components/toolkit-page.tsx b/app/_components/toolkit-docs/components/toolkit-page.tsx index ad55c6413..c672c3cd6 100644 --- a/app/_components/toolkit-docs/components/toolkit-page.tsx +++ b/app/_components/toolkit-docs/components/toolkit-page.tsx @@ -1,11 +1,11 @@ "use client"; import { Badge, Button } from "@arcadeai/design-system"; -import { ArrowDown, ArrowUp, KeyRound } from "lucide-react"; +import { ArrowDown, ArrowUp } from "lucide-react"; +import dynamic from "next/dynamic"; import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import ReactMarkdown from "react-markdown"; -import ScopePicker from "../../scope-picker"; import ToolFooter from "../../tool-footer"; import { getPackageName, @@ -27,11 +27,12 @@ const TOC_OBSERVER_THRESHOLD_MID = 0.5; // Scroll padding for TOC item visibility const TOC_SCROLL_PADDING = 20; +import { rehypeNeutralizeEmails } from "../lib/neutralize-emails"; import type { - ToolDefinition, ToolkitCategory, ToolkitPageProps, ToolkitType, + ToolSummary, } from "../types"; import { AvailableToolsTable, toToolAnchorId } from "./available-tools-table"; import { @@ -41,9 +42,17 @@ import { sortChunksDeterministically, } from "./documentation-chunk-renderer"; import { PageActionsBar } from "./page-actions"; -import { ToolSection } from "./tool-section"; import { ToolkitHeader } from "./toolkit-header"; +// The per-tool detail area (scope picker + tool sections) renders client-only +// (ssr: false) so it stays out of the server HTML — the crawlable summary +// (Available Tools table + sidebar) is what ships server-side. Keeps large +// toolkit pages under Googlebot's 2 MB crawl limit. +const ToolkitToolDetail = dynamic( + () => import("./toolkit-tool-detail").then((m) => m.ToolkitToolDetail), + { ssr: false } +); + /** * Floating buttons to scroll to top/bottom of the page. * Only shows when user has scrolled past a threshold. @@ -261,7 +270,7 @@ function ToolsOnThisPage({ selectedTools, documentationChunks = [], }: { - tools: ToolDefinition[]; + tools: ToolSummary[]; selectedTools: Set; documentationChunks?: ReadonlyArray<{ header?: string }>; }) { @@ -472,22 +481,16 @@ function ToolsOnThisPage({ >
{filteredTools.map((tool) => { - const hasSecrets = - (tool.secretsInfo?.length ?? 0) > 0 || - (tool.secrets?.length ?? 0) > 0; const toolId = toToolAnchorId(tool.qualifiedName); return ( setItemRef(toolId, el)} title={tool.qualifiedName} > - {tool.qualifiedName} - {hasSecrets && ( - - )} + {tool.qualifiedName} ); })} @@ -522,27 +525,19 @@ export function ToolkitPage({ data }: ToolkitPageProps) { }; }, []); + // Track the URL hash so the targeted tool section can auto-expand its + // (lazily-loaded) detail on deep-link landing and on sidebar/table clicks. + const [activeHash, setActiveHash] = useState(""); + useEffect(() => { + const update = () => setActiveHash(window.location.hash.slice(1)); + update(); + window.addEventListener("hashchange", update); + return () => window.removeEventListener("hashchange", update); + }, []); + const tools = data.tools ?? []; const documentationChunks = data.documentationChunks ?? []; const [selectedTools, setSelectedTools] = useState>(new Set()); - const selectionTools = tools.map((tool) => { - const secrets = - (tool.secrets ?? []).length > 0 - ? (tool.secrets ?? []) - : (tool.secretsInfo ?? []).map((secret) => secret.name); - - return { - name: tool.name, - scopes: tool.auth?.scopes ?? [], - secrets, - // Full tool definition for enhanced copy functionality - qualifiedName: tool.qualifiedName, - fullyQualifiedName: tool.fullyQualifiedName, - description: tool.description, - parameters: tool.parameters, - output: tool.output, - }; - }); const shouldShowSelection = tools.length > 0; // Compute tool stats @@ -646,7 +641,9 @@ export function ToolkitPage({ data }: ToolkitPageProps) { {data.summary && (
- {data.summary} + + {data.summary} +
)} @@ -735,28 +732,15 @@ export function ToolkitPage({ data }: ToolkitPageProps) { position="after" /> - {shouldShowSelection && ( -
- -
- )} - - {tools.map((tool) => ( - - ))} +
; + shouldShowSelection: boolean; + activeHash: string; + onToggleSelection: (toolName: string) => void; + onScopeSelectionChange: (toolNames: string[]) => void; +}; + +/** + * The per-tool detail area (scope picker + tool sections). Loaded via + * `next/dynamic({ ssr: false })`, so none of it is server-rendered — the server + * HTML carries only the crawlable summary (Available Tools table + sidebar), + * which keeps large toolkit pages under Googlebot's 2 MB crawl limit. Per-tool + * detail is fetched on expand. + */ +export function ToolkitToolDetail({ + tools, + toolkitId, + selectedTools, + shouldShowSelection, + activeHash, + onToggleSelection, + onScopeSelectionChange, +}: ToolkitToolDetailProps) { + const selectionTools = tools.map((tool) => { + const secrets = + (tool.secrets ?? []).length > 0 + ? (tool.secrets ?? []) + : (tool.secretsInfo ?? []).map((secret) => secret.name); + return { + name: tool.name, + scopes: tool.auth?.scopes ?? [], + secrets, + qualifiedName: tool.qualifiedName, + fullyQualifiedName: tool.fullyQualifiedName, + description: tool.description, + }; + }); + + return ( + <> + {shouldShowSelection && ( +
+ +
+ )} + + {tools.map((tool) => ( + + ))} + + ); +} diff --git a/app/_components/toolkit-docs/components/use-toolkit-detail.ts b/app/_components/toolkit-docs/components/use-toolkit-detail.ts new file mode 100644 index 000000000..641dababe --- /dev/null +++ b/app/_components/toolkit-docs/components/use-toolkit-detail.ts @@ -0,0 +1,103 @@ +"use client"; + +import { useEffect, useState } from "react"; +import type { ToolDefinition, ToolkitData } from "../types"; + +/** + * Lazy per-tool detail loading. + * + * Toolkit reference pages ship only a lightweight per-tool summary in the + * initial HTML (see `toToolkitSummary`); the heavy fields — parameters, output + * schema and code example — are fetched on demand when a tool section expands, + * from the existing `/api/toolkit-data/[toolkitId]` route. This keeps the + * server-rendered document under Googlebot's 2 MB crawl limit. + * + * One fetch per toolkit per page (the full toolkit JSON), shared across every + * tool section via a module-level cache and keyed by `qualifiedName`. + */ +type DetailMap = Map; + +const detailCache = new Map>(); + +export function loadToolkitDetail(toolkitId: string): Promise { + const cached = detailCache.get(toolkitId); + if (cached) { + return cached; + } + + const promise = fetch(`/api/toolkit-data/${encodeURIComponent(toolkitId)}`) + .then((response) => { + if (!response.ok) { + throw new Error(`Failed to load toolkit detail (${response.status})`); + } + return response.json() as Promise; + }) + .then((data) => { + const map: DetailMap = new Map(); + for (const tool of data.tools ?? []) { + map.set(tool.qualifiedName, tool); + } + return map; + }) + .catch((error) => { + // Evict so a remounted/retried section can fetch again. + detailCache.delete(toolkitId); + throw error; + }); + + detailCache.set(toolkitId, promise); + return promise; +} + +export type ToolDetailState = + | { status: "idle" } + | { status: "loading" } + | { status: "error" } + | { status: "ready"; tool: ToolDefinition }; + +export function useToolDetail( + toolkitId: string, + qualifiedName: string, + enabled: boolean, + reloadToken = 0 +): ToolDetailState { + // Start idle — nothing is loading until a section is actually enabled + // (expanded). Reporting "loading" while disabled would misrepresent the state. + const [state, setState] = useState({ status: "idle" }); + + useEffect(() => { + if (!enabled) { + setState({ status: "idle" }); + return; + } + + let active = true; + setState({ status: "loading" }); + + // A bumped reloadToken is a manual retry: drop any cached (failed) result so + // the load actually re-fetches. + if (reloadToken > 0) { + detailCache.delete(toolkitId); + } + + loadToolkitDetail(toolkitId) + .then((map) => { + if (!active) { + return; + } + const tool = map.get(qualifiedName); + setState(tool ? { status: "ready", tool } : { status: "error" }); + }) + .catch(() => { + if (active) { + setState({ status: "error" }); + } + }); + + return () => { + active = false; + }; + }, [toolkitId, qualifiedName, enabled, reloadToken]); + + return state; +} diff --git a/app/_components/toolkit-docs/lib/neutralize-emails.tsx b/app/_components/toolkit-docs/lib/neutralize-emails.tsx new file mode 100644 index 000000000..88b545d66 --- /dev/null +++ b/app/_components/toolkit-docs/lib/neutralize-emails.tsx @@ -0,0 +1,101 @@ +import { Fragment, type ReactNode } from "react"; + +/** + * Matches the email-like text runs that Cloudflare's Email Obfuscation (Scrape + * Shield) rewrites into a `/cdn-cgi/l/email-protection` link — which 404s for + * crawlers and shows up in Ahrefs as "links to broken page". Mirrors + * Cloudflare's own detection: a local part, `@`, then a dotted domain with a + * TLD. Covers example emails and `user:password@host.tld` connection strings. + */ +const EMAIL_RE = /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/g; + +/** Offsets of each `@` that sits inside an email-like run. */ +function atBreakOffsets(text: string): number[] { + const breaks: number[] = []; + for (const match of text.matchAll(EMAIL_RE)) { + const start = match.index ?? 0; + breaks.push(start + match[0].indexOf("@")); + } + return breaks; +} + +/** + * Render `text`, inserting a zero-width `` immediately before the `@` of + * any email-like run. `` is invisible and excluded from copied text, so the + * displayed and copied value is unchanged — but the email is no longer a + * contiguous text node, so Cloudflare's edge scanner won't obfuscate it. + * + * Use this for plain-text fields rendered server-side (e.g. a tool description). + */ +export function splitEmails(text: string): ReactNode { + const breaks = atBreakOffsets(text); + if (breaks.length === 0) { + return text; + } + + const nodes: ReactNode[] = []; + let cursor = 0; + for (const offset of breaks) { + nodes.push( + {text.slice(cursor, offset)} + ); + nodes.push(); + cursor = offset; + } + nodes.push({text.slice(cursor)}); + return nodes; +} + +/** Structural view over hast nodes — avoids depending on `unist-util-visit`. */ +type WalkNode = { + type: string; + value?: string; + tagName?: string; + properties?: Record; + children?: WalkNode[]; +}; + +function neutralizeTextValue(value: string): WalkNode[] { + const breaks = atBreakOffsets(value); + if (breaks.length === 0) { + return [{ type: "text", value }]; + } + + const out: WalkNode[] = []; + let cursor = 0; + for (const offset of breaks) { + out.push({ type: "text", value: value.slice(cursor, offset) }); + out.push({ type: "element", tagName: "wbr", properties: {}, children: [] }); + cursor = offset; + } + out.push({ type: "text", value: value.slice(cursor) }); + return out; +} + +function walk(node: WalkNode): void { + if (!node.children) { + return; + } + const next: WalkNode[] = []; + for (const child of node.children) { + if (child.type === "text" && typeof child.value === "string") { + next.push(...neutralizeTextValue(child.value)); + } else { + walk(child); + next.push(child); + } + } + node.children = next; +} + +/** + * rehype plugin (for react-markdown) that applies the same `` break to + * email-like text inside rendered markdown — e.g. a toolkit `summary` that + * contains a `mongodb+srv://user:pass@host.tld` connection string. + * + * Typed structurally against the hast tree (a `WalkNode`) to avoid a direct + * dependency on `@types/hast`, which pnpm only exposes transitively. + */ +export function rehypeNeutralizeEmails() { + return (tree: WalkNode): void => walk(tree); +} diff --git a/app/_components/toolkit-docs/types/index.ts b/app/_components/toolkit-docs/types/index.ts index 53e20fce9..f091219b6 100644 --- a/app/_components/toolkit-docs/types/index.ts +++ b/app/_components/toolkit-docs/types/index.ts @@ -244,6 +244,16 @@ export type ToolDefinition = { codeExample?: ToolCodeExample; }; +/** + * A tool with its heavy detail fields stripped — everything needed to render the + * Available Tools table, the sidebar, and a collapsed tool section. The detail + * (parameters/output/codeExample) is fetched on expand as a full ToolDefinition. + */ +export type ToolSummary = Omit< + ToolDefinition, + "parameters" | "output" | "codeExample" +>; + // ============================================================================ // Toolkit Metadata Types // ============================================================================ @@ -356,6 +366,14 @@ export type ToolkitData = { generatedAt?: string; }; +/** + * Toolkit data with each tool's heavy detail fields stripped. This is what the + * client `ToolkitPage` receives, keeping the initial HTML/Flight payload small. + */ +export type ToolkitSummary = Omit & { + tools: ToolSummary[]; +}; + // ============================================================================ // Component Props Types // ============================================================================ @@ -436,14 +454,18 @@ export type DynamicCodeBlockProps = { * Props for ToolSection component */ export type ToolSectionProps = { - /** Tool definition */ - tool: ToolDefinition; + /** Tool summary (heavy detail fetched lazily on expand) */ + tool: ToolSummary; + /** Toolkit id, used to lazily fetch this tool's detail */ + toolkitId: string; /** Whether the tool is selected in the selected tools panel */ isSelected?: boolean; /** Show selection checkbox */ showSelection?: boolean; /** Toggle selection handler */ onToggleSelection?: (toolName: string) => void; + /** Expand on mount and keep expanded (e.g. when the URL hash targets it) */ + forceExpanded?: boolean; }; /** @@ -495,6 +517,6 @@ export type AvailableToolsTableProps = { * Props for ToolkitPage component */ export type ToolkitPageProps = { - /** Complete toolkit data */ - data: ToolkitData; + /** Toolkit data with per-tool detail stripped (fetched lazily on expand) */ + data: ToolkitSummary; }; diff --git a/app/_lib/toolkit-data.ts b/app/_lib/toolkit-data.ts index 9d94655dc..f0299c74e 100644 --- a/app/_lib/toolkit-data.ts +++ b/app/_lib/toolkit-data.ts @@ -1,8 +1,40 @@ import { readdir, readFile } from "node:fs/promises"; import { join } from "node:path"; -import type { ToolkitData } from "@/app/_components/toolkit-docs/types"; +import type { + ToolkitData, + ToolkitSummary, + ToolSummary, +} from "@/app/_components/toolkit-docs/types"; import { getToolkitSlug, normalizeToolkitId } from "./toolkit-slug"; +/** + * Strip each tool's heavy fields (parameters, output, codeExample) so the + * client `ToolkitPage` ships only a lightweight summary in the initial HTML. + * The detail is fetched on expand from `/api/toolkit-data/[toolkitId]`. This is + * what keeps the largest reference pages under Googlebot's 2 MB crawl limit. + * + * The `ToolSummary` return annotation keeps this in sync with the type: if a + * non-heavy field is added to `ToolDefinition`, TypeScript flags the omission. + */ +export function toToolkitSummary(data: ToolkitData): ToolkitSummary { + return { + ...data, + tools: data.tools.map( + (tool): ToolSummary => ({ + name: tool.name, + qualifiedName: tool.qualifiedName, + fullyQualifiedName: tool.fullyQualifiedName, + description: tool.description, + auth: tool.auth, + secrets: tool.secrets, + secretsInfo: tool.secretsInfo, + metadata: tool.metadata, + documentationChunks: tool.documentationChunks, + }) + ), + }; +} + export type ToolkitIndexEntry = { id: string; label: string; diff --git a/app/_lib/toolkit-markdown.ts b/app/_lib/toolkit-markdown.ts new file mode 100644 index 000000000..1c4629c9a --- /dev/null +++ b/app/_lib/toolkit-markdown.ts @@ -0,0 +1,108 @@ +import type { + ToolDefinition, + ToolkitData, + ToolParameter, +} from "@/app/_components/toolkit-docs/types"; + +/** + * Serialize full toolkit data to markdown for the "copy page as markdown" / + * agent view. Toolkit reference pages render per-tool detail client-only (to + * stay under Googlebot's 2 MB HTML limit), so the edge HTML→markdown view would + * miss parameters/output/examples — this builds them straight from ToolkitData, + * independent of the rendered HTML. + */ +const JSON_INDENT = 2; + +/** Collapse newlines and escape pipes so a value is safe inside a table cell. */ +function cell(value: string | null | undefined): string { + return (value ?? "") + .replace(/\s*\n\s*/g, " ") + .replace(/\|/g, "\\|") + .trim(); +} + +function parameterRow(param: ToolParameter): string { + const type = param.enum ? `${param.type} (enum)` : param.type; + const required = param.required ? "Yes" : "No"; + return `| \`${param.name}\` | ${cell(type)} | ${required} | ${cell(param.description)} |`; +} + +function exampleBlock(tool: ToolDefinition): string | null { + const example = tool.codeExample; + if (!example?.parameters) { + return null; + } + const input: Record = {}; + for (const [name, param] of Object.entries(example.parameters)) { + input[name] = param.value; + } + return [ + "**Example input**", + "", + "```json", + JSON.stringify(input, null, JSON_INDENT), + "```", + ].join("\n"); +} + +function toolBlock(tool: ToolDefinition): string { + const blocks: string[] = [`### ${tool.qualifiedName}`]; + + if (tool.description) { + blocks.push(tool.description.trim()); + } + + const scopes = tool.auth?.scopes ?? []; + if (scopes.length > 0) { + blocks.push( + `**Required OAuth scopes:** ${scopes.map((s) => `\`${s}\``).join(", ")}` + ); + } + + const secrets = tool.secrets ?? []; + if (secrets.length > 0) { + blocks.push(`**Secrets:** ${secrets.map((s) => `\`${s}\``).join(", ")}`); + } + + if (tool.parameters && tool.parameters.length > 0) { + const rows = [ + "| Name | Type | Required | Description |", + "| --- | --- | --- | --- |", + ...tool.parameters.map(parameterRow), + ]; + blocks.push(`**Parameters**\n\n${rows.join("\n")}`); + } else { + blocks.push("_No parameters._"); + } + + if (tool.output) { + const desc = tool.output.description ? ` — ${tool.output.description}` : ""; + blocks.push(`**Output:** \`${tool.output.type}\`${desc}`); + } + + const example = exampleBlock(tool); + if (example) { + blocks.push(example); + } + + return blocks.join("\n\n"); +} + +export function toToolkitMarkdown(data: ToolkitData): string { + const blocks: string[] = [`# ${data.label || data.id}`]; + + if (data.description) { + blocks.push(data.description.trim()); + } + if (data.summary) { + blocks.push(data.summary.trim()); + } + + const tools = data.tools ?? []; + blocks.push(`## Tools (${tools.length})`); + for (const tool of tools) { + blocks.push(toolBlock(tool)); + } + + return `${blocks.join("\n\n")}\n`; +} diff --git a/app/_lib/toolkit-static-params.ts b/app/_lib/toolkit-static-params.ts index fb7748228..1e07c5c33 100644 --- a/app/_lib/toolkit-static-params.ts +++ b/app/_lib/toolkit-static-params.ts @@ -42,7 +42,7 @@ const DESIGN_SYSTEM_TOOLKITS_FOR_ROUTES: ToolkitCatalogEntry[] = const loadDesignSystemToolkits = async (): Promise => DESIGN_SYSTEM_TOOLKITS_FOR_ROUTES; -function normalizeCategory( +export function normalizeCategory( value: string | null | undefined ): IntegrationCategory { if (!value) { @@ -54,6 +54,25 @@ function normalizeCategory( : "others"; } +/** + * The canonical docs path for a toolkit: `/en/resources/integrations// + * `. Category comes from the toolkit's own data (its true, linked + * category) — NOT the URL it was reached through. The dynamic `[toolkitId]` + * route accepts any category segment, so a page reached at a wrong-category + * alias (e.g. `development/pagerduty-api` when its category is `customer-support`) + * must canonicalize to the one generated, index-linked page instead of + * orphaning itself. Mirrors the slug + category logic in `listToolkitRoutes`. + */ +export function getToolkitCanonicalPath(toolkit: { + id: string; + category?: string | null; + docsLink?: string | null; +}): string { + const category = normalizeCategory(toolkit.category); + const slug = getToolkitSlug({ id: toolkit.id, docsLink: toolkit.docsLink }); + return `/en/resources/integrations/${category}/${slug}`; +} + const DEFAULT_DATA_DIR = join( process.cwd(), "toolkit-docs-generator", diff --git a/app/api/toolkit-data/[toolkitId]/route.ts b/app/api/toolkit-data/[toolkitId]/route.ts index 9842b5563..42f1bb912 100644 --- a/app/api/toolkit-data/[toolkitId]/route.ts +++ b/app/api/toolkit-data/[toolkitId]/route.ts @@ -1,13 +1,18 @@ import { NextResponse } from "next/server"; import { readToolkitData } from "@/app/_lib/toolkit-data"; +import { toToolkitMarkdown } from "@/app/_lib/toolkit-markdown"; -// Cache headers for toolkit data responses +// Cache headers for toolkit data responses. This route content-negotiates on +// Accept (JSON vs. text/markdown), so Vary: Accept is required — without it a +// shared cache/CDN could serve one representation for a request that asked for +// the other. const CACHE_HEADERS = { "Cache-Control": "public, max-age=3600, stale-while-revalidate=86400", + Vary: "Accept", }; export async function GET( - _request: Request, + request: Request, { params }: { params: Promise<{ toolkitId: string }> } ) { try { @@ -30,6 +35,18 @@ export async function GET( ); } + // Content-negotiate markdown: the toolkit page renders per-tool detail + // client-only, so the "copy as markdown" / agent view builds full markdown + // straight from the data here instead of from the slimmed HTML. + if ((request.headers.get("accept") ?? "").includes("text/markdown")) { + return new NextResponse(toToolkitMarkdown(data), { + headers: { + ...CACHE_HEADERS, + "Content-Type": "text/markdown; charset=utf-8", + }, + }); + } + return NextResponse.json(data, { headers: CACHE_HEADERS }); } catch (error) { // biome-ignore lint/suspicious/noConsole: Server-side error logging is appropriate diff --git a/app/en/get-started/agent-frameworks/ag2/use-arcade-tools/page.mdx b/app/en/get-started/agent-frameworks/ag2/use-arcade-tools/page.mdx index eb7fec54c..dea5ec02c 100644 --- a/app/en/get-started/agent-frameworks/ag2/use-arcade-tools/page.mdx +++ b/app/en/get-started/agent-frameworks/ag2/use-arcade-tools/page.mdx @@ -338,7 +338,7 @@ uv run main.py You should see the agent responding to your prompts, handling tool calls and authorization requests. Here are some example prompts you can try: - "Show my unread emails" -- "Send an email to someone@example.com about scheduling a demo" +- "Send an email to your teammate about scheduling a demo" - "Summarize my latest 3 emails" - "Send a message in the #general Slack channel" diff --git a/app/en/get-started/agent-frameworks/springai/page.mdx b/app/en/get-started/agent-frameworks/springai/page.mdx index 204e9547e..bd8c95731 100644 --- a/app/en/get-started/agent-frameworks/springai/page.mdx +++ b/app/en/get-started/agent-frameworks/springai/page.mdx @@ -122,7 +122,7 @@ Set these environment variables before running the application: ```bash export OPENAI_API_KEY=your-openai-api-key export ARCADE_API_KEY=your-arcade-api-key -export ARCADE_USER_ID=your-email@example.com +export ARCADE_USER_ID=your-user-id ``` The `ARCADE_USER_ID` is your app's identifier for the current user (often the email you signed up with). Arcade uses this to track authorizations per user. diff --git a/app/en/get-started/quickstarts/call-tool-agent/page.mdx b/app/en/get-started/quickstarts/call-tool-agent/page.mdx index 0c56a689c..119b6cd21 100644 --- a/app/en/get-started/quickstarts/call-tool-agent/page.mdx +++ b/app/en/get-started/quickstarts/call-tool-agent/page.mdx @@ -521,7 +521,7 @@ logger.info( Run your Java application, you should see output similar to: ```text - Success! Check your email at brian.demers@gmail.com + Success! Check your inbox You just chained 3 tools together: 1. Searched Google News for stories about MCP URL mode elicitation diff --git a/app/en/guides/audit-logs/page.mdx b/app/en/guides/audit-logs/page.mdx index 681e56721..d08e045cb 100644 --- a/app/en/guides/audit-logs/page.mdx +++ b/app/en/guides/audit-logs/page.mdx @@ -32,7 +32,7 @@ curl -s "https://cloud.arcade.dev/api/v1/orgs/{org_id}/audit_logs?action=AUDIT_A "timestamp": "2026-02-24T12:34:56.789Z", "action": "AUDIT_ACTION_CREATED", "source": "AUDIT_SOURCE_API", - "display_name": "jane@example.com", + "display_name": "Jane Doe", "organization_id": "550e8400-e29b-41d4-a716-446655440000", "principal_type": "ACCOUNT", "resource_type": "RESOURCE_TYPE_API_KEY", @@ -108,7 +108,7 @@ Requires a valid user identity (API key or bearer token). The authenticated prin "client_ip": "203.0.113.42", "action": "AUDIT_ACTION_CREATED", "source": "AUDIT_SOURCE_DASHBOARD", - "display_name": "jane@example.com", + "display_name": "Jane Doe", "customer_id": "f47ac10b-58cc-4372-a567-0e02b2c3d479", "organization_id": "550e8400-e29b-41d4-a716-446655440000", "project_id": "6ba7b810-9dad-11d1-80b4-00c04fd430c8", diff --git a/app/en/guides/contextual-access/build-your-own/page.mdx b/app/en/guides/contextual-access/build-your-own/page.mdx index b9a88f989..8c4cb21e6 100644 --- a/app/en/guides/contextual-access/build-your-own/page.mdx +++ b/app/en/guides/contextual-access/build-your-own/page.mdx @@ -162,7 +162,7 @@ For the full list of valid `service_domains` and `operations` values, see [Add T } } }, - "inputs": { "query": "from:boss@company.com" }, + "inputs": { "query": "from:your-manager" }, "context": { "user_id": "user_123" } } ``` diff --git a/app/en/resources/integrations/_lib/toolkit-docs-page.tsx b/app/en/resources/integrations/_lib/toolkit-docs-page.tsx index b5eef55cf..c0522c52a 100644 --- a/app/en/resources/integrations/_lib/toolkit-docs-page.tsx +++ b/app/en/resources/integrations/_lib/toolkit-docs-page.tsx @@ -1,9 +1,10 @@ import type { Metadata } from "next"; import { notFound } from "next/navigation"; import { ToolkitPage } from "@/app/_components/toolkit-docs"; -import { readToolkitData } from "@/app/_lib/toolkit-data"; -import { getToolkitSlug, normalizeToolkitId } from "@/app/_lib/toolkit-slug"; +import { readToolkitData, toToolkitSummary } from "@/app/_lib/toolkit-data"; +import { normalizeToolkitId } from "@/app/_lib/toolkit-slug"; import { + getToolkitCanonicalPath, getToolkitStaticParamsForCategory, type IntegrationCategory, } from "@/app/_lib/toolkit-static-params"; @@ -43,21 +44,30 @@ export function createToolkitDocsPage(category: IntegrationCategory) { return {}; } - // Canonicalize to the toolkit's preferred slug so any alias that resolves - // to the same content (e.g. a normalized id vs. its docsLink slug) points - // search engines at one URL. - const canonicalSlug = getToolkitSlug({ + // Canonicalize to the toolkit's own category + slug, not the URL it was + // reached through. The dynamic [toolkitId] route accepts any category, so a + // wrong-category alias (e.g. development/pagerduty-api for a customer-support + // toolkit) must point at the one generated, index-linked page. + const canonical = getToolkitCanonicalPath({ id: data.id, + category: data.metadata?.category, docsLink: data.metadata?.docsLink, }); - return { + const metadata: Metadata = { title: data.label || data.id, description: data.description || "Generated MCP server documentation.", - alternates: { - canonical: `/en/resources/integrations/${category}/${canonicalSlug}`, - }, + alternates: { canonical }, }; + + // Hidden toolkits stay reachable via the dynamic route (and render as + // non-clickable cards in the index), but must not be indexed — otherwise + // their self-canonical is flagged as an orphan with no incoming links. + if (data.metadata?.isHidden) { + metadata.robots = { index: false }; + } + + return metadata; }; const Page = async ({ params }: { params: Promise }) => { @@ -68,7 +78,9 @@ export function createToolkitDocsPage(category: IntegrationCategory) { notFound(); } - return ; + // Pass a summary (per-tool detail stripped) so the heavy fields never enter + // the initial Flight payload — detail is fetched on expand. See MARTECH-17. + return ; }; return { generateMetadata, generateStaticParams, Page }; diff --git a/tests/integration-index-links.test.ts b/tests/integration-index-links.test.ts index e56828c5b..de50d8f9d 100644 --- a/tests/integration-index-links.test.ts +++ b/tests/integration-index-links.test.ts @@ -14,6 +14,7 @@ import { type ToolkitWithDocsLink, } from "@/app/_lib/toolkit-slug"; import { + getToolkitCanonicalPath, INTEGRATION_CATEGORIES, listToolkitRoutes, listValidIntegrationLinks, @@ -345,4 +346,47 @@ describe("toolkit page canonical hygiene", () => { } expect(offenders).toEqual([]); }); + + // MARTECH-17: the dynamic [toolkitId] route accepts ANY category segment, so a + // toolkit is reachable at wrong-category aliases (a docsLink/category mismatch + // produced development/pagerduty-api). generateMetadata canonicalizes every + // such page to getToolkitCanonicalPath(data) — the toolkit's own category + + // slug — which must be a real, index-linked route, or the alias self-canonicals + // into an orphan ("Canonical URL has no incoming internal links"). Derived over + // ALL data files, not just static routes. Hidden toolkits are noindex (excluded). + test( + "every non-hidden toolkit canonicalizes to a linked route (no orphan canonicals)", + async () => { + const dataDir = join(ROOT, "toolkit-docs-generator", "data", "toolkits"); + const files = readdirSync(dataDir).filter( + (file) => file.endsWith(".json") && file !== "index.json" + ); + const orphans: string[] = []; + for (const file of files) { + const parsed = JSON.parse( + await readFile(join(dataDir, file), "utf-8") + ) as { + id?: string; + metadata?: { + category?: string; + docsLink?: string; + isHidden?: boolean; + }; + }; + if (!parsed.id || parsed.metadata?.isHidden) { + continue; + } + const canonical = getToolkitCanonicalPath({ + id: parsed.id, + category: parsed.metadata?.category, + docsLink: parsed.metadata?.docsLink, + }); + if (!validLinks.has(canonical)) { + orphans.push(`${file} → ${canonical}`); + } + } + expect(orphans).toEqual([]); + }, + TIMEOUT + ); }); diff --git a/tests/neutralize-emails.test.tsx b/tests/neutralize-emails.test.tsx new file mode 100644 index 000000000..47e7411f9 --- /dev/null +++ b/tests/neutralize-emails.test.tsx @@ -0,0 +1,85 @@ +import { renderToStaticMarkup } from "react-dom/server"; +import { describe, expect, test } from "vitest"; +import { + rehypeNeutralizeEmails, + splitEmails, +} from "@/app/_components/toolkit-docs/lib/neutralize-emails"; + +/** + * MARTECH-17: Cloudflare's Email Obfuscation rewrites any contiguous email-like + * text in server HTML into a `/cdn-cgi/l/email-protection` link, which 404s for + * crawlers (Ahrefs "links to broken page"). The neutralizer inserts a zero-width + * `` before the `@` so the rendered text node is no longer a contiguous + * match — while the visible/copied value is unchanged. + */ +const EMAIL = /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/; + +describe("splitEmails", () => { + test("returns the original string untouched when it has no email", () => { + expect(splitEmails("just some text")).toBe("just some text"); + }); + + test("breaks an email so the rendered HTML has no contiguous match", () => { + const html = renderToStaticMarkup( + {splitEmails("ping jane.doe@example.com today")} + ); + expect(html).toContain(" is removed. + expect(html.replace(//g, "")).toContain("jane.doe@example.com"); + }); + + test("breaks user:password@host connection-string credentials too", () => { + const html = renderToStaticMarkup( + + {splitEmails("mongodb+srv://user:pass@cluster.mongodb.net/db")} + + ); + expect(html).not.toMatch(EMAIL); + }); +}); + +type HastNode = { + type: string; + value?: string; + tagName?: string; + properties?: Record; + children?: HastNode[]; +}; + +const collectText = (node: HastNode): string => + node.type === "text" + ? (node.value ?? "") + : (node.children ?? []).map(collectText).join(""); + +const hasContiguousEmail = (node: HastNode): boolean => + node.type === "text" + ? EMAIL.test(node.value ?? "") + : (node.children ?? []).some(hasContiguousEmail); + +describe("rehypeNeutralizeEmails", () => { + test("splits email text nodes and inserts a , losslessly", () => { + const tree: HastNode = { + type: "root", + children: [ + { + type: "element", + tagName: "p", + properties: {}, + children: [{ type: "text", value: "reach user@example.com now" }], + }, + ], + }; + + rehypeNeutralizeEmails()(tree); + + const paragraph = tree.children?.[0]; + expect(paragraph?.children?.some((child) => child.tagName === "wbr")).toBe( + true + ); + // No single text node still holds a full email... + expect(hasContiguousEmail(tree)).toBe(false); + // ...and the concatenated text is unchanged. + expect(collectText(tree)).toBe("reach user@example.com now"); + }); +}); diff --git a/tests/page-size.test.ts b/tests/page-size.test.ts new file mode 100644 index 000000000..089c3da89 --- /dev/null +++ b/tests/page-size.test.ts @@ -0,0 +1,61 @@ +import { readdirSync, readFileSync } from "node:fs"; +import { join } from "node:path"; +import { describe, expect, test } from "vitest"; +import type { ToolkitData } from "@/app/_components/toolkit-docs/types"; +import { toToolkitSummary } from "@/app/_lib/toolkit-data"; + +/** + * MARTECH-17: auto-generated toolkit reference pages exceeded Googlebot's 2 MB + * uncompressed-HTML crawl limit. The fix ships only a per-tool *summary* in the + * initial document (`toToolkitSummary`) and lazy-loads detail on expand. This + * guards that the summary — the dominant part of the initial Flight payload — + * stays small and never regains the heavy fields, without a full `next build`. + */ +const DATA_DIR = join( + process.cwd(), + "toolkit-docs-generator", + "data", + "toolkits" +); + +// Well under the 2 MB live limit: the rendered DOM + Flight framing add overhead +// on top of the serialized summary, so leave headroom. +const SUMMARY_BUDGET_BYTES = 1.5 * 1024 * 1024; + +// The heavy per-tool fields that must be fetched lazily, never in the summary. +const HEAVY_FIELDS = ["parameters", "output", "codeExample"] as const; + +const toolkitFiles = readdirSync(DATA_DIR).filter( + (file) => file.endsWith(".json") && file !== "index.json" +); + +describe("toolkit summary page-size budget", () => { + test("there are toolkit data files to check", () => { + expect(toolkitFiles.length).toBeGreaterThan(0); + }); + + test.each(toolkitFiles)( + "%s: summary stays under budget and strips heavy fields", + (file) => { + const data = JSON.parse( + readFileSync(join(DATA_DIR, file), "utf-8") + ) as ToolkitData; + const summary = toToolkitSummary(data); + + const bytes = Buffer.byteLength(JSON.stringify(summary), "utf-8"); + expect( + bytes, + `${file} summary is ${(bytes / 1024 / 1024).toFixed(2)} MB` + ).toBeLessThan(SUMMARY_BUDGET_BYTES); + + for (const tool of summary.tools) { + for (const field of HEAVY_FIELDS) { + expect( + field in tool, + `${file}: ${tool.qualifiedName} still carries "${field}"` + ).toBe(false); + } + } + } + ); +}); diff --git a/tests/toolkit-markdown.test.ts b/tests/toolkit-markdown.test.ts new file mode 100644 index 000000000..456358329 --- /dev/null +++ b/tests/toolkit-markdown.test.ts @@ -0,0 +1,75 @@ +import { describe, expect, test } from "vitest"; +import type { ToolkitData } from "@/app/_components/toolkit-docs/types"; +import { toToolkitMarkdown } from "@/app/_lib/toolkit-markdown"; + +/** + * MARTECH-17 follow-up: toolkit pages render per-tool detail client-only, so the + * edge HTML→markdown "copy page" view lost parameters/output/examples. The data + * route now builds markdown straight from ToolkitData; this guards that the + * serializer emits that detail. + */ +const fixture: ToolkitData = { + id: "Demo", + label: "Demo", + version: "1.0.0", + description: "A demo toolkit.", + metadata: { + category: "development", + iconUrl: "", + isBYOC: false, + isPro: false, + type: "arcade", + docsLink: "", + }, + auth: null, + customImports: [], + subPages: [], + tools: [ + { + name: "DoThing", + qualifiedName: "Demo.DoThing", + fullyQualifiedName: "Demo.DoThing@1.0.0", + description: "Does a thing.", + parameters: [ + { + name: "recipient", + type: "string", + required: true, + description: "Who to do the thing for", + enum: null, + }, + ], + auth: { providerId: "demo", providerType: "oauth2", scopes: ["scope.a"] }, + secrets: ["API_KEY"], + secretsInfo: [], + output: { type: "json", description: "The result" }, + documentationChunks: [], + codeExample: { + toolName: "Demo.DoThing", + parameters: { + recipient: { value: "someone", type: "string", required: true }, + }, + requiresAuth: true, + }, + }, + ], +}; + +describe("toToolkitMarkdown", () => { + const md = toToolkitMarkdown(fixture); + + test("includes the toolkit header and tool heading", () => { + expect(md).toContain("# Demo"); + expect(md).toContain("### Demo.DoThing"); + }); + + test("includes per-tool detail missing from the slimmed HTML", () => { + expect(md).toContain( + "| `recipient` | string | Yes | Who to do the thing for |" + ); + expect(md).toContain("**Output:** `json` — The result"); + expect(md).toContain("scope.a"); + expect(md).toContain("API_KEY"); + expect(md).toContain("Example input"); + }); +});