From 667638b048416b98cf5b2b09aae90a48647dc3d9 Mon Sep 17 00:00:00 2001 From: Juan Ibarlucea Date: Tue, 16 Jun 2026 17:25:29 -0300 Subject: [PATCH 1/5] fix(seo): shrink toolkit pages under 2 MB + clear residual docs Ahrefs errors (MARTECH-17) Clears the three remaining docs.arcade.dev Ahrefs Site Audit error classes. Page size > 2 MB (20 pages): toolkit reference pages serialized the entire ToolkitData into the initial payload and server-rendered every tool. Now the server HTML carries only a crawlable summary (Available Tools table + names-only sidebar); per-tool detail (parameters/output/codeExample) is stripped via toToolkitSummary and lazy-fetched on expand from the existing /api/toolkit-data/[toolkitId] route. Tool sections + the scope picker render client-side after mount. github-api 10.3 MB -> 1.40 MB; all 20 pages now < 2 MB. Cloudflare email obfuscation (~16 URLs): a -based neutralizer keeps email/connection-string patterns out of contiguous server-HTML text (so Cloudflare cannot rewrite them into /cdn-cgi/l/email-protection 404s), applied to toolkit summaries + tool descriptions. 5 guide example emails reworded. Canonical orphans (2 URLs): toolkit pages canonicalize to their own category + slug (so a wrong-category alias like development/pagerduty-api points at the linked customer-support page), and hidden toolkits (notion) emit robots noindex. Tests: page-size budget + email-neutralizer unit tests; integration-index guard extended to assert no toolkit canonicalizes to an orphan. Co-Authored-By: Claude Opus 4.8 --- .../components/available-tools-table.tsx | 7 +- .../toolkit-docs/components/tool-section.tsx | 260 +++++++++++++----- .../toolkit-docs/components/toolkit-page.tsx | 73 +++-- .../components/use-toolkit-detail.ts | 103 +++++++ .../toolkit-docs/lib/neutralize-emails.tsx | 101 +++++++ app/_components/toolkit-docs/types/index.ts | 39 ++- app/_lib/toolkit-data.ts | 34 ++- app/_lib/toolkit-static-params.ts | 21 +- .../ag2/use-arcade-tools/page.mdx | 2 +- .../agent-frameworks/springai/page.mdx | 2 +- .../quickstarts/call-tool-agent/page.mdx | 2 +- app/en/guides/audit-logs/page.mdx | 4 +- .../contextual-access/build-your-own/page.mdx | 2 +- .../integrations/_lib/toolkit-docs-page.tsx | 34 ++- tests/integration-index-links.test.ts | 44 +++ tests/neutralize-emails.test.tsx | 85 ++++++ tests/page-size.test.ts | 61 ++++ 17 files changed, 752 insertions(+), 122 deletions(-) create mode 100644 app/_components/toolkit-docs/components/use-toolkit-detail.ts create mode 100644 app/_components/toolkit-docs/lib/neutralize-emails.tsx create mode 100644 tests/neutralize-emails.test.tsx create mode 100644 tests/page-size.test.ts diff --git a/app/_components/toolkit-docs/components/available-tools-table.tsx b/app/_components/toolkit-docs/components/available-tools-table.tsx index 9bc24623f..7b6eebf76 100644 --- a/app/_components/toolkit-docs/components/available-tools-table.tsx +++ b/app/_components/toolkit-docs/components/available-tools-table.tsx @@ -22,6 +22,7 @@ import { import { useEffect, useMemo, useRef, useState } from "react"; import { SCROLLING_CELL } from "../constants"; +import { splitEmails } from "../lib/neutralize-emails"; import type { AvailableToolsTableProps, BehaviorFlagKey, @@ -585,7 +586,11 @@ function AvailableToolsRow({ - {tool.description ?? "No description provided."} + + {tool.description + ? splitEmails(tool.description) + : "No description provided."} + diff --git a/app/_components/toolkit-docs/components/tool-section.tsx b/app/_components/toolkit-docs/components/tool-section.tsx index 0f22427b9..22fd06a14 100644 --- a/app/_components/toolkit-docs/components/tool-section.tsx +++ b/app/_components/toolkit-docs/components/tool-section.tsx @@ -1,9 +1,17 @@ "use client"; import { Button } from "@arcadeai/design-system"; -import { Check, Copy, KeyRound, ShieldCheck } from "lucide-react"; -import { useCallback, useState } from "react"; -import type { ToolSectionProps } from "../types"; +import { + Check, + ChevronDown, + Copy, + KeyRound, + Loader2, + ShieldCheck, +} from "lucide-react"; +import { useCallback, useEffect, useRef, useState } from "react"; +import { splitEmails } from "../lib/neutralize-emails"; +import type { ToolDefinition, ToolSectionProps, ToolSummary } from "../types"; import { toToolAnchorId } from "./available-tools-table"; import { DocumentationChunkRenderer, @@ -13,11 +21,12 @@ import { DynamicCodeBlock } from "./dynamic-code-block"; import { ParametersTable } from "./parameters-table"; import { ScopesDisplay } from "./scopes-display"; import { ToolMetadataSection } from "./tool-metadata-section"; +import { useToolDetail } from "./use-toolkit-detail"; const COPY_FEEDBACK_MS = 2000; const JSON_PRETTY_PRINT_INDENT = 2; -function CopyToolButton({ tool }: { tool: ToolSectionProps["tool"] }) { +function CopyToolButton({ tool }: { tool: ToolDefinition }) { const [copied, setCopied] = useState(false); const handleCopy = useCallback(async () => { @@ -95,7 +104,7 @@ function CopyScopesButton({ scopes }: { scopes: string[] }) { } export function shouldRenderDefaultSection( - chunks: ToolSectionProps["tool"]["documentationChunks"], + chunks: ToolSummary["documentationChunks"], location: "description" | "parameters" | "auth" | "secrets" | "output" ): boolean { return !hasChunksAt(chunks, location, "replace"); @@ -103,23 +112,17 @@ export function shouldRenderDefaultSection( function ToolHeaderSection({ tool, - showSelection, - isSelected, - onToggleSelection, - hasScopes, - hasSecrets, anchorId, + expanded, + onToggleExpanded, }: { - tool: ToolSectionProps["tool"]; - showSelection: boolean; - isSelected: boolean; - onToggleSelection?: (toolName: string) => void; - hasScopes: boolean; - hasSecrets: boolean; + tool: ToolSummary; anchorId: string; + expanded: boolean; + onToggleExpanded: () => void; }) { return ( -
+
-
- - {showSelection && ( - - )} -
+
); } +function ToolSelectionToggle({ + tool, + isSelected, + onToggleSelection, + hasScopes, + hasSecrets, +}: { + tool: ToolSummary; + isSelected: boolean; + onToggleSelection?: (toolName: string) => void; + hasScopes: boolean; + hasSecrets: boolean; +}) { + return ( + + ); +} + function ToolDescriptionSection({ tool, showDescription, }: { - tool: ToolSectionProps["tool"]; + tool: ToolSummary; showDescription: boolean; }) { return ( @@ -172,7 +197,9 @@ function ToolDescriptionSection({ /> {showDescription && (

- {tool.description ?? "No description provided."} + {tool.description + ? splitEmails(tool.description) + : "No description provided."}

)} void; scopes: string[]; - secretsInfo: ToolSectionProps["tool"]["secretsInfo"]; + secretsInfo: ToolSummary["secretsInfo"]; hasScopes: boolean; hasSecrets: boolean; showSecrets: boolean; @@ -331,7 +358,7 @@ function ToolScopesDetailsSection({ showAuth, scopes, }: { - tool: ToolSectionProps["tool"]; + tool: ToolSummary; showAdvanced: boolean; hasScopes: boolean; showAuth: boolean; @@ -374,7 +401,7 @@ function ToolOutputSection({ tool, showOutput, }: { - tool: ToolSectionProps["tool"]; + tool: ToolDefinition; showOutput: boolean; }) { return ( @@ -422,7 +449,7 @@ function ToolOutputSection({ ); } -function ToolExampleSection({ tool }: { tool: ToolSectionProps["tool"] }) { +function ToolExampleSection({ tool }: { tool: ToolDefinition }) { return tool.codeExample ? (
@@ -434,18 +461,49 @@ function ToolExampleSection({ tool }: { tool: ToolSectionProps["tool"] }) { ); } +function ToolDetailLoading() { + return ( +
+ + Loading details... +
+ ); +} + +function ToolDetailError({ onRetry }: { onRetry: () => void }) { + return ( +
+ Couldn't load tool details. + +
+ ); +} + /** * ToolSection * - * Renders a single tool section with parameters, scopes, secrets, output, and example. + * Renders a single tool. The header, metadata and description render from the + * lightweight summary (and ship in the initial HTML); the parameters, scopes, + * secrets, output and code example load on expand from + * `/api/toolkit-data/[toolkitId]` so the initial document stays small enough + * for Googlebot's 2 MB crawl limit. Sections targeted by the URL hash expand + * automatically. */ export function ToolSection({ tool, + toolkitId, isSelected = false, showSelection = false, onToggleSelection, + forceExpanded = false, }: ToolSectionProps) { + const [expanded, setExpanded] = useState(forceExpanded); const [showAdvanced, setShowAdvanced] = useState(false); + const [reloadToken, setReloadToken] = useState(0); + const sectionRef = useRef(null); + const anchorId = toToolAnchorId(tool.qualifiedName); const scopes = tool.auth?.scopes ?? []; const secretsInfo = tool.secretsInfo ?? []; @@ -453,6 +511,30 @@ export function ToolSection({ const hasSecrets = (tool.secrets?.length ?? 0) > 0 || (tool.secretsInfo?.length ?? 0) > 0; + const detail = useToolDetail( + toolkitId, + tool.qualifiedName, + expanded, + reloadToken + ); + const fullTool: ToolDefinition | null = + detail.status === "ready" ? { ...tool, ...detail.detail } : null; + + // Expand when the URL hash targets this tool (deep-link or sidebar/row click). + useEffect(() => { + if (forceExpanded) { + setExpanded(true); + } + }, [forceExpanded]); + + // A deep-linked section is short while its detail loads, so the browser's + // initial scroll lands above its final position — re-scroll once it grows. + useEffect(() => { + if (forceExpanded && fullTool) { + sectionRef.current?.scrollIntoView(); + } + }, [forceExpanded, fullTool]); + const showDescription = shouldRenderDefaultSection( tool.documentationChunks, "description" @@ -475,38 +557,70 @@ export function ToolSection({
- - - - setShowAdvanced(!showAdvanced)} - scopes={scopes} - secretsInfo={secretsInfo} - showAdvanced={showAdvanced} - showSecrets={showSecrets} + expanded={expanded} + onToggleExpanded={() => setExpanded((value) => !value)} tool={tool} /> - - - + + {expanded && ( + <> + {showSelection && ( + + )} + + + + {detail.status === "loading" && } + {detail.status === "error" && ( + setReloadToken((token) => token + 1)} + /> + )} + {fullTool && ( + <> + + setShowAdvanced(!showAdvanced)} + scopes={scopes} + secretsInfo={secretsInfo} + showAdvanced={showAdvanced} + showSecrets={showSecrets} + tool={tool} + /> + + + +
+ +
+ + )} + + )}
); } diff --git a/app/_components/toolkit-docs/components/toolkit-page.tsx b/app/_components/toolkit-docs/components/toolkit-page.tsx index ad55c6413..572d1932a 100644 --- a/app/_components/toolkit-docs/components/toolkit-page.tsx +++ b/app/_components/toolkit-docs/components/toolkit-page.tsx @@ -1,7 +1,7 @@ "use client"; import { Badge, Button } from "@arcadeai/design-system"; -import { ArrowDown, ArrowUp, KeyRound } from "lucide-react"; +import { ArrowDown, ArrowUp } from "lucide-react"; import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import ReactMarkdown from "react-markdown"; @@ -27,11 +27,12 @@ const TOC_OBSERVER_THRESHOLD_MID = 0.5; // Scroll padding for TOC item visibility const TOC_SCROLL_PADDING = 20; +import { rehypeNeutralizeEmails } from "../lib/neutralize-emails"; import type { - ToolDefinition, ToolkitCategory, ToolkitPageProps, ToolkitType, + ToolSummary, } from "../types"; import { AvailableToolsTable, toToolAnchorId } from "./available-tools-table"; import { @@ -261,7 +262,7 @@ function ToolsOnThisPage({ selectedTools, documentationChunks = [], }: { - tools: ToolDefinition[]; + tools: ToolSummary[]; selectedTools: Set; documentationChunks?: ReadonlyArray<{ header?: string }>; }) { @@ -472,22 +473,16 @@ function ToolsOnThisPage({ >
{filteredTools.map((tool) => { - const hasSecrets = - (tool.secretsInfo?.length ?? 0) > 0 || - (tool.secrets?.length ?? 0) > 0; const toolId = toToolAnchorId(tool.qualifiedName); return ( setItemRef(toolId, el)} title={tool.qualifiedName} > - {tool.qualifiedName} - {hasSecrets && ( - - )} + {tool.qualifiedName} ); })} @@ -522,6 +517,25 @@ export function ToolkitPage({ data }: ToolkitPageProps) { }; }, []); + // Track the URL hash so the targeted tool section can auto-expand its + // (lazily-loaded) detail on deep-link landing and on sidebar/table clicks. + const [activeHash, setActiveHash] = useState(""); + useEffect(() => { + const update = () => setActiveHash(window.location.hash.slice(1)); + update(); + window.addEventListener("hashchange", update); + return () => window.removeEventListener("hashchange", update); + }, []); + + // The per-tool sections render after mount (client-only). The crawlable + // summary in the server HTML is the Available Tools table + the sidebar; for + // large toolkits (e.g. github-api, 818 tools) server-rendering every section + // would blow past Googlebot's 2 MB uncompressed-HTML crawl limit. + const [sectionsMounted, setSectionsMounted] = useState(false); + useEffect(() => { + setSectionsMounted(true); + }, []); + const tools = data.tools ?? []; const documentationChunks = data.documentationChunks ?? []; const [selectedTools, setSelectedTools] = useState>(new Set()); @@ -535,12 +549,13 @@ export function ToolkitPage({ data }: ToolkitPageProps) { name: tool.name, scopes: tool.auth?.scopes ?? [], secrets, - // Full tool definition for enhanced copy functionality + // Detail (parameters/output) is lazy-loaded per tool and not in the + // summary, so the "copy selected tools" JSON uses ScopePicker's basic + // {name, scopes, secrets} format. Per-tool "Copy definition" (in an + // expanded section) still has full fidelity. qualifiedName: tool.qualifiedName, fullyQualifiedName: tool.fullyQualifiedName, description: tool.description, - parameters: tool.parameters, - output: tool.output, }; }); const shouldShowSelection = tools.length > 0; @@ -646,7 +661,9 @@ export function ToolkitPage({ data }: ToolkitPageProps) { {data.summary && (
- {data.summary} + + {data.summary} +
)} @@ -735,7 +752,10 @@ export function ToolkitPage({ data }: ToolkitPageProps) { position="after" /> - {shouldShowSelection && ( + {/* Client-only: the scope picker is an interactive widget (no crawlable + content) that renders a per-tool grid, so keeping it out of the server + HTML saves significant bytes on large toolkits. */} + {sectionsMounted && shouldShowSelection && (
)} - {tools.map((tool) => ( - - ))} + {sectionsMounted && + tools.map((tool) => ( + + ))}
; + +const detailCache = new Map>(); + +function loadToolkitDetail(toolkitId: string): Promise { + const cached = detailCache.get(toolkitId); + if (cached) { + return cached; + } + + const promise = fetch(`/api/toolkit-data/${encodeURIComponent(toolkitId)}`) + .then((response) => { + if (!response.ok) { + throw new Error(`Failed to load toolkit detail (${response.status})`); + } + return response.json() as Promise; + }) + .then((data) => { + const map: DetailMap = new Map(); + for (const tool of data.tools ?? []) { + map.set(tool.qualifiedName, { + parameters: tool.parameters, + output: tool.output, + codeExample: tool.codeExample, + }); + } + return map; + }) + .catch((error) => { + // Evict so a remounted/retried section can fetch again. + detailCache.delete(toolkitId); + throw error; + }); + + detailCache.set(toolkitId, promise); + return promise; +} + +export type ToolDetailState = + | { status: "loading" } + | { status: "error" } + | { status: "ready"; detail: ToolDetail }; + +export function useToolDetail( + toolkitId: string, + qualifiedName: string, + enabled: boolean, + reloadToken = 0 +): ToolDetailState { + const [state, setState] = useState({ status: "loading" }); + + useEffect(() => { + if (!enabled) { + return; + } + + let active = true; + setState({ status: "loading" }); + + // A bumped reloadToken is a manual retry: drop any cached (failed) result so + // the load actually re-fetches. + if (reloadToken > 0) { + detailCache.delete(toolkitId); + } + + loadToolkitDetail(toolkitId) + .then((map) => { + if (!active) { + return; + } + const detail = map.get(qualifiedName); + setState(detail ? { status: "ready", detail } : { status: "error" }); + }) + .catch(() => { + if (active) { + setState({ status: "error" }); + } + }); + + return () => { + active = false; + }; + }, [toolkitId, qualifiedName, enabled, reloadToken]); + + return state; +} diff --git a/app/_components/toolkit-docs/lib/neutralize-emails.tsx b/app/_components/toolkit-docs/lib/neutralize-emails.tsx new file mode 100644 index 000000000..88b545d66 --- /dev/null +++ b/app/_components/toolkit-docs/lib/neutralize-emails.tsx @@ -0,0 +1,101 @@ +import { Fragment, type ReactNode } from "react"; + +/** + * Matches the email-like text runs that Cloudflare's Email Obfuscation (Scrape + * Shield) rewrites into a `/cdn-cgi/l/email-protection` link — which 404s for + * crawlers and shows up in Ahrefs as "links to broken page". Mirrors + * Cloudflare's own detection: a local part, `@`, then a dotted domain with a + * TLD. Covers example emails and `user:password@host.tld` connection strings. + */ +const EMAIL_RE = /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/g; + +/** Offsets of each `@` that sits inside an email-like run. */ +function atBreakOffsets(text: string): number[] { + const breaks: number[] = []; + for (const match of text.matchAll(EMAIL_RE)) { + const start = match.index ?? 0; + breaks.push(start + match[0].indexOf("@")); + } + return breaks; +} + +/** + * Render `text`, inserting a zero-width `` immediately before the `@` of + * any email-like run. `` is invisible and excluded from copied text, so the + * displayed and copied value is unchanged — but the email is no longer a + * contiguous text node, so Cloudflare's edge scanner won't obfuscate it. + * + * Use this for plain-text fields rendered server-side (e.g. a tool description). + */ +export function splitEmails(text: string): ReactNode { + const breaks = atBreakOffsets(text); + if (breaks.length === 0) { + return text; + } + + const nodes: ReactNode[] = []; + let cursor = 0; + for (const offset of breaks) { + nodes.push( + {text.slice(cursor, offset)} + ); + nodes.push(); + cursor = offset; + } + nodes.push({text.slice(cursor)}); + return nodes; +} + +/** Structural view over hast nodes — avoids depending on `unist-util-visit`. */ +type WalkNode = { + type: string; + value?: string; + tagName?: string; + properties?: Record; + children?: WalkNode[]; +}; + +function neutralizeTextValue(value: string): WalkNode[] { + const breaks = atBreakOffsets(value); + if (breaks.length === 0) { + return [{ type: "text", value }]; + } + + const out: WalkNode[] = []; + let cursor = 0; + for (const offset of breaks) { + out.push({ type: "text", value: value.slice(cursor, offset) }); + out.push({ type: "element", tagName: "wbr", properties: {}, children: [] }); + cursor = offset; + } + out.push({ type: "text", value: value.slice(cursor) }); + return out; +} + +function walk(node: WalkNode): void { + if (!node.children) { + return; + } + const next: WalkNode[] = []; + for (const child of node.children) { + if (child.type === "text" && typeof child.value === "string") { + next.push(...neutralizeTextValue(child.value)); + } else { + walk(child); + next.push(child); + } + } + node.children = next; +} + +/** + * rehype plugin (for react-markdown) that applies the same `` break to + * email-like text inside rendered markdown — e.g. a toolkit `summary` that + * contains a `mongodb+srv://user:pass@host.tld` connection string. + * + * Typed structurally against the hast tree (a `WalkNode`) to avoid a direct + * dependency on `@types/hast`, which pnpm only exposes transitively. + */ +export function rehypeNeutralizeEmails() { + return (tree: WalkNode): void => walk(tree); +} diff --git a/app/_components/toolkit-docs/types/index.ts b/app/_components/toolkit-docs/types/index.ts index 53e20fce9..193aaf7d3 100644 --- a/app/_components/toolkit-docs/types/index.ts +++ b/app/_components/toolkit-docs/types/index.ts @@ -244,6 +244,25 @@ export type ToolDefinition = { codeExample?: ToolCodeExample; }; +/** + * The heavy per-tool fields that are lazily fetched (not in the initial HTML). + * Kept out of the server-rendered payload to stay under Googlebot's 2 MB limit. + */ +export type ToolDetail = Pick< + ToolDefinition, + "parameters" | "output" | "codeExample" +>; + +/** + * A tool with its heavy detail fields stripped — everything needed to render the + * Available Tools table, the sidebar, and a collapsed tool section. The detail + * (parameters/output/codeExample) is fetched on expand via {@link ToolDetail}. + */ +export type ToolSummary = Omit< + ToolDefinition, + "parameters" | "output" | "codeExample" +>; + // ============================================================================ // Toolkit Metadata Types // ============================================================================ @@ -356,6 +375,14 @@ export type ToolkitData = { generatedAt?: string; }; +/** + * Toolkit data with each tool's heavy detail fields stripped. This is what the + * client `ToolkitPage` receives, keeping the initial HTML/Flight payload small. + */ +export type ToolkitSummary = Omit & { + tools: ToolSummary[]; +}; + // ============================================================================ // Component Props Types // ============================================================================ @@ -436,14 +463,18 @@ export type DynamicCodeBlockProps = { * Props for ToolSection component */ export type ToolSectionProps = { - /** Tool definition */ - tool: ToolDefinition; + /** Tool summary (heavy detail fetched lazily on expand) */ + tool: ToolSummary; + /** Toolkit id, used to lazily fetch this tool's detail */ + toolkitId: string; /** Whether the tool is selected in the selected tools panel */ isSelected?: boolean; /** Show selection checkbox */ showSelection?: boolean; /** Toggle selection handler */ onToggleSelection?: (toolName: string) => void; + /** Expand on mount and keep expanded (e.g. when the URL hash targets it) */ + forceExpanded?: boolean; }; /** @@ -495,6 +526,6 @@ export type AvailableToolsTableProps = { * Props for ToolkitPage component */ export type ToolkitPageProps = { - /** Complete toolkit data */ - data: ToolkitData; + /** Toolkit data with per-tool detail stripped (fetched lazily on expand) */ + data: ToolkitSummary; }; diff --git a/app/_lib/toolkit-data.ts b/app/_lib/toolkit-data.ts index 9d94655dc..f0299c74e 100644 --- a/app/_lib/toolkit-data.ts +++ b/app/_lib/toolkit-data.ts @@ -1,8 +1,40 @@ import { readdir, readFile } from "node:fs/promises"; import { join } from "node:path"; -import type { ToolkitData } from "@/app/_components/toolkit-docs/types"; +import type { + ToolkitData, + ToolkitSummary, + ToolSummary, +} from "@/app/_components/toolkit-docs/types"; import { getToolkitSlug, normalizeToolkitId } from "./toolkit-slug"; +/** + * Strip each tool's heavy fields (parameters, output, codeExample) so the + * client `ToolkitPage` ships only a lightweight summary in the initial HTML. + * The detail is fetched on expand from `/api/toolkit-data/[toolkitId]`. This is + * what keeps the largest reference pages under Googlebot's 2 MB crawl limit. + * + * The `ToolSummary` return annotation keeps this in sync with the type: if a + * non-heavy field is added to `ToolDefinition`, TypeScript flags the omission. + */ +export function toToolkitSummary(data: ToolkitData): ToolkitSummary { + return { + ...data, + tools: data.tools.map( + (tool): ToolSummary => ({ + name: tool.name, + qualifiedName: tool.qualifiedName, + fullyQualifiedName: tool.fullyQualifiedName, + description: tool.description, + auth: tool.auth, + secrets: tool.secrets, + secretsInfo: tool.secretsInfo, + metadata: tool.metadata, + documentationChunks: tool.documentationChunks, + }) + ), + }; +} + export type ToolkitIndexEntry = { id: string; label: string; diff --git a/app/_lib/toolkit-static-params.ts b/app/_lib/toolkit-static-params.ts index fb7748228..1e07c5c33 100644 --- a/app/_lib/toolkit-static-params.ts +++ b/app/_lib/toolkit-static-params.ts @@ -42,7 +42,7 @@ const DESIGN_SYSTEM_TOOLKITS_FOR_ROUTES: ToolkitCatalogEntry[] = const loadDesignSystemToolkits = async (): Promise => DESIGN_SYSTEM_TOOLKITS_FOR_ROUTES; -function normalizeCategory( +export function normalizeCategory( value: string | null | undefined ): IntegrationCategory { if (!value) { @@ -54,6 +54,25 @@ function normalizeCategory( : "others"; } +/** + * The canonical docs path for a toolkit: `/en/resources/integrations// + * `. Category comes from the toolkit's own data (its true, linked + * category) — NOT the URL it was reached through. The dynamic `[toolkitId]` + * route accepts any category segment, so a page reached at a wrong-category + * alias (e.g. `development/pagerduty-api` when its category is `customer-support`) + * must canonicalize to the one generated, index-linked page instead of + * orphaning itself. Mirrors the slug + category logic in `listToolkitRoutes`. + */ +export function getToolkitCanonicalPath(toolkit: { + id: string; + category?: string | null; + docsLink?: string | null; +}): string { + const category = normalizeCategory(toolkit.category); + const slug = getToolkitSlug({ id: toolkit.id, docsLink: toolkit.docsLink }); + return `/en/resources/integrations/${category}/${slug}`; +} + const DEFAULT_DATA_DIR = join( process.cwd(), "toolkit-docs-generator", diff --git a/app/en/get-started/agent-frameworks/ag2/use-arcade-tools/page.mdx b/app/en/get-started/agent-frameworks/ag2/use-arcade-tools/page.mdx index eb7fec54c..dea5ec02c 100644 --- a/app/en/get-started/agent-frameworks/ag2/use-arcade-tools/page.mdx +++ b/app/en/get-started/agent-frameworks/ag2/use-arcade-tools/page.mdx @@ -338,7 +338,7 @@ uv run main.py You should see the agent responding to your prompts, handling tool calls and authorization requests. Here are some example prompts you can try: - "Show my unread emails" -- "Send an email to someone@example.com about scheduling a demo" +- "Send an email to your teammate about scheduling a demo" - "Summarize my latest 3 emails" - "Send a message in the #general Slack channel" diff --git a/app/en/get-started/agent-frameworks/springai/page.mdx b/app/en/get-started/agent-frameworks/springai/page.mdx index 204e9547e..bd8c95731 100644 --- a/app/en/get-started/agent-frameworks/springai/page.mdx +++ b/app/en/get-started/agent-frameworks/springai/page.mdx @@ -122,7 +122,7 @@ Set these environment variables before running the application: ```bash export OPENAI_API_KEY=your-openai-api-key export ARCADE_API_KEY=your-arcade-api-key -export ARCADE_USER_ID=your-email@example.com +export ARCADE_USER_ID=your-user-id ``` The `ARCADE_USER_ID` is your app's identifier for the current user (often the email you signed up with). Arcade uses this to track authorizations per user. diff --git a/app/en/get-started/quickstarts/call-tool-agent/page.mdx b/app/en/get-started/quickstarts/call-tool-agent/page.mdx index 0c56a689c..119b6cd21 100644 --- a/app/en/get-started/quickstarts/call-tool-agent/page.mdx +++ b/app/en/get-started/quickstarts/call-tool-agent/page.mdx @@ -521,7 +521,7 @@ logger.info( Run your Java application, you should see output similar to: ```text - Success! Check your email at brian.demers@gmail.com + Success! Check your inbox You just chained 3 tools together: 1. Searched Google News for stories about MCP URL mode elicitation diff --git a/app/en/guides/audit-logs/page.mdx b/app/en/guides/audit-logs/page.mdx index 681e56721..d08e045cb 100644 --- a/app/en/guides/audit-logs/page.mdx +++ b/app/en/guides/audit-logs/page.mdx @@ -32,7 +32,7 @@ curl -s "https://cloud.arcade.dev/api/v1/orgs/{org_id}/audit_logs?action=AUDIT_A "timestamp": "2026-02-24T12:34:56.789Z", "action": "AUDIT_ACTION_CREATED", "source": "AUDIT_SOURCE_API", - "display_name": "jane@example.com", + "display_name": "Jane Doe", "organization_id": "550e8400-e29b-41d4-a716-446655440000", "principal_type": "ACCOUNT", "resource_type": "RESOURCE_TYPE_API_KEY", @@ -108,7 +108,7 @@ Requires a valid user identity (API key or bearer token). The authenticated prin "client_ip": "203.0.113.42", "action": "AUDIT_ACTION_CREATED", "source": "AUDIT_SOURCE_DASHBOARD", - "display_name": "jane@example.com", + "display_name": "Jane Doe", "customer_id": "f47ac10b-58cc-4372-a567-0e02b2c3d479", "organization_id": "550e8400-e29b-41d4-a716-446655440000", "project_id": "6ba7b810-9dad-11d1-80b4-00c04fd430c8", diff --git a/app/en/guides/contextual-access/build-your-own/page.mdx b/app/en/guides/contextual-access/build-your-own/page.mdx index b9a88f989..8c4cb21e6 100644 --- a/app/en/guides/contextual-access/build-your-own/page.mdx +++ b/app/en/guides/contextual-access/build-your-own/page.mdx @@ -162,7 +162,7 @@ For the full list of valid `service_domains` and `operations` values, see [Add T } } }, - "inputs": { "query": "from:boss@company.com" }, + "inputs": { "query": "from:your-manager" }, "context": { "user_id": "user_123" } } ``` diff --git a/app/en/resources/integrations/_lib/toolkit-docs-page.tsx b/app/en/resources/integrations/_lib/toolkit-docs-page.tsx index b5eef55cf..c0522c52a 100644 --- a/app/en/resources/integrations/_lib/toolkit-docs-page.tsx +++ b/app/en/resources/integrations/_lib/toolkit-docs-page.tsx @@ -1,9 +1,10 @@ import type { Metadata } from "next"; import { notFound } from "next/navigation"; import { ToolkitPage } from "@/app/_components/toolkit-docs"; -import { readToolkitData } from "@/app/_lib/toolkit-data"; -import { getToolkitSlug, normalizeToolkitId } from "@/app/_lib/toolkit-slug"; +import { readToolkitData, toToolkitSummary } from "@/app/_lib/toolkit-data"; +import { normalizeToolkitId } from "@/app/_lib/toolkit-slug"; import { + getToolkitCanonicalPath, getToolkitStaticParamsForCategory, type IntegrationCategory, } from "@/app/_lib/toolkit-static-params"; @@ -43,21 +44,30 @@ export function createToolkitDocsPage(category: IntegrationCategory) { return {}; } - // Canonicalize to the toolkit's preferred slug so any alias that resolves - // to the same content (e.g. a normalized id vs. its docsLink slug) points - // search engines at one URL. - const canonicalSlug = getToolkitSlug({ + // Canonicalize to the toolkit's own category + slug, not the URL it was + // reached through. The dynamic [toolkitId] route accepts any category, so a + // wrong-category alias (e.g. development/pagerduty-api for a customer-support + // toolkit) must point at the one generated, index-linked page. + const canonical = getToolkitCanonicalPath({ id: data.id, + category: data.metadata?.category, docsLink: data.metadata?.docsLink, }); - return { + const metadata: Metadata = { title: data.label || data.id, description: data.description || "Generated MCP server documentation.", - alternates: { - canonical: `/en/resources/integrations/${category}/${canonicalSlug}`, - }, + alternates: { canonical }, }; + + // Hidden toolkits stay reachable via the dynamic route (and render as + // non-clickable cards in the index), but must not be indexed — otherwise + // their self-canonical is flagged as an orphan with no incoming links. + if (data.metadata?.isHidden) { + metadata.robots = { index: false }; + } + + return metadata; }; const Page = async ({ params }: { params: Promise }) => { @@ -68,7 +78,9 @@ export function createToolkitDocsPage(category: IntegrationCategory) { notFound(); } - return ; + // Pass a summary (per-tool detail stripped) so the heavy fields never enter + // the initial Flight payload — detail is fetched on expand. See MARTECH-17. + return ; }; return { generateMetadata, generateStaticParams, Page }; diff --git a/tests/integration-index-links.test.ts b/tests/integration-index-links.test.ts index e56828c5b..de50d8f9d 100644 --- a/tests/integration-index-links.test.ts +++ b/tests/integration-index-links.test.ts @@ -14,6 +14,7 @@ import { type ToolkitWithDocsLink, } from "@/app/_lib/toolkit-slug"; import { + getToolkitCanonicalPath, INTEGRATION_CATEGORIES, listToolkitRoutes, listValidIntegrationLinks, @@ -345,4 +346,47 @@ describe("toolkit page canonical hygiene", () => { } expect(offenders).toEqual([]); }); + + // MARTECH-17: the dynamic [toolkitId] route accepts ANY category segment, so a + // toolkit is reachable at wrong-category aliases (a docsLink/category mismatch + // produced development/pagerduty-api). generateMetadata canonicalizes every + // such page to getToolkitCanonicalPath(data) — the toolkit's own category + + // slug — which must be a real, index-linked route, or the alias self-canonicals + // into an orphan ("Canonical URL has no incoming internal links"). Derived over + // ALL data files, not just static routes. Hidden toolkits are noindex (excluded). + test( + "every non-hidden toolkit canonicalizes to a linked route (no orphan canonicals)", + async () => { + const dataDir = join(ROOT, "toolkit-docs-generator", "data", "toolkits"); + const files = readdirSync(dataDir).filter( + (file) => file.endsWith(".json") && file !== "index.json" + ); + const orphans: string[] = []; + for (const file of files) { + const parsed = JSON.parse( + await readFile(join(dataDir, file), "utf-8") + ) as { + id?: string; + metadata?: { + category?: string; + docsLink?: string; + isHidden?: boolean; + }; + }; + if (!parsed.id || parsed.metadata?.isHidden) { + continue; + } + const canonical = getToolkitCanonicalPath({ + id: parsed.id, + category: parsed.metadata?.category, + docsLink: parsed.metadata?.docsLink, + }); + if (!validLinks.has(canonical)) { + orphans.push(`${file} → ${canonical}`); + } + } + expect(orphans).toEqual([]); + }, + TIMEOUT + ); }); diff --git a/tests/neutralize-emails.test.tsx b/tests/neutralize-emails.test.tsx new file mode 100644 index 000000000..47e7411f9 --- /dev/null +++ b/tests/neutralize-emails.test.tsx @@ -0,0 +1,85 @@ +import { renderToStaticMarkup } from "react-dom/server"; +import { describe, expect, test } from "vitest"; +import { + rehypeNeutralizeEmails, + splitEmails, +} from "@/app/_components/toolkit-docs/lib/neutralize-emails"; + +/** + * MARTECH-17: Cloudflare's Email Obfuscation rewrites any contiguous email-like + * text in server HTML into a `/cdn-cgi/l/email-protection` link, which 404s for + * crawlers (Ahrefs "links to broken page"). The neutralizer inserts a zero-width + * `` before the `@` so the rendered text node is no longer a contiguous + * match — while the visible/copied value is unchanged. + */ +const EMAIL = /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/; + +describe("splitEmails", () => { + test("returns the original string untouched when it has no email", () => { + expect(splitEmails("just some text")).toBe("just some text"); + }); + + test("breaks an email so the rendered HTML has no contiguous match", () => { + const html = renderToStaticMarkup( + {splitEmails("ping jane.doe@example.com today")} + ); + expect(html).toContain(" is removed. + expect(html.replace(//g, "")).toContain("jane.doe@example.com"); + }); + + test("breaks user:password@host connection-string credentials too", () => { + const html = renderToStaticMarkup( + + {splitEmails("mongodb+srv://user:pass@cluster.mongodb.net/db")} + + ); + expect(html).not.toMatch(EMAIL); + }); +}); + +type HastNode = { + type: string; + value?: string; + tagName?: string; + properties?: Record; + children?: HastNode[]; +}; + +const collectText = (node: HastNode): string => + node.type === "text" + ? (node.value ?? "") + : (node.children ?? []).map(collectText).join(""); + +const hasContiguousEmail = (node: HastNode): boolean => + node.type === "text" + ? EMAIL.test(node.value ?? "") + : (node.children ?? []).some(hasContiguousEmail); + +describe("rehypeNeutralizeEmails", () => { + test("splits email text nodes and inserts a , losslessly", () => { + const tree: HastNode = { + type: "root", + children: [ + { + type: "element", + tagName: "p", + properties: {}, + children: [{ type: "text", value: "reach user@example.com now" }], + }, + ], + }; + + rehypeNeutralizeEmails()(tree); + + const paragraph = tree.children?.[0]; + expect(paragraph?.children?.some((child) => child.tagName === "wbr")).toBe( + true + ); + // No single text node still holds a full email... + expect(hasContiguousEmail(tree)).toBe(false); + // ...and the concatenated text is unchanged. + expect(collectText(tree)).toBe("reach user@example.com now"); + }); +}); diff --git a/tests/page-size.test.ts b/tests/page-size.test.ts new file mode 100644 index 000000000..089c3da89 --- /dev/null +++ b/tests/page-size.test.ts @@ -0,0 +1,61 @@ +import { readdirSync, readFileSync } from "node:fs"; +import { join } from "node:path"; +import { describe, expect, test } from "vitest"; +import type { ToolkitData } from "@/app/_components/toolkit-docs/types"; +import { toToolkitSummary } from "@/app/_lib/toolkit-data"; + +/** + * MARTECH-17: auto-generated toolkit reference pages exceeded Googlebot's 2 MB + * uncompressed-HTML crawl limit. The fix ships only a per-tool *summary* in the + * initial document (`toToolkitSummary`) and lazy-loads detail on expand. This + * guards that the summary — the dominant part of the initial Flight payload — + * stays small and never regains the heavy fields, without a full `next build`. + */ +const DATA_DIR = join( + process.cwd(), + "toolkit-docs-generator", + "data", + "toolkits" +); + +// Well under the 2 MB live limit: the rendered DOM + Flight framing add overhead +// on top of the serialized summary, so leave headroom. +const SUMMARY_BUDGET_BYTES = 1.5 * 1024 * 1024; + +// The heavy per-tool fields that must be fetched lazily, never in the summary. +const HEAVY_FIELDS = ["parameters", "output", "codeExample"] as const; + +const toolkitFiles = readdirSync(DATA_DIR).filter( + (file) => file.endsWith(".json") && file !== "index.json" +); + +describe("toolkit summary page-size budget", () => { + test("there are toolkit data files to check", () => { + expect(toolkitFiles.length).toBeGreaterThan(0); + }); + + test.each(toolkitFiles)( + "%s: summary stays under budget and strips heavy fields", + (file) => { + const data = JSON.parse( + readFileSync(join(DATA_DIR, file), "utf-8") + ) as ToolkitData; + const summary = toToolkitSummary(data); + + const bytes = Buffer.byteLength(JSON.stringify(summary), "utf-8"); + expect( + bytes, + `${file} summary is ${(bytes / 1024 / 1024).toFixed(2)} MB` + ).toBeLessThan(SUMMARY_BUDGET_BYTES); + + for (const tool of summary.tools) { + for (const field of HEAVY_FIELDS) { + expect( + field in tool, + `${file}: ${tool.qualifiedName} still carries "${field}"` + ).toBe(false); + } + } + } + ); +}); From d00c6e3c067b5d1ab614ba4f1dec5e32672ea2c0 Mon Sep 17 00:00:00 2001 From: Juan Ibarlucea Date: Tue, 16 Jun 2026 17:54:09 -0300 Subject: [PATCH 2/5] refactor(seo): simplify lazy toolkit detail (ponytail review) - Cache the full ToolDefinition the /api/toolkit-data response already returns instead of Pick-ing a ToolDetail subset and re-merging it with the summary on expand. Drops the ToolDetail type, the per-field map, and the spread merge; ToolSection just uses the fetched tool. - Drop splitEmails() on the per-tool description: that section renders client-only (gated by sectionsMounted + expanded), so it's never in the server HTML Cloudflare scans. The SSR Available Tools table still neutralizes. Co-Authored-By: Claude Opus 4.8 --- .../toolkit-docs/components/tool-section.tsx | 7 ++----- .../components/use-toolkit-detail.ts | 16 ++++++---------- app/_components/toolkit-docs/types/index.ts | 11 +---------- 3 files changed, 9 insertions(+), 25 deletions(-) diff --git a/app/_components/toolkit-docs/components/tool-section.tsx b/app/_components/toolkit-docs/components/tool-section.tsx index 22fd06a14..eda7aaa0c 100644 --- a/app/_components/toolkit-docs/components/tool-section.tsx +++ b/app/_components/toolkit-docs/components/tool-section.tsx @@ -10,7 +10,6 @@ import { ShieldCheck, } from "lucide-react"; import { useCallback, useEffect, useRef, useState } from "react"; -import { splitEmails } from "../lib/neutralize-emails"; import type { ToolDefinition, ToolSectionProps, ToolSummary } from "../types"; import { toToolAnchorId } from "./available-tools-table"; import { @@ -197,9 +196,7 @@ function ToolDescriptionSection({ /> {showDescription && (

- {tool.description - ? splitEmails(tool.description) - : "No description provided."} + {tool.description ?? "No description provided."}

)} { diff --git a/app/_components/toolkit-docs/components/use-toolkit-detail.ts b/app/_components/toolkit-docs/components/use-toolkit-detail.ts index 586c8cfea..18bea3479 100644 --- a/app/_components/toolkit-docs/components/use-toolkit-detail.ts +++ b/app/_components/toolkit-docs/components/use-toolkit-detail.ts @@ -1,7 +1,7 @@ "use client"; import { useEffect, useState } from "react"; -import type { ToolDetail, ToolkitData } from "../types"; +import type { ToolDefinition, ToolkitData } from "../types"; /** * Lazy per-tool detail loading. @@ -15,7 +15,7 @@ import type { ToolDetail, ToolkitData } from "../types"; * One fetch per toolkit per page (the full toolkit JSON), shared across every * tool section via a module-level cache and keyed by `qualifiedName`. */ -type DetailMap = Map; +type DetailMap = Map; const detailCache = new Map>(); @@ -35,11 +35,7 @@ function loadToolkitDetail(toolkitId: string): Promise { .then((data) => { const map: DetailMap = new Map(); for (const tool of data.tools ?? []) { - map.set(tool.qualifiedName, { - parameters: tool.parameters, - output: tool.output, - codeExample: tool.codeExample, - }); + map.set(tool.qualifiedName, tool); } return map; }) @@ -56,7 +52,7 @@ function loadToolkitDetail(toolkitId: string): Promise { export type ToolDetailState = | { status: "loading" } | { status: "error" } - | { status: "ready"; detail: ToolDetail }; + | { status: "ready"; tool: ToolDefinition }; export function useToolDetail( toolkitId: string, @@ -85,8 +81,8 @@ export function useToolDetail( if (!active) { return; } - const detail = map.get(qualifiedName); - setState(detail ? { status: "ready", detail } : { status: "error" }); + const tool = map.get(qualifiedName); + setState(tool ? { status: "ready", tool } : { status: "error" }); }) .catch(() => { if (active) { diff --git a/app/_components/toolkit-docs/types/index.ts b/app/_components/toolkit-docs/types/index.ts index 193aaf7d3..f091219b6 100644 --- a/app/_components/toolkit-docs/types/index.ts +++ b/app/_components/toolkit-docs/types/index.ts @@ -244,19 +244,10 @@ export type ToolDefinition = { codeExample?: ToolCodeExample; }; -/** - * The heavy per-tool fields that are lazily fetched (not in the initial HTML). - * Kept out of the server-rendered payload to stay under Googlebot's 2 MB limit. - */ -export type ToolDetail = Pick< - ToolDefinition, - "parameters" | "output" | "codeExample" ->; - /** * A tool with its heavy detail fields stripped — everything needed to render the * Available Tools table, the sidebar, and a collapsed tool section. The detail - * (parameters/output/codeExample) is fetched on expand via {@link ToolDetail}. + * (parameters/output/codeExample) is fetched on expand as a full ToolDefinition. */ export type ToolSummary = Omit< ToolDefinition, From 2d495509c0ca14f9b1188bc0ee24941e0ed16293 Mon Sep 17 00:00:00 2001 From: Juan Ibarlucea Date: Wed, 17 Jun 2026 15:15:35 -0300 Subject: [PATCH 3/5] refactor(seo): address review on toolkit lazy-load (copy fidelity, dynamic, idle) Review feedback on PR #1023 (sdserranog): - ScopePicker "Copy tools JSON" lazily fetches full per-tool detail so the copied JSON keeps parameters/output and always uses the qualified tool name. Previously, dropping detail from the summary forced the basic fallback, which emitted the short name (breaking downstream tool configs). Unifies the copy buttons via an optional async getText. - Tool sections + scope picker render via next/dynamic({ ssr: false }) instead of a manual sectionsMounted flag. - useToolDetail starts "idle" rather than reporting "loading" while collapsed. Co-Authored-By: Claude Opus 4.8 --- app/_components/scope-picker.tsx | 118 ++++++++++++------ .../toolkit-docs/components/toolkit-page.tsx | 77 +++--------- .../components/toolkit-tool-detail.tsx | 79 ++++++++++++ .../components/use-toolkit-detail.ts | 8 +- 4 files changed, 187 insertions(+), 95 deletions(-) create mode 100644 app/_components/toolkit-docs/components/toolkit-tool-detail.tsx diff --git a/app/_components/scope-picker.tsx b/app/_components/scope-picker.tsx index ab031c40c..4e6e6535b 100644 --- a/app/_components/scope-picker.tsx +++ b/app/_components/scope-picker.tsx @@ -4,6 +4,7 @@ import { Button } from "@arcadeai/design-system"; import { Check, Copy, KeyRound, ShieldCheck, Wrench } from "lucide-react"; import posthog from "posthog-js"; import { useCallback, useEffect, useMemo, useState } from "react"; +import { loadToolkitDetail } from "./toolkit-docs/components/use-toolkit-detail"; const COPY_FEEDBACK_MS = 2000; @@ -47,20 +48,33 @@ type ScopePickerProps = { tools: Tool[]; selectedTools?: string[]; onSelectedToolsChange?: (selectedTools: string[]) => void; + /** Toolkit id — lets "Copy tools JSON" lazily fetch full per-tool detail. */ + toolkitId?: string; }; -function CopyButton({ text, label }: { text: string; label: string }) { +function CopyButton({ + text, + getText, + label, +}: { + text?: string; + // Build the text to copy on demand (e.g. lazily fetch full tool detail). + getText?: () => Promise; + label: string; +}) { const [copied, setCopied] = useState(false); const handleCopy = useCallback(async () => { try { - await navigator.clipboard.writeText(text); + await navigator.clipboard.writeText( + getText ? await getText() : (text ?? "") + ); setCopied(true); setTimeout(() => setCopied(false), COPY_FEEDBACK_MS); } catch { // Ignore clipboard errors (e.g., permissions, unsupported browser). } - }, [text]); + }, [text, getText]); return (