From c05f62e9548d219308abf45462fe2969a834eef1 Mon Sep 17 00:00:00 2001
From: Ross <therossgalloway@gmail.com>
Date: Wed, 31 Dec 2025 11:47:11 -0500
Subject: [PATCH 1/5] Feat: add AI artifacts to build and copy to markdown
 button

---
 README.md                                  |   9 +
 bun.lock                                   |   8 +
 package.json                               |   4 +-
 scripts/fetchedAddressData.json            |   2 +-
 scripts/generateAiArtifacts.ts             | 387 +++++++++++++++++++++
 src/components/DocMarkdownCopyButton.tsx   | 178 ++++++++++
 src/css/docCopyButton.module.css           |  36 ++
 src/theme/DocItem/Content/index.js         |  31 ++
 src/theme/DocItem/Layout/index.js          |  77 +++-
 src/theme/DocItem/Layout/styles.module.css |  34 ++
 10 files changed, 760 insertions(+), 6 deletions(-)
 create mode 100644 scripts/generateAiArtifacts.ts
 create mode 100644 src/components/DocMarkdownCopyButton.tsx
 create mode 100644 src/css/docCopyButton.module.css
 create mode 100644 src/theme/DocItem/Content/index.js
 create mode 100644 src/theme/DocItem/Layout/styles.module.css

diff --git a/README.md b/README.md
index e491ec2b3..5c0ca6343 100644
--- a/README.md
+++ b/README.md
@@ -99,6 +99,15 @@ This command generates static content into the `build` directory and can be serv
 bun run build
 ```
 
+## AI / agent exports
+
+The build also generates AI-friendly artifacts that will be served by the hosted site:
+
+- `build/llms.txt` (served as `/llms.txt`)
+- `build/ai/manifest.json` (served as `/ai/manifest.json`)
+- `build/ai/docs.jsonl` (served as `/ai/docs.jsonl`)
+- `build/ai/raw/` (served as `/ai/raw/`)
+
 ## Configure .env
 
 The docs site pulls data from on-chain smart contracts, so an API key is necessary. The default is an Alchemy API key so the easiest thing to do is get a free api key from them at https://www.alchemy.com/pricing.
diff --git a/bun.lock b/bun.lock
index 135a28eb8..9cdcefee7 100644
--- a/bun.lock
+++ b/bun.lock
@@ -27,6 +27,8 @@
         "remark-math": "^6.0.0",
         "solc": "^0.8.31",
         "solidity-docgen": "^0.5.17",
+        "turndown": "^7.1.2",
+        "turndown-plugin-gfm": "^1.0.2",
         "viem": "^2.41.2",
       },
       "devDependencies": {
@@ -546,6 +548,8 @@
 
     "@mermaid-js/parser": ["@mermaid-js/parser@0.6.2", "", { "dependencies": { "langium": "3.3.1" } }, "sha512-+PO02uGF6L6Cs0Bw8RpGhikVvMWEysfAyl27qTlroUB8jSWr1lL0Sf6zi78ZxlSnmgSY2AMMKVgghnN9jTtwkQ=="],
 
+    "@mixmark-io/domino": ["@mixmark-io/domino@2.2.0", "", {}, "sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw=="],
+
     "@module-federation/error-codes": ["@module-federation/error-codes@0.21.4", "", {}, "sha512-ClpL5MereWNXh+EgDjz7w4RrC1JlisQTvXDa1gLxpviHafzNDfdViVmuhi9xXVuj+EYo8KU70Y999KHhk9424Q=="],
 
     "@module-federation/runtime": ["@module-federation/runtime@0.21.4", "", { "dependencies": { "@module-federation/error-codes": "0.21.4", "@module-federation/runtime-core": "0.21.4", "@module-federation/sdk": "0.21.4" } }, "sha512-wgvGqryurVEvkicufJmTG0ZehynCeNLklv8kIk5BLIsWYSddZAE+xe4xov1kgH5fIJQAoQNkRauFFjVNlHoAkA=="],
@@ -2830,6 +2834,10 @@
 
     "tslib": ["tslib@2.8.1", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="],
 
+    "turndown": ["turndown@7.2.2", "", { "dependencies": { "@mixmark-io/domino": "^2.2.0" } }, "sha512-1F7db8BiExOKxjSMU2b7if62D/XOyQyZbPKq/nUwopfgnHlqXHqQ0lvfUTeUIr1lZJzOPFn43dODyMSIfvWRKQ=="],
+
+    "turndown-plugin-gfm": ["turndown-plugin-gfm@1.0.2", "", {}, "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg=="],
+
     "type-fest": ["type-fest@0.6.0", "", {}, "sha512-q+MB8nYR1KDLrgr4G5yemftpMC7/QLqVndBmEEdqzmNj5dcFOO4Oo8qlwZE3ULT3+Zim1F8Kq4cBnikNhlCMlg=="],
 
     "type-is": ["type-is@1.6.18", "", { "dependencies": { "media-typer": "0.3.0", "mime-types": "~2.1.24" } }, "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g=="],
diff --git a/package.json b/package.json
index adf5237c5..96b4f478f 100644
--- a/package.json
+++ b/package.json
@@ -7,7 +7,7 @@
     "get-branch-name": "git rev-parse --abbrev-ref HEAD > .branch-name",
     "start": "bun run runAddressCheck && bun run get-branch-name && BRANCH_NAME=$(cat .branch-name) IS_DEV=true docusaurus start",
     "start-no-check": "bun run get-branch-name && BRANCH_NAME=$(cat .branch-name) IS_DEV=true docusaurus start",
-    "build": "docusaurus build",
+    "build": "docusaurus build && bun scripts/generateAiArtifacts.ts",
     "swizzle": "docusaurus swizzle",
     "deploy": "docusaurus deploy",
     "clear": "docusaurus clear",
@@ -45,6 +45,8 @@
     "remark-math": "^6.0.0",
     "solc": "^0.8.31",
     "solidity-docgen": "^0.5.17",
+    "turndown": "^7.1.2",
+    "turndown-plugin-gfm": "^1.0.2",
     "viem": "^2.41.2"
   },
   "browserslist": {
diff --git a/scripts/fetchedAddressData.json b/scripts/fetchedAddressData.json
index 87a536075..dd268c53d 100644
--- a/scripts/fetchedAddressData.json
+++ b/scripts/fetchedAddressData.json
@@ -1,5 +1,5 @@
 {
-  "timeLastChecked": 1766510970,
+  "timeLastChecked": 1767199169,
   "addressesData": {
     "v3ContractAddresses": {
       "topLevel": {
diff --git a/scripts/generateAiArtifacts.ts b/scripts/generateAiArtifacts.ts
new file mode 100644
index 000000000..98557655f
--- /dev/null
+++ b/scripts/generateAiArtifacts.ts
@@ -0,0 +1,387 @@
+import fs from 'fs'
+import path from 'path'
+import crypto from 'crypto'
+
+type Heading = {
+  level: number
+  id?: string
+  text: string
+}
+
+type DocRecordV1 = {
+  schemaVersion: 1
+  url: string
+  route: string
+  title: string
+  headings: Heading[]
+  text: string
+  sha256: string
+  updatedAt: string
+  source: {
+    htmlPath: string
+  }
+}
+
+type ManifestV1 = {
+  schemaVersion: 1
+  generatedAt: string
+  siteOrigin: string
+  docCount: number
+  docsJsonlPath: string
+  rawDocsPath: string
+}
+
+function ensureDir(dirPath: string) {
+  fs.mkdirSync(dirPath, { recursive: true })
+}
+
+function decodeHtmlEntities(input: string) {
+  const named: Record<string, string> = {
+    amp: '&',
+    lt: '<',
+    gt: '>',
+    quot: '"',
+    apos: "'",
+    nbsp: ' ',
+  }
+
+  return input
+    .replace(/&([a-zA-Z]+);/g, (match, name: string) => named[name] ?? match)
+    .replace(/&#x([0-9a-fA-F]+);/g, (_, hex: string) =>
+      String.fromCodePoint(Number.parseInt(hex, 16))
+    )
+    .replace(/&#([0-9]+);/g, (_, num: string) =>
+      String.fromCodePoint(Number.parseInt(num, 10))
+    )
+}
+
+function stripTags(input: string) {
+  return input.replace(/<[^>]+>/g, '')
+}
+
+function normalizeText(input: string) {
+  return input
+    .replace(/\r\n/g, '\n')
+    .replace(/\u200B|\u200C|\u200D|\uFEFF/g, '')
+    .replace(/[ \t]+\n/g, '\n')
+    .replace(/\n{3,}/g, '\n\n')
+    .replace(/[ \t]{2,}/g, ' ')
+    .trim()
+}
+
+function extractAttr(tagAttrs: string, name: string) {
+  const re = new RegExp(
+    `${name}=(?:"([^"]+)"|'([^']+)'|([^\\s>]+))`,
+    'i'
+  )
+  const match = tagAttrs.match(re)
+  return match?.[1] ?? match?.[2] ?? match?.[3]
+}
+
+function absoluteUrl(origin: string, href: string, pageUrl?: string) {
+  if (!href) return href
+  if (href.startsWith('http://') || href.startsWith('https://')) return href
+  if (href.startsWith('//')) return `https:${href}`
+  if (href.startsWith('/')) return `${origin}${href}`
+  if (href.startsWith('#') && pageUrl) return `${pageUrl}${href}`
+  return href
+}
+
+function extractCanonicalUrl(html: string) {
+  const match = html.match(/<link[^>]*rel=canonical[^>]*>/i)
+  if (!match) return undefined
+  const href = extractAttr(match[0], 'href')
+  return href
+}
+
+function extractDocHtml(html: string) {
+  const start = html.indexOf('<div class="theme-doc-markdown markdown">')
+  if (start < 0) return undefined
+
+  const divTagRe = /<\/?div\b[^>]*>/gi
+  divTagRe.lastIndex = start
+
+  let depth = 0
+  let contentStart = -1
+
+  let match: RegExpExecArray | null
+  while ((match = divTagRe.exec(html))) {
+    const tag = match[0] ?? ''
+    const isOpen = /^<div\b/i.test(tag)
+    const isClose = /^<\/div\b/i.test(tag)
+    if (!isOpen && !isClose) continue
+
+    if (isOpen) {
+      depth += 1
+      if (depth === 1) contentStart = divTagRe.lastIndex
+      continue
+    }
+
+    depth -= 1
+    if (depth === 0 && contentStart >= 0) {
+      const contentEnd = match.index
+      return html.slice(contentStart, contentEnd)
+    }
+  }
+
+  return undefined
+}
+
+function extractHeadings(docHtml: string) {
+  const headings: Heading[] = []
+  const re = /<h([1-6])([^>]*)>([\s\S]*?)<\/h\1>/gi
+  let match: RegExpExecArray | null
+
+  while ((match = re.exec(docHtml))) {
+    const level = Number.parseInt(match[1] ?? '0', 10)
+    const attrs = match[2] ?? ''
+    const inner = (match[3] ?? '').replace(
+      /<a[^>]*class=hash-link[^>]*>[\s\S]*?<\/a>/gi,
+      ''
+    )
+    const text = normalizeText(decodeHtmlEntities(stripTags(inner)))
+    if (!text) continue
+    const id = extractAttr(attrs, 'id')
+    headings.push({ level, id, text })
+  }
+
+  return headings
+}
+
+function extractTitle(html: string, docHtml?: string) {
+  if (docHtml) {
+    const h1 = docHtml.match(/<h1[^>]*>([\s\S]*?)<\/h1>/i)
+    if (h1?.[1]) return normalizeText(decodeHtmlEntities(stripTags(h1[1])))
+  }
+  const ogTitle = html.match(/<meta[^>]*property=og:title[^>]*>/i)
+  if (ogTitle) {
+    const content = extractAttr(ogTitle[0], 'content')
+    if (content) return normalizeText(decodeHtmlEntities(content))
+  }
+  const title = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i)
+  if (title?.[1]) return normalizeText(decodeHtmlEntities(stripTags(title[1])))
+  return 'Untitled'
+}
+
+function htmlToPlainText(docHtml: string, origin: string, pageUrl: string) {
+  let s = docHtml
+
+  s = s.replace(
+    /<a[^>]*class=hash-link[^>]*>[\s\S]*?<\/a>/gi,
+    ''
+  )
+
+  s = s.replace(
+    /<pre[^>]*>\s*<code([^>]*)>([\s\S]*?)<\/code>\s*<\/pre>/gi,
+    (_, codeAttrs: string, codeInner: string) => {
+      const classAttr = extractAttr(codeAttrs ?? '', 'class') ?? ''
+      const langMatch = classAttr.match(/language-([a-zA-Z0-9_-]+)/)
+      const lang = langMatch?.[1] ?? ''
+      const code = normalizeText(decodeHtmlEntities(stripTags(codeInner)))
+      const fence = lang ? `\n\n\`\`\`${lang}\n${code}\n\`\`\`\n\n` : `\n\n\`\`\`\n${code}\n\`\`\`\n\n`
+      return fence
+    }
+  )
+
+  s = s.replace(/<img([^>]*)>/gi, (_, attrs: string) => {
+    const alt = decodeHtmlEntities(extractAttr(attrs ?? '', 'alt') ?? 'image')
+    const src = extractAttr(attrs ?? '', 'src') ?? ''
+    const abs = absoluteUrl(origin, src, pageUrl)
+    return `\n\n![${alt}](${abs})\n\n`
+  })
+
+  s = s.replace(/<h([1-6])([^>]*)>([\s\S]*?)<\/h\1>/gi, (_, lvl: string, _attrs: string, inner: string) => {
+    const level = Number.parseInt(lvl, 10)
+    const text = normalizeText(decodeHtmlEntities(stripTags(inner)))
+    if (!text) return ''
+    const hashes = '#'.repeat(Math.min(Math.max(level, 1), 6))
+    return `\n\n${hashes} ${text}\n\n`
+  })
+
+  s = s.replace(/<a([^>]*)>([\s\S]*?)<\/a>/gi, (_, attrs: string, inner: string) => {
+    const text = normalizeText(decodeHtmlEntities(stripTags(inner)))
+    const href = extractAttr(attrs ?? '', 'href') ?? ''
+    if (!href) return text
+    const abs = absoluteUrl(origin, href, pageUrl)
+    if (!text) return abs
+    if (text === abs) return abs
+    return `${text} (${abs})`
+  })
+
+  s = s.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, (_, inner: string) => {
+    const text = normalizeText(decodeHtmlEntities(stripTags(inner)))
+    if (!text) return ''
+    return `\`${text}\``
+  })
+
+  s = s.replace(/<\/(p|div|section|article)>/gi, '\n\n')
+  s = s.replace(/<(p|div|section|article)[^>]*>/gi, '\n\n')
+  s = s.replace(/<br\s*\/?\s*>/gi, '\n')
+
+  s = s.replace(/<li[^>]*>/gi, '\n- ')
+  s = s.replace(/<\/li>/gi, '\n')
+
+  s = s.replace(/<\/tr>/gi, '\n')
+  s = s.replace(/<\/t[hd]>/gi, ' | ')
+
+  s = decodeHtmlEntities(stripTags(s))
+  return normalizeText(s)
+}
+
+function sha256(input: string) {
+  return crypto.createHash('sha256').update(input).digest('hex')
+}
+
+function walkFiles(dirPath: string) {
+  const results: string[] = []
+  const stack = [dirPath]
+
+  while (stack.length) {
+    const current = stack.pop()
+    if (!current) continue
+    const entries = fs.readdirSync(current, { withFileTypes: true })
+    for (const entry of entries) {
+      const full = path.join(current, entry.name)
+      if (entry.isDirectory()) stack.push(full)
+      else if (entry.isFile()) results.push(full)
+    }
+  }
+
+  return results
+}
+
+function copyRawDocs(docsDir: string, rawOutDir: string) {
+  if (!fs.existsSync(docsDir)) return
+  const files = walkFiles(docsDir)
+  for (const src of files) {
+    const ext = path.extname(src).toLowerCase()
+    if (ext !== '.md' && ext !== '.mdx') continue
+    const rel = path.relative(docsDir, src)
+    const dest = path.join(rawOutDir, rel)
+    ensureDir(path.dirname(dest))
+    fs.copyFileSync(src, dest)
+  }
+}
+
+function main() {
+  const workspaceRoot = process.cwd()
+  const buildDir = path.join(workspaceRoot, 'build')
+  const outDir = path.join(buildDir, 'ai')
+  const rawOutDir = path.join(outDir, 'raw')
+  const docsDir = path.join(workspaceRoot, 'docs')
+
+  if (!fs.existsSync(buildDir)) {
+    throw new Error(
+      `Missing ${buildDir}. Run \`bun run build\` before generating AI artifacts.`
+    )
+  }
+
+  const siteOrigin = (process.env.DOCS_URL ?? 'https://docs.yearn.fi').replace(
+    /\/+$/,
+    ''
+  )
+
+  fs.rmSync(outDir, { recursive: true, force: true })
+  ensureDir(outDir)
+  ensureDir(rawOutDir)
+
+  const htmlFiles = walkFiles(buildDir).filter((p) => {
+    if (!p.endsWith('index.html')) return false
+    const rel = path.relative(buildDir, p).replace(/\\/g, '/')
+    if (rel.startsWith('assets/') || rel.startsWith('fonts/')) return false
+    return true
+  })
+
+  const docsJsonlPath = path.join(outDir, 'docs.jsonl')
+  const docsJsonlStream = fs.createWriteStream(docsJsonlPath, {
+    encoding: 'utf-8',
+  })
+
+  let docCount = 0
+  for (const htmlPath of htmlFiles) {
+    const html = fs.readFileSync(htmlPath, 'utf-8')
+    if (!html.includes('docs-doc-page')) continue
+    if (!html.includes('theme-doc-markdown')) continue
+
+    const canonical = extractCanonicalUrl(html)
+    const pageUrl =
+      canonical && canonical.startsWith('http')
+        ? canonical
+        : canonical
+          ? absoluteUrl(siteOrigin, canonical)
+          : undefined
+
+    const relHtmlPath = path.relative(buildDir, htmlPath).replace(/\\/g, '/')
+
+    const route =
+      pageUrl && pageUrl.startsWith(siteOrigin)
+        ? pageUrl.slice(siteOrigin.length) || '/'
+        : (() => {
+            const rel = relHtmlPath.replace(/index\.html$/i, '')
+            return `/${rel}`.replace(/\\/g, '/').replace(/\/+$/, '') || '/'
+          })()
+
+    const docHtml = extractDocHtml(html)
+    if (!docHtml) continue
+
+    const title = extractTitle(html, docHtml)
+    const headings = extractHeadings(docHtml)
+    const absUrl = pageUrl ?? absoluteUrl(siteOrigin, route)
+    const text = htmlToPlainText(docHtml, siteOrigin, absUrl)
+    const stat = fs.statSync(htmlPath)
+
+    const record: DocRecordV1 = {
+      schemaVersion: 1,
+      url: absUrl,
+      route,
+      title,
+      headings,
+      text,
+      sha256: sha256(text),
+      updatedAt: stat.mtime.toISOString(),
+      source: { htmlPath: relHtmlPath },
+    }
+
+    docsJsonlStream.write(`${JSON.stringify(record)}\n`)
+    docCount += 1
+  }
+
+  docsJsonlStream.end()
+
+  copyRawDocs(docsDir, rawOutDir)
+
+  const manifest: ManifestV1 = {
+    schemaVersion: 1,
+    generatedAt: new Date().toISOString(),
+    siteOrigin,
+    docCount,
+    docsJsonlPath: '/ai/docs.jsonl',
+    rawDocsPath: '/ai/raw/',
+  }
+  fs.writeFileSync(
+    path.join(outDir, 'manifest.json'),
+    `${JSON.stringify(manifest, null, 2)}\n`
+  )
+
+  fs.writeFileSync(
+    path.join(buildDir, 'llms.txt'),
+    [
+      '# Yearn Docs (docs.yearn.fi)',
+      '',
+      'AI-readable exports:',
+      '- Manifest: /ai/manifest.json',
+      '- Plaintext corpus (JSONL): /ai/docs.jsonl',
+      '- Raw docs sources (MD/MDX): /ai/raw/ (mirrors repository `docs/`)',
+      '',
+      'Notes:',
+      '- Prefer citing canonical page URLs on https://docs.yearn.fi',
+      '- Use plaintext for retrieval; fall back to raw MDX/MD for exact formatting/quotes',
+      '',
+    ].join('\n')
+  )
+
+  // eslint-disable-next-line no-console
+  console.log(`Generated AI artifacts: ${docCount} docs -> ${path.relative(workspaceRoot, outDir)}`)
+}
+
+main()
diff --git a/src/components/DocMarkdownCopyButton.tsx b/src/components/DocMarkdownCopyButton.tsx
new file mode 100644
index 000000000..5d7dc8408
--- /dev/null
+++ b/src/components/DocMarkdownCopyButton.tsx
@@ -0,0 +1,178 @@
+import React, { useCallback, useEffect, useRef, useState } from 'react'
+import { Button } from '@site/src/components/shadcn/button/button'
+import styles from '@site/src/css/docCopyButton.module.css'
+import { Check, Copy, AlertTriangle } from 'lucide-react'
+
+const RESET_DELAY_MS = 2200
+
+const cleanupDocContent = (root: HTMLElement) => {
+  const selectorsToRemove = [
+    '.hash-link',
+    '.theme-code-block__copy-button',
+    '.theme-code-block__button',
+    'button',
+    'svg',
+    'style',
+    'script',
+    'input',
+    'textarea',
+    'select',
+  ]
+
+  root.querySelectorAll(selectorsToRemove.join(',')).forEach((node) => {
+    node.remove()
+  })
+}
+
+const getCodeLanguage = (codeEl: HTMLElement | null) => {
+  if (!codeEl) return ''
+  const direct =
+    codeEl.getAttribute('data-language') ||
+    codeEl.parentElement?.getAttribute('data-language')
+  if (direct) return direct
+  const className = codeEl.getAttribute('class') || ''
+  const match = className.match(/language-([^\s]+)/)
+  return match ? match[1] : ''
+}
+
+const toMarkdown = async (root: HTMLElement) => {
+  const [turndownModule, gfmModule] = await Promise.all([
+    import('turndown'),
+    import('turndown-plugin-gfm'),
+  ])
+  const TurndownService =
+    (turndownModule as any).default || (turndownModule as any)
+  const turndownService = new TurndownService({
+    codeBlockStyle: 'fenced',
+    headingStyle: 'atx',
+    bulletListMarker: '-',
+  })
+  const gfm =
+    (gfmModule as any).gfm || (gfmModule as any).default || (gfmModule as any)
+  if (gfm) {
+    turndownService.use(gfm)
+  }
+
+  turndownService.addRule('fencedCodeBlockWithLanguage', {
+    filter: (node) => {
+      if (!(node instanceof HTMLElement)) return false
+      return (
+        node.nodeName === 'PRE' &&
+        node.firstElementChild?.nodeName === 'CODE'
+      )
+    },
+    replacement: (_content, node) => {
+      const pre = node as HTMLElement
+      const code = pre.querySelector('code')
+      const language = getCodeLanguage(code)
+      const text = code?.textContent || pre.textContent || ''
+      const trimmed = text.replace(/\n$/, '')
+      const fence = '```'
+      return `\n\n${fence}${language ? language : ''}\n${trimmed}\n${fence}\n\n`
+    },
+  })
+
+  const markdown = turndownService.turndown(root)
+  return markdown.trim()
+}
+
+const writeToClipboard = async (text: string) => {
+  if (navigator.clipboard && window.isSecureContext) {
+    await navigator.clipboard.writeText(text)
+    return
+  }
+
+  const textarea = document.createElement('textarea')
+  textarea.value = text
+  textarea.setAttribute('readonly', '')
+  textarea.style.position = 'fixed'
+  textarea.style.left = '-9999px'
+  textarea.style.top = '0'
+  document.body.appendChild(textarea)
+  textarea.focus()
+  textarea.select()
+  const success = document.execCommand('copy')
+  textarea.remove()
+  if (!success) {
+    throw new Error('Clipboard copy failed')
+  }
+}
+
+const DocMarkdownCopyButton = () => {
+  const [status, setStatus] = useState<'idle' | 'copying' | 'copied' | 'error'>(
+    'idle'
+  )
+  const timeoutRef = useRef<number | null>(null)
+
+  useEffect(() => {
+    return () => {
+      if (timeoutRef.current) {
+        window.clearTimeout(timeoutRef.current)
+      }
+    }
+  }, [])
+
+  const resetStatus = () => {
+    if (timeoutRef.current) {
+      window.clearTimeout(timeoutRef.current)
+    }
+    timeoutRef.current = window.setTimeout(() => {
+      setStatus('idle')
+    }, RESET_DELAY_MS)
+  }
+
+  const handleCopy = useCallback(async () => {
+    try {
+      setStatus('copying')
+      const docContent = document.querySelector(
+        '.theme-doc-markdown'
+      ) as HTMLElement | null
+      if (!docContent) {
+        throw new Error('Doc content not found')
+      }
+      const clone = docContent.cloneNode(true) as HTMLElement
+      cleanupDocContent(clone)
+      const markdown = await toMarkdown(clone)
+      await writeToClipboard(markdown)
+      setStatus('copied')
+      resetStatus()
+    } catch (error) {
+      console.error('Failed to copy markdown', error)
+      setStatus('error')
+      resetStatus()
+    }
+  }, [])
+
+  const label =
+    status === 'copying'
+      ? 'Copying...'
+      : status === 'copied'
+      ? 'Copied markdown'
+      : status === 'error'
+      ? 'Copy failed'
+      : 'Copy page as Markdown'
+
+  const Icon =
+    status === 'copied'
+      ? Check
+      : status === 'error'
+      ? AlertTriangle
+      : Copy
+
+  return (
+    <div className={styles.copyToolbar}>
+      <Button
+        type="button"
+        variant="link"
+        onClick={handleCopy}
+        className={styles.copyButton}
+        aria-live="polite"
+      >
+        <Icon size={16} aria-hidden="true" />
+        <span>{label}</span>
+      </Button>
+    </div>
+  )
+}
+
+export default DocMarkdownCopyButton
diff --git a/src/css/docCopyButton.module.css b/src/css/docCopyButton.module.css
new file mode 100644
index 000000000..0e2c09eae
--- /dev/null
+++ b/src/css/docCopyButton.module.css
@@ -0,0 +1,36 @@
+.copyToolbar {
+  display: flex;
+  justify-content: flex-start;
+  align-items: center;
+  margin: 0;
+  padding-left: var(
+    --doc-copy-padding-horizontal,
+    calc(var(--ifm-toc-padding-horizontal) * 2)
+  );
+  padding-right: var(
+    --doc-copy-padding-horizontal,
+    calc(var(--ifm-toc-padding-horizontal) * 2)
+  );
+}
+
+.copyButton {
+  gap: 0.5rem;
+  max-width: none;
+  border: none;
+  background: transparent;
+  font-weight: 400;
+  padding: 0.25rem 0;
+  color: var(--ifm-font-color-base);
+  text-decoration: none;
+}
+
+.copyButton.copyButton:hover {
+  background: transparent;
+  color: var(--ifm-color-primary);
+  text-decoration: none;
+}
+
+.copyButton.copyButton:focus-visible {
+  box-shadow: none;
+  text-decoration: underline;
+}
diff --git a/src/theme/DocItem/Content/index.js b/src/theme/DocItem/Content/index.js
new file mode 100644
index 000000000..14ab46956
--- /dev/null
+++ b/src/theme/DocItem/Content/index.js
@@ -0,0 +1,31 @@
+import React from 'react'
+import clsx from 'clsx'
+import { ThemeClassNames } from '@docusaurus/theme-common'
+import { useDoc } from '@docusaurus/plugin-content-docs/client'
+import Heading from '@theme/Heading'
+import MDXContent from '@theme/MDXContent'
+
+const useSyntheticTitle = () => {
+  const { metadata, frontMatter, contentTitle } = useDoc()
+  const shouldRender =
+    !frontMatter.hide_title && typeof contentTitle === 'undefined'
+  if (!shouldRender) {
+    return null
+  }
+  return metadata.title
+}
+
+export default function DocItemContent({ children }) {
+  const syntheticTitle = useSyntheticTitle()
+
+  return (
+    <div className={clsx(ThemeClassNames.docs.docMarkdown, 'markdown')}>
+      {syntheticTitle && (
+        <header>
+          <Heading as="h1">{syntheticTitle}</Heading>
+        </header>
+      )}
+      <MDXContent>{children}</MDXContent>
+    </div>
+  )
+}
diff --git a/src/theme/DocItem/Layout/index.js b/src/theme/DocItem/Layout/index.js
index 5ae7953b8..f8ae7200e 100644
--- a/src/theme/DocItem/Layout/index.js
+++ b/src/theme/DocItem/Layout/index.js
@@ -1,16 +1,85 @@
 import React from 'react'
-import Layout from '@theme-original/DocItem/Layout'
+import clsx from 'clsx'
+import { useWindowSize } from '@docusaurus/theme-common'
 import { useDoc } from '@docusaurus/plugin-content-docs/client'
+import DocItemPaginator from '@theme/DocItem/Paginator'
+import DocVersionBanner from '@theme/DocVersionBanner'
+import DocVersionBadge from '@theme/DocVersionBadge'
+import DocItemFooter from '@theme/DocItem/Footer'
+import DocItemTOCMobile from '@theme/DocItem/TOC/Mobile'
+import DocItemTOCDesktop from '@theme/DocItem/TOC/Desktop'
+import DocItemContent from '@theme/DocItem/Content'
+import DocBreadcrumbs from '@theme/DocBreadcrumbs'
+import ContentVisibility from '@theme/ContentVisibility'
 import { ContractDataProvider } from '@site/src/context/ContractDataContext'
+import DocMarkdownCopyButton from '@site/src/components/DocMarkdownCopyButton'
+import styles from './styles.module.css'
 
-export default function LayoutWrapper(props) {
+const useDocTOC = () => {
+  const { frontMatter, toc } = useDoc()
+  const windowSize = useWindowSize()
+
+  const hidden = frontMatter.hide_table_of_contents
+  const canRender = !hidden && toc.length > 0
+
+  const mobile = canRender ? <DocItemTOCMobile /> : undefined
+
+  const desktop =
+    canRender && (windowSize === 'desktop' || windowSize === 'ssr') ? (
+      <DocItemTOCDesktop />
+    ) : undefined
+
+  return {
+    hidden,
+    mobile,
+    desktop,
+  }
+}
+
+const DocItemLayoutContent = ({ children }) => {
+  const docTOC = useDocTOC()
+  const { metadata } = useDoc()
+
+  return (
+    <div className="row">
+      <div className={clsx('col', !docTOC.hidden && styles.docItemCol)}>
+        <ContentVisibility metadata={metadata} />
+        <DocVersionBanner />
+        <div className={styles.docItemContainer}>
+          <article>
+            <DocBreadcrumbs />
+            <DocVersionBadge />
+            {docTOC.mobile}
+            {docTOC.mobile && (
+              <div className={styles.mobileCopyButton}>
+                <DocMarkdownCopyButton />
+              </div>
+            )}
+            <DocItemContent>{children}</DocItemContent>
+            <DocItemFooter />
+          </article>
+          <DocItemPaginator />
+        </div>
+      </div>
+      {docTOC.desktop && (
+        <div className={clsx('col col--3', styles.docSidebar)}>
+          <DocMarkdownCopyButton />
+          {docTOC.desktop}
+        </div>
+      )}
+    </div>
+  )
+}
+
+export default function DocItemLayoutWrapper(props) {
   const { frontMatter } = useDoc()
+  const content = <DocItemLayoutContent {...props} />
 
   return frontMatter.rpcCalls ? (
     <ContractDataProvider contractParams={frontMatter.rpcCalls}>
-      <Layout {...props} />
+      {content}
     </ContractDataProvider>
   ) : (
-    <Layout {...props} />
+    content
   )
 }
diff --git a/src/theme/DocItem/Layout/styles.module.css b/src/theme/DocItem/Layout/styles.module.css
new file mode 100644
index 000000000..bf14f3665
--- /dev/null
+++ b/src/theme/DocItem/Layout/styles.module.css
@@ -0,0 +1,34 @@
+.docItemContainer header + *,
+.docItemContainer article > *:first-child {
+  margin-top: 0;
+}
+
+@media (min-width: 997px) {
+  .docItemCol {
+    max-width: 75% !important;
+  }
+}
+
+.docSidebar {
+  display: flex;
+  flex-direction: column;
+  gap: 0.75rem;
+}
+
+@media (max-width: 996px) {
+  .docSidebar {
+    display: none;
+  }
+
+  .mobileCopyButton {
+    margin-top: -0.5rem;
+    margin-bottom: 1rem;
+    --doc-copy-padding-horizontal: var(--ifm-toc-padding-horizontal);
+  }
+}
+
+@media (min-width: 997px) {
+  .mobileCopyButton {
+    display: none;
+  }
+}

From 9d282b853fe9b25237d42e3aab8711d2d1e2aa07 Mon Sep 17 00:00:00 2001
From: Ross <therossgalloway@gmail.com>
Date: Fri, 2 Jan 2026 10:40:47 -0500
Subject: [PATCH 2/5] Feat: update schema version and enhance AI document
 retrieval capabilities

---
 scripts/generateAiArtifacts.ts        | 77 +++++++++++++++++++--------
 skills/yearn-docs-site-query/SKILL.md | 69 ++++++++++++++++++++++++
 2 files changed, 124 insertions(+), 22 deletions(-)
 create mode 100644 skills/yearn-docs-site-query/SKILL.md

diff --git a/scripts/generateAiArtifacts.ts b/scripts/generateAiArtifacts.ts
index 98557655f..098e69986 100644
--- a/scripts/generateAiArtifacts.ts
+++ b/scripts/generateAiArtifacts.ts
@@ -9,8 +9,9 @@ type Heading = {
 }
 
 type DocRecordV1 = {
-  schemaVersion: 1
+  schemaVersion: 2
   url: string
+  canonicalUrl?: string
   route: string
   title: string
   headings: Heading[]
@@ -19,11 +20,12 @@ type DocRecordV1 = {
   updatedAt: string
   source: {
     htmlPath: string
+    rawPath?: string
   }
 }
 
 type ManifestV1 = {
-  schemaVersion: 1
+  schemaVersion: 2
   generatedAt: string
   siteOrigin: string
   docCount: number
@@ -94,6 +96,21 @@ function extractCanonicalUrl(html: string) {
   return href
 }
 
+function resolveSiteOrigin() {
+  const explicit = process.env.DOCS_URL?.trim()
+  if (explicit) return explicit.replace(/\/+$/, '')
+
+  const vercel = process.env.VERCEL_URL?.trim()
+  if (vercel) {
+    const withProto = vercel.startsWith('http://') || vercel.startsWith('https://')
+      ? vercel
+      : `https://${vercel}`
+    return withProto.replace(/\/+$/, '')
+  }
+
+  return 'https://docs.yearn.fi'
+}
+
 function extractDocHtml(html: string) {
   const start = html.indexOf('<div class="theme-doc-markdown markdown">')
   if (start < 0) return undefined
@@ -263,6 +280,26 @@ function copyRawDocs(docsDir: string, rawOutDir: string) {
   }
 }
 
+function findRawSourceRelativePath(docsDir: string, relHtmlPath: string) {
+  const rel = relHtmlPath.replace(/\\/g, '/')
+
+  const candidates: string[] = []
+  if (rel.endsWith('/index.html')) {
+    const dir = rel.slice(0, -'/index.html'.length)
+    candidates.push(`${dir}.md`, `${dir}.mdx`, `${dir}/index.md`, `${dir}/index.mdx`)
+  } else if (rel.endsWith('index.html')) {
+    const base = rel.slice(0, -'index.html'.length).replace(/\/+$/, '')
+    candidates.push(`${base}.md`, `${base}.mdx`, `${base}/index.md`, `${base}/index.mdx`)
+  }
+
+  for (const candidate of candidates) {
+    const full = path.join(docsDir, candidate)
+    if (fs.existsSync(full) && fs.statSync(full).isFile()) return candidate
+  }
+
+  return undefined
+}
+
 function main() {
   const workspaceRoot = process.cwd()
   const buildDir = path.join(workspaceRoot, 'build')
@@ -276,10 +313,7 @@ function main() {
     )
   }
 
-  const siteOrigin = (process.env.DOCS_URL ?? 'https://docs.yearn.fi').replace(
-    /\/+$/,
-    ''
-  )
+  const siteOrigin = resolveSiteOrigin()
 
   fs.rmSync(outDir, { recursive: true, force: true })
   ensureDir(outDir)
@@ -303,19 +337,13 @@ function main() {
     if (!html.includes('docs-doc-page')) continue
     if (!html.includes('theme-doc-markdown')) continue
 
-    const canonical = extractCanonicalUrl(html)
-    const pageUrl =
-      canonical && canonical.startsWith('http')
-        ? canonical
-        : canonical
-          ? absoluteUrl(siteOrigin, canonical)
-          : undefined
+    const canonicalUrl = extractCanonicalUrl(html)
 
     const relHtmlPath = path.relative(buildDir, htmlPath).replace(/\\/g, '/')
 
     const route =
-      pageUrl && pageUrl.startsWith(siteOrigin)
-        ? pageUrl.slice(siteOrigin.length) || '/'
+      canonicalUrl && canonicalUrl.startsWith(siteOrigin)
+        ? canonicalUrl.slice(siteOrigin.length) || '/'
         : (() => {
             const rel = relHtmlPath.replace(/index\.html$/i, '')
             return `/${rel}`.replace(/\\/g, '/').replace(/\/+$/, '') || '/'
@@ -326,20 +354,23 @@ function main() {
 
     const title = extractTitle(html, docHtml)
     const headings = extractHeadings(docHtml)
-    const absUrl = pageUrl ?? absoluteUrl(siteOrigin, route)
-    const text = htmlToPlainText(docHtml, siteOrigin, absUrl)
+    const url = absoluteUrl(siteOrigin, route)
+    const rawRel = findRawSourceRelativePath(docsDir, relHtmlPath)
+    const rawPath = rawRel ? `/ai/raw/${rawRel}` : undefined
+    const text = htmlToPlainText(docHtml, siteOrigin, url)
     const stat = fs.statSync(htmlPath)
 
     const record: DocRecordV1 = {
-      schemaVersion: 1,
-      url: absUrl,
+      schemaVersion: 2,
+      url,
+      canonicalUrl: canonicalUrl && canonicalUrl.startsWith('http') ? canonicalUrl : undefined,
       route,
       title,
       headings,
       text,
       sha256: sha256(text),
       updatedAt: stat.mtime.toISOString(),
-      source: { htmlPath: relHtmlPath },
+      source: { htmlPath: relHtmlPath, rawPath },
     }
 
     docsJsonlStream.write(`${JSON.stringify(record)}\n`)
@@ -351,7 +382,7 @@ function main() {
   copyRawDocs(docsDir, rawOutDir)
 
   const manifest: ManifestV1 = {
-    schemaVersion: 1,
+    schemaVersion: 2,
     generatedAt: new Date().toISOString(),
     siteOrigin,
     docCount,
@@ -366,7 +397,9 @@ function main() {
   fs.writeFileSync(
     path.join(buildDir, 'llms.txt'),
     [
-      '# Yearn Docs (docs.yearn.fi)',
+      '# Yearn Docs',
+      '',
+      `Site: ${siteOrigin}`,
       '',
       'AI-readable exports:',
       '- Manifest: /ai/manifest.json',
diff --git a/skills/yearn-docs-site-query/SKILL.md b/skills/yearn-docs-site-query/SKILL.md
new file mode 100644
index 000000000..e9de1bcc2
--- /dev/null
+++ b/skills/yearn-docs-site-query/SKILL.md
@@ -0,0 +1,69 @@
+---
+name: yearn-docs-site-query
+description: Query and cite Yearn documentation via the hosted docs site's AI exports (llms.txt, /ai/manifest.json, /ai/docs.jsonl, and optional /ai/raw/). Use when answering questions from docs.yearn.fi (or a Vercel preview) without cloning the repo, and when you need reliable retrieval + citations from the published docs.
+---
+
+# Yearn Docs: hosted retrieval
+
+## Inputs
+
+- `BASE`: The docs site origin to query (e.g. a Vercel preview or `https://docs.yearn.fi`).
+
+Example:
+
+```text
+BASE=https://yearn-docs-git-feat-copy-page-content-to-markdown-yearn.vercel.app
+```
+
+## Endpoints (relative to `BASE`)
+
+- `GET /llms.txt` — human/agent pointer file.
+- `GET /ai/manifest.json` — machine-readable pointers (paths, counts, origin).
+- `GET /ai/docs.jsonl` — plaintext corpus (JSON Lines: 1 JSON object per doc page).
+- `GET /ai/raw/...` — optional raw `.md`/`.mdx` source mirror (if present in corpus records).
+
+## Retrieval workflow
+
+1. Fetch `GET {BASE}/ai/manifest.json` and `GET {BASE}/llms.txt`.
+2. Download `GET {BASE}/ai/docs.jsonl` and parse it as JSONL (stream line-by-line).
+3. Rank candidate records for the user query:
+   - Tokenize the query and score matches.
+   - Boost matches in `title` and `headings[].text` over matches in `text`.
+4. Select the top records (e.g. 3–8), then select the best sections:
+   - Split `text` by headings and keep the most relevant chunks.
+5. Answer using only the selected chunks (don’t hallucinate fields not present in the docs).
+
+## Record shape (docs.jsonl)
+
+Expect at least:
+
+- `title`: page title
+- `headings`: array of `{ level, id?, text }`
+- `text`: extracted plaintext (may include fenced code blocks)
+- `route`: the path on the site host (e.g. `/developers/addresses`)
+- `url`: canonical URL for the page (often on `https://docs.yearn.fi/...`)
+- `source.htmlPath`: where it came from in the static build
+
+Optional/newer:
+
+- `canonicalUrl`
+- `source.rawPath`: path under `/ai/raw/...` for exact `.md`/`.mdx`
+
+## Citing and linking
+
+- Prefer citing the canonical URL in the record:
+  - Use `canonicalUrl` if present, else `url`.
+- If `url` points to the wrong origin for the environment you’re using, cite `BASE + route`.
+
+## Exact-source fallback (for precise quotes/formatting)
+
+When you need exact formatting, tables, or the original MDX:
+
+1. If `source.rawPath` exists: `GET {BASE}{source.rawPath}` and quote from that.
+2. Otherwise fetch the rendered page: `GET {BASE}{route}` and extract the relevant section from HTML.
+
+## Caching
+
+- Cache `GET {BASE}/ai/docs.jsonl` per session.
+- If you can, revalidate with `ETag`/`If-None-Match` on subsequent runs.
+

From e989e59c163510f86dc026ccf62c21247adee2fc Mon Sep 17 00:00:00 2001
From: Ross <therossgalloway@gmail.com>
Date: Fri, 2 Jan 2026 10:41:51 -0500
Subject: [PATCH 3/5] update docs base URL in skill

---
 skills/yearn-docs-site-query/SKILL.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/skills/yearn-docs-site-query/SKILL.md b/skills/yearn-docs-site-query/SKILL.md
index e9de1bcc2..e3d82085f 100644
--- a/skills/yearn-docs-site-query/SKILL.md
+++ b/skills/yearn-docs-site-query/SKILL.md
@@ -12,7 +12,7 @@ description: Query and cite Yearn documentation via the hosted docs site's AI ex
 Example:
 
 ```text
-BASE=https://yearn-docs-git-feat-copy-page-content-to-markdown-yearn.vercel.app
+BASE=https://docs.yearn.fi
 ```
 
 ## Endpoints (relative to `BASE`)
@@ -66,4 +66,3 @@ When you need exact formatting, tables, or the original MDX:
 
 - Cache `GET {BASE}/ai/docs.jsonl` per session.
 - If you can, revalidate with `ETag`/`If-None-Match` on subsequent runs.
-

From cde68ce2a2412235cdef3af74ecd3ac09bef1a99 Mon Sep 17 00:00:00 2001
From: Ross <therossgalloway@gmail.com>
Date: Fri, 2 Jan 2026 12:32:13 -0500
Subject: [PATCH 4/5] improve skill

---
 skills/yearn-docs-site-query/SKILL.md         |  54 +-
 .../scripts/yearn_docs_query.py               | 526 ++++++++++++++++++
 2 files changed, 579 insertions(+), 1 deletion(-)
 create mode 100644 skills/yearn-docs-site-query/scripts/yearn_docs_query.py

diff --git a/skills/yearn-docs-site-query/SKILL.md b/skills/yearn-docs-site-query/SKILL.md
index e3d82085f..09695d34c 100644
--- a/skills/yearn-docs-site-query/SKILL.md
+++ b/skills/yearn-docs-site-query/SKILL.md
@@ -7,7 +7,7 @@ description: Query and cite Yearn documentation via the hosted docs site's AI ex
 
 ## Inputs
 
-- `BASE`: The docs site origin to query (e.g. a Vercel preview or `https://docs.yearn.fi`).
+- `BASE` (optional): The docs site origin to query (e.g. a Vercel preview). Defaults to `https://docs.yearn.fi`.
 
 Example:
 
@@ -15,6 +15,58 @@ Example:
 BASE=https://docs.yearn.fi
 ```
 
+Tip: if you use a bash one-liner like `BASE=... python3 ... --base "$BASE" ...`, `$BASE` expands before the assignment takes effect. Prefer either `--base 'https://...'` directly, or `export BASE=...` first, or omit `--base` and rely on the default.
+
+## One-approval workflow (recommended)
+
+To avoid repeated network approvals in restricted environments, use the bundled helper. It auto-checks for updates (conditional request) and only downloads `docs.jsonl` when it has changed, so you typically need only **one approval per invocation**:
+
+```bash
+python3 skills/yearn-docs-site-query/scripts/yearn_docs_query.py --base "$BASE" search "yCHAD multisig signers"
+```
+
+If you want absolutely no network calls, force offline mode (no network at all):
+
+```bash
+python3 skills/yearn-docs-site-query/scripts/yearn_docs_query.py --base "$BASE" --offline search "veYFI gauge"
+```
+
+To fetch a page by route:
+
+```bash
+python3 skills/yearn-docs-site-query/scripts/yearn_docs_query.py --base "$BASE" get /developers/security/multisig
+```
+
+To clear cached files for that `BASE`:
+
+```bash
+python3 skills/yearn-docs-site-query/scripts/yearn_docs_query.py --base "$BASE" cleanup
+```
+
+To see whether your cached data is stale (age/etag/manifest info):
+
+```bash
+python3 skills/yearn-docs-site-query/scripts/yearn_docs_query.py --base "$BASE" status
+```
+
+To check the remote for updates and refresh the cache if needed (will require network approval in restricted environments):
+
+```bash
+python3 skills/yearn-docs-site-query/scripts/yearn_docs_query.py --base "$BASE" --check-updates status
+```
+
+To skip update checks and use cached data (if present):
+
+```bash
+python3 skills/yearn-docs-site-query/scripts/yearn_docs_query.py --base "$BASE" --no-auto-update search "multisig signers"
+```
+
+To force a full re-download (even if unchanged):
+
+```bash
+python3 skills/yearn-docs-site-query/scripts/yearn_docs_query.py --base "$BASE" --refresh status
+```
+
 ## Endpoints (relative to `BASE`)
 
 - `GET /llms.txt` — human/agent pointer file.
diff --git a/skills/yearn-docs-site-query/scripts/yearn_docs_query.py b/skills/yearn-docs-site-query/scripts/yearn_docs_query.py
new file mode 100644
index 000000000..f288c9cbe
--- /dev/null
+++ b/skills/yearn-docs-site-query/scripts/yearn_docs_query.py
@@ -0,0 +1,526 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+import os
+import pathlib
+import re
+import sys
+import time
+import urllib.error
+import urllib.request
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, List, Optional, Tuple
+
+
+def _normalize_base(base: str) -> str:
+    base = base.strip()
+    if not base:
+        raise ValueError("BASE is empty")
+    if not base.startswith("http://") and not base.startswith("https://"):
+        base = "https://" + base
+    return base.rstrip("/")
+
+
+def _sha256_hex(s: str) -> str:
+    return hashlib.sha256(s.encode("utf-8")).hexdigest()
+
+
+def _default_cache_dir() -> pathlib.Path:
+    # Prefer XDG cache.
+    xdg = os.environ.get("XDG_CACHE_HOME")
+    if xdg:
+        return pathlib.Path(xdg) / "yearn-docs-site-query"
+
+    home = os.path.expanduser("~")
+    if home and home != "~":
+        return pathlib.Path(home) / ".cache" / "yearn-docs-site-query"
+
+    # Fallback: local relative cache.
+    return pathlib.Path(".yearn-docs-cache")
+
+
+def _read_text(path: pathlib.Path) -> Optional[str]:
+    try:
+        return path.read_text("utf-8")
+    except FileNotFoundError:
+        return None
+
+
+def _write_text(path: pathlib.Path, text: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(text, encoding="utf-8")
+
+
+def _read_json(path: pathlib.Path) -> Optional[Dict[str, Any]]:
+    raw = _read_text(path)
+    if raw is None:
+        return None
+    return json.loads(raw)
+
+
+def _write_json(path: pathlib.Path, obj: Dict[str, Any]) -> None:
+    _write_text(path, json.dumps(obj, indent=2, sort_keys=True) + "\n")
+
+
+def _http_get(
+    url: str,
+    *,
+    headers: Optional[Dict[str, str]] = None,
+    timeout_s: int = 30,
+) -> Tuple[int, Dict[str, str], bytes]:
+    req = urllib.request.Request(url, headers=headers or {}, method="GET")
+    try:
+        with urllib.request.urlopen(req, timeout=timeout_s) as resp:
+            status = getattr(resp, "status", 200)
+            resp_headers = {k.lower(): v for k, v in resp.headers.items()}
+            body = resp.read()
+            return status, resp_headers, body
+    except urllib.error.HTTPError as e:
+        status = e.code
+        resp_headers = {k.lower(): v for k, v in e.headers.items()} if e.headers else {}
+        body = e.read() if hasattr(e, "read") else b""
+        return status, resp_headers, body
+
+
+def _save_bytes(path: pathlib.Path, data: bytes) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_bytes(data)
+
+
+def _load_bytes(path: pathlib.Path) -> Optional[bytes]:
+    try:
+        return path.read_bytes()
+    except FileNotFoundError:
+        return None
+
+
+def _tokenize(query: str) -> List[str]:
+    query = query.lower().strip()
+    if not query:
+        return []
+    return [t for t in re.split(r"[^\w]+", query) if t]
+
+
+@dataclass
+class SearchHit:
+    score: int
+    title: str
+    url: str
+    route: str
+    snippet: str
+
+
+def _score_record(record: Dict[str, Any], tokens: List[str]) -> Tuple[int, str]:
+    title = (record.get("title") or "").lower()
+    headings = " ".join((h.get("text") or "") for h in (record.get("headings") or [])).lower()
+    text = (record.get("text") or "").lower()
+
+    score = 0
+    coverage = 0
+    for tok in tokens:
+        in_title = title.count(tok)
+        in_headings = headings.count(tok)
+        in_text = text.count(tok)
+
+        if in_title or in_headings or in_text:
+            coverage += 1
+
+        score += 10 * in_title
+        score += 6 * in_headings
+        score += 1 * in_text
+
+    # Prefer documents that match more distinct query tokens, and penalize partial matches.
+    score += 200 * coverage
+    missing = max(0, len(tokens) - coverage)
+    score -= 300 * missing
+
+    snippet = ""
+    if tokens:
+        tok = tokens[0]
+        idx = text.find(tok)
+        if idx >= 0:
+            start = max(0, idx - 120)
+            end = min(len(text), idx + 240)
+            snippet = (record.get("text") or "")[start:end].replace("\n", " ").strip()
+    return score, snippet
+
+
+def _iter_jsonl(path: pathlib.Path) -> Iterable[Dict[str, Any]]:
+    with path.open("r", encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            yield json.loads(line)
+
+
+def _fetch_corpus(
+    *,
+    base: str,
+    cache_dir: pathlib.Path,
+    refresh: bool,
+    offline: bool,
+    check_updates: bool,
+    auto_update: bool,
+) -> Tuple[pathlib.Path, Dict[str, Any]]:
+    base_key = _sha256_hex(base)[:12]
+    root = cache_dir / base_key
+    root.mkdir(parents=True, exist_ok=True)
+
+    manifest_path = root / "manifest.json"
+    docs_path = root / "docs.jsonl"
+    meta_path = root / "meta.json"
+
+    meta = _read_json(meta_path) or {}
+
+    if offline:
+        manifest = _read_json(manifest_path)
+        if manifest is None:
+            raise SystemExit(f"Offline mode: missing cached manifest at {manifest_path}")
+        if not docs_path.exists():
+            raise SystemExit(f"Offline mode: missing cached corpus at {docs_path}")
+        return docs_path, manifest
+
+    # Cache-first: avoid network if we already have a corpus and aren't asked to check/refresh.
+    cached_manifest = _read_json(manifest_path)
+    if (
+        not refresh
+        and not check_updates
+        and not auto_update
+        and cached_manifest is not None
+        and docs_path.exists()
+    ):
+        return docs_path, cached_manifest
+
+    # Determine the docs.jsonl URL without necessarily fetching the manifest.
+    docs_rel = (cached_manifest or {}).get("docsJsonlPath") or meta.get("docsRel") or "/ai/docs.jsonl"
+    if not str(docs_rel).startswith("/"):
+        docs_rel = "/" + str(docs_rel)
+    docs_url = f"{base}{docs_rel}"
+
+    # Auto-update: do a conditional GET against docs.jsonl (ETag / Last-Modified) and download only if changed.
+    if auto_update and not refresh and docs_path.exists():
+        headers: Dict[str, str] = {}
+        if meta.get("etag"):
+            headers["If-None-Match"] = meta["etag"]
+        if meta.get("lastModified"):
+            headers["If-Modified-Since"] = meta["lastModified"]
+
+        if headers:
+            status, resp_headers, body = _http_get(docs_url, headers=headers, timeout_s=60)
+            if status == 304:
+                meta["checkedAt"] = int(time.time())
+                _write_json(meta_path, meta)
+                return docs_path, cached_manifest or {}
+            if status == 200:
+                _save_bytes(docs_path, body)
+                meta["etag"] = resp_headers.get("etag", meta.get("etag"))
+                meta["lastModified"] = resp_headers.get("last-modified", meta.get("lastModified"))
+                meta["fetchedAt"] = int(time.time())
+                meta["checkedAt"] = meta["fetchedAt"]
+                meta["docsUrl"] = docs_url
+                meta["docsRel"] = docs_rel
+                _write_json(meta_path, meta)
+                return docs_path, cached_manifest or {}
+
+            # If the update check fails, fall back to cached data instead of failing the whole query.
+            if cached_manifest is not None and docs_path.exists():
+                print(f"warning: update check failed ({status}); using cached corpus", file=sys.stderr)
+                meta["checkedAt"] = int(time.time())
+                _write_json(meta_path, meta)
+                return docs_path, cached_manifest
+
+    manifest_url = f"{base}/ai/manifest.json"
+    manifest: Dict[str, Any]
+    if check_updates or refresh or cached_manifest is None:
+        status, _headers, body = _http_get(manifest_url, timeout_s=30)
+        if status != 200:
+            raise SystemExit(f"Failed to fetch manifest ({status}): {manifest_url}")
+        _save_bytes(manifest_path, body)
+        manifest = json.loads(body.decode("utf-8"))
+        meta["manifestGeneratedAt"] = manifest.get("generatedAt")
+        meta["manifestDocCount"] = manifest.get("docCount")
+        meta["manifestUrl"] = manifest_url
+    else:
+        manifest = cached_manifest
+
+    docs_rel = manifest.get("docsJsonlPath") or docs_rel or "/ai/docs.jsonl"
+    if not str(docs_rel).startswith("/"):
+        docs_rel = "/" + str(docs_rel)
+    docs_url = f"{base}{docs_rel}"
+    meta["docsUrl"] = docs_url
+    meta["docsRel"] = docs_rel
+
+    if not refresh and docs_path.exists():
+        etag = meta.get("etag")
+        if etag:
+            status2, headers2, body2 = _http_get(
+                docs_url,
+                headers={"If-None-Match": etag},
+                timeout_s=60,
+            )
+            if status2 == 304:
+                meta["checkedAt"] = int(time.time())
+                _write_json(meta_path, meta)
+                return docs_path, manifest
+            if status2 == 200:
+                _save_bytes(docs_path, body2)
+                meta["etag"] = headers2.get("etag", etag)
+                meta["lastModified"] = headers2.get("last-modified", meta.get("lastModified"))
+                meta["fetchedAt"] = int(time.time())
+                meta["checkedAt"] = meta["fetchedAt"]
+                _write_json(meta_path, meta)
+                return docs_path, manifest
+            raise SystemExit(f"Failed to refresh corpus ({status2}): {docs_url}")
+
+        # No etag: keep cached unless forced.
+        return docs_path, manifest
+
+    status3, headers3, body3 = _http_get(docs_url, timeout_s=120)
+    if status3 != 200:
+        raise SystemExit(f"Failed to fetch corpus ({status3}): {docs_url}")
+    _save_bytes(docs_path, body3)
+    meta["etag"] = headers3.get("etag")
+    meta["lastModified"] = headers3.get("last-modified")
+    meta["fetchedAt"] = int(time.time())
+    meta["checkedAt"] = meta["fetchedAt"]
+    _write_json(meta_path, meta)
+    return docs_path, manifest
+
+
+def _cmd_search(args: argparse.Namespace) -> int:
+    base = _normalize_base(args.base)
+    cache_dir = pathlib.Path(args.cache_dir) if args.cache_dir else _default_cache_dir()
+
+    docs_path, _manifest = _fetch_corpus(
+        base=base,
+        cache_dir=cache_dir,
+        refresh=args.refresh,
+        offline=args.offline,
+        check_updates=args.check_updates,
+        auto_update=args.auto_update,
+    )
+
+    tokens = _tokenize(args.query)
+    if not tokens:
+        print("Empty query.")
+        return 2
+
+    hits: List[SearchHit] = []
+    for record in _iter_jsonl(docs_path):
+        score, snippet = _score_record(record, tokens)
+        if score <= 0:
+            continue
+        hits.append(
+            SearchHit(
+                score=score,
+                title=record.get("title") or "",
+                url=record.get("url") or "",
+                route=record.get("route") or "",
+                snippet=snippet,
+            )
+        )
+
+    hits.sort(key=lambda h: h.score, reverse=True)
+    hits = hits[: args.max_results]
+
+    if args.format == "json":
+        print(
+            json.dumps(
+                [h.__dict__ for h in hits],
+                indent=2,
+            )
+        )
+        return 0
+
+    for i, h in enumerate(hits, 1):
+        print(f"{i}. {h.title}")
+        print(f"   url: {h.url}")
+        print(f"   route: {h.route}")
+        print(f"   score: {h.score}")
+        if h.snippet:
+            print(f"   snippet: {h.snippet}")
+    return 0
+
+
+def _cmd_get(args: argparse.Namespace) -> int:
+    base = _normalize_base(args.base)
+    cache_dir = pathlib.Path(args.cache_dir) if args.cache_dir else _default_cache_dir()
+
+    docs_path, _manifest = _fetch_corpus(
+        base=base,
+        cache_dir=cache_dir,
+        refresh=args.refresh,
+        offline=args.offline,
+        check_updates=args.check_updates,
+        auto_update=args.auto_update,
+    )
+
+    target_route = args.route.strip()
+    if not target_route.startswith("/"):
+        target_route = "/" + target_route
+
+    for record in _iter_jsonl(docs_path):
+        if (record.get("route") or "") == target_route:
+            if args.format == "json":
+                print(json.dumps(record, indent=2))
+            else:
+                print(record.get("text") or "")
+            return 0
+
+    print(f"Not found: {target_route}")
+    return 1
+
+
+def _cmd_cleanup(args: argparse.Namespace) -> int:
+    base = _normalize_base(args.base)
+    cache_dir = pathlib.Path(args.cache_dir) if args.cache_dir else _default_cache_dir()
+    base_key = _sha256_hex(base)[:12]
+    root = cache_dir / base_key
+    if root.exists():
+        for p in sorted(root.rglob("*"), reverse=True):
+            if p.is_file():
+                p.unlink()
+            elif p.is_dir():
+                try:
+                    p.rmdir()
+                except OSError:
+                    pass
+        try:
+            root.rmdir()
+        except OSError:
+            pass
+        print(f"Removed cache: {root}")
+    else:
+        print(f"No cache found: {root}")
+    return 0
+
+
+def _cmd_status(args: argparse.Namespace) -> int:
+    base = _normalize_base(args.base)
+    cache_dir = pathlib.Path(args.cache_dir) if args.cache_dir else _default_cache_dir()
+    base_key = _sha256_hex(base)[:12]
+    root = cache_dir / base_key
+    manifest_path = root / "manifest.json"
+    docs_path = root / "docs.jsonl"
+    meta_path = root / "meta.json"
+
+    meta = _read_json(meta_path) or {}
+    manifest = _read_json(manifest_path) or {}
+
+    def fmt_age(ts: Optional[int]) -> str:
+        if not ts:
+            return "unknown"
+        age = int(time.time()) - int(ts)
+        if age < 60:
+            return f"{age}s"
+        if age < 3600:
+            return f"{age//60}m"
+        if age < 86400:
+            return f"{age//3600}h"
+        return f"{age//86400}d"
+
+    exists = root.exists() and docs_path.exists() and manifest_path.exists()
+    print(f"base: {base}")
+    print(f"cacheDir: {root}")
+    print(f"cached: {exists}")
+    if exists:
+        print(f"etag: {meta.get('etag') or ''}")
+        print(f"fetchedAt: {meta.get('fetchedAt') or ''} (age {fmt_age(meta.get('fetchedAt'))})")
+        if meta.get("checkedAt"):
+            print(f"checkedAt: {meta.get('checkedAt')} (age {fmt_age(meta.get('checkedAt'))})")
+        if manifest.get("generatedAt"):
+            print(f"manifest.generatedAt: {manifest.get('generatedAt')}")
+        if manifest.get("docCount") is not None:
+            print(f"manifest.docCount: {manifest.get('docCount')}")
+
+    if args.check_updates:
+        # Touch the network once and update cache if needed.
+        _fetch_corpus(
+            base=base,
+            cache_dir=cache_dir,
+            refresh=args.refresh,
+            offline=False,
+            check_updates=True,
+            auto_update=False,
+        )
+        print("updateCheck: done")
+    return 0
+
+
+def main(argv: List[str]) -> int:
+    parser = argparse.ArgumentParser(
+        prog="yearn_docs_query",
+        description="Fetch and query Yearn hosted docs AI corpus (docs.jsonl) with local caching.",
+    )
+    parser.add_argument(
+        "--base",
+        default=os.environ.get("BASE", "https://docs.yearn.fi"),
+        help="Docs site origin (e.g. https://docs.yearn.fi or a Vercel preview). Defaults to https://docs.yearn.fi (or BASE env var if set).",
+    )
+    parser.add_argument(
+        "--cache-dir",
+        default="",
+        help="Override cache directory (default: XDG cache or ~/.cache/yearn-docs-site-query).",
+    )
+    parser.add_argument(
+        "--offline",
+        action="store_true",
+        help="Do not make network requests; require cached files to exist.",
+    )
+    parser.add_argument(
+        "--refresh",
+        action="store_true",
+        help="Force refresh of cached corpus (ignore existing cache / etag).",
+    )
+    parser.add_argument(
+        "--check-updates",
+        action="store_true",
+        help="Revalidate cached corpus against the remote (may download if changed).",
+    )
+    parser.add_argument(
+        "--auto-update",
+        action="store_true",
+        default=True,
+        help="Check remote docs.jsonl via conditional request and download only if changed (default: on).",
+    )
+    parser.add_argument(
+        "--no-auto-update",
+        dest="auto_update",
+        action="store_false",
+        help="Do not check the remote; use cached corpus if present (unless --refresh/--check-updates).",
+    )
+
+    sub = parser.add_subparsers(dest="cmd", required=True)
+
+    p_search = sub.add_parser("search", help="Search the corpus and print best matches.")
+    p_search.add_argument("query", help="Search query.")
+    p_search.add_argument("--max-results", type=int, default=8)
+    p_search.add_argument("--format", choices=["text", "json"], default="text")
+    p_search.set_defaults(func=_cmd_search)
+
+    p_get = sub.add_parser("get", help="Get a page by route and print its text (or full JSON).")
+    p_get.add_argument("route", help="Route path, e.g. /developers/security/multisig")
+    p_get.add_argument("--format", choices=["text", "json"], default="text")
+    p_get.set_defaults(func=_cmd_get)
+
+    p_cleanup = sub.add_parser("cleanup", help="Delete cached corpus for this BASE.")
+    p_cleanup.set_defaults(func=_cmd_cleanup)
+
+    p_status = sub.add_parser("status", help="Show cache status; optionally check for updates.")
+    p_status.set_defaults(func=_cmd_status)
+
+    args = parser.parse_args(argv)
+    # If the user explicitly passed an empty --base (common shell one-liner footgun),
+    # fall back to the BASE env var.
+    if not (args.base or "").strip():
+        args.base = os.environ.get("BASE", "https://docs.yearn.fi")
+
+    return int(args.func(args))
+
+
+if __name__ == "__main__":
+    raise SystemExit(main(sys.argv[1:]))

From 90965305aae394a5c2ac7068a014ec69cf74537f Mon Sep 17 00:00:00 2001
From: Ross <therossgalloway@gmail.com>
Date: Fri, 2 Jan 2026 14:35:21 -0500
Subject: [PATCH 5/5] chore: move skill to public as it is for outside users to
 use to access these docs

---
 {skills => static/skills}/yearn-docs-site-query/SKILL.md          | 0
 .../skills}/yearn-docs-site-query/scripts/yearn_docs_query.py     | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename {skills => static/skills}/yearn-docs-site-query/SKILL.md (100%)
 rename {skills => static/skills}/yearn-docs-site-query/scripts/yearn_docs_query.py (100%)

diff --git a/skills/yearn-docs-site-query/SKILL.md b/static/skills/yearn-docs-site-query/SKILL.md
similarity index 100%
rename from skills/yearn-docs-site-query/SKILL.md
rename to static/skills/yearn-docs-site-query/SKILL.md
diff --git a/skills/yearn-docs-site-query/scripts/yearn_docs_query.py b/static/skills/yearn-docs-site-query/scripts/yearn_docs_query.py
similarity index 100%
rename from skills/yearn-docs-site-query/scripts/yearn_docs_query.py
rename to static/skills/yearn-docs-site-query/scripts/yearn_docs_query.py