|
| 1 | +/** |
| 2 | + * Build-time pre-rendering script. |
| 3 | + * |
| 4 | + * Runs after `vite build` to generate static HTML for each route. |
| 5 | + * This makes doc pages crawlable by search engines without SSR. |
| 6 | + * |
| 7 | + * Usage: bun apps/web/scripts/prerender.ts |
| 8 | + */ |
| 9 | + |
| 10 | +import { mkdirSync, readFileSync, writeFileSync } from "node:fs"; |
| 11 | +import { dirname, resolve } from "node:path"; |
| 12 | +import { type DocPage, docs } from "../src/data/docs"; |
| 13 | +import { getAllPaths, getSeoMeta } from "../src/data/seo"; |
| 14 | + |
| 15 | +const DIST = resolve(import.meta.dir, "../dist"); |
| 16 | +const SITE_URL = "https://ooxml.dev"; |
| 17 | + |
| 18 | +// Read the built index.html as template |
| 19 | +const template = readFileSync(resolve(DIST, "index.html"), "utf-8"); |
| 20 | + |
| 21 | +// --- Content block → HTML converters --- |
| 22 | + |
| 23 | +function escapeHtml(str: string): string { |
| 24 | + return str |
| 25 | + .replace(/&/g, "&") |
| 26 | + .replace(/</g, "<") |
| 27 | + .replace(/>/g, ">") |
| 28 | + .replace(/"/g, """); |
| 29 | +} |
| 30 | + |
| 31 | +function inlineMarkdownToHtml(text: string): string { |
| 32 | + return text |
| 33 | + .replace( |
| 34 | + /\[([^\]]+)\]\(([^)]+)\)/g, |
| 35 | + (_, linkText, url) => `<a href="${escapeHtml(url)}">${escapeHtml(linkText)}</a>`, |
| 36 | + ) |
| 37 | + .replace(/`([^`]+)`/g, (_, code) => `<code>${escapeHtml(code)}</code>`); |
| 38 | +} |
| 39 | + |
| 40 | +function contentBlockToHtml(block: DocPage["content"][number]): string { |
| 41 | + switch (block.type) { |
| 42 | + case "heading": { |
| 43 | + const tag = `h${block.level}`; |
| 44 | + return `<${tag}>${escapeHtml(block.text)}</${tag}>`; |
| 45 | + } |
| 46 | + case "paragraph": |
| 47 | + return `<p>${inlineMarkdownToHtml(block.text)}</p>`; |
| 48 | + case "code": |
| 49 | + return `<pre><code>${escapeHtml(block.code)}</code></pre>`; |
| 50 | + case "preview": |
| 51 | + return `<pre><code>${escapeHtml(block.xml)}</code></pre>`; |
| 52 | + case "note": |
| 53 | + return `<div><strong>${escapeHtml(block.title)}</strong>${block.app ? ` <em>(${escapeHtml(block.app)})</em>` : ""}<p>${inlineMarkdownToHtml(block.text)}</p></div>`; |
| 54 | + case "table": |
| 55 | + return `<table><thead><tr>${block.headers.map((h) => `<th>${inlineMarkdownToHtml(h)}</th>`).join("")}</tr></thead><tbody>${block.rows.map((row) => `<tr>${row.map((cell) => `<td>${inlineMarkdownToHtml(cell)}</td>`).join("")}</tr>`).join("")}</tbody></table>`; |
| 56 | + default: |
| 57 | + return ""; |
| 58 | + } |
| 59 | +} |
| 60 | + |
| 61 | +function docPageToHtml(page: DocPage): string { |
| 62 | + const parts: string[] = []; |
| 63 | + parts.push(`<article>`); |
| 64 | + if (page.badge) { |
| 65 | + parts.push(`<span>${escapeHtml(page.badge)}</span>`); |
| 66 | + } |
| 67 | + parts.push(`<h1>${escapeHtml(page.title)}</h1>`); |
| 68 | + if (page.description) { |
| 69 | + parts.push(`<p>${escapeHtml(page.description)}</p>`); |
| 70 | + } |
| 71 | + for (const block of page.content) { |
| 72 | + parts.push(contentBlockToHtml(block)); |
| 73 | + } |
| 74 | + parts.push(`</article>`); |
| 75 | + return parts.join("\n"); |
| 76 | +} |
| 77 | + |
| 78 | +// --- Static HTML for non-doc pages --- |
| 79 | + |
| 80 | +function homePageHtml(): string { |
| 81 | + return `<main> |
| 82 | +<h1>ooxml.dev</h1> |
| 83 | +<p>The OOXML spec, explained by people who actually implemented it.</p> |
| 84 | +<p>Interactive examples, real-world gotchas, live previews, and AI-powered search.</p> |
| 85 | +<a href="/docs">Browse Reference</a> |
| 86 | +</main>`; |
| 87 | +} |
| 88 | + |
| 89 | +function mcpPageHtml(): string { |
| 90 | + return `<main> |
| 91 | +<h1>Search the ECMA-376 spec with AI</h1> |
| 92 | +<p>Connect your MCP-compatible client to search 18,000+ specification chunks using natural language queries.</p> |
| 93 | +<h2>Available Tools</h2> |
| 94 | +<ul> |
| 95 | +<li><strong>search_ecma_spec</strong> — Semantic search across the specification.</li> |
| 96 | +<li><strong>get_section</strong> — Retrieve a specific section by ID.</li> |
| 97 | +<li><strong>list_parts</strong> — Browse the specification structure.</li> |
| 98 | +</ul> |
| 99 | +<h2>What is MCP?</h2> |
| 100 | +<p>The Model Context Protocol (MCP) is an open standard that lets AI assistants connect to external data sources and tools.</p> |
| 101 | +</main>`; |
| 102 | +} |
| 103 | + |
| 104 | +function specPageHtml(): string { |
| 105 | + return `<main> |
| 106 | +<h1>ECMA-376 Spec Explorer</h1> |
| 107 | +<p>Search and browse the ECMA-376 Office Open XML specification with semantic search and PDF viewer.</p> |
| 108 | +</main>`; |
| 109 | +} |
| 110 | + |
| 111 | +// --- Meta tags and JSON-LD --- |
| 112 | + |
| 113 | +function buildHead(path: string): string { |
| 114 | + const seo = getSeoMeta(path); |
| 115 | + const url = `${SITE_URL}${path}`; |
| 116 | + |
| 117 | + const meta = [ |
| 118 | + `<title>${escapeHtml(seo.title)}</title>`, |
| 119 | + `<meta name="description" content="${escapeHtml(seo.description)}"/>`, |
| 120 | + `<link rel="canonical" href="${url}"/>`, |
| 121 | + `<meta property="og:title" content="${escapeHtml(seo.title)}"/>`, |
| 122 | + `<meta property="og:description" content="${escapeHtml(seo.description)}"/>`, |
| 123 | + `<meta property="og:url" content="${url}"/>`, |
| 124 | + `<meta property="og:type" content="${seo.type}"/>`, |
| 125 | + `<meta property="og:site_name" content="ooxml.dev"/>`, |
| 126 | + `<meta name="twitter:card" content="summary"/>`, |
| 127 | + `<meta name="twitter:title" content="${escapeHtml(seo.title)}"/>`, |
| 128 | + `<meta name="twitter:description" content="${escapeHtml(seo.description)}"/>`, |
| 129 | + ]; |
| 130 | + |
| 131 | + // JSON-LD structured data |
| 132 | + if (seo.type === "article") { |
| 133 | + const jsonLd = { |
| 134 | + "@context": "https://schema.org", |
| 135 | + "@type": "TechArticle", |
| 136 | + headline: seo.title.split(" | ")[0].split(" — ")[0], |
| 137 | + description: seo.description, |
| 138 | + url, |
| 139 | + author: { "@type": "Organization", name: "SuperDoc", url: "https://superdoc.dev" }, |
| 140 | + publisher: { "@type": "Organization", name: "ooxml.dev" }, |
| 141 | + about: { |
| 142 | + "@type": "Thing", |
| 143 | + name: "Office Open XML", |
| 144 | + sameAs: "https://en.wikipedia.org/wiki/Office_Open_XML", |
| 145 | + }, |
| 146 | + }; |
| 147 | + meta.push(`<script type="application/ld+json">${JSON.stringify(jsonLd)}</script>`); |
| 148 | + } else if (path === "/") { |
| 149 | + const jsonLd = { |
| 150 | + "@context": "https://schema.org", |
| 151 | + "@type": "WebSite", |
| 152 | + name: "ooxml.dev", |
| 153 | + url: SITE_URL, |
| 154 | + description: seo.description, |
| 155 | + potentialAction: { |
| 156 | + "@type": "SearchAction", |
| 157 | + target: { "@type": "EntryPoint", urlTemplate: `${SITE_URL}/spec?q={search_term}` }, |
| 158 | + "query-input": "required name=search_term", |
| 159 | + }, |
| 160 | + }; |
| 161 | + meta.push(`<script type="application/ld+json">${JSON.stringify(jsonLd)}</script>`); |
| 162 | + } |
| 163 | + |
| 164 | + return meta.join("\n "); |
| 165 | +} |
| 166 | + |
| 167 | +// --- Generate HTML for a given path --- |
| 168 | + |
| 169 | +function getContentHtml(path: string): string { |
| 170 | + if (path === "/") return homePageHtml(); |
| 171 | + if (path === "/mcp") return mcpPageHtml(); |
| 172 | + if (path === "/spec") return specPageHtml(); |
| 173 | + |
| 174 | + // Doc pages |
| 175 | + const slug = path === "/docs" ? "index" : path.replace("/docs/", ""); |
| 176 | + const page = docs[slug]; |
| 177 | + if (page) return docPageToHtml(page); |
| 178 | + |
| 179 | + return ""; |
| 180 | +} |
| 181 | + |
| 182 | +function renderPage(path: string): string { |
| 183 | + const headTags = buildHead(path); |
| 184 | + const content = getContentHtml(path); |
| 185 | + |
| 186 | + let html = template; |
| 187 | + |
| 188 | + // Replace <title> tag |
| 189 | + html = html.replace( |
| 190 | + /<title>[^<]*<\/title>/, |
| 191 | + headTags.split("\n")[0], // title tag |
| 192 | + ); |
| 193 | + |
| 194 | + // Inject remaining meta tags before </head> |
| 195 | + const remainingMeta = headTags.split("\n").slice(1).join("\n "); |
| 196 | + html = html.replace("</head>", ` ${remainingMeta}\n </head>`); |
| 197 | + |
| 198 | + // Inject content into <div id="root"> |
| 199 | + html = html.replace('<div id="root"></div>', `<div id="root">${content}</div>`); |
| 200 | + |
| 201 | + return html; |
| 202 | +} |
| 203 | + |
| 204 | +// --- Sitemap generation --- |
| 205 | + |
| 206 | +function generateSitemap(paths: string[]): string { |
| 207 | + const urls = paths.map((path) => { |
| 208 | + const priority = path === "/" ? "1.0" : path.startsWith("/docs/") ? "0.8" : "0.7"; |
| 209 | + const changefreq = path === "/" ? "weekly" : "monthly"; |
| 210 | + return ` <url> |
| 211 | + <loc>${SITE_URL}${path}</loc> |
| 212 | + <changefreq>${changefreq}</changefreq> |
| 213 | + <priority>${priority}</priority> |
| 214 | + </url>`; |
| 215 | + }); |
| 216 | + |
| 217 | + return `<?xml version="1.0" encoding="UTF-8"?> |
| 218 | +<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> |
| 219 | +${urls.join("\n")} |
| 220 | +</urlset>`; |
| 221 | +} |
| 222 | + |
| 223 | +// --- Main --- |
| 224 | + |
| 225 | +const paths = getAllPaths(); |
| 226 | +let count = 0; |
| 227 | + |
| 228 | +for (const path of paths) { |
| 229 | + const html = renderPage(path); |
| 230 | + const filePath = |
| 231 | + path === "/" ? resolve(DIST, "index.html") : resolve(DIST, `${path.slice(1)}/index.html`); |
| 232 | + |
| 233 | + mkdirSync(dirname(filePath), { recursive: true }); |
| 234 | + writeFileSync(filePath, html); |
| 235 | + count++; |
| 236 | + console.log(` ✓ ${path}`); |
| 237 | +} |
| 238 | + |
| 239 | +// Generate sitemap |
| 240 | +const sitemap = generateSitemap(paths); |
| 241 | +writeFileSync(resolve(DIST, "sitemap.xml"), sitemap); |
| 242 | +console.log(` ✓ /sitemap.xml`); |
| 243 | + |
| 244 | +console.log(`\nPre-rendered ${count} pages + sitemap.`); |
0 commit comments