Skip to content

Commit 9b59825

Browse files
committed
feat: add SEO pre-rendering, meta tags, sitemap, and robots.txt
Build-time pre-rendering generates static HTML for all 10 routes so search engines can index doc pages without executing JavaScript. Each page gets a unique title, meta description, canonical URL, Open Graph tags, Twitter card, and JSON-LD structured data. Sitemap.xml and robots.txt are generated/served automatically.
1 parent 7692938 commit 9b59825

9 files changed

Lines changed: 357 additions & 1 deletion

File tree

apps/web/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"type": "module",
66
"scripts": {
77
"dev": "vite",
8-
"build": "tsc && vite build",
8+
"build": "tsc && vite build && bun scripts/prerender.ts",
99
"preview": "vite preview",
1010
"typecheck": "tsc --noEmit",
1111
"deploy": "bun run build && wrangler pages deploy dist --project-name=ooxml-dev"

apps/web/public/robots.txt

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
User-agent: *
2+
Allow: /
3+
4+
Sitemap: https://ooxml.dev/sitemap.xml
5+
6+
# Block AI training crawlers
7+
User-agent: GPTBot
8+
Disallow: /
9+
10+
User-agent: CCBot
11+
Disallow: /
12+
13+
User-agent: Google-Extended
14+
Disallow: /
15+
16+
User-agent: Bytespider
17+
Disallow: /
18+
19+
User-agent: ClaudeBot
20+
Disallow: /
21+
22+
User-agent: Amazonbot
23+
Disallow: /
24+
25+
User-agent: Applebot-Extended
26+
Disallow: /
27+
28+
User-agent: meta-externalagent
29+
Disallow: /

apps/web/scripts/prerender.ts

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
/**
2+
* Build-time pre-rendering script.
3+
*
4+
* Runs after `vite build` to generate static HTML for each route.
5+
* This makes doc pages crawlable by search engines without SSR.
6+
*
7+
* Usage: bun apps/web/scripts/prerender.ts
8+
*/
9+
10+
import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
11+
import { dirname, resolve } from "node:path";
12+
import { type DocPage, docs } from "../src/data/docs";
13+
import { getAllPaths, getSeoMeta } from "../src/data/seo";
14+
15+
const DIST = resolve(import.meta.dir, "../dist");
16+
const SITE_URL = "https://ooxml.dev";
17+
18+
// Read the built index.html as template
19+
const template = readFileSync(resolve(DIST, "index.html"), "utf-8");
20+
21+
// --- Content block → HTML converters ---
22+
23+
function escapeHtml(str: string): string {
24+
return str
25+
.replace(/&/g, "&")
26+
.replace(/</g, "&lt;")
27+
.replace(/>/g, "&gt;")
28+
.replace(/"/g, "&quot;");
29+
}
30+
31+
function inlineMarkdownToHtml(text: string): string {
32+
return text
33+
.replace(
34+
/\[([^\]]+)\]\(([^)]+)\)/g,
35+
(_, linkText, url) => `<a href="${escapeHtml(url)}">${escapeHtml(linkText)}</a>`,
36+
)
37+
.replace(/`([^`]+)`/g, (_, code) => `<code>${escapeHtml(code)}</code>`);
38+
}
39+
40+
function contentBlockToHtml(block: DocPage["content"][number]): string {
41+
switch (block.type) {
42+
case "heading": {
43+
const tag = `h${block.level}`;
44+
return `<${tag}>${escapeHtml(block.text)}</${tag}>`;
45+
}
46+
case "paragraph":
47+
return `<p>${inlineMarkdownToHtml(block.text)}</p>`;
48+
case "code":
49+
return `<pre><code>${escapeHtml(block.code)}</code></pre>`;
50+
case "preview":
51+
return `<pre><code>${escapeHtml(block.xml)}</code></pre>`;
52+
case "note":
53+
return `<div><strong>${escapeHtml(block.title)}</strong>${block.app ? ` <em>(${escapeHtml(block.app)})</em>` : ""}<p>${inlineMarkdownToHtml(block.text)}</p></div>`;
54+
case "table":
55+
return `<table><thead><tr>${block.headers.map((h) => `<th>${inlineMarkdownToHtml(h)}</th>`).join("")}</tr></thead><tbody>${block.rows.map((row) => `<tr>${row.map((cell) => `<td>${inlineMarkdownToHtml(cell)}</td>`).join("")}</tr>`).join("")}</tbody></table>`;
56+
default:
57+
return "";
58+
}
59+
}
60+
61+
function docPageToHtml(page: DocPage): string {
62+
const parts: string[] = [];
63+
parts.push(`<article>`);
64+
if (page.badge) {
65+
parts.push(`<span>${escapeHtml(page.badge)}</span>`);
66+
}
67+
parts.push(`<h1>${escapeHtml(page.title)}</h1>`);
68+
if (page.description) {
69+
parts.push(`<p>${escapeHtml(page.description)}</p>`);
70+
}
71+
for (const block of page.content) {
72+
parts.push(contentBlockToHtml(block));
73+
}
74+
parts.push(`</article>`);
75+
return parts.join("\n");
76+
}
77+
78+
// --- Static HTML for non-doc pages ---
79+
80+
function homePageHtml(): string {
81+
return `<main>
82+
<h1>ooxml.dev</h1>
83+
<p>The OOXML spec, explained by people who actually implemented it.</p>
84+
<p>Interactive examples, real-world gotchas, live previews, and AI-powered search.</p>
85+
<a href="/docs">Browse Reference</a>
86+
</main>`;
87+
}
88+
89+
function mcpPageHtml(): string {
90+
return `<main>
91+
<h1>Search the ECMA-376 spec with AI</h1>
92+
<p>Connect your MCP-compatible client to search 18,000+ specification chunks using natural language queries.</p>
93+
<h2>Available Tools</h2>
94+
<ul>
95+
<li><strong>search_ecma_spec</strong> — Semantic search across the specification.</li>
96+
<li><strong>get_section</strong> — Retrieve a specific section by ID.</li>
97+
<li><strong>list_parts</strong> — Browse the specification structure.</li>
98+
</ul>
99+
<h2>What is MCP?</h2>
100+
<p>The Model Context Protocol (MCP) is an open standard that lets AI assistants connect to external data sources and tools.</p>
101+
</main>`;
102+
}
103+
104+
function specPageHtml(): string {
105+
return `<main>
106+
<h1>ECMA-376 Spec Explorer</h1>
107+
<p>Search and browse the ECMA-376 Office Open XML specification with semantic search and PDF viewer.</p>
108+
</main>`;
109+
}
110+
111+
// --- Meta tags and JSON-LD ---
112+
113+
function buildHead(path: string): string {
114+
const seo = getSeoMeta(path);
115+
const url = `${SITE_URL}${path}`;
116+
117+
const meta = [
118+
`<title>${escapeHtml(seo.title)}</title>`,
119+
`<meta name="description" content="${escapeHtml(seo.description)}"/>`,
120+
`<link rel="canonical" href="${url}"/>`,
121+
`<meta property="og:title" content="${escapeHtml(seo.title)}"/>`,
122+
`<meta property="og:description" content="${escapeHtml(seo.description)}"/>`,
123+
`<meta property="og:url" content="${url}"/>`,
124+
`<meta property="og:type" content="${seo.type}"/>`,
125+
`<meta property="og:site_name" content="ooxml.dev"/>`,
126+
`<meta name="twitter:card" content="summary"/>`,
127+
`<meta name="twitter:title" content="${escapeHtml(seo.title)}"/>`,
128+
`<meta name="twitter:description" content="${escapeHtml(seo.description)}"/>`,
129+
];
130+
131+
// JSON-LD structured data
132+
if (seo.type === "article") {
133+
const jsonLd = {
134+
"@context": "https://schema.org",
135+
"@type": "TechArticle",
136+
headline: seo.title.split(" | ")[0].split(" — ")[0],
137+
description: seo.description,
138+
url,
139+
author: { "@type": "Organization", name: "SuperDoc", url: "https://superdoc.dev" },
140+
publisher: { "@type": "Organization", name: "ooxml.dev" },
141+
about: {
142+
"@type": "Thing",
143+
name: "Office Open XML",
144+
sameAs: "https://en.wikipedia.org/wiki/Office_Open_XML",
145+
},
146+
};
147+
meta.push(`<script type="application/ld+json">${JSON.stringify(jsonLd)}</script>`);
148+
} else if (path === "/") {
149+
const jsonLd = {
150+
"@context": "https://schema.org",
151+
"@type": "WebSite",
152+
name: "ooxml.dev",
153+
url: SITE_URL,
154+
description: seo.description,
155+
potentialAction: {
156+
"@type": "SearchAction",
157+
target: { "@type": "EntryPoint", urlTemplate: `${SITE_URL}/spec?q={search_term}` },
158+
"query-input": "required name=search_term",
159+
},
160+
};
161+
meta.push(`<script type="application/ld+json">${JSON.stringify(jsonLd)}</script>`);
162+
}
163+
164+
return meta.join("\n ");
165+
}
166+
167+
// --- Generate HTML for a given path ---
168+
169+
function getContentHtml(path: string): string {
170+
if (path === "/") return homePageHtml();
171+
if (path === "/mcp") return mcpPageHtml();
172+
if (path === "/spec") return specPageHtml();
173+
174+
// Doc pages
175+
const slug = path === "/docs" ? "index" : path.replace("/docs/", "");
176+
const page = docs[slug];
177+
if (page) return docPageToHtml(page);
178+
179+
return "";
180+
}
181+
182+
function renderPage(path: string): string {
183+
const headTags = buildHead(path);
184+
const content = getContentHtml(path);
185+
186+
let html = template;
187+
188+
// Replace <title> tag
189+
html = html.replace(
190+
/<title>[^<]*<\/title>/,
191+
headTags.split("\n")[0], // title tag
192+
);
193+
194+
// Inject remaining meta tags before </head>
195+
const remainingMeta = headTags.split("\n").slice(1).join("\n ");
196+
html = html.replace("</head>", ` ${remainingMeta}\n </head>`);
197+
198+
// Inject content into <div id="root">
199+
html = html.replace('<div id="root"></div>', `<div id="root">${content}</div>`);
200+
201+
return html;
202+
}
203+
204+
// --- Sitemap generation ---
205+
206+
function generateSitemap(paths: string[]): string {
207+
const urls = paths.map((path) => {
208+
const priority = path === "/" ? "1.0" : path.startsWith("/docs/") ? "0.8" : "0.7";
209+
const changefreq = path === "/" ? "weekly" : "monthly";
210+
return ` <url>
211+
<loc>${SITE_URL}${path}</loc>
212+
<changefreq>${changefreq}</changefreq>
213+
<priority>${priority}</priority>
214+
</url>`;
215+
});
216+
217+
return `<?xml version="1.0" encoding="UTF-8"?>
218+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
219+
${urls.join("\n")}
220+
</urlset>`;
221+
}
222+
223+
// --- Main ---
224+
225+
const paths = getAllPaths();
226+
let count = 0;
227+
228+
for (const path of paths) {
229+
const html = renderPage(path);
230+
const filePath =
231+
path === "/" ? resolve(DIST, "index.html") : resolve(DIST, `${path.slice(1)}/index.html`);
232+
233+
mkdirSync(dirname(filePath), { recursive: true });
234+
writeFileSync(filePath, html);
235+
count++;
236+
console.log(` ✓ ${path}`);
237+
}
238+
239+
// Generate sitemap
240+
const sitemap = generateSitemap(paths);
241+
writeFileSync(resolve(DIST, "sitemap.xml"), sitemap);
242+
console.log(` ✓ /sitemap.xml`);
243+
244+
console.log(`\nPre-rendered ${count} pages + sitemap.`);

apps/web/src/data/seo.ts

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import { docs } from "./docs";
2+
3+
export interface SeoMeta {
4+
title: string;
5+
description: string;
6+
type: "website" | "article";
7+
}
8+
9+
const staticPages: Record<string, SeoMeta> = {
10+
"/": {
11+
title: "ooxml.dev — The Implementer's Guide to OOXML (ECMA-376)",
12+
description:
13+
"Interactive OOXML reference with live previews, implementation notes, and real-world gotchas. Built by the SuperDoc team.",
14+
type: "website",
15+
},
16+
"/mcp": {
17+
title: "ECMA-376 MCP Server — Search the OOXML Spec with AI | ooxml.dev",
18+
description:
19+
"Connect your AI assistant to search 18,000+ OOXML specification chunks. Works with Claude Code, Cursor, and any MCP-compatible client.",
20+
type: "website",
21+
},
22+
"/spec": {
23+
title: "ECMA-376 Spec Explorer | ooxml.dev",
24+
description:
25+
"Search and browse the ECMA-376 Office Open XML specification with semantic search and PDF viewer.",
26+
type: "website",
27+
},
28+
"/docs": {
29+
title: "OOXML Reference — Getting Started | ooxml.dev",
30+
description:
31+
"Learn the basics of OOXML (Office Open XML) and how to use this interactive reference.",
32+
type: "article",
33+
},
34+
};
35+
36+
export function getSeoMeta(path: string): SeoMeta {
37+
if (staticPages[path]) {
38+
return staticPages[path];
39+
}
40+
41+
const slug = path.replace("/docs/", "");
42+
const page = docs[slug];
43+
if (page) {
44+
const badge = page.badge ? ` (${page.badge})` : "";
45+
return {
46+
title: `${page.title}${badge}${page.description || "OOXML Reference"} | ooxml.dev`,
47+
description:
48+
page.description || `${page.title} — interactive OOXML reference with live previews.`,
49+
type: "article",
50+
};
51+
}
52+
53+
return staticPages["/"];
54+
}
55+
56+
export function getAllPaths(): string[] {
57+
const paths = ["/", "/mcp", "/spec", "/docs"];
58+
for (const slug of Object.keys(docs)) {
59+
if (slug === "index") continue;
60+
paths.push(`/docs/${slug}`);
61+
}
62+
return paths;
63+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import { useEffect } from "react";
2+
3+
export function useDocumentTitle(title: string) {
4+
useEffect(() => {
5+
document.title = title;
6+
}, [title]);
7+
}

apps/web/src/pages/Home.tsx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
import { Link } from "react-router-dom";
22
import { Footer } from "../components/Footer";
33
import { Navbar } from "../components/Navbar";
4+
import { getSeoMeta } from "../data/seo";
5+
import { useDocumentTitle } from "../hooks/useDocumentTitle";
46

57
export function Home() {
8+
useDocumentTitle(getSeoMeta("/").title);
69
return (
710
<div className="min-h-screen bg-[var(--color-bg-primary)]">
811
<Navbar maxWidth />

apps/web/src/pages/Mcp.tsx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import { useState } from "react";
22
import { Link } from "react-router-dom";
33
import { Navbar } from "../components/Navbar";
4+
import { getSeoMeta } from "../data/seo";
5+
import { useDocumentTitle } from "../hooks/useDocumentTitle";
46

57
const MCP_ENDPOINT = `${import.meta.env.VITE_API_URL}/mcp`;
68
const CLAUDE_COMMAND = `claude mcp add --transport http ecma-spec ${MCP_ENDPOINT}`;
@@ -31,6 +33,7 @@ const EXAMPLE_QUERIES = [
3133
type TabId = "claude" | "cursor" | "other";
3234

3335
export function Mcp() {
36+
useDocumentTitle(getSeoMeta("/mcp").title);
3437
const [copiedEndpoint, setCopiedEndpoint] = useState(false);
3538
const [copiedCommand, setCopiedCommand] = useState(false);
3639
const [activeTab, setActiveTab] = useState<TabId>("claude");

0 commit comments

Comments
 (0)