Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions docusaurus.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ import { themes as prismThemes } from "prism-react-renderer";
import type { Config } from "@docusaurus/types";
import type * as Preset from "@docusaurus/preset-classic";
// eslint-disable-next-line @typescript-eslint/no-require-imports
const { CURRENT_VERSION, ACTIVE_VERSIONS, LEGACY_VERSIONS } = require("./scripts/lib/version-policy.js") as {
CURRENT_VERSION: string;
ACTIVE_VERSIONS: string[];
LEGACY_VERSIONS: string[];
};
const { CURRENT_VERSION, ACTIVE_VERSIONS, LEGACY_VERSIONS, BUILT_VERSIONS } =
require("./scripts/lib/version-policy.js") as {
CURRENT_VERSION: string;
ACTIVE_VERSIONS: string[];
LEGACY_VERSIONS: string[];
BUILT_VERSIONS: string[];
};

// This runs in Node.js - Don't use client-side code here (browser APIs, JSX...)

Expand All @@ -27,6 +29,10 @@ const legacyVersionsAsNoIndex: Record<string, { noIndex: true }> = Object.fromEn
LEGACY_VERSIONS.map((v) => [v, { noIndex: true }])
);

// Versions whose docs are canonical and thus indexed in the sitemap. Only the current
// version is canonical; every other built version canonicalizes to it.
const sitemapVersions = [CURRENT_VERSION];

const config: Config = {
title: "RavenDB Documentation",
tagline: "High-performance NoSQL database that just works.",
Expand Down Expand Up @@ -115,7 +121,7 @@ const config: Config = {
lastmod: "date",
changefreq: null,
priority: null,
ignorePatterns: LEGACY_VERSIONS.map((v) => `/${v}/**`),
ignorePatterns: BUILT_VERSIONS.filter((v) => !sitemapVersions.includes(v)).map((v) => `/${v}/**`),
},
googleTagManager: {
containerId: "GTM-TDH4JWF2",
Expand Down
8 changes: 4 additions & 4 deletions scripts/split-sitemap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import path from "node:path";
import { fileURLToPath } from "node:url";
import { splitSitemap, type SplitSucceeded } from "../src/lib/split-sitemap/lib/split.js";
import { LEGACY_VERSIONS } from "./lib/version-policy.js";
import { CURRENT_VERSION } from "./lib/version-policy.js";

const BASE_URL = "https://docs.ravendb.net";

Expand All @@ -25,20 +25,20 @@ const __dirname = path.dirname(__filename);

const buildDir = process.argv[2] ?? path.join(__dirname, "..", "build");

const result = splitSitemap({ buildDir, legacyVersions: LEGACY_VERSIONS, baseUrl: BASE_URL });
const result = splitSitemap({ buildDir, currentVersion: CURRENT_VERSION, baseUrl: BASE_URL });

if (result.skipped) {
console.log(`[split-sitemap] skipped: ${result.reason}`);
process.exit(0);
} else {
// Discriminated-union narrowing on boolean literals requires strictNullChecks, which
// this project's tsconfig does not enable. process.exit() above makes the cast safe.
const { files, includedUrls, skippedLegacyUrls } = result as SplitSucceeded;
const { files, includedUrls, skippedVersionUrls } = result as SplitSucceeded;
for (const { name, urls } of files) {
console.log(`[split-sitemap] ${name}: ${urls} URLs`);
}
console.log(
`[split-sitemap] split into ${files.length} sub-sitemaps ` +
`(${includedUrls} URLs included, ${skippedLegacyUrls} legacy URLs excluded)`
`(${includedUrls} URLs included, ${skippedVersionUrls} non-current-version URLs excluded)`
);
}
33 changes: 22 additions & 11 deletions src/lib/split-sitemap/__tests__/split.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import path from "node:path";
import { splitSitemap } from "../lib/split.js";

const BASE_URL = "https://docs.ravendb.net";
const LEGACY = ["4.2", "5.4"];
const CURRENT = "7.2";

function urlBlock(loc: string): string {
return `<url><loc>${loc}</loc><changefreq>weekly</changefreq></url>`;
Expand All @@ -33,12 +33,12 @@ function withTempBuildDir(body: (dir: string) => void): void {

test("splitSitemap skips when sitemap.xml is absent", () => {
withTempBuildDir((dir) => {
const result = splitSitemap({ buildDir: dir, legacyVersions: LEGACY, baseUrl: BASE_URL });
const result = splitSitemap({ buildDir: dir, currentVersion: CURRENT, baseUrl: BASE_URL });
assert.equal(result.skipped, true);
});
});

test("splitSitemap groups URLs by section and version", () => {
test("splitSitemap groups URLs by section and keeps only the current doc version", () => {
withTempBuildDir((dir) => {
const urls = [
`${BASE_URL}/7.2/foo`,
Expand All @@ -51,15 +51,15 @@ test("splitSitemap groups URLs by section and version", () => {
`${BASE_URL}/search`,
];
fs.writeFileSync(path.join(dir, "sitemap.xml"), buildSitemap(urls));
const result = splitSitemap({ buildDir: dir, legacyVersions: LEGACY, baseUrl: BASE_URL });
const result = splitSitemap({ buildDir: dir, currentVersion: CURRENT, baseUrl: BASE_URL });
assert.equal(result.skipped, false);
if (result.skipped) {
return;
}
const names = result.files.map((f) => f.name).sort();
// 6.2 is not the current version, so it gets no sitemap file.
assert.deepEqual(names, [
"sitemap-cloud.xml",
"sitemap-docs-6.2.xml",
"sitemap-docs-7.2.xml",
"sitemap-guides.xml",
"sitemap-misc.xml",
Expand All @@ -72,26 +72,37 @@ test("splitSitemap groups URLs by section and version", () => {
});
});

test("splitSitemap excludes legacy-version URLs", () => {
test("splitSitemap excludes every non-current doc version (active and legacy)", () => {
withTempBuildDir((dir) => {
const urls = [`${BASE_URL}/7.2/ok`, `${BASE_URL}/4.2/legacy`, `${BASE_URL}/5.4/also-legacy`];
const urls = [
`${BASE_URL}/7.2/ok`,
`${BASE_URL}/7.1/active`, // active, non-current → canonical points at 7.2
`${BASE_URL}/6.2/active`, // active, non-current → canonical points at 7.2
`${BASE_URL}/4.2/legacy`,
`${BASE_URL}/5.4/also-legacy`,
];
fs.writeFileSync(path.join(dir, "sitemap.xml"), buildSitemap(urls));
const result = splitSitemap({ buildDir: dir, legacyVersions: LEGACY, baseUrl: BASE_URL });
const result = splitSitemap({ buildDir: dir, currentVersion: CURRENT, baseUrl: BASE_URL });
assert.equal(result.skipped, false);
if (result.skipped) {
return;
}
assert.equal(result.includedUrls, 1);
assert.equal(result.skippedLegacyUrls, 2);
assert.ok(!result.files.some((f) => f.name.includes("4.2") || f.name.includes("5.4")));
assert.equal(result.skippedVersionUrls, 4);
assert.ok(
!result.files.some(
(f) =>
f.name.includes("7.1") || f.name.includes("6.2") || f.name.includes("4.2") || f.name.includes("5.4")
)
);
});
});

test("splitSitemap replaces sitemap.xml with a sitemapindex referencing each sub-file", () => {
withTempBuildDir((dir) => {
const urls = [`${BASE_URL}/7.2/foo`, `${BASE_URL}/cloud/x`];
fs.writeFileSync(path.join(dir, "sitemap.xml"), buildSitemap(urls));
splitSitemap({ buildDir: dir, legacyVersions: LEGACY, baseUrl: BASE_URL });
splitSitemap({ buildDir: dir, currentVersion: CURRENT, baseUrl: BASE_URL });
const indexXml = fs.readFileSync(path.join(dir, "sitemap.xml"), "utf8");
assert.match(indexXml, /<sitemapindex/);
assert.match(indexXml, /sitemap-docs-7\.2\.xml/);
Expand Down
29 changes: 15 additions & 14 deletions src/lib/split-sitemap/lib/split.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
* /cloud/* → sitemap-cloud.xml
* /guides/* → sitemap-guides.xml
* /samples/* → sitemap-samples.xml
* /X.Y/* → sitemap-docs-X.Y.xml (if X.Y isn't legacy)
* /X.Y/* → sitemap-docs-X.Y.xml (current version only)
* other → sitemap-misc.xml (search, root pages, etc.)
*
* Legacy versions are excluded entirely (they're also disallowed in robots.txt).
* Only the current (canonical) version's docs belong in the sitemap; other versions
* point their canonical at the current URL (and legacy ones are robots.txt-disallowed).
*
* Invoked from scripts/split-sitemap.ts as a post-`docusaurus build` step —
* NOT a Docusaurus plugin, because @docusaurus/plugin-sitemap writes
Expand All @@ -26,8 +27,8 @@ import path from "node:path";
export interface SplitOptions {
/** Absolute path to the output build directory (where sitemap.xml lives). */
buildDir: string;
/** Legacy versions — excluded from the sitemap index. */
legacyVersions: readonly string[];
/** Current (canonical) version — the only doc version kept in the sitemap. */
currentVersion: string;
/** Site origin for sitemapindex <loc> entries. No trailing slash. */
baseUrl: string;
}
Expand All @@ -41,7 +42,7 @@ export interface SplitSucceeded {
skipped: false;
files: { name: string; urls: number }[];
includedUrls: number;
skippedLegacyUrls: number;
skippedVersionUrls: number;
}

const SECTION_MAP: Record<string, string> = {
Expand All @@ -58,7 +59,7 @@ const URLSET_OPEN =
'xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" ' +
'xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">';

function getSitemapFile(loc: string, legacySet: Set<string>, baseUrl: string): string | null {
function getSitemapFile(loc: string, currentVersion: string, baseUrl: string): string | null {
const prefix = `${baseUrl}/`;
const urlPath = loc.startsWith(prefix) ? loc.slice(prefix.length) : loc.replace(/^\//, "");
const firstSegment = urlPath.split("/")[0];
Expand All @@ -67,7 +68,8 @@ function getSitemapFile(loc: string, legacySet: Set<string>, baseUrl: string): s
return SECTION_MAP[firstSegment];
}
if (/^\d+\.\d+$/.test(firstSegment)) {
if (legacySet.has(firstSegment)) {
// Allowlist: keep only the current (canonical) version; drop all others.
if (firstSegment !== currentVersion) {
return null;
}
return `sitemap-docs-${firstSegment}.xml`;
Expand All @@ -76,7 +78,7 @@ function getSitemapFile(loc: string, legacySet: Set<string>, baseUrl: string): s
}

export function splitSitemap(options: SplitOptions): SplitResult | SplitSucceeded {
const { buildDir, legacyVersions, baseUrl } = options;
const { buildDir, currentVersion, baseUrl } = options;
const sitemapPath = path.join(buildDir, "sitemap.xml");

if (!fs.existsSync(sitemapPath)) {
Expand All @@ -89,18 +91,17 @@ export function splitSitemap(options: SplitOptions): SplitResult | SplitSucceede
return { skipped: true, reason: "sitemap.xml contains no URLs" };
}

const legacySet = new Set(legacyVersions);
const groups: Record<string, string[]> = {};
let skippedLegacy = 0;
let skippedVersions = 0;

for (const block of urlBlocks) {
const locMatch = block.match(/<loc>(.*?)<\/loc>/);
if (!locMatch) {
continue;
}
const file = getSitemapFile(locMatch[1], legacySet, baseUrl);
const file = getSitemapFile(locMatch[1], currentVersion, baseUrl);
if (!file) {
skippedLegacy++;
skippedVersions++;
continue;
}
(groups[file] ??= []).push(block);
Expand Down Expand Up @@ -131,7 +132,7 @@ export function splitSitemap(options: SplitOptions): SplitResult | SplitSucceede
return {
skipped: false,
files: result,
includedUrls: urlBlocks.length - skippedLegacy,
skippedLegacyUrls: skippedLegacy,
includedUrls: urlBlocks.length - skippedVersions,
skippedVersionUrls: skippedVersions,
};
}
Loading