Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changeset/per-collection-sitemaps.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"emdash": minor
---

Per-collection sitemaps with sitemap index and lastmod

`/sitemap.xml` now serves a `<sitemapindex>` with one child sitemap per SEO-enabled collection. Each collection's sitemap is at `/sitemap-{collection}.xml` with `<lastmod>` on both index entries and individual URLs. Uses the collection's `url_pattern` for correct URL building.
7 changes: 6 additions & 1 deletion packages/core/src/api/handlers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ export {
} from "./schema.js";

// SEO handlers
export { handleSitemapData, type SitemapContentEntry, type SitemapDataResponse } from "./seo.js";
export {
handleSitemapData,
type SitemapCollectionData,
type SitemapContentEntry,
type SitemapDataResponse,
} from "./seo.js";

// Plugin handlers
export {
Expand Down
69 changes: 48 additions & 21 deletions packages/core/src/api/handlers/seo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,48 +12,65 @@ import type { ApiResult } from "../types.js";

/** Raw content data for sitemap generation — the route builds the actual URLs */
export interface SitemapContentEntry {
/** Collection slug (e.g., "post", "page") */
collection: string;
/** Content slug or ID */
identifier: string;
/** Content ID (ULID) */
id: string;
/** Content slug, or null when the entry has no slug */
slug: string | null;
/** ISO date of last modification */
updatedAt: string;
}

export interface SitemapDataResponse {
/** Per-collection sitemap data with entries and URL pattern */
export interface SitemapCollectionData {
/** Collection slug (e.g., "post", "page") */
collection: string;
/** URL pattern with {slug} placeholder, or null for default /{collection}/{slug} */
urlPattern: string | null;
/** Most recent updated_at across all entries (for sitemap index lastmod) */
lastmod: string;
/** Individual content entries */
entries: SitemapContentEntry[];
}

export interface SitemapDataResponse {
collections: SitemapCollectionData[];
}

/** Maximum entries per sitemap (per spec) */
const SITEMAP_MAX_ENTRIES = 50_000;

/**
* Collect all published, indexable content across SEO-enabled collections
* for sitemap generation.
* for sitemap generation, grouped by collection.
*
* Only includes content from collections with `has_seo = 1`.
* Excludes content with `seo_no_index = 1` in the `_emdash_seo` table.
*
* Returns raw data (collection + identifier + date). The caller (route)
* is responsible for building absolute URLs — this handler does NOT
* Returns raw data grouped per collection. The caller (route) is
* responsible for building absolute URLs — this handler does NOT
* assume a URL structure.
*/
export async function handleSitemapData(
db: Kysely<Database>,
/** When set, only return data for this collection. */
collectionSlug?: string,
): Promise<ApiResult<SitemapDataResponse>> {
try {
// Find all SEO-enabled collections
const collections = await db
// Find SEO-enabled collections (optionally filtered)
let query = db
.selectFrom("_emdash_collections")
.select(["slug"])
.where("has_seo", "=", 1)
.execute();
.select(["slug", "url_pattern"])
.where("has_seo", "=", 1);

if (collectionSlug) {
query = query.where("slug", "=", collectionSlug);
}

const entries: SitemapContentEntry[] = [];
const collections = await query.execute();

for (const col of collections) {
if (entries.length >= SITEMAP_MAX_ENTRIES) break;
const result: SitemapCollectionData[] = [];

for (const col of collections) {
// Validate the slug before using it as a table name identifier.
// Should always pass (slugs are validated on creation), but
// guards against corrupted DB data.
Expand All @@ -65,7 +82,6 @@ export async function handleSitemapData(
}

const tableName = `ec_${col.slug}`;
const remaining = SITEMAP_MAX_ENTRIES - entries.length;

// Query published, non-deleted content.
// LEFT JOIN _emdash_seo to check noindex flag.
Expand All @@ -87,24 +103,35 @@ export async function handleSitemapData(
AND c.deleted_at IS NULL
AND (s.seo_no_index IS NULL OR s.seo_no_index = 0)
ORDER BY c.updated_at DESC
LIMIT ${remaining}
LIMIT ${SITEMAP_MAX_ENTRIES}
`.execute(db);

if (rows.rows.length === 0) continue;

const entries: SitemapContentEntry[] = [];
for (const row of rows.rows) {
entries.push({
collection: col.slug,
identifier: row.slug || row.id,
id: row.id,
slug: row.slug,
updatedAt: row.updated_at,
});
Comment thread
jdevalk marked this conversation as resolved.
}

result.push({
collection: col.slug,
urlPattern: col.url_pattern,
// Rows are ordered by updated_at DESC, so first row is the latest
lastmod: rows.rows[0].updated_at,
entries,
});
} catch (err) {
// Table missing or query error — skip this collection
console.warn(`[SITEMAP] Failed to query collection "${col.slug}":`, err);
continue;
}
}

return { success: true, data: { entries } };
return { success: true, data: { collections: result } };
} catch (error) {
console.error("[SITEMAP_ERROR]", error);
return {
Expand Down
5 changes: 5 additions & 0 deletions packages/core/src/astro/integration/routes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,11 @@ export function injectCoreRoutes(injectRoute: InjectRoute): void {
entrypoint: resolveRoute("sitemap.xml.ts"),
});

injectRoute({
pattern: "/sitemap-[collection].xml",
entrypoint: resolveRoute("sitemap-[collection].xml.ts"),
});

injectRoute({
pattern: "/robots.txt",
entrypoint: resolveRoute("robots.txt.ts"),
Expand Down
4 changes: 3 additions & 1 deletion packages/core/src/astro/middleware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ function setBaselineSecurityHeaders(response: Response): void {

/** Public routes that require the runtime (sitemap, robots.txt, etc.) */
const PUBLIC_RUNTIME_ROUTES = new Set(["/sitemap.xml", "/robots.txt"]);
const SITEMAP_COLLECTION_RE = /^\/sitemap-[a-z][a-z0-9_]*\.xml$/;

export const onRequest = defineMiddleware(async (context, next) => {
const { request, locals, cookies } = context;
Expand All @@ -185,7 +186,8 @@ export const onRequest = defineMiddleware(async (context, next) => {
// Process /_emdash routes and public routes with an active session
// (logged-in editors need the runtime for toolbar/visual editing on public pages)
const isEmDashRoute = url.pathname.startsWith("/_emdash");
const isPublicRuntimeRoute = PUBLIC_RUNTIME_ROUTES.has(url.pathname);
const isPublicRuntimeRoute =
PUBLIC_RUNTIME_ROUTES.has(url.pathname) || SITEMAP_COLLECTION_RE.test(url.pathname);
Comment thread
jdevalk marked this conversation as resolved.

// Check for edit mode cookie - editors viewing public pages need the runtime
// so auth middleware can verify their session for visual editing
Expand Down
104 changes: 104 additions & 0 deletions packages/core/src/astro/routes/sitemap-[collection].xml.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/**
* Per-collection sitemap endpoint
*
* GET /sitemap-{collection}.xml - Sitemap for a single content collection.
*
* Uses the collection's url_pattern to build URLs. Falls back to
* /{collection}/{slug} when no pattern is configured.
*/

import type { APIRoute } from "astro";

import { handleSitemapData } from "#api/handlers/seo.js";
import { getSiteSettingsWithDb } from "#settings/index.js";

export const prerender = false;

const TRAILING_SLASH_RE = /\/$/;
const AMP_RE = /&/g;
const LT_RE = /</g;
const GT_RE = />/g;
const QUOT_RE = /"/g;
const APOS_RE = /'/g;
const SLUG_PLACEHOLDER = "{slug}";
const ID_PLACEHOLDER = "{id}";

export const GET: APIRoute = async ({ params, locals, url }) => {
const { emdash } = locals;
const collectionSlug = params.collection;

if (!emdash?.db || !collectionSlug) {
return new Response("<!-- EmDash not configured -->", {
status: 500,
headers: { "Content-Type": "application/xml" },
});
}

try {
const settings = await getSiteSettingsWithDb(emdash.db);
const siteUrl = (settings.url || url.origin).replace(TRAILING_SLASH_RE, "");

const result = await handleSitemapData(emdash.db, collectionSlug);

if (!result.success || !result.data) {
return new Response("<!-- Failed to generate sitemap -->", {
status: 500,
headers: { "Content-Type": "application/xml" },
});
}

const col = result.data.collections[0];
if (!col) {
return new Response("<!-- Collection not found or empty -->", {
status: 404,
headers: { "Content-Type": "application/xml" },
});
}

const lines: string[] = [
'<?xml version="1.0" encoding="UTF-8"?>',
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
];

for (const entry of col.entries) {
const slug = entry.slug || entry.id;
const path = col.urlPattern
? col.urlPattern
.replace(SLUG_PLACEHOLDER, encodeURIComponent(slug))
.replace(ID_PLACEHOLDER, encodeURIComponent(entry.id))
: `/${encodeURIComponent(col.collection)}/${encodeURIComponent(slug)}`;

const loc = `${siteUrl}${path}`;
Comment thread
jdevalk marked this conversation as resolved.

lines.push(" <url>");
lines.push(` <loc>${escapeXml(loc)}</loc>`);
lines.push(` <lastmod>${escapeXml(entry.updatedAt)}</lastmod>`);
lines.push(" </url>");
}

lines.push("</urlset>");

return new Response(lines.join("\n"), {
status: 200,
headers: {
"Content-Type": "application/xml; charset=utf-8",
"Cache-Control": "public, max-age=3600",
},
});
} catch {
return new Response("<!-- Internal error generating sitemap -->", {
status: 500,
headers: { "Content-Type": "application/xml" },
});
}
};

/** Escape special XML characters in a string */
function escapeXml(str: string): string {
return str
.replace(AMP_RE, "&amp;")
.replace(LT_RE, "&lt;")
.replace(GT_RE, "&gt;")
.replace(QUOT_RE, "&quot;")
.replace(APOS_RE, "&apos;");
}
35 changes: 13 additions & 22 deletions packages/core/src/astro/routes/sitemap.xml.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
/**
* Sitemap XML endpoint
* Sitemap index endpoint
*
* GET /sitemap.xml - Auto-generated sitemap from published content
* GET /sitemap.xml - Sitemap index listing one sitemap per collection.
*
* Includes all published, non-noindex content across all collections.
* The site URL is read from site settings or the request URL origin.
*
* Default URL pattern: /{collection}/{slug-or-id}. Users can override
* by creating their own /sitemap.xml route in their Astro project.
* Each collection with published, indexable content gets its own
* child sitemap at /sitemap-{collection}.xml. The index includes
* a <lastmod> per child derived from the most recently updated entry.
*/

import type { APIRoute } from "astro";
Expand Down Expand Up @@ -35,7 +33,6 @@ export const GET: APIRoute = async ({ locals, url }) => {
}

try {
// Determine site URL from settings or request origin
const settings = await getSiteSettingsWithDb(emdash.db);
const siteUrl = (settings.url || url.origin).replace(TRAILING_SLASH_RE, "");

Expand All @@ -48,28 +45,22 @@ export const GET: APIRoute = async ({ locals, url }) => {
});
}

const entries = result.data.entries;
const { collections } = result.data;

// Build XML
const lines: string[] = [
'<?xml version="1.0" encoding="UTF-8"?>',
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
'<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
];

for (const entry of entries) {
// Default URL pattern: /{collection}/{identifier}
// Encode path segments to handle slugs with spaces/unicode/reserved chars
const loc = `${siteUrl}/${encodeURIComponent(entry.collection)}/${encodeURIComponent(entry.identifier)}`;

lines.push(" <url>");
for (const col of collections) {
const loc = `${siteUrl}/sitemap-${encodeURIComponent(col.collection)}.xml`;
lines.push(" <sitemap>");
lines.push(` <loc>${escapeXml(loc)}</loc>`);
lines.push(` <lastmod>${escapeXml(entry.updatedAt)}</lastmod>`);
lines.push(" <changefreq>weekly</changefreq>");
lines.push(" <priority>0.7</priority>");
lines.push(" </url>");
lines.push(` <lastmod>${escapeXml(col.lastmod)}</lastmod>`);
lines.push(" </sitemap>");
}

lines.push("</urlset>");
lines.push("</sitemapindex>");

return new Response(lines.join("\n"), {
status: 200,
Expand Down
Loading
Loading