From 69f2852a7f2e8e74492528461ac4c4678ce787b6 Mon Sep 17 00:00:00 2001
From: Xeek <6032840+x3ek@users.noreply.github.com>
Date: Fri, 3 Apr 2026 08:58:36 -0500
Subject: [PATCH 1/4] feat(seo): add sitemap.xml and robots.txt generation
 (#48, #60)

Add /sitemap.xml with loc+lastmod for homepage, post index, all published posts, and public pages. Add /robots.txt allowing all crawlers, disallowing admin/auth/health/webhooks paths, with Sitemap directive. Extract get_all_pages() helper in content service and refactor feed.py to use shared get_all_posts().

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/squishmark/main.py             |   3 +-
 src/squishmark/routers/feed.py     |  21 +-
 src/squishmark/routers/seo.py      | 112 +++++++++++
 src/squishmark/services/content.py |  32 ++-
 tests/test_seo.py                  | 299 +++++++++++++++++++++++++++++
 5 files changed, 446 insertions(+), 21 deletions(-)
 create mode 100644 src/squishmark/routers/seo.py
 create mode 100644 tests/test_seo.py

diff --git a/src/squishmark/main.py b/src/squishmark/main.py
index 2fb1edf..b757d56 100644
--- a/src/squishmark/main.py
+++ b/src/squishmark/main.py
@@ -14,7 +14,7 @@
 from squishmark.config import get_settings
 from squishmark.models.content import Config
 from squishmark.models.db import close_db, get_db_session, init_db
-from squishmark.routers import admin, auth, feed, pages, posts, webhooks
+from squishmark.routers import admin, auth, feed, pages, posts, seo, webhooks
 from squishmark.services.analytics import AnalyticsService
 from squishmark.services.github import get_github_service, shutdown_github_service
 from squishmark.services.markdown import get_markdown_service
@@ -296,6 +296,7 @@ async def livereload_ws(websocket: WebSocket) -> None:
     app.include_router(admin.router)
     app.include_router(webhooks.router)
     app.include_router(feed.router)
+    app.include_router(seo.router)
     app.include_router(posts.router)
     app.include_router(pages.router)  # Catch-all for static pages, must be last
 
diff --git a/src/squishmark/routers/feed.py b/src/squishmark/routers/feed.py
index 79d7792..1f28c7e 100644
--- a/src/squishmark/routers/feed.py
+++ b/src/squishmark/routers/feed.py
@@ -8,6 +8,7 @@
 
 from squishmark.models.content import Config, Post
 from squishmark.services.cache import get_cache
+from squishmark.services.content import get_all_posts
 from squishmark.services.github import get_github_service
 from squishmark.services.markdown import get_markdown_service
 
@@ -91,24 +92,8 @@ async def atom_feed() -> Response:
     config = Config.from_dict(config_data)
     markdown_service = get_markdown_service(config)
 
-    # Fetch all published posts
-    post_files = await github_service.list_directory("posts")
-    posts: list[Post] = []
-    for path in post_files:
-        if not path.endswith(".md"):
-            continue
-        file = await github_service.get_file(path)
-        if file is None:
-            continue
-        post = markdown_service.parse_post(path, file.content)
-        if not post.draft:
-            posts.append(post)
-
-    # Newest first
-    posts.sort(key=lambda p: (p.date is not None, p.date), reverse=True)
-
-    # Limit to 20 most recent
-    posts = posts[:20]
+    posts = await get_all_posts(github_service, markdown_service)
+    posts = posts[:20]  # Limit to 20 most recent
 
     xml_bytes = _build_atom_feed(config, posts)
     await cache.set(FEED_CACHE_KEY, xml_bytes)
diff --git a/src/squishmark/routers/seo.py b/src/squishmark/routers/seo.py
new file mode 100644
index 0000000..cd2e1f8
--- /dev/null
+++ b/src/squishmark/routers/seo.py
@@ -0,0 +1,112 @@
+"""SEO routes: sitemap.xml and robots.txt."""
+
+from xml.etree.ElementTree import Element, SubElement, tostring
+
+from fastapi import APIRouter
+from fastapi.responses import Response
+
+from squishmark.models.content import Config, Page, Post
+from squishmark.services.cache import get_cache
+from squishmark.services.content import get_all_pages, get_all_posts
+from squishmark.services.github import get_github_service
+from squishmark.services.markdown import get_markdown_service
+
+router = APIRouter(tags=["seo"])
+
+SITEMAP_NS = "http://www.sitemaps.org/schemas/sitemap/0.9"
+SITEMAP_CACHE_KEY = "seo:sitemap"
+ROBOTS_CACHE_KEY = "seo:robots"
+
+
+def _build_sitemap(config: Config, posts: list[Post], pages: list[Page]) -> bytes:
+    """Build a sitemap.xml from config, posts, and pages."""
+    site_url = config.site.url.rstrip("/") if config.site.url else ""
+
+    urlset = Element("urlset", xmlns=SITEMAP_NS)
+
+    # Homepage
+    url_el = SubElement(urlset, "url")
+    SubElement(url_el, "loc").text = f"{site_url}/"
+    if posts and posts[0].date:
+        SubElement(url_el, "lastmod").text = posts[0].date.isoformat()
+
+    # Post index
+    url_el = SubElement(urlset, "url")
+    SubElement(url_el, "loc").text = f"{site_url}/posts"
+
+    # Individual posts
+    for post in posts:
+        url_el = SubElement(urlset, "url")
+        SubElement(url_el, "loc").text = f"{site_url}{post.url}"
+        if post.date:
+            SubElement(url_el, "lastmod").text = post.date.isoformat()
+
+    # Public pages only (not unlisted or hidden)
+    for page in pages:
+        if page.visibility != "public":
+            continue
+        url_el = SubElement(urlset, "url")
+        SubElement(url_el, "loc").text = f"{site_url}{page.url}"
+
+    return b'<?xml version="1.0" encoding="utf-8"?>\n' + tostring(urlset, encoding="unicode").encode("utf-8")
+
+
+def _build_robots_txt(config: Config) -> str:
+    """Build robots.txt content."""
+    site_url = config.site.url.rstrip("/") if config.site.url else ""
+
+    lines = [
+        "User-agent: *",
+        "Allow: /",
+        "",
+        "Disallow: /admin/*",
+        "Disallow: /auth/*",
+        "Disallow: /health",
+        "Disallow: /webhooks/*",
+    ]
+
+    if site_url:
+        lines.append("")
+        lines.append(f"Sitemap: {site_url}/sitemap.xml")
+
+    return "\n".join(lines) + "\n"
+
+
+@router.get("/sitemap.xml")
+async def sitemap_xml() -> Response:
+    """Serve the XML sitemap."""
+    cache = get_cache()
+
+    cached = await cache.get(SITEMAP_CACHE_KEY)
+    if cached is not None:
+        return Response(content=cached, media_type="application/xml; charset=utf-8")
+
+    github_service = get_github_service()
+    config_data = await github_service.get_config()
+    config = Config.from_dict(config_data)
+    markdown_service = get_markdown_service(config)
+
+    posts = await get_all_posts(github_service, markdown_service)
+    pages = await get_all_pages(github_service, markdown_service)
+
+    xml_bytes = _build_sitemap(config, posts, pages)
+    await cache.set(SITEMAP_CACHE_KEY, xml_bytes)
+    return Response(content=xml_bytes, media_type="application/xml; charset=utf-8")
+
+
+@router.get("/robots.txt")
+async def robots_txt() -> Response:
+    """Serve robots.txt."""
+    cache = get_cache()
+
+    cached = await cache.get(ROBOTS_CACHE_KEY)
+    if cached is not None:
+        return Response(content=cached, media_type="text/plain; charset=utf-8")
+
+    github_service = get_github_service()
+    config_data = await github_service.get_config()
+    config = Config.from_dict(config_data)
+
+    content = _build_robots_txt(config)
+    await cache.set(ROBOTS_CACHE_KEY, content)
+    return Response(content=content, media_type="text/plain; charset=utf-8")
diff --git a/src/squishmark/services/content.py b/src/squishmark/services/content.py
index c0bf9f2..8264509 100644
--- a/src/squishmark/services/content.py
+++ b/src/squishmark/services/content.py
@@ -1,6 +1,6 @@
-"""Shared content helpers for fetching and filtering posts."""
+"""Shared content helpers for fetching and filtering posts and pages."""
 
-from squishmark.models.content import Post, SiteConfig
+from squishmark.models.content import Page, Post, SiteConfig
 from squishmark.services.github import GitHubService
 from squishmark.services.markdown import MarkdownService
 
@@ -51,3 +51,31 @@ def get_featured_posts(posts: list[Post], site_config: SiteConfig) -> list[Post]
         ),
     )
     return featured[: site_config.featured_max]
+
+
+async def get_all_pages(
+    github_service: GitHubService,
+    markdown_service: MarkdownService,
+    include_hidden: bool = False,
+) -> list[Page]:
+    """Fetch and parse all pages from the content repository."""
+    page_files = await github_service.list_directory("pages")
+
+    pages: list[Page] = []
+    for path in page_files:
+        if not path.endswith(".md"):
+            continue
+
+        file = await github_service.get_file(path)
+        if file is None:
+            continue
+
+        page = markdown_service.parse_page(path, file.content)
+
+        # Skip hidden pages unless requested
+        if page.visibility == "hidden" and not include_hidden:
+            continue
+
+        pages.append(page)
+
+    return pages
diff --git a/tests/test_seo.py b/tests/test_seo.py
new file mode 100644
index 0000000..ddc950d
--- /dev/null
+++ b/tests/test_seo.py
@@ -0,0 +1,299 @@
+"""Tests for SEO routes: sitemap.xml and robots.txt."""
+
+import datetime
+from unittest.mock import AsyncMock, MagicMock, patch
+from xml.etree.ElementTree import fromstring
+
+import pytest
+
+from squishmark.models.content import Config, Page, Post
+from squishmark.routers.seo import _build_robots_txt, _build_sitemap
+
+SITEMAP_NS = "http://www.sitemaps.org/schemas/sitemap/0.9"
+
+
+def _ns(tag: str) -> str:
+    """Prefix a tag with the sitemap namespace."""
+    return f"{{{SITEMAP_NS}}}{tag}"
+
+
+@pytest.fixture
+def sample_config() -> Config:
+    return Config.from_dict(
+        {
+            "site": {
+                "title": "Test Blog",
+                "description": "A test blog",
+                "author": "Test Author",
+                "url": "https://example.com",
+            },
+        }
+    )
+
+
+@pytest.fixture
+def sample_config_no_url() -> Config:
+    return Config.from_dict(
+        {
+            "site": {
+                "title": "Test Blog",
+            },
+        }
+    )
+
+
+@pytest.fixture
+def sample_posts() -> list[Post]:
+    return [
+        Post(
+            slug="post-one",
+            title="Post One",
+            date=datetime.date(2026, 2, 15),
+            html="<p>Content one</p>",
+        ),
+        Post(
+            slug="post-two",
+            title="Post Two",
+            date=datetime.date(2026, 2, 10),
+            html="<p>Content two</p>",
+        ),
+    ]
+
+
+@pytest.fixture
+def sample_pages() -> list[Page]:
+    return [
+        Page(slug="about", title="About", visibility="public"),
+        Page(slug="secret", title="Secret", visibility="unlisted"),
+        Page(slug="hidden-page", title="Hidden", visibility="hidden"),
+    ]
+
+
+class TestBuildSitemap:
+    def test_valid_xml_structure(self, sample_config, sample_posts, sample_pages):
+        xml_bytes = _build_sitemap(sample_config, sample_posts, sample_pages)
+
+        assert xml_bytes.startswith(b'<?xml version="1.0" encoding="utf-8"?>')
+        root = fromstring(xml_bytes)
+        assert root.tag == _ns("urlset")
+
+    def test_homepage_entry(self, sample_config, sample_posts, sample_pages):
+        xml_bytes = _build_sitemap(sample_config, sample_posts, sample_pages)
+        root = fromstring(xml_bytes)
+        urls = root.findall(_ns("url"))
+
+        # First URL should be homepage
+        loc = urls[0].find(_ns("loc")).text
+        assert loc == "https://example.com/"
+
+    def test_homepage_lastmod_from_newest_post(self, sample_config, sample_posts, sample_pages):
+        xml_bytes = _build_sitemap(sample_config, sample_posts, sample_pages)
+        root = fromstring(xml_bytes)
+        homepage = root.findall(_ns("url"))[0]
+        lastmod = homepage.find(_ns("lastmod")).text
+        assert lastmod == "2026-02-15"
+
+    def test_post_index_entry(self, sample_config, sample_posts, sample_pages):
+        xml_bytes = _build_sitemap(sample_config, sample_posts, sample_pages)
+        root = fromstring(xml_bytes)
+        locs = [u.find(_ns("loc")).text for u in root.findall(_ns("url"))]
+        assert "https://example.com/posts" in locs
+
+    def test_posts_included_with_lastmod(self, sample_config, sample_posts, sample_pages):
+        xml_bytes = _build_sitemap(sample_config, sample_posts, sample_pages)
+        root = fromstring(xml_bytes)
+        urls = root.findall(_ns("url"))
+
+        post_urls = {u.find(_ns("loc")).text: u for u in urls}
+
+        post_one = post_urls["https://example.com/posts/post-one"]
+        assert post_one.find(_ns("lastmod")).text == "2026-02-15"
+
+        post_two = post_urls["https://example.com/posts/post-two"]
+        assert post_two.find(_ns("lastmod")).text == "2026-02-10"
+
+    def test_public_pages_included(self, sample_config, sample_posts, sample_pages):
+        xml_bytes = _build_sitemap(sample_config, sample_posts, sample_pages)
+        root = fromstring(xml_bytes)
+        locs = [u.find(_ns("loc")).text for u in root.findall(_ns("url"))]
+        assert "https://example.com/about" in locs
+
+    def test_unlisted_pages_excluded(self, sample_config, sample_posts, sample_pages):
+        xml_bytes = _build_sitemap(sample_config, sample_posts, sample_pages)
+        root = fromstring(xml_bytes)
+        locs = [u.find(_ns("loc")).text for u in root.findall(_ns("url"))]
+        assert "https://example.com/secret" not in locs
+
+    def test_hidden_pages_excluded(self, sample_config, sample_posts, sample_pages):
+        xml_bytes = _build_sitemap(sample_config, sample_posts, sample_pages)
+        root = fromstring(xml_bytes)
+        locs = [u.find(_ns("loc")).text for u in root.findall(_ns("url"))]
+        assert "https://example.com/hidden-page" not in locs
+
+    def test_empty_content(self, sample_config):
+        xml_bytes = _build_sitemap(sample_config, [], [])
+        root = fromstring(xml_bytes)
+        urls = root.findall(_ns("url"))
+        # Only homepage and post index
+        assert len(urls) == 2
+        locs = [u.find(_ns("loc")).text for u in urls]
+        assert "https://example.com/" in locs
+        assert "https://example.com/posts" in locs
+
+    def test_no_site_url_uses_relative_paths(self, sample_config_no_url, sample_posts, sample_pages):
+        xml_bytes = _build_sitemap(sample_config_no_url, sample_posts, sample_pages)
+        root = fromstring(xml_bytes)
+        locs = [u.find(_ns("loc")).text for u in root.findall(_ns("url"))]
+        assert "/" in locs
+        assert "/posts" in locs
+        assert "/posts/post-one" in locs
+
+    def test_post_without_date_has_no_lastmod(self, sample_config):
+        post = Post(slug="no-date", title="No Date", html="<p>Hi</p>")
+        xml_bytes = _build_sitemap(sample_config, [post], [])
+        root = fromstring(xml_bytes)
+        urls = root.findall(_ns("url"))
+        post_url = [u for u in urls if u.find(_ns("loc")).text == "https://example.com/posts/no-date"][0]
+        assert post_url.find(_ns("lastmod")) is None
+
+    def test_no_priority_or_changefreq(self, sample_config, sample_posts, sample_pages):
+        """Sitemap should not include priority or changefreq elements."""
+        xml_bytes = _build_sitemap(sample_config, sample_posts, sample_pages)
+        root = fromstring(xml_bytes)
+        for url in root.findall(_ns("url")):
+            assert url.find(_ns("priority")) is None
+            assert url.find(_ns("changefreq")) is None
+
+
+class TestBuildRobotsTxt:
+    def test_allows_all_crawlers(self, sample_config):
+        content = _build_robots_txt(sample_config)
+        assert "User-agent: *" in content
+        assert "Allow: /" in content
+
+    def test_disallow_admin_paths(self, sample_config):
+        content = _build_robots_txt(sample_config)
+        assert "Disallow: /admin/*" in content
+        assert "Disallow: /auth/*" in content
+        assert "Disallow: /health" in content
+        assert "Disallow: /webhooks/*" in content
+
+    def test_sitemap_directive_with_url(self, sample_config):
+        content = _build_robots_txt(sample_config)
+        assert "Sitemap: https://example.com/sitemap.xml" in content
+
+    def test_no_sitemap_without_url(self, sample_config_no_url):
+        content = _build_robots_txt(sample_config_no_url)
+        assert "Sitemap" not in content
+
+    def test_no_static_disallow(self, sample_config):
+        """Static files should not be blocked."""
+        content = _build_robots_txt(sample_config)
+        assert "/static" not in content
+
+
+class TestSitemapEndpoint:
+    @pytest.mark.asyncio
+    async def test_returns_xml_content_type(self):
+        mock_github = AsyncMock()
+        mock_github.get_config.return_value = {"site": {"title": "Test"}}
+        mock_github.list_directory.return_value = []
+
+        with (
+            patch("squishmark.routers.seo.get_github_service", return_value=mock_github),
+            patch("squishmark.routers.seo.get_cache") as mock_cache_fn,
+        ):
+            mock_cache = AsyncMock()
+            mock_cache.get.return_value = None
+            mock_cache_fn.return_value = mock_cache
+
+            from squishmark.routers.seo import sitemap_xml
+
+            response = await sitemap_xml()
+
+        assert "application/xml" in response.media_type
+
+    @pytest.mark.asyncio
+    async def test_cached_response_returned(self):
+        cached_xml = b'<?xml version="1.0"?><urlset>cached</urlset>'
+
+        with patch("squishmark.routers.seo.get_cache") as mock_cache_fn:
+            mock_cache = AsyncMock()
+            mock_cache.get.return_value = cached_xml
+            mock_cache_fn.return_value = mock_cache
+
+            from squishmark.routers.seo import sitemap_xml
+
+            response = await sitemap_xml()
+
+        assert response.body == cached_xml
+
+    @pytest.mark.asyncio
+    async def test_drafts_excluded(self):
+        mock_github = AsyncMock()
+        mock_github.get_config.return_value = {
+            "site": {"title": "Test", "url": "https://example.com"},
+        }
+        mock_github.list_directory.side_effect = [
+            # posts directory
+            ["posts/2026-01-01-published.md", "posts/2026-01-02-draft.md"],
+            # pages directory
+            [],
+        ]
+        mock_github.get_file.side_effect = [
+            MagicMock(content="---\ntitle: Published\ndate: 2026-01-01\n---\nContent."),
+            MagicMock(content="---\ntitle: Draft\ndate: 2026-01-02\ndraft: true\n---\nDraft."),
+        ]
+
+        with (
+            patch("squishmark.routers.seo.get_github_service", return_value=mock_github),
+            patch("squishmark.routers.seo.get_cache") as mock_cache_fn,
+        ):
+            mock_cache = AsyncMock()
+            mock_cache.get.return_value = None
+            mock_cache_fn.return_value = mock_cache
+
+            from squishmark.routers.seo import sitemap_xml
+
+            response = await sitemap_xml()
+
+        root = fromstring(response.body)
+        locs = [u.find(_ns("loc")).text for u in root.findall(_ns("url"))]
+        assert "https://example.com/posts/published" in locs
+        assert "https://example.com/posts/draft" not in locs
+
+
+class TestRobotsEndpoint:
+    @pytest.mark.asyncio
+    async def test_returns_text_content_type(self):
+        mock_github = AsyncMock()
+        mock_github.get_config.return_value = {"site": {"title": "Test"}}
+
+        with (
+            patch("squishmark.routers.seo.get_github_service", return_value=mock_github),
+            patch("squishmark.routers.seo.get_cache") as mock_cache_fn,
+        ):
+            mock_cache = AsyncMock()
+            mock_cache.get.return_value = None
+            mock_cache_fn.return_value = mock_cache
+
+            from squishmark.routers.seo import robots_txt
+
+            response = await robots_txt()
+
+        assert "text/plain" in response.media_type
+
+    @pytest.mark.asyncio
+    async def test_cached_response_returned(self):
+        cached_txt = "User-agent: *\nAllow: /\n"
+
+        with patch("squishmark.routers.seo.get_cache") as mock_cache_fn:
+            mock_cache = AsyncMock()
+            mock_cache.get.return_value = cached_txt
+            mock_cache_fn.return_value = mock_cache
+
+            from squishmark.routers.seo import robots_txt
+
+            response = await robots_txt()
+
+        assert response.body.decode() == cached_txt

From 6807bbc535c3652f98d5b3d9661cc9e582b571d6 Mon Sep 17 00:00:00 2001
From: Xeek <6032840+x3ek@users.noreply.github.com>
Date: Fri, 3 Apr 2026 09:12:25 -0500
Subject: [PATCH 2/4] fix(seo): add lastmod to post index entry in sitemap

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/squishmark/routers/seo.py | 2 ++
 tests/test_seo.py             | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/squishmark/routers/seo.py b/src/squishmark/routers/seo.py
index cd2e1f8..a0ff7b1 100644
--- a/src/squishmark/routers/seo.py
+++ b/src/squishmark/routers/seo.py
@@ -33,6 +33,8 @@ def _build_sitemap(config: Config, posts: list[Post], pages: list[Page]) -> byte
     # Post index
     url_el = SubElement(urlset, "url")
     SubElement(url_el, "loc").text = f"{site_url}/posts"
+    if posts and posts[0].date:
+        SubElement(url_el, "lastmod").text = posts[0].date.isoformat()
 
     # Individual posts
     for post in posts:
diff --git a/tests/test_seo.py b/tests/test_seo.py
index ddc950d..a615844 100644
--- a/tests/test_seo.py
+++ b/tests/test_seo.py
@@ -96,8 +96,9 @@ def test_homepage_lastmod_from_newest_post(self, sample_config, sample_posts, sa
     def test_post_index_entry(self, sample_config, sample_posts, sample_pages):
         xml_bytes = _build_sitemap(sample_config, sample_posts, sample_pages)
         root = fromstring(xml_bytes)
-        locs = [u.find(_ns("loc")).text for u in root.findall(_ns("url"))]
-        assert "https://example.com/posts" in locs
+        urls = root.findall(_ns("url"))
+        post_index = [u for u in urls if u.find(_ns("loc")).text == "https://example.com/posts"][0]
+        assert post_index.find(_ns("lastmod")).text == "2026-02-15"
 
     def test_posts_included_with_lastmod(self, sample_config, sample_posts, sample_pages):
         xml_bytes = _build_sitemap(sample_config, sample_posts, sample_pages)

From 1c5d43b8b850d9911ac8a301d150c598e3fb4853 Mon Sep 17 00:00:00 2001
From: Xeek <6032840+x3ek@users.noreply.github.com>
Date: Fri, 3 Apr 2026 09:20:14 -0500
Subject: [PATCH 3/4] feat(seo): add date field to Page model and lastmod to
 page sitemap entries

Pages can now have an optional date in frontmatter. When present, it appears as lastmod in the sitemap. Pages without dates simply omit lastmod, which is valid per the sitemap spec.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/squishmark/models/content.py    |  1 +
 src/squishmark/routers/seo.py       |  2 ++
 src/squishmark/services/markdown.py |  1 +
 tests/test_seo.py                   | 17 +++++++++++++----
 4 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/squishmark/models/content.py b/src/squishmark/models/content.py
index c54099b..9caf07a 100644
--- a/src/squishmark/models/content.py
+++ b/src/squishmark/models/content.py
@@ -68,6 +68,7 @@ class Page(BaseModel):
 
     slug: str
     title: str
+    date: datetime.date | None = None
     description: str = ""
     content: str = ""  # Raw markdown
     html: str = ""  # Rendered HTML
diff --git a/src/squishmark/routers/seo.py b/src/squishmark/routers/seo.py
index a0ff7b1..d6dcf6e 100644
--- a/src/squishmark/routers/seo.py
+++ b/src/squishmark/routers/seo.py
@@ -49,6 +49,8 @@ def _build_sitemap(config: Config, posts: list[Post], pages: list[Page]) -> byte
             continue
         url_el = SubElement(urlset, "url")
         SubElement(url_el, "loc").text = f"{site_url}{page.url}"
+        if page.date:
+            SubElement(url_el, "lastmod").text = page.date.isoformat()
 
     return b'<?xml version="1.0" encoding="utf-8"?>\n' + tostring(urlset, encoding="unicode").encode("utf-8")
 
diff --git a/src/squishmark/services/markdown.py b/src/squishmark/services/markdown.py
index 0bc939c..b1affbb 100644
--- a/src/squishmark/services/markdown.py
+++ b/src/squishmark/services/markdown.py
@@ -219,6 +219,7 @@ def parse_page(self, path: str, content: str) -> Page:
         return Page(
             slug=slug,
             title=frontmatter.title,
+            date=frontmatter.date,
             description=description,
             content=markdown_content,
             html=html,
diff --git a/tests/test_seo.py b/tests/test_seo.py
index a615844..5eaca1c 100644
--- a/tests/test_seo.py
+++ b/tests/test_seo.py
@@ -63,7 +63,7 @@ def sample_posts() -> list[Post]:
 @pytest.fixture
 def sample_pages() -> list[Page]:
     return [
-        Page(slug="about", title="About", visibility="public"),
+        Page(slug="about", title="About", visibility="public", date=datetime.date(2026, 1, 20)),
         Page(slug="secret", title="Secret", visibility="unlisted"),
         Page(slug="hidden-page", title="Hidden", visibility="hidden"),
     ]
@@ -113,11 +113,20 @@ def test_posts_included_with_lastmod(self, sample_config, sample_posts, sample_p
         post_two = post_urls["https://example.com/posts/post-two"]
         assert post_two.find(_ns("lastmod")).text == "2026-02-10"
 
-    def test_public_pages_included(self, sample_config, sample_posts, sample_pages):
+    def test_public_pages_included_with_lastmod(self, sample_config, sample_posts, sample_pages):
         xml_bytes = _build_sitemap(sample_config, sample_posts, sample_pages)
         root = fromstring(xml_bytes)
-        locs = [u.find(_ns("loc")).text for u in root.findall(_ns("url"))]
-        assert "https://example.com/about" in locs
+        urls = root.findall(_ns("url"))
+        about = [u for u in urls if u.find(_ns("loc")).text == "https://example.com/about"][0]
+        assert about.find(_ns("lastmod")).text == "2026-01-20"
+
+    def test_page_without_date_has_no_lastmod(self, sample_config, sample_posts):
+        page = Page(slug="no-date", title="No Date", visibility="public")
+        xml_bytes = _build_sitemap(sample_config, sample_posts, [page])
+        root = fromstring(xml_bytes)
+        urls = root.findall(_ns("url"))
+        page_url = [u for u in urls if u.find(_ns("loc")).text == "https://example.com/no-date"][0]
+        assert page_url.find(_ns("lastmod")) is None
 
     def test_unlisted_pages_excluded(self, sample_config, sample_posts, sample_pages):
         xml_bytes = _build_sitemap(sample_config, sample_posts, sample_pages)

From 856a865ad942c99b025770f5f138f620eb5b88ce Mon Sep 17 00:00:00 2001
From: Xeek <6032840+x3ek@users.noreply.github.com>
Date: Fri, 3 Apr 2026 09:29:36 -0500
Subject: [PATCH 4/4] refactor(seo): extract _add_url helper to reduce
 duplication in sitemap builder

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/squishmark/routers/seo.py | 38 ++++++++++++++---------------------
 1 file changed, 15 insertions(+), 23 deletions(-)

diff --git a/src/squishmark/routers/seo.py b/src/squishmark/routers/seo.py
index d6dcf6e..a54e13b 100644
--- a/src/squishmark/routers/seo.py
+++ b/src/squishmark/routers/seo.py
@@ -1,5 +1,6 @@
 """SEO routes: sitemap.xml and robots.txt."""
 
+import datetime
 from xml.etree.ElementTree import Element, SubElement, tostring
 
 from fastapi import APIRouter
@@ -18,39 +19,30 @@
 ROBOTS_CACHE_KEY = "seo:robots"
 
 
+def _add_url(urlset: Element, loc: str, lastmod: datetime.date | None = None) -> None:
+    """Append a <url> entry to the sitemap urlset."""
+    url_el = SubElement(urlset, "url")
+    SubElement(url_el, "loc").text = loc
+    if lastmod:
+        SubElement(url_el, "lastmod").text = lastmod.isoformat()
+
+
 def _build_sitemap(config: Config, posts: list[Post], pages: list[Page]) -> bytes:
     """Build a sitemap.xml from config, posts, and pages."""
     site_url = config.site.url.rstrip("/") if config.site.url else ""
+    newest_post_date = posts[0].date if posts else None
 
     urlset = Element("urlset", xmlns=SITEMAP_NS)
 
-    # Homepage
-    url_el = SubElement(urlset, "url")
-    SubElement(url_el, "loc").text = f"{site_url}/"
-    if posts and posts[0].date:
-        SubElement(url_el, "lastmod").text = posts[0].date.isoformat()
-
-    # Post index
-    url_el = SubElement(urlset, "url")
-    SubElement(url_el, "loc").text = f"{site_url}/posts"
-    if posts and posts[0].date:
-        SubElement(url_el, "lastmod").text = posts[0].date.isoformat()
+    _add_url(urlset, f"{site_url}/", newest_post_date)
+    _add_url(urlset, f"{site_url}/posts", newest_post_date)
 
-    # Individual posts
     for post in posts:
-        url_el = SubElement(urlset, "url")
-        SubElement(url_el, "loc").text = f"{site_url}{post.url}"
-        if post.date:
-            SubElement(url_el, "lastmod").text = post.date.isoformat()
+        _add_url(urlset, f"{site_url}{post.url}", post.date)
 
-    # Public pages only (not unlisted or hidden)
     for page in pages:
-        if page.visibility != "public":
-            continue
-        url_el = SubElement(urlset, "url")
-        SubElement(url_el, "loc").text = f"{site_url}{page.url}"
-        if page.date:
-            SubElement(url_el, "lastmod").text = page.date.isoformat()
+        if page.visibility == "public":
+            _add_url(urlset, f"{site_url}{page.url}", page.date)
 
     return b'<?xml version="1.0" encoding="utf-8"?>\n' + tostring(urlset, encoding="unicode").encode("utf-8")