From 6f61f334173e0687fcb77851fcb9d3800a2147f1 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Thu, 4 Dec 2025 11:54:54 -0600 Subject: [PATCH 01/37] release: 0.10.1 --- markata/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/__about__.py b/markata/__about__.py index 61fb31ca..cb2314f0 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.10.0" +__version__ = "0.10.1b0" From a13d675aac54aee5734b64aa64b43355cd5ebc51 Mon Sep 17 00:00:00 2001 From: autobump Date: Thu, 4 Dec 2025 17:56:19 +0000 Subject: [PATCH 02/37] =?UTF-8?q?Bump=20version:=200.10.1b0=20=E2=86=92=20?= =?UTF-8?q?0.11.0.dev0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markata/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/__about__.py b/markata/__about__.py index cb2314f0..994be33c 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.10.1b0" +__version__ = "0.11.0.dev0" From dd2b37fe5626fc2ff5f8a3711d842c375c363e5d Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Thu, 4 Dec 2025 12:48:53 -0600 Subject: [PATCH 03/37] fix: improve cache invalidation for feeds and templates - Add get_templates_mtime() to track all template file changes including includes/extends - Update feeds.py cache keys to include: - All post metadata via post.to_dict() instead of just content - Template modification times to detect template changes - Update post_template.py cache keys to include template mtime - Fixes issue where feeds don't update when: - Post metadata changes (title, date, slug, published, description) - Published status flips from scheduled to published - Template files are modified (main or included templates) - New posts are added to feeds This eliminates the need for manual cache busting when making common changes. --- markata/plugins/feeds.py | 51 ++++++++++++++++---------------- markata/plugins/post_template.py | 21 +++++++++++++ 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index 6e04763c..d8c6c236 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -440,6 +440,24 @@ def pre_render(markata: Markata) -> None: markata.feeds = Feeds(markata) +def get_templates_mtime(markata): + """Get latest mtime from all template directories. + + This tracks changes to any template file including includes, extends, and imports. + """ + max_mtime = 0 + for template_dir in markata.jinja_env.template_paths: + template_path = Path(template_dir) + if template_path.exists(): + for path in template_path.rglob('*'): + if path.is_file(): + try: + max_mtime = max(max_mtime, path.stat().st_mtime) + except (OSError, FileNotFoundError): + continue + return max_mtime + + @lru_cache() def get_template(markata, template): try: @@ -502,6 +520,9 @@ def create_page( template = get_template(markata, feed.config.template) partial_template = get_template(markata, feed.config.partial_template) canonical_url = f"{markata.config.url}/{feed.config.slug}/" + + # Get templates mtime to bust cache when any template changes + templates_mtime = get_templates_mtime(markata) key = markata.make_hash( "feeds", @@ -511,8 +532,9 @@ def create_page( markata.config.url, markata.config.description, feed.config.title, - feed.map("content"), + feed.map("str(post.to_dict())"), # Track all post metadata, not just content canonical_url, + str(templates_mtime), # Track template file changes # datetime.datetime.today(), # markata.config, ) @@ -630,32 +652,11 @@ def create_card( if template is None: template = markata.config.get("feeds_config", {}).get("card_template", None) - # Get template modification time if template exists - template_mtime = 0 - if template: - template_path = None - # Check user template paths first - for path in markata.jinja_env.template_paths: - potential_path = Path(path) / template - if potential_path.exists(): - template_path = potential_path - break - - # Check package templates if not found in user paths - if not template_path: - import importlib - - package_template = ( - importlib.resources.files("markata") / "templates" / template - ) - if package_template.exists(): - template_path = package_template - - if template_path: - template_mtime = template_path.stat().st_mtime + # Get templates mtime to bust cache when any template changes + templates_mtime = get_templates_mtime(markata) key = markata.make_hash( - "feeds", template, str(post), post.content, str(template_mtime) + "feeds", template, str(post.to_dict()), str(templates_mtime) ) card = markata.precache.get(key) diff --git a/markata/plugins/post_template.py b/markata/plugins/post_template.py index 5a037861..656eb123 100644 --- a/markata/plugins/post_template.py +++ b/markata/plugins/post_template.py @@ -401,12 +401,33 @@ def get_template(markata, template): return template +def get_templates_mtime(markata): + """Get latest mtime from all template directories. + + This tracks changes to any template file including includes, extends, and imports. + """ + max_mtime = 0 + for template_dir in markata.jinja_env.template_paths: + template_path = Path(template_dir) + if template_path.exists(): + for path in template_path.rglob('*'): + if path.is_file(): + try: + max_mtime = max(max_mtime, path.stat().st_mtime) + except (OSError, FileNotFoundError): + continue + return max_mtime + + def render_article(markata, cache, article): """Render an article using cached templates.""" + templates_mtime = get_templates_mtime(markata) + key = markata.make_hash( "post_template", __version__, article.key, + str(templates_mtime), # Track template file changes ) html = markata.precache.get(key) From 9ea63c10482c3b103fe974156ac84932f951260c Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Thu, 4 Dec 2025 12:53:39 -0600 Subject: [PATCH 04/37] fix: add cache invalidation for redirects and jinja_md templates - redirects.py: Include template mtime in cache key to detect template changes - jinja_md.py: Include post metadata and markata version in cache key Previously: - Changing redirect template wouldn't regenerate redirect files - Changing post metadata in jinja templates wouldn't invalidate cache - Changes to markata context wouldn't trigger re-render Now cache properly invalidates when: - Redirect template file is modified - Post metadata changes that affects jinja rendering - Markata version changes (API changes) --- markata/plugins/jinja_md.py | 10 +++++++++- markata/plugins/redirects.py | 20 ++++++++++++-------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/markata/plugins/jinja_md.py b/markata/plugins/jinja_md.py index 5e9112a2..4574d325 100644 --- a/markata/plugins/jinja_md.py +++ b/markata/plugins/jinja_md.py @@ -217,7 +217,15 @@ def pre_render(markata: "Markata") -> None: for post in markata.filter("jinja==True"): if post.get("jinja", True) and not ignore_spec.match_file(post["path"]): try: - key = markata.make_hash("jina_md", "pre_render", post.content) + # Include post metadata and markata version in cache key + # since these affect the rendered output + key = markata.make_hash( + "jinja_md", + "pre_render", + post.content, + str(post.to_dict()), # Include all post metadata + __version__, # Include markata version + ) content_from_cache = markata.precache.get(key) if content_from_cache is None and post.content is not None: post.content = jinja_env.from_string(post.content).render( diff --git a/markata/plugins/redirects.py b/markata/plugins/redirects.py index 87e3ed82..8c3c6f7d 100644 --- a/markata/plugins/redirects.py +++ b/markata/plugins/redirects.py @@ -160,14 +160,6 @@ def save(markata: "Markata") -> None: else: raw_redirects = [] - key = markata.make_hash("redirects", "raw_redirects", raw_redirects) - with markata.cache as cache: - cache.get(key) - if cache.get(key) == "done": - return - - cache.set(key, "done", expire=markata.config.default_cache_expire) - redirects = [ Redirect(original=s[0], new=s[1], markata=markata) for r in raw_redirects @@ -178,6 +170,18 @@ def save(markata: "Markata") -> None: template_file = Path(str(markata.config.get("redirect_template"))) else: template_file = DEFAULT_REDIRECT_TEMPLATE + + # Get template mtime to bust cache when template changes + template_mtime = template_file.stat().st_mtime if template_file.exists() else 0 + + key = markata.make_hash("redirects", "raw_redirects", raw_redirects, str(template_mtime)) + with markata.cache as cache: + cache.get(key) + if cache.get(key) == "done": + return + + cache.set(key, "done", expire=markata.config.default_cache_expire) + template = Template(template_file.read_text()) for redirect in redirects: From a253987161d742ab8ccf34bc7fa5ba67740cfd90 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Thu, 4 Dec 2025 15:35:23 -0600 Subject: [PATCH 05/37] fix: standardize cache keys with version tracking Improve cache key consistency across plugins: - heading_link.py: Replace expensive Path(__file__).read_text() with __version__ - render_markdown.py: Add __version__, backend, and extensions to cache key - auto_description.py: Add __version__ to cache key - seo.py: Add __version__ to cache key Previously: - Changing markdown backend/extensions wouldn't invalidate cache - Version changes wouldn't bust cache for descriptions and SEO Now cache properly invalidates when: - Markata version changes (API/behavior changes) - Markdown backend or extensions change - Plugin code changes (via version bump) This eliminates unnecessary cache hits with stale data and improves performance by removing file I/O from cache key generation. --- markata/plugins/auto_description.py | 2 ++ markata/plugins/heading_link.py | 3 ++- markata/plugins/render_markdown.py | 10 +++++++++- markata/plugins/seo.py | 1 + 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/markata/plugins/auto_description.py b/markata/plugins/auto_description.py index 5046e507..c1dc598e 100644 --- a/markata/plugins/auto_description.py +++ b/markata/plugins/auto_description.py @@ -93,6 +93,7 @@ from bs4 import MarkupResemblesLocatorWarning +from markata import __version__ from markata.hookspec import hook_impl warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) @@ -151,6 +152,7 @@ def set_description( article.content, plugin_text, config, + __version__, ) description_from_cache = markata.cache.get(key) diff --git a/markata/plugins/heading_link.py b/markata/plugins/heading_link.py index b2a3f2cc..1388815e 100644 --- a/markata/plugins/heading_link.py +++ b/markata/plugins/heading_link.py @@ -74,6 +74,7 @@ from bs4 import BeautifulSoup +from markata import __version__ from markata import Markata from markata.hookspec import hook_impl @@ -93,7 +94,7 @@ def post_render(markata: Markata) -> None: key = markata.make_hash( "heading_link", "post_render", - Path(__file__).read_text(), + __version__, article.content, article.html, ) diff --git a/markata/plugins/render_markdown.py b/markata/plugins/render_markdown.py index cfe8a21f..c56c3457 100644 --- a/markata/plugins/render_markdown.py +++ b/markata/plugins/render_markdown.py @@ -104,6 +104,7 @@ import pydantic +from markata import __version__ from markata.hookspec import hook_impl from markata.hookspec import register_attr from markata.plugins.md_it_highlight_code import highlight_code @@ -288,7 +289,14 @@ def render_article_parallel(markata, config, cache, article): article.html = "" return article, "" - key = markata.make_hash("render_markdown", "render", content) + key = markata.make_hash( + "render_markdown", + "render", + content, + __version__, + markata.config.render_markdown.backend.value, + str(markata.config.render_markdown.extensions), + ) html_from_cache = markata.precache.get(key) if html_from_cache is not None: diff --git a/markata/plugins/seo.py b/markata/plugins/seo.py index 0064f904..162ecee0 100644 --- a/markata/plugins/seo.py +++ b/markata/plugins/seo.py @@ -174,6 +174,7 @@ def render(markata: Markata) -> None: twitter_card, article.metadata["title"], str(config_seo), + __version__, ) html_from_cache = markata.precache.get(key) From 0dfa7df56c09cedfd35299dd532b83c6fee9f064 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Fri, 5 Dec 2025 13:31:22 -0600 Subject: [PATCH 06/37] ruff fix --- markata/plugins/feeds.py | 4 ++-- markata/plugins/heading_link.py | 3 +-- markata/plugins/jinja_md.py | 4 ++-- markata/plugins/post_template.py | 4 ++-- markata/plugins/redirects.py | 4 ++-- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index d8c6c236..2df748e6 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -442,7 +442,7 @@ def pre_render(markata: Markata) -> None: def get_templates_mtime(markata): """Get latest mtime from all template directories. - + This tracks changes to any template file including includes, extends, and imports. """ max_mtime = 0 @@ -520,7 +520,7 @@ def create_page( template = get_template(markata, feed.config.template) partial_template = get_template(markata, feed.config.partial_template) canonical_url = f"{markata.config.url}/{feed.config.slug}/" - + # Get templates mtime to bust cache when any template changes templates_mtime = get_templates_mtime(markata) diff --git a/markata/plugins/heading_link.py b/markata/plugins/heading_link.py index 1388815e..7fe32a35 100644 --- a/markata/plugins/heading_link.py +++ b/markata/plugins/heading_link.py @@ -69,13 +69,12 @@ """ import re -from pathlib import Path from typing import TYPE_CHECKING from bs4 import BeautifulSoup -from markata import __version__ from markata import Markata +from markata import __version__ from markata.hookspec import hook_impl if TYPE_CHECKING: diff --git a/markata/plugins/jinja_md.py b/markata/plugins/jinja_md.py index 4574d325..6ed29b77 100644 --- a/markata/plugins/jinja_md.py +++ b/markata/plugins/jinja_md.py @@ -220,8 +220,8 @@ def pre_render(markata: "Markata") -> None: # Include post metadata and markata version in cache key # since these affect the rendered output key = markata.make_hash( - "jinja_md", - "pre_render", + "jinja_md", + "pre_render", post.content, str(post.to_dict()), # Include all post metadata __version__, # Include markata version diff --git a/markata/plugins/post_template.py b/markata/plugins/post_template.py index 656eb123..e4915b15 100644 --- a/markata/plugins/post_template.py +++ b/markata/plugins/post_template.py @@ -403,7 +403,7 @@ def get_template(markata, template): def get_templates_mtime(markata): """Get latest mtime from all template directories. - + This tracks changes to any template file including includes, extends, and imports. """ max_mtime = 0 @@ -422,7 +422,7 @@ def get_templates_mtime(markata): def render_article(markata, cache, article): """Render an article using cached templates.""" templates_mtime = get_templates_mtime(markata) - + key = markata.make_hash( "post_template", __version__, diff --git a/markata/plugins/redirects.py b/markata/plugins/redirects.py index 8c3c6f7d..033d4457 100644 --- a/markata/plugins/redirects.py +++ b/markata/plugins/redirects.py @@ -170,10 +170,10 @@ def save(markata: "Markata") -> None: template_file = Path(str(markata.config.get("redirect_template"))) else: template_file = DEFAULT_REDIRECT_TEMPLATE - + # Get template mtime to bust cache when template changes template_mtime = template_file.stat().st_mtime if template_file.exists() else 0 - + key = markata.make_hash("redirects", "raw_redirects", raw_redirects, str(template_mtime)) with markata.cache as cache: cache.get(key) From e3686e6b30655b3c80e9762bba6fb7d59a22803c Mon Sep 17 00:00:00 2001 From: autobump Date: Fri, 5 Dec 2025 19:32:32 +0000 Subject: [PATCH 07/37] =?UTF-8?q?Bump=20version:=200.11.0.dev0=20=E2=86=92?= =?UTF-8?q?=200.11.0.dev1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markata/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/__about__.py b/markata/__about__.py index 994be33c..494f8975 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.11.0.dev0" +__version__ = "0.11.0.dev1" From 5ee2f1bd393a7606b1a47148a5cb20c14bc5295a Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Fri, 5 Dec 2025 13:43:27 -0600 Subject: [PATCH 08/37] Fix AttributeError: 'Environment' object has no attribute 'template_paths' - Add get_template_paths() helper function to extract paths from Jinja2 Environment - Update get_templates_mtime() in post_template.py and feeds.py to use helper - Jinja2 Environment stores paths in loader.searchpath, not as direct attribute --- markata/plugins/feeds.py | 19 ++++++++++++++++++- markata/plugins/post_template.py | 19 ++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index 2df748e6..6ce51368 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -440,13 +440,30 @@ def pre_render(markata: Markata) -> None: markata.feeds = Feeds(markata) +def get_template_paths(env): + """Extract template paths from Jinja2 Environment's loader.""" + from jinja2 import ChoiceLoader, FileSystemLoader + + paths = [] + loader = env.loader + + if isinstance(loader, ChoiceLoader): + for sub_loader in loader.loaders: + if isinstance(sub_loader, FileSystemLoader): + paths.extend(sub_loader.searchpath) + elif isinstance(loader, FileSystemLoader): + paths.extend(loader.searchpath) + + return paths + + def get_templates_mtime(markata): """Get latest mtime from all template directories. This tracks changes to any template file including includes, extends, and imports. """ max_mtime = 0 - for template_dir in markata.jinja_env.template_paths: + for template_dir in get_template_paths(markata.jinja_env): template_path = Path(template_dir) if template_path.exists(): for path in template_path.rglob('*'): diff --git a/markata/plugins/post_template.py b/markata/plugins/post_template.py index e4915b15..0836a76d 100644 --- a/markata/plugins/post_template.py +++ b/markata/plugins/post_template.py @@ -401,13 +401,30 @@ def get_template(markata, template): return template +def get_template_paths(env): + """Extract template paths from Jinja2 Environment's loader.""" + from jinja2 import ChoiceLoader, FileSystemLoader + + paths = [] + loader = env.loader + + if isinstance(loader, ChoiceLoader): + for sub_loader in loader.loaders: + if isinstance(sub_loader, FileSystemLoader): + paths.extend(sub_loader.searchpath) + elif isinstance(loader, FileSystemLoader): + paths.extend(loader.searchpath) + + return paths + + def get_templates_mtime(markata): """Get latest mtime from all template directories. This tracks changes to any template file including includes, extends, and imports. """ max_mtime = 0 - for template_dir in markata.jinja_env.template_paths: + for template_dir in get_template_paths(markata.jinja_env): template_path = Path(template_dir) if template_path.exists(): for path in template_path.rglob('*'): From 5f1742ff02795c7dd62861473d5dcec96a261133 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Fri, 5 Dec 2025 14:10:14 -0600 Subject: [PATCH 09/37] Refactor: Centralize template utilities in jinja_env plugin - Move get_template_paths(), get_templates_mtime(), and get_template() to jinja_env.py - Remove duplicate implementations from post_template.py and feeds.py - Update plugins to use centralized functions from jinja_env - Rename local wrappers to _get_cached_template() to avoid conflicts - Simplifies template handling and reduces code duplication (net -99 lines) This makes it easier for any plugin to work with templates without reimplementing the same utilities. --- markata/plugins/feeds.py | 71 ++++++---------------------- markata/plugins/jinja_env.py | 80 ++++++++++++++++++++++++++++++++ markata/plugins/post_template.py | 50 ++++---------------- 3 files changed, 102 insertions(+), 99 deletions(-) diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index 6ce51368..41d6e6de 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -216,6 +216,9 @@ from markata.errors import DeprecationWarning from markata.hookspec import hook_impl from markata.hookspec import register_attr +from markata.plugins.jinja_env import get_template +from markata.plugins.jinja_env import get_template_paths +from markata.plugins.jinja_env import get_templates_mtime if TYPE_CHECKING: from frontmatter import Post @@ -440,58 +443,10 @@ def pre_render(markata: Markata) -> None: markata.feeds = Feeds(markata) -def get_template_paths(env): - """Extract template paths from Jinja2 Environment's loader.""" - from jinja2 import ChoiceLoader, FileSystemLoader - - paths = [] - loader = env.loader - - if isinstance(loader, ChoiceLoader): - for sub_loader in loader.loaders: - if isinstance(sub_loader, FileSystemLoader): - paths.extend(sub_loader.searchpath) - elif isinstance(loader, FileSystemLoader): - paths.extend(loader.searchpath) - - return paths - - -def get_templates_mtime(markata): - """Get latest mtime from all template directories. - - This tracks changes to any template file including includes, extends, and imports. - """ - max_mtime = 0 - for template_dir in get_template_paths(markata.jinja_env): - template_path = Path(template_dir) - if template_path.exists(): - for path in template_path.rglob('*'): - if path.is_file(): - try: - max_mtime = max(max_mtime, path.stat().st_mtime) - except (OSError, FileNotFoundError): - continue - return max_mtime - - @lru_cache() -def get_template(markata, template): - try: - return markata.jinja_env.get_template(template) - except jinja2.TemplateNotFound: - # try to load it as a file - ... - - try: - return Template(Path(template).read_text(), undefined=SilentUndefined) - except FileNotFoundError: - # default to load it as a string - ... - except OSError: # thrown by File name too long - # default to load it as a string - ... - return Template(template, undefined=SilentUndefined) +def _get_cached_template(markata, template): + """Get a template with caching, using the centralized get_template function.""" + return get_template(markata.jinja_env, template) @hook_impl @@ -512,7 +467,7 @@ def save(markata: Markata) -> None: if not home.exists() and archive.exists(): shutil.copy(str(archive), str(home)) - xsl_template = get_template(markata, feed.config.xsl_template) + xsl_template = _get_cached_template(markata, feed.config.xsl_template) xsl = xsl_template.render( markata=markata, __version__=__version__, @@ -534,12 +489,12 @@ def create_page( create an html unorderd list of posts. """ - template = get_template(markata, feed.config.template) - partial_template = get_template(markata, feed.config.partial_template) + template = _get_cached_template(markata, feed.config.template) + partial_template = _get_cached_template(markata, feed.config.partial_template) canonical_url = f"{markata.config.url}/{feed.config.slug}/" # Get templates mtime to bust cache when any template changes - templates_mtime = get_templates_mtime(markata) + templates_mtime = get_templates_mtime(markata.jinja_env) key = markata.make_hash( "feeds", @@ -613,7 +568,7 @@ def create_page( if feed_rss_from_cache is None: from_cache = False - rss_template = get_template(markata, feed.config.rss_template) + rss_template = _get_cached_template(markata, feed.config.rss_template) feed_rss = rss_template.render(markata=markata, feed=feed) cache.set(feed_rss_key, feed_rss) else: @@ -621,7 +576,7 @@ def create_page( if feed_sitemap_from_cache is None: from_cache = False - sitemap_template = get_template(markata, feed.config.sitemap_template) + sitemap_template = _get_cached_template(markata, feed.config.sitemap_template) feed_sitemap = sitemap_template.render(markata=markata, feed=feed) cache.set(feed_sitemap_key, feed_sitemap) else: @@ -670,7 +625,7 @@ def create_card( template = markata.config.get("feeds_config", {}).get("card_template", None) # Get templates mtime to bust cache when any template changes - templates_mtime = get_templates_mtime(markata) + templates_mtime = get_templates_mtime(markata.jinja_env) key = markata.make_hash( "feeds", template, str(post.to_dict()), str(templates_mtime) diff --git a/markata/plugins/jinja_env.py b/markata/plugins/jinja_env.py index 30b4cf3f..d0f24bd0 100644 --- a/markata/plugins/jinja_env.py +++ b/markata/plugins/jinja_env.py @@ -211,3 +211,83 @@ def configure(markata: Markata) -> None: # Register the environment on the config's private attribute markata.jinja_env = env + + +def get_template_paths(env: Environment) -> list[str]: + """Extract template paths from Jinja2 Environment's loader. + + Args: + env: Jinja2 Environment instance + + Returns: + List of template directory paths from all FileSystemLoaders + """ + paths = [] + loader = env.loader + + if isinstance(loader, ChoiceLoader): + for sub_loader in loader.loaders: + if isinstance(sub_loader, FileSystemLoader): + paths.extend(sub_loader.searchpath) + elif isinstance(loader, FileSystemLoader): + paths.extend(loader.searchpath) + + return paths + + +def get_templates_mtime(env: Environment) -> float: + """Get latest mtime from all template directories. + + This tracks changes to any template file including includes, extends, and imports. + + Args: + env: Jinja2 Environment instance + + Returns: + Maximum modification time across all template files, or 0 if none found + """ + max_mtime = 0 + for template_dir in get_template_paths(env): + template_path = Path(template_dir) + if template_path.exists(): + for path in template_path.rglob('*'): + if path.is_file(): + try: + max_mtime = max(max_mtime, path.stat().st_mtime) + except (OSError, FileNotFoundError): + continue + return max_mtime + + +def get_template(env: Environment, template: str) -> jinja2.Template: + """Get a template with fallback handling. + + Tries to load the template in the following order: + 1. From the Jinja2 environment (template loader) + 2. As a file path (if the string is a valid file path) + 3. As a string template (direct template compilation) + + Args: + env: Jinja2 Environment instance + template: Template name, file path, or template string + + Returns: + Compiled Jinja2 Template object + """ + # Try to load from environment first + try: + return env.get_template(template) + except jinja2.TemplateNotFound: + pass + + # Try to load as a file + try: + template_content = Path(template).read_text() + return env.from_string(template_content) + except FileNotFoundError: + pass + except OSError: # File name too long, etc. + pass + + # Fall back to treating it as a string template + return env.from_string(template) diff --git a/markata/plugins/post_template.py b/markata/plugins/post_template.py index 0836a76d..1800937d 100644 --- a/markata/plugins/post_template.py +++ b/markata/plugins/post_template.py @@ -241,6 +241,9 @@ from markata import __version__ from markata.hookspec import hook_impl +from markata.plugins.jinja_env import get_template +from markata.plugins.jinja_env import get_template_paths +from markata.plugins.jinja_env import get_templates_mtime if TYPE_CHECKING: from markata import Markata @@ -389,56 +392,21 @@ def dynamic_templates_in_templates_dir(cls, value): _template_cache = {} -def get_template(markata, template): +def _get_cached_template(markata, template): """Get a template from the cache or compile it.""" cache_key = str(template) if cache_key in _template_cache: return _template_cache[cache_key] if isinstance(template, str): - template = markata.jinja_env.get_template(template) + template = get_template(markata.jinja_env, template) _template_cache[cache_key] = template return template -def get_template_paths(env): - """Extract template paths from Jinja2 Environment's loader.""" - from jinja2 import ChoiceLoader, FileSystemLoader - - paths = [] - loader = env.loader - - if isinstance(loader, ChoiceLoader): - for sub_loader in loader.loaders: - if isinstance(sub_loader, FileSystemLoader): - paths.extend(sub_loader.searchpath) - elif isinstance(loader, FileSystemLoader): - paths.extend(loader.searchpath) - - return paths - - -def get_templates_mtime(markata): - """Get latest mtime from all template directories. - - This tracks changes to any template file including includes, extends, and imports. - """ - max_mtime = 0 - for template_dir in get_template_paths(markata.jinja_env): - template_path = Path(template_dir) - if template_path.exists(): - for path in template_path.rglob('*'): - if path.is_file(): - try: - max_mtime = max(max_mtime, path.stat().st_mtime) - except (OSError, FileNotFoundError): - continue - return max_mtime - - def render_article(markata, cache, article): """Render an article using cached templates.""" - templates_mtime = get_templates_mtime(markata) + templates_mtime = get_templates_mtime(markata.jinja_env) key = markata.make_hash( "post_template", @@ -452,12 +420,12 @@ def render_article(markata, cache, article): return html if isinstance(article.template, str): - template = get_template(markata, article.template) + template = _get_cached_template(markata, article.template) html = render_template(markata, article, template) if isinstance(article.template, dict): html = { - slug: render_template(markata, article, get_template(markata, template)) + slug: render_template(markata, article, _get_cached_template(markata, template)) for slug, template in article.template.items() } cache.set(key, html, expire=markata.config.default_cache_expire) @@ -496,7 +464,7 @@ def save(markata: "Markata") -> None: if t.endswith("css") or t.endswith("js") or t.endswith("xsl") ] for template in linked_templates: - template = get_template(markata, template) + template = _get_cached_template(markata, template) css = template.render(markata=markata, __version__=__version__) Path(markata.config.output_dir / Path(template.filename).name).write_text(css) From 2db2396feb42670530f7118108392588cd9a8868 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Fri, 5 Dec 2025 14:16:52 -0600 Subject: [PATCH 10/37] Simplify: Add built-in caching to get_template, remove plugin wrappers - Add @lru_cache to get_template() in jinja_env.py - Remove _get_cached_template() wrappers from post_template.py and feeds.py - Plugins now call get_template(markata.jinja_env, template) directly - Eliminates code duplication: -30 lines Now plugins just need one simple call - no need to implement their own caching wrappers. The centralized function handles everything. --- markata/plugins/feeds.py | 16 +++++----------- markata/plugins/jinja_env.py | 6 +++++- markata/plugins/post_template.py | 21 +++------------------ 3 files changed, 13 insertions(+), 30 deletions(-) diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index 41d6e6de..7c87e117 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -443,12 +443,6 @@ def pre_render(markata: Markata) -> None: markata.feeds = Feeds(markata) -@lru_cache() -def _get_cached_template(markata, template): - """Get a template with caching, using the centralized get_template function.""" - return get_template(markata.jinja_env, template) - - @hook_impl def save(markata: Markata) -> None: """ @@ -467,7 +461,7 @@ def save(markata: Markata) -> None: if not home.exists() and archive.exists(): shutil.copy(str(archive), str(home)) - xsl_template = _get_cached_template(markata, feed.config.xsl_template) + xsl_template = get_template(markata.jinja_env, feed.config.xsl_template) xsl = xsl_template.render( markata=markata, __version__=__version__, @@ -489,8 +483,8 @@ def create_page( create an html unorderd list of posts. """ - template = _get_cached_template(markata, feed.config.template) - partial_template = _get_cached_template(markata, feed.config.partial_template) + template = get_template(markata.jinja_env, feed.config.template) + partial_template = get_template(markata.jinja_env, feed.config.partial_template) canonical_url = f"{markata.config.url}/{feed.config.slug}/" # Get templates mtime to bust cache when any template changes @@ -568,7 +562,7 @@ def create_page( if feed_rss_from_cache is None: from_cache = False - rss_template = _get_cached_template(markata, feed.config.rss_template) + rss_template = get_template(markata.jinja_env, feed.config.rss_template) feed_rss = rss_template.render(markata=markata, feed=feed) cache.set(feed_rss_key, feed_rss) else: @@ -576,7 +570,7 @@ def create_page( if feed_sitemap_from_cache is None: from_cache = False - sitemap_template = _get_cached_template(markata, feed.config.sitemap_template) + sitemap_template = get_template(markata.jinja_env, feed.config.sitemap_template) feed_sitemap = sitemap_template.render(markata=markata, feed=feed) cache.set(feed_sitemap_key, feed_sitemap) else: diff --git a/markata/plugins/jinja_env.py b/markata/plugins/jinja_env.py index d0f24bd0..a930b919 100644 --- a/markata/plugins/jinja_env.py +++ b/markata/plugins/jinja_env.py @@ -61,6 +61,7 @@ def render_template(markata, content): - Silent undefined behavior means undefined variables render as empty strings """ +from functools import lru_cache from pathlib import Path from typing import List @@ -259,14 +260,17 @@ def get_templates_mtime(env: Environment) -> float: return max_mtime +@lru_cache(maxsize=128) def get_template(env: Environment, template: str) -> jinja2.Template: - """Get a template with fallback handling. + """Get a template with fallback handling and caching. Tries to load the template in the following order: 1. From the Jinja2 environment (template loader) 2. As a file path (if the string is a valid file path) 3. As a string template (direct template compilation) + Templates are cached after loading for performance. + Args: env: Jinja2 Environment instance template: Template name, file path, or template string diff --git a/markata/plugins/post_template.py b/markata/plugins/post_template.py index 1800937d..6a2da5c6 100644 --- a/markata/plugins/post_template.py +++ b/markata/plugins/post_template.py @@ -389,21 +389,6 @@ def dynamic_templates_in_templates_dir(cls, value): return templates_dir -_template_cache = {} - - -def _get_cached_template(markata, template): - """Get a template from the cache or compile it.""" - cache_key = str(template) - if cache_key in _template_cache: - return _template_cache[cache_key] - - if isinstance(template, str): - template = get_template(markata.jinja_env, template) - _template_cache[cache_key] = template - return template - - def render_article(markata, cache, article): """Render an article using cached templates.""" templates_mtime = get_templates_mtime(markata.jinja_env) @@ -420,12 +405,12 @@ def render_article(markata, cache, article): return html if isinstance(article.template, str): - template = _get_cached_template(markata, article.template) + template = get_template(markata.jinja_env, article.template) html = render_template(markata, article, template) if isinstance(article.template, dict): html = { - slug: render_template(markata, article, _get_cached_template(markata, template)) + slug: render_template(markata, article, get_template(markata.jinja_env, template)) for slug, template in article.template.items() } cache.set(key, html, expire=markata.config.default_cache_expire) @@ -464,7 +449,7 @@ def save(markata: "Markata") -> None: if t.endswith("css") or t.endswith("js") or t.endswith("xsl") ] for template in linked_templates: - template = _get_cached_template(markata, template) + template = get_template(markata.jinja_env, template) css = template.render(markata=markata, __version__=__version__) Path(markata.config.output_dir / Path(template.filename).name).write_text(css) From 47856e6ab2da574a17a7bf33e09e0b046dd23c13 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Fri, 5 Dec 2025 14:22:26 -0600 Subject: [PATCH 11/37] docs: update CHANGELOG for 0.11.0 release - Document cache invalidation improvements across all plugins - Highlight breaking changes for plugin authors using internal get_template() - Show migration path to centralized jinja_env utilities --- CHANGELOG.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cfd649b..67f312f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,33 @@ # Markata Changelog +## 0.11.0 + +### Cache Invalidation Improvements + +- Fix: feeds now properly invalidate cache when post metadata changes (title, date, slug, published, description) +- Fix: feeds now properly invalidate cache when template files are modified +- Fix: post_template now tracks template file changes for cache invalidation +- Fix: redirects now invalidate cache when template files change +- Fix: jinja_md now includes post metadata and version in cache keys +- Fix: standardized cache keys across plugins to include `__version__` for proper invalidation on updates +- Fix: render_markdown now includes backend and extensions in cache key +- Perf: heading_link replaced expensive file I/O with `__version__` in cache key + +### Template Utilities (Breaking for Plugin Authors) + +- **BREAKING**: Removed internal `get_template()` functions from `feeds.py` and `post_template.py` +- Feat: added centralized `get_template()`, `get_template_paths()`, and `get_templates_mtime()` to `jinja_env` plugin +- Feat: `get_template()` includes automatic caching with `@lru_cache` and smart fallback handling +- **Plugin authors**: Import from `markata.plugins.jinja_env` instead of using internal functions + ```python + from markata.plugins.jinja_env import get_template, get_templates_mtime + template = get_template(markata.jinja_env, "template.html") + ``` + +## 0.10.1 + +- Release: version bump + ## 0.10.0 - Fix: `auto_description` now more accurately returns plain text, does not cut off words, and add an ellipsis. From 12649ea0a6fb261b351bae04b0c0bf3649bf7aba Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Fri, 5 Dec 2025 15:56:02 -0600 Subject: [PATCH 12/37] ruff fixes --- markata/plugins/feeds.py | 2 -- markata/plugins/jinja_env.py | 26 +++++++++++++------------- markata/plugins/post_template.py | 1 - 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index 7c87e117..7ec1575e 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -192,7 +192,6 @@ import shutil import textwrap import warnings -from functools import lru_cache from pathlib import Path from typing import TYPE_CHECKING from typing import Any @@ -217,7 +216,6 @@ from markata.hookspec import hook_impl from markata.hookspec import register_attr from markata.plugins.jinja_env import get_template -from markata.plugins.jinja_env import get_template_paths from markata.plugins.jinja_env import get_templates_mtime if TYPE_CHECKING: diff --git a/markata/plugins/jinja_env.py b/markata/plugins/jinja_env.py index a930b919..5b75fc75 100644 --- a/markata/plugins/jinja_env.py +++ b/markata/plugins/jinja_env.py @@ -216,34 +216,34 @@ def configure(markata: Markata) -> None: def get_template_paths(env: Environment) -> list[str]: """Extract template paths from Jinja2 Environment's loader. - + Args: env: Jinja2 Environment instance - + Returns: List of template directory paths from all FileSystemLoaders """ paths = [] loader = env.loader - + if isinstance(loader, ChoiceLoader): for sub_loader in loader.loaders: if isinstance(sub_loader, FileSystemLoader): paths.extend(sub_loader.searchpath) elif isinstance(loader, FileSystemLoader): paths.extend(loader.searchpath) - + return paths def get_templates_mtime(env: Environment) -> float: """Get latest mtime from all template directories. - + This tracks changes to any template file including includes, extends, and imports. - + Args: env: Jinja2 Environment instance - + Returns: Maximum modification time across all template files, or 0 if none found """ @@ -263,18 +263,18 @@ def get_templates_mtime(env: Environment) -> float: @lru_cache(maxsize=128) def get_template(env: Environment, template: str) -> jinja2.Template: """Get a template with fallback handling and caching. - + Tries to load the template in the following order: 1. From the Jinja2 environment (template loader) 2. As a file path (if the string is a valid file path) 3. As a string template (direct template compilation) - + Templates are cached after loading for performance. - + Args: env: Jinja2 Environment instance template: Template name, file path, or template string - + Returns: Compiled Jinja2 Template object """ @@ -283,7 +283,7 @@ def get_template(env: Environment, template: str) -> jinja2.Template: return env.get_template(template) except jinja2.TemplateNotFound: pass - + # Try to load as a file try: template_content = Path(template).read_text() @@ -292,6 +292,6 @@ def get_template(env: Environment, template: str) -> jinja2.Template: pass except OSError: # File name too long, etc. pass - + # Fall back to treating it as a string template return env.from_string(template) diff --git a/markata/plugins/post_template.py b/markata/plugins/post_template.py index 6a2da5c6..1f4c6f41 100644 --- a/markata/plugins/post_template.py +++ b/markata/plugins/post_template.py @@ -242,7 +242,6 @@ from markata import __version__ from markata.hookspec import hook_impl from markata.plugins.jinja_env import get_template -from markata.plugins.jinja_env import get_template_paths from markata.plugins.jinja_env import get_templates_mtime if TYPE_CHECKING: From 623cba10e2048dec020768431062f0cbf5ccfab0 Mon Sep 17 00:00:00 2001 From: autobump Date: Fri, 5 Dec 2025 21:57:22 +0000 Subject: [PATCH 13/37] =?UTF-8?q?Bump=20version:=200.11.0.dev1=20=E2=86=92?= =?UTF-8?q?=200.11.0.dev2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markata/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/__about__.py b/markata/__about__.py index 494f8975..95c7fe42 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.11.0.dev1" +__version__ = "0.11.0.dev2" From dda86037095f5e8d24eb924962ab85e1c8e0f49b Mon Sep 17 00:00:00 2001 From: Waylon Walker Date: Sun, 7 Dec 2025 16:33:06 -0600 Subject: [PATCH 14/37] Add atom support (#167) * feat: atom support --- CHANGELOG.md | 1 + markata/plugins/feeds.py | 115 ++++++++++++++++++++++++++----------- markata/templates/atom.xml | 42 ++++++++++++++ 3 files changed, 125 insertions(+), 33 deletions(-) create mode 100644 markata/templates/atom.xml diff --git a/CHANGELOG.md b/CHANGELOG.md index 67f312f7..f0c54e7f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Fix: standardized cache keys across plugins to include `__version__` for proper invalidation on updates - Fix: render_markdown now includes backend and extensions in cache key - Perf: heading_link replaced expensive file I/O with `__version__` in cache key +- Feat: feeds now support atom feeds ### Template Utilities (Breaking for Plugin Authors) diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index 7ec1575e..68ad5d83 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -244,6 +244,8 @@ class FeedConfig(pydantic.BaseModel, JupyterMixin): tail: Optional[int] = None rss: bool = True sitemap: bool = True + atom: bool = True + atom_template: str = "atom.xml" # feed_groups: Dict[str, List[str]] = Field(default_factory=dict) # sidebar_feeds: List[str] = Field(default_factory=list) card_template: str = "card.html" @@ -471,7 +473,6 @@ def save(markata: Markata) -> None: if current_xsl != xsl: xsl_file.write_text(xsl) - def create_page( markata: Markata, feed: Feed, @@ -492,7 +493,6 @@ def create_page( "feeds", template, __version__, - # cards, markata.config.url, markata.config.description, feed.config.title, @@ -507,11 +507,13 @@ def create_page( html_partial_key = markata.make_hash(key, "partial_html") feed_rss_key = markata.make_hash(key, "rss") feed_sitemap_key = markata.make_hash(key, "sitemap") + feed_atom_key = markata.make_hash(key, "atom") feed_html_from_cache = markata.precache.get(html_key) feed_html_partial_from_cache = markata.precache.get(html_partial_key) feed_rss_from_cache = markata.precache.get(feed_rss_key) feed_sitemap_from_cache = markata.precache.get(feed_sitemap_key) + feed_atom_from_cache = markata.precache.get(feed_atom_key) output_file = Path(markata.config.output_dir) / feed.config.slug / "index.html" output_file.parent.mkdir(exist_ok=True, parents=True) @@ -529,7 +531,14 @@ def create_page( ) sitemap_output_file.parent.mkdir(exist_ok=True, parents=True) + atom_output_file = ( + Path(markata.config.output_dir) / feed.config.slug / "atom.xml" + ) + atom_output_file.parent.mkdir(exist_ok=True, parents=True) + from_cache = True + + # ---------- HTML ---------- if feed_html_from_cache is None: from_cache = False feed_html = template.render( @@ -544,6 +553,7 @@ def create_page( else: feed_html = feed_html_from_cache + # ---------- Partial HTML ---------- if feed_html_partial_from_cache is None: from_cache = False feed_html_partial = partial_template.render( @@ -558,47 +568,86 @@ def create_page( else: feed_html_partial = feed_html_partial_from_cache - if feed_rss_from_cache is None: - from_cache = False - rss_template = get_template(markata.jinja_env, feed.config.rss_template) - feed_rss = rss_template.render(markata=markata, feed=feed) - cache.set(feed_rss_key, feed_rss) + # ---------- RSS ---------- + if feed.config.rss: + if feed_rss_from_cache is None: + from_cache = False + rss_template = get_template(markata.jinja_env, feed.config.rss_template) + feed_rss = rss_template.render(markata=markata, feed=feed) + cache.set(feed_rss_key, feed_rss) + else: + feed_rss = feed_rss_from_cache else: - feed_rss = feed_rss_from_cache - - if feed_sitemap_from_cache is None: - from_cache = False - sitemap_template = get_template(markata.jinja_env, feed.config.sitemap_template) - feed_sitemap = sitemap_template.render(markata=markata, feed=feed) - cache.set(feed_sitemap_key, feed_sitemap) + feed_rss = None + + # ---------- Sitemap ---------- + if feed.config.sitemap: + if feed_sitemap_from_cache is None: + from_cache = False + sitemap_template = get_template(markata.jinja_env, feed.config.sitemap_template) + feed_sitemap = sitemap_template.render(markata=markata, feed=feed) + cache.set(feed_sitemap_key, feed_sitemap) + else: + feed_sitemap = feed_sitemap_from_cache else: - feed_sitemap = feed_sitemap_from_cache - - if ( - from_cache - and output_file.exists() - and partial_output_file.exists() - and rss_output_file.exists() - and sitemap_output_file.exists() - ): - return - + feed_sitemap = None + + # ---------- Atom ---------- + if feed.config.atom: + if feed_atom_from_cache is None: + from_cache = False + atom_template = get_template(markata, feed.config.atom_template) + feed_atom = atom_template.render( + markata=markata, + feed=feed, + datetime=datetime, # ⭐ so the template can use datetime + ) + cache.set(feed_atom_key, feed_atom) + else: + feed_atom = feed_atom_from_cache + # If everything came from cache and files exist, bail early + if ( + from_cache + and output_file.exists() + and partial_output_file.exists() + and (not feed.config.rss or rss_output_file.exists()) + and (not feed.config.sitemap or sitemap_output_file.exists()) + and (not feed.config.atom or atom_output_file.exists()) + ): + return + + # Write HTML current_html = output_file.read_text() if output_file.exists() else "" if current_html != feed_html: output_file.write_text(feed_html) + + # Write partial HTML current_partial_html = ( partial_output_file.read_text() if partial_output_file.exists() else "" ) if current_partial_html != feed_html_partial: partial_output_file.write_text(feed_html_partial) - current_rss = rss_output_file.read_text() if rss_output_file.exists() else "" - if current_rss != feed_rss: - rss_output_file.write_text(feed_rss) - current_sitemap = ( - sitemap_output_file.read_text() if sitemap_output_file.exists() else "" - ) - if current_sitemap != feed_sitemap: - sitemap_output_file.write_text(feed_sitemap) + + # Write RSS (if enabled) + if feed_rss is not None: + current_rss = rss_output_file.read_text() if rss_output_file.exists() else "" + if current_rss != feed_rss: + rss_output_file.write_text(feed_rss) + + # Write sitemap (if enabled) + if feed_sitemap is not None: + current_sitemap = ( + sitemap_output_file.read_text() if sitemap_output_file.exists() else "" + ) + if current_sitemap != feed_sitemap: + sitemap_output_file.write_text(feed_sitemap) + + # Write Atom (if enabled) + if feed_atom is not None: + current_atom = atom_output_file.read_text() if atom_output_file.exists() else "" + if current_atom != feed_atom: + atom_output_file.write_text(feed_atom) + @background.task diff --git a/markata/templates/atom.xml b/markata/templates/atom.xml new file mode 100644 index 00000000..b47325f7 --- /dev/null +++ b/markata/templates/atom.xml @@ -0,0 +1,42 @@ + + + {{ feed.config.title }} + {{ markata.config.url }}/{{ feed.config.slug }}/ + + + + + {# Feed-level updated (Atom likes this) #} + {% set updated_post = feed.posts[0] if feed.posts else None %} + {% if updated_post and updated_post.date %} + {{ updated_post.date.isoformat() }}Z + {% else %} + {{ datetime.datetime.utcnow().isoformat() }}Z + {% endif %} + + {% for post in feed.posts %} + + {{ post.title }} + {{ markata.config.url }}/{{ post.slug }}/ + + + {% if post.date %} + {{ post.date.isoformat() }}Z + {{ post.date.isoformat() }}Z + {% endif %} + + {% if post.description %} + + {% endif %} + + {% if post.content %} + + {% endif %} + + {% endfor %} + + From 494b60e683182fde7d6669f18f886c05d1f0d783 Mon Sep 17 00:00:00 2001 From: autobump Date: Sun, 7 Dec 2025 22:34:27 +0000 Subject: [PATCH 15/37] =?UTF-8?q?Bump=20version:=200.11.0.dev2=20=E2=86=92?= =?UTF-8?q?=200.11.0.dev3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markata/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/__about__.py b/markata/__about__.py index 95c7fe42..e1e6d835 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.11.0.dev2" +__version__ = "0.11.0.dev3" From 10fa96f00ec5b0f30b31f275300dc7b22f040d59 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Mon, 8 Dec 2025 10:25:43 -0600 Subject: [PATCH 16/37] fix: markata has no attribute get_template --- markata/plugins/feeds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index 68ad5d83..d56f06fb 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -596,7 +596,7 @@ def create_page( if feed.config.atom: if feed_atom_from_cache is None: from_cache = False - atom_template = get_template(markata, feed.config.atom_template) + atom_template = get_template(markata.jinja_env, feed.config.atom_template) feed_atom = atom_template.render( markata=markata, feed=feed, From 619336f025f2b428e01c6222f31f2f7b78e64c06 Mon Sep 17 00:00:00 2001 From: autobump Date: Mon, 8 Dec 2025 16:26:56 +0000 Subject: [PATCH 17/37] =?UTF-8?q?Bump=20version:=200.11.0.dev3=20=E2=86=92?= =?UTF-8?q?=200.11.0.dev4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markata/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/__about__.py b/markata/__about__.py index e1e6d835..1db8026f 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.11.0.dev3" +__version__ = "0.11.0.dev4" From b51059bc74794f66da3be8256a6d925e78f4bbf6 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Mon, 8 Dec 2025 13:43:04 -0600 Subject: [PATCH 18/37] Fix: `auto_description` now strips wikilinks, HTML tags, markdown-it attributes (e.g. {.class-name}), admonitions (!!!, !!!+, ???, ???+), and HTML comments for cleaner descriptions --- CHANGELOG.md | 1 + markata/plugins/auto_description.py | 34 ++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0c54e7f..3e66749f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Fix: jinja_md now includes post metadata and version in cache keys - Fix: standardized cache keys across plugins to include `__version__` for proper invalidation on updates - Fix: render_markdown now includes backend and extensions in cache key +- Fix: `auto_description` now strips wikilinks, HTML tags, markdown-it attributes (e.g. {.class-name}), admonitions (!!!, !!!+, ???, ???+), and HTML comments for cleaner descriptions - Perf: heading_link replaced expensive file I/O with `__version__` in cache key - Feat: feeds now support atom feeds diff --git a/markata/plugins/auto_description.py b/markata/plugins/auto_description.py index c1dc598e..ff2a70cb 100644 --- a/markata/plugins/auto_description.py +++ b/markata/plugins/auto_description.py @@ -111,9 +111,28 @@ def get_description(article: "Post") -> str: Uses markdown-it-py to parse the markdown and extracts text content from all nodes. Strips out any HTML tags, returning only plain text. Properly handles markdown links and formatting. """ + import re from bs4 import BeautifulSoup from markdown_it import MarkdownIt + content = article.content + + # Remove admonitions (e.g., !!!, !!!+, ???, ???+) + content = re.sub(r'^[!?]{3}\+? .*?$', '', content, flags=re.MULTILINE) + + # Remove CSS class attributes {.class-name} + content = re.sub(r'\{\.[\w\-]+\}', '', content) + + # Remove wikilinks [[link]] or [[link|text]] + content = re.sub(r'\[\[([^\]|]+)(?:\|([^\]]+))?\]\]', lambda m: m.group(2) if m.group(2) else m.group(1), content) + + # Remove HTML comments + content = re.sub(r'', '', content, flags=re.DOTALL) + + # Remove HTML tags before markdown parsing + soup = BeautifulSoup(content, "html.parser") + content = soup.get_text(separator=" ") + def extract_text(tokens): text_chunks = [] for token in tokens: @@ -125,14 +144,17 @@ def extract_text(tokens): return " ".join(text_chunks) md = MarkdownIt("commonmark") - tokens = md.parse(article.content) + tokens = md.parse(content) # Recursively extract visible text from all tokens description = extract_text(tokens) - # Remove any HTML tags using BeautifulSoup - soup = BeautifulSoup(description, "html.parser") - plain_text = soup.get_text(separator=" ", strip=True) - return plain_text + + # Clean up excessive whitespace + description = re.sub(r'\s+', ' ', description).strip() + + print(f'Generated description: {description[:60]}...') + + return description def set_description( @@ -148,7 +170,7 @@ def set_description( the configured descriptions for the article. """ key = markata.make_hash( - "auto_description", + "auto_description2", article.content, plugin_text, config, From c773bf28dacbb8091785fdf60d1afb703309cd6e Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Mon, 8 Dec 2025 13:48:14 -0600 Subject: [PATCH 19/37] fix: also include jinja blocks --- CHANGELOG.md | 2 +- markata/plugins/auto_description.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e66749f..5144be11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ - Fix: jinja_md now includes post metadata and version in cache keys - Fix: standardized cache keys across plugins to include `__version__` for proper invalidation on updates - Fix: render_markdown now includes backend and extensions in cache key -- Fix: `auto_description` now strips wikilinks, HTML tags, markdown-it attributes (e.g. {.class-name}), admonitions (!!!, !!!+, ???, ???+), and HTML comments for cleaner descriptions +- Fix: `auto_description` now strips wikilinks, HTML tags, markdown-it attributes (e.g. {.class-name}), Jinja template tags, admonitions (!!!, !!!+, ???, ???+), and HTML comments for cleaner descriptions - Perf: heading_link replaced expensive file I/O with `__version__` in cache key - Feat: feeds now support atom feeds diff --git a/markata/plugins/auto_description.py b/markata/plugins/auto_description.py index ff2a70cb..669b8ba4 100644 --- a/markata/plugins/auto_description.py +++ b/markata/plugins/auto_description.py @@ -123,6 +123,10 @@ def get_description(article: "Post") -> str: # Remove CSS class attributes {.class-name} content = re.sub(r'\{\.[\w\-]+\}', '', content) + # Remove Jinja template tags {% %} and {{ }} + content = re.sub(r'\{%.*?%\}', '', content, flags=re.DOTALL) + content = re.sub(r'\{\{.*?\}\}', '', content, flags=re.DOTALL) + # Remove wikilinks [[link]] or [[link|text]] content = re.sub(r'\[\[([^\]|]+)(?:\|([^\]]+))?\]\]', lambda m: m.group(2) if m.group(2) else m.group(1), content) @@ -152,8 +156,6 @@ def extract_text(tokens): # Clean up excessive whitespace description = re.sub(r'\s+', ' ', description).strip() - print(f'Generated description: {description[:60]}...') - return description From d49817fc2571875a28d36dba13a7ab2ec6ccb666 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Mon, 8 Dec 2025 14:46:31 -0600 Subject: [PATCH 20/37] Fix: `publish_html` now properly resolves custom `output_html` paths relative to `output_dir`, preventing files from being written to project root --- CHANGELOG.md | 1 + markata/plugins/publish_html.py | 24 +++++++++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5144be11..f8a82192 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Fix: standardized cache keys across plugins to include `__version__` for proper invalidation on updates - Fix: render_markdown now includes backend and extensions in cache key - Fix: `auto_description` now strips wikilinks, HTML tags, markdown-it attributes (e.g. {.class-name}), Jinja template tags, admonitions (!!!, !!!+, ???, ???+), and HTML comments for cleaner descriptions +- Fix: `publish_html` now properly resolves custom `output_html` paths relative to `output_dir`, preventing files from being written to project root - Perf: heading_link replaced expensive file I/O with `__version__` in cache key - Feat: feeds now support atom feeds diff --git a/markata/plugins/publish_html.py b/markata/plugins/publish_html.py index f6dd6d3b..bfee126c 100644 --- a/markata/plugins/publish_html.py +++ b/markata/plugins/publish_html.py @@ -167,7 +167,29 @@ def default_output_html(cls, v: Optional[Union[str, Path]], info) -> Optional[Pa def output_html_relative(cls, v: Optional[Path], info) -> Optional[Path]: if v is None: return cls.default_output_html(v, info) - return v + + # If a custom output_html is provided, ensure it's relative to output_dir + markata = info.data.get("markata") + if markata is None: + return v + + output_dir = markata.config.output_dir + + # Convert string to Path if needed + if isinstance(v, str): + v = Path(v) + + # If v is already absolute and within output_dir, keep it + if v.is_absolute(): + try: + v.relative_to(output_dir) + return v + except ValueError: + # Not relative to output_dir, make it so + pass + + # Make relative paths relative to output_dir + return output_dir / v @field_validator("output_html", mode="before") @classmethod From b2ceedbb061b82bc004dba78a147f802ff2fa7a4 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Mon, 8 Dec 2025 14:49:10 -0600 Subject: [PATCH 21/37] fix atom issues --- markata/templates/atom.xml | 24 ++++++++---- markata/templates/atom.xsl | 67 +++++++++++++++++++++++++++++++++ markata/templates/rss.xml | 2 +- markata/templates/rss_card.html | 39 +++++++++++++++++-- 4 files changed, 119 insertions(+), 13 deletions(-) create mode 100644 markata/templates/atom.xsl diff --git a/markata/templates/atom.xml b/markata/templates/atom.xml index b47325f7..050835d8 100644 --- a/markata/templates/atom.xml +++ b/markata/templates/atom.xml @@ -1,12 +1,18 @@ + - {{ feed.config.title }} - {{ markata.config.url }}/{{ feed.config.slug }}/ + {# Normalize once: pydantic Url -> string, strip trailing slash #} + {% set base_url = markata.config.url | string | trim('/') %} + {% set feed_slug = feed.config.slug | string | trim('/') %} - - + {{ feed.config.title|e }} + {{ markata.config.description | e }} + {{ markata.config.url | e }} + {{ (base_url ~ '/' ~ feed_slug ~ '/') | e }} + + + - {# Feed-level updated (Atom likes this) #} {% set updated_post = feed.posts[0] if feed.posts else None %} {% if updated_post and updated_post.date %} {{ updated_post.date.isoformat() }}Z @@ -15,10 +21,12 @@ {% endif %} {% for post in feed.posts %} + {% set post_slug = post.slug | string | trim('/') %} - {{ post.title }} - {{ markata.config.url }}/{{ post.slug }}/ - + {{ post.title|e }} + + {{ (base_url ~ '/' ~ post_slug ~ '/') | e }} + {% if post.date %} {{ post.date.isoformat() }}Z diff --git a/markata/templates/atom.xsl b/markata/templates/atom.xsl new file mode 100644 index 00000000..389cd800 --- /dev/null +++ b/markata/templates/atom.xsl @@ -0,0 +1,67 @@ +{% extends "base.xsl" %} +{% block content %} +
+
+
+

+ + + + + + + + + + +

+ +

+ + + + + + + + + + +

+ + + + + + + + + + + + + + + + + + + + + + Visit Website → + +
+ +
+

Recent Items

+
    + + + {% include 'rss_card.html' %} + +
+
+
+
+{% endblock %} diff --git a/markata/templates/rss.xml b/markata/templates/rss.xml index b0348374..0e028256 100644 --- a/markata/templates/rss.xml +++ b/markata/templates/rss.xml @@ -2,7 +2,7 @@ - {{ feed.config.name | e }} + {{ feed.config.title | e }} {{ markata.config.url | e }} {{ markata.config.description | e }} Markata diff --git a/markata/templates/rss_card.html b/markata/templates/rss_card.html index cc7f1ec8..f4d4f5d9 100644 --- a/markata/templates/rss_card.html +++ b/markata/templates/rss_card.html @@ -1,16 +1,47 @@
  • + - + + + + + + + + + + + + + + +

    - + +

    +

    - + + + + + + + + + + + +

    +
  • + From 871bafc8aa72494b16faee758c0207b2ed141f96 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Mon, 8 Dec 2025 14:49:37 -0600 Subject: [PATCH 22/37] ruff fix --- markata/plugins/auto_description.py | 15 ++++++++------- markata/plugins/publish_html.py | 10 +++++----- tests/test_feeds.py | 3 ++- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/markata/plugins/auto_description.py b/markata/plugins/auto_description.py index 669b8ba4..45b1e94c 100644 --- a/markata/plugins/auto_description.py +++ b/markata/plugins/auto_description.py @@ -112,27 +112,28 @@ def get_description(article: "Post") -> str: Strips out any HTML tags, returning only plain text. Properly handles markdown links and formatting. """ import re + from bs4 import BeautifulSoup from markdown_it import MarkdownIt content = article.content - + # Remove admonitions (e.g., !!!, !!!+, ???, ???+) content = re.sub(r'^[!?]{3}\+? .*?$', '', content, flags=re.MULTILINE) - + # Remove CSS class attributes {.class-name} content = re.sub(r'\{\.[\w\-]+\}', '', content) - + # Remove Jinja template tags {% %} and {{ }} content = re.sub(r'\{%.*?%\}', '', content, flags=re.DOTALL) content = re.sub(r'\{\{.*?\}\}', '', content, flags=re.DOTALL) - + # Remove wikilinks [[link]] or [[link|text]] content = re.sub(r'\[\[([^\]|]+)(?:\|([^\]]+))?\]\]', lambda m: m.group(2) if m.group(2) else m.group(1), content) - + # Remove HTML comments content = re.sub(r'', '', content, flags=re.DOTALL) - + # Remove HTML tags before markdown parsing soup = BeautifulSoup(content, "html.parser") content = soup.get_text(separator=" ") @@ -152,7 +153,7 @@ def extract_text(tokens): # Recursively extract visible text from all tokens description = extract_text(tokens) - + # Clean up excessive whitespace description = re.sub(r'\s+', ' ', description).strip() diff --git a/markata/plugins/publish_html.py b/markata/plugins/publish_html.py index bfee126c..ebca2303 100644 --- a/markata/plugins/publish_html.py +++ b/markata/plugins/publish_html.py @@ -167,18 +167,18 @@ def default_output_html(cls, v: Optional[Union[str, Path]], info) -> Optional[Pa def output_html_relative(cls, v: Optional[Path], info) -> Optional[Path]: if v is None: return cls.default_output_html(v, info) - + # If a custom output_html is provided, ensure it's relative to output_dir markata = info.data.get("markata") if markata is None: return v - + output_dir = markata.config.output_dir - + # Convert string to Path if needed if isinstance(v, str): v = Path(v) - + # If v is already absolute and within output_dir, keep it if v.is_absolute(): try: @@ -187,7 +187,7 @@ def output_html_relative(cls, v: Optional[Path], info) -> Optional[Path]: except ValueError: # Not relative to output_dir, make it so pass - + # Make relative paths relative to output_dir return output_dir / v diff --git a/tests/test_feeds.py b/tests/test_feeds.py index f993aaa3..fd955089 100644 --- a/tests/test_feeds.py +++ b/tests/test_feeds.py @@ -1,7 +1,8 @@ import rich import markata -from markata.plugins.feeds import Feed, Feeds +from markata.plugins.feeds import Feed +from markata.plugins.feeds import Feeds class DummyMarkata: From 7dcca6d0db370db4a3e9e3cd06691eedc0cef4cd Mon Sep 17 00:00:00 2001 From: autobump Date: Mon, 8 Dec 2025 20:50:46 +0000 Subject: [PATCH 23/37] =?UTF-8?q?Bump=20version:=200.11.0.dev4=20=E2=86=92?= =?UTF-8?q?=200.11.0.dev5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markata/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/__about__.py b/markata/__about__.py index 1db8026f..4c0379b3 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.11.0.dev4" +__version__ = "0.11.0.dev5" From 73be834a58aa1acacec75ea43a4108d55fc78ef3 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Mon, 8 Dec 2025 16:38:10 -0600 Subject: [PATCH 24/37] perf: optimize file writes and feed generation - Cache expensive feed.map() calls during hash generation (~7.7s savings) - Batch directory creation in feeds plugin (~2s savings) - Only read XSL files when they exist and need comparison - Only write files when content changes in: - to_json (markata.json) - service_worker (service-worker.js) - redirects (redirect HTML files) - jinja_env (head.html template) - post_template (CSS/JS/XSL templates) - feeds (XSL files) Prevents unnecessary file system modifications and downstream syncing. --- CHANGELOG.md | 7 +++++++ markata/plugins/feeds.py | 33 ++++++++++++++++++++----------- markata/plugins/jinja_env.py | 9 +++++---- markata/plugins/post_template.py | 5 ++++- markata/plugins/redirects.py | 5 ++++- markata/plugins/service_worker.py | 4 +++- markata/plugins/to_json.py | 5 ++++- 7 files changed, 48 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f8a82192..770b8556 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ ## 0.11.0 +### Performance Improvements + +- Perf: feeds now cache expensive `feed.map()` calls during hash generation (~7.7s savings) +- Perf: feeds batch directory creation operations (~2s savings) +- Perf: feeds only read XSL files when they exist and need comparison +- Perf: to_json, service_worker, redirects, jinja_env, and post_template now only write files when content changes (prevents unnecessary file system modifications and downstream syncing) + ### Cache Invalidation Improvements - Fix: feeds now properly invalidate cache when post metadata changes (title, date, slug, published, description) diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index d56f06fb..a5dcdfdf 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -469,8 +469,13 @@ def save(markata: Markata) -> None: config=markata.config, ) xsl_file = Path(markata.config.output_dir) / "rss.xsl" - current_xsl = xsl_file.read_text() if xsl_file.exists() else "" - if current_xsl != xsl: + # Only read file if it exists and we need to compare + should_write = True + if xsl_file.exists(): + current_xsl = xsl_file.read_text() + should_write = current_xsl != xsl + + if should_write: xsl_file.write_text(xsl) def create_page( @@ -488,6 +493,16 @@ def create_page( # Get templates mtime to bust cache when any template changes templates_mtime = get_templates_mtime(markata.jinja_env) + + # Cache expensive feed.map() call for hash generation + cache_key_posts = f"feed_hash_posts_{feed.config.slug}" + if not hasattr(markata, "_feed_hash_cache"): + markata._feed_hash_cache = {} + + if cache_key_posts not in markata._feed_hash_cache: + markata._feed_hash_cache[cache_key_posts] = feed.map("str(post.to_dict())") + + posts_hash_data = markata._feed_hash_cache[cache_key_posts] key = markata.make_hash( "feeds", @@ -496,7 +511,7 @@ def create_page( markata.config.url, markata.config.description, feed.config.title, - feed.map("str(post.to_dict())"), # Track all post metadata, not just content + posts_hash_data, # Use cached post data canonical_url, str(templates_mtime), # Track template file changes # datetime.datetime.today(), @@ -516,25 +531,19 @@ def create_page( feed_atom_from_cache = markata.precache.get(feed_atom_key) output_file = Path(markata.config.output_dir) / feed.config.slug / "index.html" - output_file.parent.mkdir(exist_ok=True, parents=True) - partial_output_file = ( Path(markata.config.output_dir) / feed.config.slug / "partial" / "index.html" ) - partial_output_file.parent.mkdir(exist_ok=True, parents=True) - rss_output_file = Path(markata.config.output_dir) / feed.config.slug / "rss.xml" - rss_output_file.parent.mkdir(exist_ok=True, parents=True) - sitemap_output_file = ( Path(markata.config.output_dir) / feed.config.slug / "sitemap.xml" ) - sitemap_output_file.parent.mkdir(exist_ok=True, parents=True) - atom_output_file = ( Path(markata.config.output_dir) / feed.config.slug / "atom.xml" ) - atom_output_file.parent.mkdir(exist_ok=True, parents=True) + + # Create all directories in one batch + partial_output_file.parent.mkdir(exist_ok=True, parents=True) from_cache = True diff --git a/markata/plugins/jinja_env.py b/markata/plugins/jinja_env.py index 5b75fc75..4e4182d9 100644 --- a/markata/plugins/jinja_env.py +++ b/markata/plugins/jinja_env.py @@ -176,11 +176,12 @@ def configure(markata: Markata) -> None: markata.config.dynamic_templates_dir.mkdir(parents=True, exist_ok=True) head_template = markata.config.dynamic_templates_dir / "head.html" - head_template.write_text( - env_for_dynamic_render.get_template("dynamic_head.html").render( - {"markata": markata} - ), + new_content = env_for_dynamic_render.get_template("dynamic_head.html").render( + {"markata": markata} ) + current_content = head_template.read_text() if head_template.exists() else "" + if current_content != new_content: + head_template.write_text(new_content) # Set up loaders loaders = [] diff --git a/markata/plugins/post_template.py b/markata/plugins/post_template.py index 1f4c6f41..0c314442 100644 --- a/markata/plugins/post_template.py +++ b/markata/plugins/post_template.py @@ -450,7 +450,10 @@ def save(markata: "Markata") -> None: for template in linked_templates: template = get_template(markata.jinja_env, template) css = template.render(markata=markata, __version__=__version__) - Path(markata.config.output_dir / Path(template.filename).name).write_text(css) + output_path = Path(markata.config.output_dir / Path(template.filename).name) + current_content = output_path.read_text() if output_path.exists() else "" + if current_content != css: + output_path.write_text(css) @hook_impl() diff --git a/markata/plugins/redirects.py b/markata/plugins/redirects.py index 033d4457..93cbdc6f 100644 --- a/markata/plugins/redirects.py +++ b/markata/plugins/redirects.py @@ -187,4 +187,7 @@ def save(markata: "Markata") -> None: for redirect in redirects: file = markata.config.output_dir / redirect.original.strip("/") / "index.html" file.parent.mkdir(parents=True, exist_ok=True) - file.write_text(template.render(redirect.dict(), config=markata.config)) + new_content = template.render(redirect.dict(), config=markata.config) + current_content = file.read_text() if file.exists() else "" + if current_content != new_content: + file.write_text(new_content) diff --git a/markata/plugins/service_worker.py b/markata/plugins/service_worker.py index 04fcdce7..4185ecc6 100644 --- a/markata/plugins/service_worker.py +++ b/markata/plugins/service_worker.py @@ -129,4 +129,6 @@ def save(markata: "Markata") -> None: ) output_file = markata.config.output_dir / "service-worker.js" - output_file.write_text(service_worker_js) + current_content = output_file.read_text() if output_file.exists() else "" + if current_content != service_worker_js: + output_file.write_text(service_worker_js) diff --git a/markata/plugins/to_json.py b/markata/plugins/to_json.py index 58a13827..ac92c54a 100644 --- a/markata/plugins/to_json.py +++ b/markata/plugins/to_json.py @@ -10,4 +10,7 @@ @hook_impl def save(markata: "Markata") -> None: output_file = markata.config.output_dir / "markata.json" - output_file.write_text(json.dumps(markata.to_dict(), default=str)) + new_content = json.dumps(markata.to_dict(), default=str) + current_content = output_file.read_text() if output_file.exists() else "" + if current_content != new_content: + output_file.write_text(new_content) From 6f3498c60ab48ce7420d51a7781cc3e5741584c7 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Mon, 8 Dec 2025 16:42:16 -0600 Subject: [PATCH 25/37] ruff fix --- markata/plugins/feeds.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index a5dcdfdf..981d6a5e 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -474,7 +474,7 @@ def save(markata: Markata) -> None: if xsl_file.exists(): current_xsl = xsl_file.read_text() should_write = current_xsl != xsl - + if should_write: xsl_file.write_text(xsl) @@ -493,15 +493,15 @@ def create_page( # Get templates mtime to bust cache when any template changes templates_mtime = get_templates_mtime(markata.jinja_env) - + # Cache expensive feed.map() call for hash generation cache_key_posts = f"feed_hash_posts_{feed.config.slug}" if not hasattr(markata, "_feed_hash_cache"): markata._feed_hash_cache = {} - + if cache_key_posts not in markata._feed_hash_cache: markata._feed_hash_cache[cache_key_posts] = feed.map("str(post.to_dict())") - + posts_hash_data = markata._feed_hash_cache[cache_key_posts] key = markata.make_hash( @@ -541,7 +541,7 @@ def create_page( atom_output_file = ( Path(markata.config.output_dir) / feed.config.slug / "atom.xml" ) - + # Create all directories in one batch partial_output_file.parent.mkdir(exist_ok=True, parents=True) From bac7c96095f507c33e01b97d92f8d0f6ad2e6a31 Mon Sep 17 00:00:00 2001 From: autobump Date: Mon, 8 Dec 2025 22:43:27 +0000 Subject: [PATCH 26/37] =?UTF-8?q?Bump=20version:=200.11.0.dev5=20=E2=86=92?= =?UTF-8?q?=200.11.0.dev6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markata/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/__about__.py b/markata/__about__.py index 4c0379b3..f05ab558 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.11.0.dev5" +__version__ = "0.11.0.dev6" From 4406bc9a3e5717b0d8692407671356278241e2b6 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Mon, 8 Dec 2025 18:07:31 -0600 Subject: [PATCH 27/37] perf: optimize diskcache and feed hash generation - Increase diskcache size_limit to 5GB (from default 1GB) - Reduce cull_limit to 10 (from default 100) for faster evictions - Use lightweight post identifiers (slug + content_hash) instead of expensive str(post.to_dict()) in feed hash generation The profile showed 22.5s in a single cache.set() call with: - 3.9s culling (evicting entries at size limit) - 14.65s transaction overhead - 6.4s serializing post.to_dict() for hashing These changes reduce cache contention and eliminate expensive Pydantic repr calls during hash generation. --- CHANGELOG.md | 2 ++ markata/__init__.py | 7 ++++++- markata/plugins/feeds.py | 7 +++++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 770b8556..71575303 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### Performance Improvements +- Perf: increased diskcache size limit to 5GB and reduced cull_limit to minimize expensive eviction operations (saves ~4s during cache culling) +- Perf: optimized feed hash generation to use lightweight post identifiers (slug + content_hash) instead of expensive `str(post.to_dict())` serialization (saves ~6s) - Perf: feeds now cache expensive `feed.map()` calls during hash generation (~7.7s savings) - Perf: feeds batch directory creation operations (~2s savings) - Perf: feeds only read XSL files when they exist and need comparison diff --git a/markata/__init__.py b/markata/__init__.py index 2d176e26..a1f62216 100644 --- a/markata/__init__.py +++ b/markata/__init__.py @@ -172,7 +172,12 @@ def cache(self: "Markata") -> Cache: # FanoutCache(self.MARKATA_CACHE_DIR, statistics=True) if self._cache is not None: return self._cache - self._cache = Cache(self.MARKATA_CACHE_DIR, statistics=True) + self._cache = Cache( + self.MARKATA_CACHE_DIR, + statistics=True, + size_limit=5 * 1024**3, # 5GB to reduce culling frequency + cull_limit=10, # Evict fewer entries at a time (default is 100) + ) self._cache.expire() return self._cache diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index 981d6a5e..8b812816 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -494,13 +494,16 @@ def create_page( # Get templates mtime to bust cache when any template changes templates_mtime = get_templates_mtime(markata.jinja_env) - # Cache expensive feed.map() call for hash generation + # Use simpler hash for posts instead of expensive str(post.to_dict()) + # Hash just the essential post identifiers: slug + content_hash cache_key_posts = f"feed_hash_posts_{feed.config.slug}" if not hasattr(markata, "_feed_hash_cache"): markata._feed_hash_cache = {} if cache_key_posts not in markata._feed_hash_cache: - markata._feed_hash_cache[cache_key_posts] = feed.map("str(post.to_dict())") + # Use post slugs and content hashes instead of full to_dict() + posts_data = feed.map("(post.slug, getattr(post, 'content_hash', ''))") + markata._feed_hash_cache[cache_key_posts] = str(sorted(posts_data)) posts_hash_data = markata._feed_hash_cache[cache_key_posts] From a3cf06aee252c5fb4a81a6a85068f671a106ffcb Mon Sep 17 00:00:00 2001 From: autobump Date: Tue, 9 Dec 2025 01:01:33 +0000 Subject: [PATCH 28/37] =?UTF-8?q?Bump=20version:=200.11.0.dev6=20=E2=86=92?= =?UTF-8?q?=200.11.0.dev7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markata/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/__about__.py b/markata/__about__.py index f05ab558..fd13bba1 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.11.0.dev6" +__version__ = "0.11.0.dev7" From 6650bd276161cbbd99f8ee804b7a66ad488edcc7 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Mon, 8 Dec 2025 19:51:36 -0600 Subject: [PATCH 29/37] fix: correct feed cache key and output_html path resolution - Feed cache now uses slug+date+title instead of non-existent content_hash This fixes feeds showing posts but posts not being written - output_html validator now checks if path already contains output_dir before prepending it, preventing markout/markout/ duplication The content_hash attribute doesn't exist on posts, causing all feed cache keys to be identical (all empty strings), which prevented proper cache invalidation. --- markata/plugins/feeds.py | 5 +++-- markata/plugins/publish_html.py | 10 ++++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index 8b812816..46f9b2af 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -501,8 +501,9 @@ def create_page( markata._feed_hash_cache = {} if cache_key_posts not in markata._feed_hash_cache: - # Use post slugs and content hashes instead of full to_dict() - posts_data = feed.map("(post.slug, getattr(post, 'content_hash', ''))") + # Use post slugs and published dates instead of full to_dict() + # This provides a stable, lightweight cache key + posts_data = feed.map("(post.slug, str(getattr(post, 'date', '')), getattr(post, 'title', ''))") markata._feed_hash_cache[cache_key_posts] = str(sorted(posts_data)) posts_hash_data = markata._feed_hash_cache[cache_key_posts] diff --git a/markata/plugins/publish_html.py b/markata/plugins/publish_html.py index ebca2303..3f7566da 100644 --- a/markata/plugins/publish_html.py +++ b/markata/plugins/publish_html.py @@ -188,8 +188,14 @@ def output_html_relative(cls, v: Optional[Path], info) -> Optional[Path]: # Not relative to output_dir, make it so pass - # Make relative paths relative to output_dir - return output_dir / v + # Check if path already starts with output_dir + try: + v.relative_to(output_dir) + # Path is already relative to output_dir + return v + except ValueError: + # Path doesn't start with output_dir, prepend it + return output_dir / v @field_validator("output_html", mode="before") @classmethod From 05d63967be3dfaf8eb10d5863dfe155419fa3dcb Mon Sep 17 00:00:00 2001 From: autobump Date: Tue, 9 Dec 2025 02:16:38 +0000 Subject: [PATCH 30/37] =?UTF-8?q?Bump=20version:=200.11.0.dev7=20=E2=86=92?= =?UTF-8?q?=200.11.0.dev8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markata/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/__about__.py b/markata/__about__.py index fd13bba1..bbe35688 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.11.0.dev7" +__version__ = "0.11.0.dev8" From d662ed66225e4860e3aa8682ee7e16f6165ad653 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Thu, 18 Dec 2025 10:17:06 -0600 Subject: [PATCH 31/37] feat: CLI config overrides with -s 'style.theme=nord' - Add parse_set_options() for dot notation config parsing - Add _deep_merge() for nested config merging - Support -c/--config for alternate config files - Support -o/--output-dir for output override - Support -s/--set for generic config overrides - Support MARKATA_* environment variables - Enable runtime theme switching and config override - Backward compatible with existing builds --- markata/__init__.py | 36 +++++- markata/plugins/base_cli.py | 196 +++++++++++++++++++++++++++++++- markata/plugins/config_model.py | 11 +- 3 files changed, 232 insertions(+), 11 deletions(-) diff --git a/markata/__init__.py b/markata/__init__.py index 2d176e26..09c9458a 100644 --- a/markata/__init__.py +++ b/markata/__init__.py @@ -59,6 +59,7 @@ ] DEFAULT_HOOKS = [ + "markata.plugins.mermaid", "markata.plugins.didyoumean", "markata.plugins.skip", "markata.plugins.md_it_wikilinks", @@ -123,7 +124,13 @@ class HooksConfig(pydantic.BaseModel): class Markata: - def __init__(self: "Markata", console: Console = None, config=None) -> None: + def __init__( + self: "Markata", + console: Console = None, + config=None, + config_overrides: Optional[Dict[str, Any]] = None, + config_file: Optional[Path] = None, + ) -> None: self.__version__ = __version__ self.stages_ran = set() self.threded = False @@ -134,6 +141,11 @@ def __init__(self: "Markata", console: Console = None, config=None) -> None: self.MARKATA_CACHE_DIR.mkdir(exist_ok=True) self._pm = pluggy.PluginManager("markata") self._pm.add_hookspecs(hookspec.MarkataSpecs) + + # Store config overrides for later use in load_config hook + self._config_overrides = config_overrides or {} + self._config_file = config_file + if config is not None: self.config = config with self.cache as cache: @@ -144,7 +156,12 @@ def __init__(self: "Markata", console: Console = None, config=None) -> None: if config is not None: raw_hooks = config else: - raw_hooks = standard_config.load("markata") + raw_hooks = standard_config.load( + "markata", + project_home=config_file.parent if config_file else ".", + overrides=config_overrides or {}, + config_file=config_file, + ) self.hooks_conf = HooksConfig.parse_obj(raw_hooks) try: default_index = self.hooks_conf.hooks.index("default") @@ -172,7 +189,12 @@ def cache(self: "Markata") -> Cache: # FanoutCache(self.MARKATA_CACHE_DIR, statistics=True) if self._cache is not None: return self._cache - self._cache = Cache(self.MARKATA_CACHE_DIR, statistics=True) + self._cache = Cache( + self.MARKATA_CACHE_DIR, + statistics=True, + size_limit=5 * 1024**3, # 5GB to reduce culling frequency + cull_limit=10, # Evict fewer entries at a time (default is 100) + ) self._cache.expire() return self._cache @@ -203,7 +225,13 @@ def __getattr__(self: "Markata", item: str) -> Any: f"Running to [purple]{stage_to_run_to}[/] to retrieve [purple]{item}[/]" ) self.run(stage_to_run_to) - return getattr(self, item) + # Check __dict__ directly to avoid infinite recursion + if item in self.__dict__: + return self.__dict__[item] + else: + raise AttributeError( + f"'Markata' object has no attribute '{item}' after running {stage_to_run_to}" + ) elif item == "precache": return self._precache or {} else: diff --git a/markata/plugins/base_cli.py b/markata/plugins/base_cli.py index 00a3b722..8b53eeee 100644 --- a/markata/plugins/base_cli.py +++ b/markata/plugins/base_cli.py @@ -44,6 +44,42 @@ --serve Start development server --profile Profile the build process --debug Enable debug mode +-c, --config Path to alternate config file +-o, --output-dir Override output directory +-s, --set Set config values (key=value format) +``` + +### Configuration Overrides + +Override configuration at runtime: + +```bash +# Use alternate config file +markata build -c themes/catppuccin.toml + +# Override output directory +markata build -o dist/theme-everforest + +# Set multiple config values +markata build -s output_dir=dist -s style.theme=nord + +# Combine multiple overrides +markata build -c base.toml -s output_dir=custom -s style.theme=gruvbox +``` + +### Environment Variable Overrides + +All config can be overridden with environment variables prefixed with `MARKATA_`: + +```bash +# Override output directory +MARKATA_OUTPUT_DIR=dist markata build + +# Override theme +MARKATA_STYLE__THEME=nord markata build + +# Use double underscore for nested config +MARKATA_STYLE__THEME=catppuccin MARKATA_OUTPUT_DIR=dist/catppuccin markata build ``` ### List Command @@ -110,7 +146,9 @@ import warnings from pathlib import Path from typing import TYPE_CHECKING +from typing import Any from typing import Callable +from typing import Dict from typing import List from typing import Literal from typing import Optional @@ -150,6 +188,51 @@ def make_pretty() -> None: ) +def parse_set_options(set_args: List[str]) -> Dict[str, Any]: + """Parse --set key=value arguments into a nested config dict. + + Supports dot notation for nested keys: + - output_dir=dist -> {"output_dir": "dist"} + - style.theme=nord -> {"style": {"theme": "nord"}} + """ + config = {} + for arg in set_args: + if "=" not in arg: + raise ValueError(f"Invalid --set format: {arg}. Expected key=value") + + key, value = arg.split("=", 1) + keys = key.split(".") + + # Navigate/create nested dict structure + current = config + for k in keys[:-1]: + if k not in current: + current[k] = {} + current = current[k] + + # Set the value, attempting type conversion + final_key = keys[-1] + # Try to parse as JSON for complex types + try: + import json + + current[final_key] = json.loads(value) + except (json.JSONDecodeError, ValueError): + # Keep as string if not valid JSON + current[final_key] = value + + return config + + +def _deep_merge(target: Dict, source: Dict) -> None: + """Deep merge source dict into target dict.""" + for key, value in source.items(): + if key in target and isinstance(target[key], dict) and isinstance(value, dict): + _deep_merge(target[key], value) + else: + target[key] = value + + @hook_impl() def cli(app: typer.Typer, markata: "Markata") -> None: """ @@ -309,6 +392,24 @@ def build( "--pdb", ), profile: bool = True, + config_file: Optional[Path] = typer.Option( + None, + "-c", + "--config", + help="Path to alternate config file", + ), + output_dir: Optional[str] = typer.Option( + None, + "-o", + "--output-dir", + help="Override output directory", + ), + set_config: List[str] = typer.Option( + [], + "-s", + "--set", + help="Set config value (key=value, supports dot notation)", + ), ) -> None: """ Markata's primary way of building your site for production. @@ -319,6 +420,59 @@ def build( markata build ``` + ## Configuration Overrides + + Override configuration at runtime using multiple methods: + + ### Alternate Config File + Use a different config file with `-c` or `--config`: + ``` bash + markata build -c themes/catppuccin.toml + ``` + + ### Output Directory + Override the output directory with `-o` or `--output-dir`: + ``` bash + markata build -o dist/theme-everforest + ``` + + ### Generic Config Override + Set any config value using `-s` or `--set` with dot notation: + ``` bash + # Single value + markata build -s output_dir=dist + + # Nested config + markata build -s style.theme=nord + + # Multiple values + markata build -s output_dir=dist -s style.theme=catppuccin + + # Complex values (use JSON) + markata build -s 'nav={"home":"/","docs":"/docs"}' + ``` + + ### Environment Variables + Override any config with environment variables: + ``` bash + # Simple value + MARKATA_OUTPUT_DIR=dist markata build + + # Nested value (use double underscore) + MARKATA_STYLE__THEME=nord markata build + + # Multiple values + MARKATA_OUTPUT_DIR=dist MARKATA_STYLE__THEME=gruvbox markata build + ``` + + ### Combining Overrides + All override methods can be combined (applied in order: file -> env -> cli): + ``` bash + MARKATA_STYLE__THEME=nord markata build -c base.toml -s output_dir=custom + ``` + + ## Debugging + If you are having an issue and want to pop immediately into a debugger upon failure you can pass the `--pdb` flag to the build command. @@ -355,21 +509,51 @@ def build( if pretty: make_pretty() + # Save console reference before potential reinit + console = markata.console + if quiet: - markata.console.quiet = True + console.quiet = True if verbose: - markata.console.print("console options:", markata.console.options) + console.print("console options:", console.options) + + # Build config overrides from CLI arguments + config_overrides = {} + + # Add output_dir if specified + if output_dir: + config_overrides["output_dir"] = output_dir + + # Parse and merge --set options + if set_config: + set_overrides = parse_set_options(set_config) + # Deep merge set_overrides into config_overrides + _deep_merge(config_overrides, set_overrides) + + # Reinitialize markata with overrides if any were provided + if config_file or config_overrides: + from markata import Markata + + # Create a new instance with overrides + markata_instance = Markata( + console=console, + config_file=config_file, + config_overrides=config_overrides, + ) + else: + # Use the existing instance + markata_instance = markata if not profile: - markata.config.profiler.should_profile = False + markata_instance.config.profiler.should_profile = False if should_pdb: - pdb_run(markata.run) + pdb_run(markata_instance.run) else: - markata.console.log("[purple]starting the build") - markata.run() + markata_instance.console.log("[purple]starting the build") + markata_instance.run() @app.command() def list( diff --git a/markata/plugins/config_model.py b/markata/plugins/config_model.py index e7ce5a50..8baad2b2 100644 --- a/markata/plugins/config_model.py +++ b/markata/plugins/config_model.py @@ -226,7 +226,16 @@ def config_model(markata: "Markata") -> None: @register_attr("config") def load_config(markata: "Markata") -> None: if "config" not in markata.__dict__.keys(): - config = standard_config.load("markata") + # Get overrides from markata instance if available + config_overrides = getattr(markata, "_config_overrides", {}) + config_file = getattr(markata, "_config_file", None) + + config = standard_config.load( + "markata", + project_home=config_file.parent if config_file else ".", + overrides=config_overrides, + config_file=config_file, + ) if config == {}: markata.config = markata.Config() else: From 08dba576833d432cbc08c8d953e80aa3cc009f86 Mon Sep 17 00:00:00 2001 From: "Waylon S. Walker" Date: Thu, 18 Dec 2025 15:19:10 -0600 Subject: [PATCH 32/37] docs: Add CLI config overrides to CHANGELOG - Document -s/--set flag for runtime config overrides with dot notation - Document -c/--config and -o/--output-dir flags - Document MARKATA_* environment variable support - Note runtime theme switching capability --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71575303..b36161c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,15 @@ ## 0.11.0 +### CLI Configuration Overrides + +- Feat: Add `-s/--set` flag for runtime config overrides with dot notation (e.g., `-s 'style.theme=nord'`) +- Feat: Add `-c/--config` flag for alternate config files +- Feat: Add `-o/--output-dir` flag for output directory override +- Feat: Add support for `MARKATA_*` environment variables with nested config (e.g., `MARKATA_STYLE__THEME=nord`) +- Feat: Add `parse_set_options()` and `_deep_merge()` utilities for config handling +- Enable: Runtime theme switching and configuration without editing files + ### Performance Improvements - Perf: increased diskcache size limit to 5GB and reduced cull_limit to minimize expensive eviction operations (saves ~4s during cache culling) From 7297e4cd182a1a5b4e363dff13f0ded87a524b9e Mon Sep 17 00:00:00 2001 From: autobump Date: Thu, 18 Dec 2025 21:20:44 +0000 Subject: [PATCH 33/37] =?UTF-8?q?Bump=20version:=200.11.0.dev8=20=E2=86=92?= =?UTF-8?q?=200.11.0.dev9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markata/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/__about__.py b/markata/__about__.py index bbe35688..0b05868b 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.11.0.dev8" +__version__ = "0.11.0.dev9" From 5faa4349ef72d31868bbf5fe6ae0b4c95299e20e Mon Sep 17 00:00:00 2001 From: Waylon Walker Date: Mon, 19 Jan 2026 09:41:53 -0600 Subject: [PATCH 34/37] feat: enhanced wikilink system (#172) --- CHANGELOG.md | 10 + justfile | 3 +- markata/plugins/md_it_wikilinks.py | 730 +++++++++++++++++++++++++++-- pyproject.toml | 11 +- 4 files changed, 713 insertions(+), 41 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b36161c8..b08a28b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,16 @@ - Perf: heading_link replaced expensive file I/O with `__version__` in cache key - Feat: feeds now support atom feeds +### Enhanced Wikilinks System + +- Feat: Added comprehensive configuration system for wikilinks resolution with priority-based scoring +- Feat: Implemented intelligent duplicate resolution that eliminates false warnings for hierarchical patterns (e.g., `tag/python` vs `python`) +- Fix: Fixed display text override syntax `[[page|Display Text]]` - now properly splits on first pipe and preserves display text +- Feat: Added support for anchor links `[[page#anchor]]` and complex syntax `[[folder/page#anchor|Display Text]]` +- Feat: Configurable priority rules allow custom scoring patterns for different content types +- Feat: Warning suppression patterns to reduce noise for expected duplicates (e.g., `tag/*`, `category/*`) +- Feat: Multiple resolution strategies: "priority" (default), "first", or "warn" + ### Template Utilities (Breaking for Plugin Authors) - **BREAKING**: Removed internal `get_template()` functions from `feeds.py` and `post_template.py` diff --git a/justfile b/justfile index 24edbf23..2d98df77 100644 --- a/justfile +++ b/justfile @@ -34,6 +34,7 @@ lint: build-docs: #!/usr/bin/env bash set -euxo pipefail + . ./.venv/bin/activate markata build serve: @@ -44,7 +45,7 @@ serve: ruff-fix: #!/usr/bin/env bash set -euxo pipefail - ruff check markata --fix + uv run ruff check markata --fix lint-test: lint cov diff --git a/markata/plugins/md_it_wikilinks.py b/markata/plugins/md_it_wikilinks.py index 843e8779..49205fe1 100644 --- a/markata/plugins/md_it_wikilinks.py +++ b/markata/plugins/md_it_wikilinks.py @@ -27,8 +27,213 @@ ## Configuration -This plugin requires no explicit configuration. It automatically processes wikilinks -in your markdown content. +This plugin supports comprehensive configuration for wikilink resolution. + +Configuration Options: +```toml +[plugins.md_it_wikilinks] + +# Resolution strategy for duplicate matches +# "priority" (default): Use priority scoring to select best match +# "first": Use first match found +# "warn": Always warn about duplicates +resolution_strategy = "priority" + +# Score difference threshold for clear winner selection (default: 20) +clear_winner_threshold = 20 + +# Suppress warnings for links matching these patterns +suppress_patterns = ["tag/*", "category/*", "archive/*"] + +# Custom priority rules (higher priority = higher score) +priority_rules = [ + { pattern = "pages/*", priority = 100 }, + { pattern = "posts/*", priority = 90 }, + { pattern = "blog/*", priority = 85 }, + { pattern = "tutorials/*", priority = 80 }, + { pattern = "docs/*", priority = 75 }, + { pattern = "tag/*", priority = 60 }, + { pattern = "category/*", priority = 55 }, + { pattern = "archive/*", priority = 50 }, + { pattern = "feed/*", priority = 45 }, +] + +# Behavior for broken links (default: "warn", "silent", "error") +fallback_behavior = "warn" + +# Enable/disable logging (default: true) +enable_logging = true +``` + +## Resolution Priority System + +Built-in Scores (configurable via priority_rules): +```toml +# Default scoring system (overridable via priority_rules) +exact_slug_match = 100 # Exact slug match: highest priority +path_match = 80 # Path match: [[folder/page]] -> high priority +feed_slug_match = 60 # Feed slug match: tag/*, category/* patterns +basename_match = 40 # Basename match: default fallback +``` + +Examples: +```toml +# Override scores for entire patterns +priority_rules = [ + { pattern = "tag/python", priority = 70 }, # Higher than default 60 + { pattern = "docs/*", priority = 95 }, # Higher than exact match! +] + +# Change entire scoring algorithm +resolution_strategy = "first" # Disable scoring, use first match +resolution_strategy = "warn" # Always warn about duplicates +clear_winner_threshold = 15 # Lower threshold = more warnings +``` + +## Smart Slug Resolution + +The plugin: +1. Looks up the target file in your content +2. Finds its generated slug +3. Creates a link to the final URL + +## Link Formats + +Supports multiple link styles: +- Basic: `[[filename]]` +- With text: `[[filename|Link Text]]` +- With path: `[[folder/file]]` +- With extension: `[[file.md]]` (extension stripped in output) +- With anchors: `[[filename#anchor]]` or `[[filename#anchor|Display Text]]` +- Complex: `[[folder/file#anchor|Display Text]]` + +**Edge Cases Handled:** +- **Spaces**: `[[my tag]]` → matches slug "my-tag" (spaces normalized to hyphens by Markata) +- **Empty display text**: `[[page|]]` → falls back to "page" +- **Multiple pipes**: `[[page|Text with | pipes]]` → splits only on first pipe +- **Quotes**: `[[page|"Display Text"]]` → supports quoted display text +- **Mixed anchors**: `[[page#anchor|Text]]` and `[[page|Text#anchor]]` +- **Complex expressions**: `[[tag/my tag|Posts about "my tag"]]` (full syntax support with quotes and spaces) + +## Duplicate Resolution System + +The wikilinks system uses intelligent priority-based resolution: + +How it works: +1. Creates mapping of all possible matches in markata.possible_wikilink +2. Scores each candidate using configurable priority rules +3. Selects highest-scoring match (clear winner if score difference > threshold) +4. Only warns for truly ambiguous cases + +## HTML Output + +Generated HTML structure: +```html +Link Text +``` + +## Uninstallation + +Since this plugin is included in the default plugin set, to disable it you must explicitly +add it to the disabled_hooks list if you are using the 'default' plugin: + +```toml +disabled_hooks = [ + "markata.plugins.md_it_wikilinks", +] +``` + +## Configuration + +This plugin supports comprehensive configuration for wikilink resolution: + +```toml +[plugins.md_it_wikilinks] +# Resolution strategy for duplicate matches +# "priority" (default): Use priority scoring to select best match +# "first": Use first match found +# "warn": Always warn about duplicates +resolution_strategy = "priority" + +# Score difference threshold for clear winner selection (default: 20) +clear_winner_threshold = 20 + +# Suppress warnings for links matching these patterns +suppress_patterns = ["tag/*", "category/*", "archive/*"] + +# Custom priority rules (higher priority = higher score) +# If no rules match, defaults to built-in scoring system +priority_rules = [ + { pattern = "pages/*", priority = 100 }, + { pattern = "posts/*", priority = 90 }, + { pattern = "blog/*", priority = 85 }, + { pattern = "tutorials/*", priority = 80 }, + { pattern = "docs/*", priority = 75 }, + { pattern = "tag/*", priority = 60 }, + { pattern = "category/*", priority = 55 }, + { pattern = "archive/*", priority = 50 }, + { pattern = "feed/*", priority = 45 }, +] + +## Priority Scoring System + +**Built-in Scores:** +- **Exact slug match**: 100 points (highest priority) +- **Path match**: 80 points (when original link includes path structure) +- **Feed slug match**: 60 points (for hierarchical feeds like `tag/*`) +- **Basename match**: 40 points (default fallback) + +**Resolution Examples:** +```toml +# For site with feed "tag/python" and page "python": +[[python]] # Matches: ["python", "tag/python"] → selects "python" (100 vs 60) +[[tag/python]] # Matches: ["python", "tag/python"] → selects "tag/python" (100 vs 40) + +# For site with multiple tag feeds: +priority_rules = [ + { pattern = "tag/python", priority = 90 }, # Higher than default tag/* + { pattern = "tag/javascript", priority = 85 }, +] +``` + +# Behavior for broken links (default: "warn") +# "warn": Log warning and use fallback link +# "silent": Use fallback link silently +# "error": Log error and use fallback link +fallback_behavior = "warn" + +# Enable/disable logging (default: true) +enable_logging = true +``` + +### Priority Rules + +Priority rules allow you to customize how links are resolved when there are multiple matches: + +- **Pattern**: Glob pattern matching page slugs (supports `*` wildcard) +- **Priority**: Score value (higher = more likely to be selected) + +The plugin evaluates rules in order and uses the first matching rule. If no custom rules match, it falls back to the built-in scoring system: + +- Exact slug match: 100 points +- Path match (`[[folder/page]]`): 80 points +- Feed slug match: 60 points +- Basename match: 40 points + +### Warning Suppression + +Use `suppress_patterns` to reduce warning noise for expected duplicates: + +```toml +suppress_patterns = [ + "tag/*", # Suppress all tag-related warnings + "category/*", # Suppress category warnings + "*/index", # Suppress index page conflicts +] +``` + +By default, the plugin uses priority-based resolution that automatically selects +the best match and only warns for truly ambiguous cases. ## Functionality @@ -64,6 +269,175 @@ - With text: `[[filename|Link Text]]` - With path: `[[folder/file]]` - With extension: `[[file.md]]` (extension stripped in output) +- With anchors: `[[filename#anchor]]` or `[[filename#anchor|Display Text]]` +- Complex: `[[folder/file#anchor|Display Text]]` + +**Edge Cases Handled:** +- **Spaces**: `[[my tag]]` → matches slug "my-tag" (spaces normalized to hyphens by Markata) +- **Empty display text**: `[[page|]]` → falls back to "page" +- **Multiple pipes**: `[[page|Text with | pipes]]` → splits only on first pipe (first `|` is separator, rest is content) +- **Quotes**: `[[page|"Display Text"]]` → supports quoted display text (quotes preserved in display text) +- **Mixed anchors**: `[[page#anchor|Text]]` and `[[page|Text#anchor]]` (anchors and display text work together) +- **Complex expressions**: `[[tag/my tag|Posts about "my tag"]]` (full syntax support with quotes and spaces) + +**Normalization Behavior:** +- **Link target normalization**: Spaces and special characters handled according to Markata slug conventions +- **Display text preservation**: Exact display text (including quotes and pipes) preserved +- **Case sensitivity**: Resolution is case-sensitive (matches Markata's slug handling) + +**Common Use Cases:** +```markdown +# Basic usage +[[thoughts]] # → /thoughts +[[tag/python]] # → /tag/python +[[docs/getting-started]] # → /docs/getting-started + +# With display text +[[thoughts|My Thoughts]] # → /thoughts (text: "My Thoughts") +[[tag/python|Python Posts]] # → /tag/python (text: "Python Posts") + +# With anchors +[[thoughts#intro]] # → /thoughts#intro +[[thoughts#intro|Introduction]] # → /thoughts#intro (text: "Introduction") + +# Complex with spaces and quotes +[[my tag|Posts about "my tag"]] # → /my-tag (text: 'Posts about "my tag"') +``` + +## Resolution Priority System + +**Built-in Scores (configurable via priority_rules):** +- **Exact slug match**: 100 points (highest priority) +- **Path match**: 80 points (when original link includes path structure) +- **Feed slug match**: 60 points (for hierarchical feeds like `tag/*`) +- **Basename match**: 40 points (default fallback) + +## Duplicate Resolution System + +The wikilinks system uses intelligent priority-based resolution to eliminate warning noise for common hierarchical patterns: + +### How Matches Are Created + +**Feed Slugs**: Feed configurations (e.g., `slug = "tag/python"`) +**Page Slugs**: Regular page slugs (e.g., `slug = "python"`) +**Mapping Creation**: Both are mapped in `markata.possible_wikilink` dictionary: + +```python +# For feeds in markata.feeds: +for slug in [v.config.slug for v in markata.feeds.values()]: + wikilink = slug.split("/")[-1] # Extract basename: "python" + markata.possible_wikilink[wikilink].append(slug) # Maps "python" → ["python", "tag/python"] + +# For regular pages: +for slug in markata.map("slug"): + wikilink = slug.split("/")[-1] # Extract basename: "python" + if wikilink not in markata.possible_wikilink: + markata.possible_wikilink[wikilink] = [slug] +``` + +### Resolution Priority System + +## Resolution Priority System + +**Built-in Scores (configurable via priority_rules):** +```toml +# Default scoring system (overridable via priority_rules) +exact_slug_match = 100 # Exact slug match: highest priority +path_match = 80 # Path match: [[folder/page]] -> high priority +feed_slug_match = 60 # Feed slug match: tag/*, category/* patterns +basename_match = 40 # Basename match: default fallback +``` + +**Configuration Options:** +```toml +# Method 1: Override default scores for entire patterns +priority_rules = [ + { pattern = "tag/*", priority = 70 }, # Higher than default 60 + { pattern = "category/*", priority = 80 }, # Higher than default 55 + { pattern = "docs/*", priority = 95 }, # Very high priority + { pattern = "posts/*", priority = 90 }, # High priority for content +] + +# Method 2: Change entire scoring algorithm +resolution_strategy = "first" # Disable scoring, use first match +resolution_strategy = "warn" # Always warn about duplicates +clear_winner_threshold = 15 # Lower threshold = more warnings +``` + +**Resolution Examples:** +```toml +# Example 1: Default behavior with feed "tag/python" and page "python" +# markata.possible_wikilink = {"python": ["python", "tag/python"]} + +[[python]] # → selects "python" (100 vs 60 points, clear winner) +[[tag/python]] # → selects "tag/python" (100 vs 40 points, clear winner) + +# Example 2: Custom priority rules +priority_rules = [ + { pattern = "tag/python", priority = 75 }, # Lower than default 60 + { pattern = "docs/getting-started", priority = 120 }, # Higher than exact match! +] + +[[python]] # Against "tag/python": 75 vs 60 → selects "tag/python" (custom rule wins) +[[tag/python]] # Against "tag/python": 100 vs 75 → selects "tag/python" (exact match still wins) + +# Example 3: Different resolution strategies +resolution_strategy = "first" # Ignores scoring, always first match +resolution_strategy = "warn" # Always warns about duplicates +``` + +**Advanced Configuration:** +```toml +# Fine-tune duplicate resolution behavior +clear_winner_threshold = 30 # Require larger score difference for clear winner +suppress_patterns = ["tag/*"] # Suppress warnings for all tag/* matches +fallback_behavior = "silent" # No warnings for broken links +``` + +**Example 2: Multiple Feeds with Same Basename** +```toml +# Site structure: +# posts/thoughts.md (slug: "thoughts") +# feed: tag/thoughts (slug: "tag/thoughts") +# feed: category/thoughts (slug: "category/thoughts") + +# Result in markata.possible_wikilink: +{ + "thoughts": ["thoughts", "tag/thoughts", "category/thoughts"] # Three matches! +} + +[[thoughts]] # → selects "thoughts" (100 vs 60 vs 55 points) +[[tag/thoughts]] # → selects "tag/thoughts" (100 vs 40 vs 55 points) +[[category/thoughts]] # → selects "category/thoughts" (100 vs 40 vs 55 points) +``` + +**Example 3: Custom Priority Rules** +```toml +# If you want specific feed patterns to have higher priority: +priority_rules = [ + { pattern = "tag/python", priority = 70 }, # Higher than default tag/* (60) + { pattern = "docs/*", priority = 95 }, # Documentation gets highest priority + { pattern = "posts/*", priority = 90 }, # Posts get high priority +] + +# Result: Custom rules override built-in scoring for matching patterns +``` + +**Configuration Priority Rules Override:** +```toml +# Custom scoring for specific patterns +priority_rules = [ + { pattern = "pages/*", priority = 100 }, # Pages get highest priority + { pattern = "tag/python", priority = 90 }, # Specific tag gets boost + { pattern = "docs/*", priority = 95 }, # Documentation gets high priority +] + +# Feed patterns with custom priorities +priority_rules = [ + { pattern = "tag/python", priority = 70 }, # Higher than default tag/* (60) + { pattern = "category/javascript", priority = 75 }, # Boost specific categories +] +``` ## HTML Output @@ -87,7 +461,10 @@ """ import logging +import re from typing import TYPE_CHECKING +from typing import Dict +from typing import List from markdown_it import MarkdownIt from markdown_it.rules_inline import StateInline @@ -101,6 +478,242 @@ logger = logging.getLogger("markata") +def get_default_config() -> Dict: + """ + Get default configuration for wikilinks plugin. + + Returns: + Dictionary with default configuration values + """ + return { + "resolution_strategy": "priority", + "clear_winner_threshold": 20, + "suppress_patterns": [], + "priority_rules": [ + {"pattern": "pages/*", "priority": 100}, + {"pattern": "posts/*", "priority": 90}, + {"pattern": "blog/*", "priority": 85}, + {"pattern": "tutorials/*", "priority": 80}, + {"pattern": "docs/*", "priority": 75}, + {"pattern": "tag/*", "priority": 60}, + {"pattern": "category/*", "priority": 55}, + {"pattern": "archive/*", "priority": 50}, + {"pattern": "feed/*", "priority": 45}, + ], + "fallback_behavior": "warn", # "warn", "silent", "error" + "enable_logging": True, + } + + +def get_plugin_config(markata: "Markata") -> Dict: + """ + Get plugin configuration with defaults merged. + + Args: + markata: Markata instance + + Returns: + Merged configuration dictionary + """ + default_config = get_default_config() + user_config = ( + getattr(markata, "config", {}).get("plugins", {}).get("md_it_wikilinks", {}) + ) + + # Deep merge user config with defaults + merged_config = default_config.copy() + merged_config.update(user_config) + + # Handle nested priority_rules merging + if "priority_rules" in user_config: + merged_config["priority_rules"] = user_config["priority_rules"] + + return merged_config + + +def matches_pattern(path: str, pattern: str) -> bool: + """ + Check if a path matches a glob-like pattern. + + Args: + path: The path to check + pattern: The pattern (supports * wildcard) + + Returns: + True if path matches pattern + """ + # Convert glob pattern to regex + regex_pattern = pattern.replace("*", ".*") + return re.match(f"^{regex_pattern}$", path) is not None + + +def should_suppress_warning(link_target: str, suppress_patterns: List[str]) -> bool: + """ + Check if warning should be suppressed for a link target. + + Args: + link_target: The link target to check + suppress_patterns: List of patterns to suppress + + Returns: + True if warning should be suppressed + """ + for pattern in suppress_patterns: + if matches_pattern(link_target, pattern): + return True + return False + + +def calculate_match_score( + link_target: str, candidate_slug: str, original_link: str, config: Dict +) -> int: + """ + Calculate priority score for a wikilink match. + Higher scores indicate better matches. + + Args: + link_target: The target link text + candidate_slug: The candidate slug to score + original_link: The original link text from markdown + config: Plugin configuration + + Returns: + Score for the candidate (higher is better) + """ + # Check custom priority rules first + priority_rules = config.get("priority_rules", []) + for rule in priority_rules: + pattern = rule.get("pattern", "") + priority = rule.get("priority", 50) + if matches_pattern(candidate_slug, pattern): + return priority + + # Default scoring system if no custom rules match + # Exact slug match (highest priority) + if link_target == candidate_slug: + return 100 + + # Path match - when original link includes path structure + if "/" in original_link and original_link.strip("/") == candidate_slug: + return 80 + + # Check if this is a basename match + basename = candidate_slug.split("/")[-1] + if link_target == basename: + # Heuristic: feeds typically have paths like "tag/", "category/", "archive/" + feed_prefixes = ["tag/", "category/", "archive/", "feed/", "topic/"] + if any(candidate_slug.startswith(prefix) for prefix in feed_prefixes): + return 60 # Feed slug match + else: + return 40 # Regular basename match + + return 0 + + +def resolve_best_match( + link_target: str, + possible_pages: list, + original_link: str, + markata: "Markata", + md=None, +) -> str: + """ + Resolve the best match from possible pages using priority scoring and configuration. + + Args: + link_target: The target link text + possible_pages: List of possible page slugs + original_link: The original link text from markdown + markata: Markata instance + md: Markdown-it instance (optional) + + Returns: + Best matching page slug + """ + # Get full plugin configuration + config = get_plugin_config(markata) + resolution_strategy = config.get("resolution_strategy", "priority") + threshold = config.get("clear_winner_threshold", 20) + suppress_patterns = config.get("suppress_patterns", []) + fallback_behavior = config.get("fallback_behavior", "warn") + enable_logging = config.get("enable_logging", True) + + if len(possible_pages) == 1: + return possible_pages[0] + + # Check if warning should be suppressed + should_suppress = should_suppress_warning(link_target, suppress_patterns) + + # For non-priority strategies, fallback to simple behavior + if resolution_strategy == "first": + return possible_pages[0] + elif resolution_strategy == "warn": + # Always warn and use first match (unless suppressed) + if not should_suppress and enable_logging and fallback_behavior == "warn": + if md is None or md.options.get("article") is None: + debug_value = "UNKNOWN" + else: + debug_value = md.options["article"].get( + "path", + md.options["article"].get( + "title", md.options["article"].get("slug", "") + ), + ) + logger.warning( + f"wikilink [[{original_link}]] has duplicate matches ({possible_pages}) in file '{debug_value}', defaulting to the first match ({possible_pages[0]})", + ) + return possible_pages[0] + + # Priority-based resolution (default) + # Calculate scores for all candidates using custom configuration + scored_candidates = [] + for candidate in possible_pages: + score = calculate_match_score(link_target, candidate, original_link, config) + scored_candidates.append((score, candidate)) + + # Sort by score (descending) and return the highest scoring match + scored_candidates.sort(key=lambda x: x[0], reverse=True) + + # Check if we have a clear winner (score difference > threshold) + if len(scored_candidates) >= 2: + top_score, top_candidate = scored_candidates[0] + second_score, second_candidate = scored_candidates[1] + + # If clear winner, return it without warning + if top_score - second_score > threshold: + return top_candidate + + # If no clear winner, return top choice but log warning for ambiguity (unless suppressed) + top_score, top_candidate = scored_candidates[0] + + if ( + not should_suppress + and enable_logging + and fallback_behavior in ["warn", "error"] + ): + if md is None or md.options.get("article") is None: + debug_value = "UNKNOWN" + else: + debug_value = md.options["article"].get( + "path", + md.options["article"].get( + "title", md.options["article"].get("slug", "") + ), + ) + + message = ( + f"wikilink [[{original_link}]] has ambiguous matches ({possible_pages}) " + f"in file '{debug_value}', selecting highest priority match ({top_candidate})" + ) + + if fallback_behavior == "error": + logger.error(message) + else: + logger.warning(message) + + return top_candidate + + @hook_impl() @register_attr("possible_wikilink") def pre_render(markata: "Markata") -> None: @@ -124,9 +737,19 @@ def pre_render(markata: "Markata") -> None: markata.possible_wikilink["index"] = ["index"] for slug in [v.config.slug for v in markata.feeds.values()]: + # Register the full slug (e.g., "tag/python") + wikilink = slug + if wikilink in markata.possible_wikilink: + if slug not in markata.possible_wikilink[wikilink]: + markata.possible_wikilink[wikilink].append(slug) + else: + markata.possible_wikilink[wikilink] = [slug] + + # Register the basename (e.g., "python") wikilink = slug.split("/")[-1] if wikilink in markata.possible_wikilink: - markata.possible_wikilink[wikilink].append(slug) + if slug not in markata.possible_wikilink[wikilink]: + markata.possible_wikilink[wikilink].append(slug) else: markata.possible_wikilink[wikilink] = [slug] @@ -157,8 +780,8 @@ def wikilinks_plugin( def _wikilinks_inline(state: StateInline, silent: bool): try: if ( - state.srcCharCode[state.pos] != start_char - or state.srcCharCode[state.pos + 1] != start_char + ord(state.src[state.pos]) != start_char + or ord(state.src[state.pos + 1]) != start_char ): return False except IndexError: @@ -168,11 +791,11 @@ def _wikilinks_inline(state: StateInline, silent: bool): found_closing = False while True: try: - end = state.srcCharCode.index(end_char, pos) + end = state.src.find(chr(end_char), pos) except ValueError: return False try: - if state.srcCharCode[end + 1] == end_char: + if state.src[end + 1] == chr(end_char): found_closing = True break except IndexError: @@ -191,56 +814,85 @@ def _wikilinks_inline(state: StateInline, silent: bool): token = state.push("link_open", "a", 1) token.block = False token.attrSet("class", "wikilink") - if "#" in text: - link, id = text.split("#") + + # Parse display text override syntax: [[page|Display Text]] + if "|" in text: + # Split only on first pipe to allow pipes in display text + link_part, display_text = text.split("|", 1) + display_text = display_text.strip() + # Fall back to link_part if display_text is empty + if not display_text: + display_text = link_part + else: + link_part, display_text = text, None + + # Handle anchor in link part: [[page#anchor]] or [[page#anchor|Display Text]] + if "#" in link_part: + link, id = link_part.split("#", 1) link = link.strip("/") else: - link, id = text, None + link, id = link_part.strip("/"), None + + # Get configuration for handling broken links + if markata: + config = get_plugin_config(markata) + suppress_patterns = config.get("suppress_patterns", []) + fallback_behavior = config.get("fallback_behavior", "warn") + enable_logging = config.get("enable_logging", True) + else: + config = get_default_config() + suppress_patterns = config.get("suppress_patterns", []) + fallback_behavior = config.get("fallback_behavior", "warn") + enable_logging = config.get("enable_logging", True) # possible_pages = markata.filter( # f'str(path).split("/")[-1].split(".")[0].replace("_", "-") == "{link.replace("_", "-")}"', # ) - possible_pages = markata.possible_wikilink.get(link, []) + possible_pages = markata.possible_wikilink.get(link, []) if markata else [] if len(possible_pages) == 1: link = possible_pages[0] elif len(possible_pages) > 1: - if md.options["article"] is None: - debug_value = "UNKNOWN" - else: - debug_value = md.options["article"].get( - "path", - md.options["article"].get( - "title", md.options["article"].get("slug", "") - ), - ) - logger.warning( - f"wikilink [[{text}]] has duplicate matches ({possible_pages}) in file '{debug_value}', defaulting to the first match ({possible_pages[0]})", - ) - link = possible_pages[0] + # Use priority-based resolution instead of simple first match + link = resolve_best_match(link, possible_pages, text, markata, md) else: - if md.options["article"] is None: - debug_value = "UNKNOWN" - else: - debug_value = md.options["article"].get( - "path", - md.options["article"].get( - "title", md.options["article"].get("slug", "") - ), - ) - logger.warning( - f"wikilink [[{text}]] no matches in file '{debug_value}', defaulting to '/{text}'", - ) - link = text + # No matches found - handle according to configuration + should_suppress = should_suppress_warning(link_part, suppress_patterns) + + if ( + not should_suppress + and enable_logging + and fallback_behavior in ["warn", "error"] + ): + if md.options.get("article") is None: + debug_value = "UNKNOWN" + else: + debug_value = md.options["article"].get( + "path", + md.options["article"].get( + "title", md.options["article"].get("slug", "") + ), + ) + + message = f"wikilink [[{text}]] no matches in file '{debug_value}', defaulting to '/{link_part}'" + + if fallback_behavior == "error": + logger.error(message) + else: + logger.warning(message) + + # Fallback to original link text + link = link_part if id and not link.endswith(f"#{id}"): link = f"{link}#{id}" token.attrSet("href", f"/{link}") content_token = state.push("text", "", 0) - content_token.content = text + # Use display text if available, otherwise fall back to the link part + content_token.content = display_text if display_text is not None else link_part token = state.push("link_close", "a", -1) - token.content = text + token.content = display_text if display_text is not None else link_part return True diff --git a/pyproject.toml b/pyproject.toml index 404fb173..a1f46cc4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ description = "Static site generator plugins all the way down." keywords = ["static-site"] name = "markata" readme = "README.md" -requires-python = ">=3.6" +requires-python = ">=3.10" [[project.authors]] name = "Waylon Walker" @@ -207,3 +207,12 @@ unfixable = [] [tool.ruff.lint.isort] force-single-line = true + +[dependency-groups] +dev = [ + "psutil>=7.2.1", + "pytest>=9.0.2", + "pytest-mock>=3.15.1", + "pytest-tmp-files>=0.0.2", + "ruff>=0.14.13", +] From d8882d5371ce49fc7644cca393a015a496ad7416 Mon Sep 17 00:00:00 2001 From: autobump Date: Mon, 19 Jan 2026 15:43:03 +0000 Subject: [PATCH 35/37] =?UTF-8?q?Bump=20version:=200.11.0.dev9=20=E2=86=92?= =?UTF-8?q?=200.11.0.dev10?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markata/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/__about__.py b/markata/__about__.py index 0b05868b..1d454318 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.11.0.dev9" +__version__ = "0.11.0.dev10" From 836feb496b96f4161b2e9f2822e81a177cb1d027 Mon Sep 17 00:00:00 2001 From: Waylon Walker Date: Wed, 21 Jan 2026 21:34:48 -0600 Subject: [PATCH 36/37] feat: add pagination support to feeds (#173) ## Summary Add pagination support to feeds with three pagination types and comprehensive security hardening. ## Features - Add feed pagination configuration (`items_per_page`, `pagination_type`, `enabled`) - Implement three pagination types: `htmx`, `manual`, `js` - HTMX infinite scroll with partial loading - JavaScript-based infinite scroll using Intersection Observer - Manual page navigation with prev/next controls - Improve feed name sanitization with Python identifier conversion ## Security - SHA-256 integrity verification for HTMX downloads (25+ versions supported) - Path traversal protection for feed slugs - XSS prevention in templates using `|tojson` filter - No CDN fallback - fail securely if HTMX download fails - Comprehensive security test suite ## Other Changes - Add pagination static assets (CSS/JS) - Add pagination templates (`feed_partial.html`, `feed_items_partial.html`, `pagination_controls.html`) - Add pagination implementation guide documentation - Bump Python minimum version due to pydantic requirements - Add helper script for managing HTMX integrity hashes --- CHANGELOG.md | 20 + docs/pagination-implementation-guide.md | 357 ++++++++++++ markata.toml | 39 +- markata/plugins/feeds.py | 638 ++++++++++++++++++++- markata/static/css/pagination.css | 180 ++++++ markata/static/js/pagination.js | 213 +++++++ markata/templates/didyoumean_partial.html | 2 - markata/templates/feed_items_partial.html | 19 + markata/templates/feed_partial.html | 86 ++- markata/templates/pagination_controls.html | 25 + pyproject.toml | 1 - scripts/README.md | 70 +++ scripts/add_htmx_hash.py | 313 ++++++++++ tests/test_feeds_security.py | 200 +++++++ 14 files changed, 2128 insertions(+), 35 deletions(-) create mode 100644 docs/pagination-implementation-guide.md create mode 100644 markata/static/css/pagination.css create mode 100644 markata/static/js/pagination.js create mode 100644 markata/templates/feed_items_partial.html create mode 100644 markata/templates/pagination_controls.html create mode 100644 scripts/README.md create mode 100755 scripts/add_htmx_hash.py create mode 100644 tests/test_feeds_security.py diff --git a/CHANGELOG.md b/CHANGELOG.md index b08a28b9..098baeff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,26 @@ ## 0.10.0 +### Feed Pagination + +- Feat: Add feed pagination with three types: `htmx`, `manual`, `js` +- Feat: HTMX infinite scroll with partial loading +- Feat: JavaScript infinite scroll using Intersection Observer +- Feat: Manual page navigation with prev/next controls +- Feat: Configurable `items_per_page`, `pagination_type`, and `enabled` per feed + +### Security + +- Feat: SHA-256 integrity verification for HTMX downloads (25+ versions supported) +- Feat: Path traversal protection for feed slugs +- Feat: XSS prevention in templates using `|tojson` filter +- Feat: Fail securely if HTMX download fails (no CDN fallback) +- Feat: Comprehensive security test suite for feeds + +### Other + +- Feat: Improve feed name sanitization with Python identifier conversion +- Feat: Add pagination implementation guide documentation - Fix: `auto_description` now more accurately returns plain text, does not cut off words, and add an ellipsis. - Fix: article_html now typed such that it may be a dict without warning - publish_source now only supports using post models that include a dumps command, i.e. no longer frontmatter post objects diff --git a/docs/pagination-implementation-guide.md b/docs/pagination-implementation-guide.md new file mode 100644 index 00000000..82ba6fe8 --- /dev/null +++ b/docs/pagination-implementation-guide.md @@ -0,0 +1,357 @@ +--- +title: Pagination Implementation Guide +description: Guide for implementing pagination in Markata templates with manual, HTMX, and JavaScript options + +--- + +# Pagination Implementation Guide + +This guide provides clear instructions for implementing pagination in your Markata templates. The pagination system is integrated into the feeds plugin and supports three types: manual, HTMX, and JavaScript infinite scroll. + +## Core Components + +### Feeds Plugin with Pagination +**Location:** `markata/plugins/feeds.py` + +The feeds plugin includes built-in pagination support with these features: +- Automatic pagination for any feed +- Three pagination types: manual, HTMX, JavaScript +- Configurable items per page +- SEO-friendly URL generation +- Template context variables + +### Pagination Plugin (Core Logic) +**Location:** `markata/plugins/pagination.py` + +Core pagination functionality that provides: +- Pagination configuration models +- URL generation helpers +- Template rendering context +- Support for all pagination types + +## Quick Setup + +### 1. Basic Configuration + +Add to your `markata.yaml`: + +```yaml +# Enable pagination globally (optional, per-feed config also available) +pagination: + default: + enabled: true + items_per_page: 20 + pagination_type: 'manual' # 'manual', 'htmx', 'js' + +# Or configure per feed +feeds: + blog: + template: "feed.html" + partial_template: "feed_partial.html" + enabled: true + items_per_page: 10 + pagination_type: 'htmx' +``` + +### 2. Template Variables + +All pagination templates receive these variables: + +```jinja2 +{{ markata }} # Markata instance +{{ feed }} # Current feed object +{{ posts }} # Posts for current page +{{ page }} # Current page number (1-based) +{{ total_pages }} # Total number of pages +{{ has_prev }} # Boolean: has previous page? +{{ has_next }} # Boolean: has next page? +{{ prev_page }} # Previous page number or null +{{ next_page }} # Next page number or null +{{ pagination_enabled }} # Boolean: is pagination enabled? +{{ pagination_config }} # PaginationConfig object +{{ feed_name }} # Feed slug/name +{{ pagination_js_url }} # URL to pagination config JS (when using JS pagination) +``` + +## Pagination Types + +### 1. Manual Pagination + +**Best for:** SEO, accessibility, traditional blogs +**Features:** +- Traditional click navigation +- Page numbers +- Previous/Next buttons +- Clean permanent URLs +- Works without JavaScript + +**Configuration:** +```yaml +feeds: + blog: + pagination_type: 'manual' + items_per_page: 10 + show_page_numbers: true + max_page_links: 7 +``` + +**Template Implementation:** + +The feeds plugin automatically handles manual pagination when `pagination_type: 'manual'`. Your template just needs to include pagination controls: + +```jinja2 +{% if pagination_enabled %} +{% set config = pagination_config %} + +
    + {% if prev_page %} + {% if page > 2 %} + + ← Previous + + {% endif %} + + + {{ page }} / {{ total_pages }} + + + {% if has_next %} + + Next → + + {% endif %} +
    +{% endif %} +``` + +### 2. HTMX Pagination + +**Best for:** Progressive enhancement, modern UX with fallback +**Features:** +- Infinite scroll with 14KB HTMX library +- Progressive enhancement built-in +- SEO-friendly URLs +- Graceful JavaScript fallback + +**Configuration:** +```yaml +feeds: + blog: + pagination_type: 'htmx' + items_per_page: 15 + show_loading_skeleton: true + auto_load_threshold: 200 +``` + +**Template Implementation:** + +```jinja2 +{% if pagination_enabled and pagination_context.pagination_type == 'htmx' %} +{% if has_next %} +
    +
    +{% endif %} + + + + +{% endif %} +``` + +### 3. JavaScript Pagination + +**Best for:** Custom infinite scroll, zero external dependencies +**Features:** +- Custom infinite scroll using Intersection Observer +- Zero external dependencies +- AJAX content loading +- URL history management +- Loading indicators + +**Configuration:** +```yaml +feeds: + blog: + pagination_type: 'js' + items_per_page: 12 + show_loading_skeleton: true + auto_load_threshold: 300 + show_end_message: true +``` + +**Template Implementation:** + +```jinja2 +{% if pagination_enabled and pagination_context.pagination_type == 'js' %} + + + +
    + + +{% endif %} +``` + +## URL Structure + +The pagination system generates clean, SEO-friendly URLs: + +- **First page:** `/feed-name/` +- **Subsequent pages:** `/feed-name/page/2/`, `/feed-name/page/3/`, etc. +- **Partial files:** `/feed-name/partial/`, `/feed-name/page/2/partial/` + +## Static Assets + +### Required Files + +The pagination system includes these static files (automatically created): + +```bash +markata/static/js/ +ā”œā”€ā”€ pagination.js # JavaScript infinite scroll module +ā”œā”€ā”€ pagination-config.js # Generated config (JS pagination only) +└── htmx.org@1.9.10.min.js # Secure HTMX download + +markata/static/css/ +└── pagination.css # Pagination styles and animations +``` + +## Advanced Configuration + +### Per-Feed Customization + +```yaml +feeds: + blog: + enabled: true + pagination_type: 'manual' + items_per_page: 8 + show_page_numbers: true + max_page_links: 5 + show_loading_skeleton: false + auto_load_threshold: 100 + show_end_message: false + + news: + enabled: true + pagination_type: 'htmx' + items_per_page: 20 + show_loading_skeleton: true + auto_load_threshold: 200 + show_end_message: true + + portfolio: + enabled: true + pagination_type: 'js' + items_per_page: 12 + show_loading_skeleton: true + auto_load_threshold: 300 + show_end_message: true +``` + +### Template Includes + +Create reusable template components: + +**`includes/pagination_info.html`:** +```jinja2 +{% if pagination_enabled %} +
    + Page {{ page }} of {{ total_pages }} + Showing {{ posts|length }} items +
    +{% endif %} +``` + +**`includes/post_card.html`:** +```jinja2 +
    +

    {{ post.title }}

    + {% if post.date %} + + {% endif %} + {% if post.description %} +

    {{ post.description }}

    + {% endif %} +
    +``` + +Then in your main template: +```jinja2 +{% include "includes/pagination_info.html" %} + +
    + {% for post in posts %} + {% include "includes/post_card.html" %} + {% endfor %} +
    +``` + +## CLI Helper + +Get example configuration: + +```bash +markata pagination config-example +``` + +This outputs a ready-to-use configuration block for your `markata.yaml`. + +## Testing Your Implementation + +1. **Manual Pagination:** Verify page numbers and Previous/Next links work +2. **HTMX Pagination:** Test infinite scroll and JavaScript fallback +3. **JavaScript Pagination:** Verify auto-loading and URL updates +4. **SEO:** Check that each page has unique titles and meta tags +5. **Accessibility:** Test keyboard navigation and screen readers + +## Troubleshooting + +### Common Issues + +**Pagination not showing:** +- Ensure `enabled: true` is set for your feed +- Check that pagination plugin loads before feeds plugin + +**HTMX not working:** +- Verify HTMX script is loaded +- Check that partial template exists and is accessible + +**JavaScript errors:** +- Ensure browser supports Intersection Observer +- Check console for fetch API errors + +**URL issues:** +- Verify your web server supports clean URLs +- Check that page 1 redirects work correctly + +### Debug Mode + +Add this to templates to debug pagination data: + +```jinja2 +{% if markata.config.debug %} +
    {{ pagination_config | pprint }}
    +
    Page: {{ page }}, Total: {{ total_pages }}
    +
    Has Prev: {{ has_prev }}, Has Next: {{ has_next }}
    +{% endif %} +``` + +This comprehensive guide should help you implement any pagination type in your Markata templates. Choose the pagination type that best fits your use case and customize templates to match your site's design. \ No newline at end of file diff --git a/markata.toml b/markata.toml index 04a9f1eb..55a7024c 100644 --- a/markata.toml +++ b/markata.toml @@ -28,6 +28,9 @@ markdown_backend = 'markdown-it-py' default_cache_expire = 1209600 # subroute = "docs" +# HTMX version for pagination +htmx_version = "2.0.8" + # set the subroute if you are deploying to a subroute of a site # make sure you enable the subroute plugin # subroute = "docs" @@ -292,5 +295,39 @@ plugin = "markata.plugins.md_it_wikilinks:wikilinks_plugin" config = { markata = "markata" } [markata.glob] -glob_patterns = ["docs/**/*.md","CHANGELOG.md"] +glob_patterns = [ +"docs/**/*.md", +"pages/**/*.md", +"CHANGELOG.md", +] use_gitignore = true + +[[markata.feeds]] +slug = 'tag/htmx' +filter = "date<=today and published==True" +sort = "date" +reverse = true +description = 'Test HTMX pagination feed' +enabled = true +items_per_page = 2 +pagination_type = 'htmx' + +[[markata.feeds]] +slug = 'tag/manual' +filter = "date<=today and published==True" +sort = "date" +reverse = true +description = 'Test manual pagination feed' +enabled = true +items_per_page = 3 +pagination_type = 'manual' + +[[markata.feeds]] +slug = 'tag/js' +filter = "date<=today and published==True" +sort = "date" +reverse = true +description = 'Test JS pagination feed' +enabled = true +items_per_page = 2 +pagination_type = 'js' diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index 46f9b2af..f52a2f84 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -188,7 +188,10 @@ """ +from __future__ import annotations + import datetime +import re import shutil import textwrap import warnings @@ -197,6 +200,7 @@ from typing import Any from typing import List from typing import Optional +from urllib.request import urlopen import jinja2 import pydantic @@ -204,15 +208,15 @@ from jinja2 import Template from jinja2 import Undefined from pydantic import ConfigDict +from pydantic import Field from pydantic import field_validator +from rich.console import Console from rich.jupyter import JupyterMixin from rich.pretty import Pretty from rich.table import Table -from markata import Markata from markata import __version__ from markata import background -from markata.errors import DeprecationWarning from markata.hookspec import hook_impl from markata.hookspec import register_attr from markata.plugins.jinja_env import get_template @@ -220,15 +224,68 @@ if TYPE_CHECKING: from frontmatter import Post - from rich.console import Console + + from markata import Markata + + +def to_pythonic_identifier(name: str) -> str: + """ + Convert a string to a valid Python identifier. + + This function handles various problematic characters that might appear + in feed names or slugs, making them suitable for use as Python attribute + names and dictionary keys. + + Rules applied: + - Replace spaces, slashes, dots, and other non-alphanumeric characters with underscores + - Convert to lowercase + - Remove leading/trailing underscores + - Ensure the result starts with a letter or underscore + - Collapse multiple consecutive underscores to a single one + + Examples: + 'project-gallery' -> 'project_gallery' + 'tag/htmx' -> 'tag_htmx' + 'My Feed Name' -> 'my_feed_name' + '123start' -> '_123start' + """ + if not name: + return "_unnamed" + + # Replace non-alphanumeric characters (except underscores) with underscores + pythonic = re.sub(r"[^a-zA-Z0-9_]", "_", str(name)) + + # Convert to lowercase + pythonic = pythonic.lower() + + # Collapse multiple consecutive underscores + pythonic = re.sub(r"_+", "_", pythonic) + + # Remove leading and trailing underscores + pythonic = pythonic.strip("_") + + # Ensure it starts with a letter or underscore (not a digit) + if pythonic and pythonic[0].isdigit(): + pythonic = "_" + pythonic + + # Handle empty result or result that became empty after processing + if not pythonic: + pythonic = "_unnamed" + + return pythonic class SilentUndefined(Undefined): + """A Jinja2 Undefined subclass that silently returns empty string on errors.""" + def _fail_with_undefined_error(self, *args, **kwargs): return "" -class MarkataFilterError(RuntimeError): ... +class MarkataFilterError(RuntimeError): + """Raised when a feed filter expression fails.""" + + ... class FeedConfig(pydantic.BaseModel, JupyterMixin): @@ -255,6 +312,12 @@ class FeedConfig(pydantic.BaseModel, JupyterMixin): sitemap_template: str = "sitemap.xml" xsl_template: str = "rss.xsl" + # Pagination configuration + enabled: bool = False + items_per_page: int = 10 + pagination_type: str = "htmx" # htmx, manual, js + per_page: int = 10 # backwards compatibility + model_config = ConfigDict( validate_assignment=True, # Config model arbitrary_types_allowed=True, @@ -269,11 +332,11 @@ class FeedConfig(pydantic.BaseModel, JupyterMixin): @classmethod def default_name(cls, v, info) -> str: if v: - return v + return to_pythonic_identifier(str(v)) slug = info.data.get("slug") if not slug: raise ValueError("Either name or slug must be provided") - return str(slug).replace("-", "_") + return to_pythonic_identifier(str(slug)) @field_validator("slug", mode="before") @classmethod @@ -290,7 +353,7 @@ def __rich_console__(self) -> "Console": return self.markata.console @property - def __rich__(self) -> Pretty: + def __rich__(self): return lambda: Pretty(self) @@ -300,8 +363,9 @@ class Feed(pydantic.BaseModel, JupyterMixin): ## Usage ``` python - from markata import Markata - m = Markata() + if not TYPE_CHECKING: + from markata import Markata + m = Markata() # access posts for a feed m.feeds.docs.posts @@ -312,7 +376,7 @@ class Feed(pydantic.BaseModel, JupyterMixin): """ config: FeedConfig - markata: Markata = pydantic.Field(exclude=True) + markata: Any = Field(exclude=True) model_config = ConfigDict( validate_assignment=False, @@ -331,6 +395,22 @@ def name(self) -> str: @property def posts(self): + # Get posts from instance state or compute normally + return self._get_posts() + + def _get_posts(self, override_posts=None): + """ + Get posts with optional override for pagination. + + Args: + override_posts: If provided, returns these posts instead of computing + + Returns: + PrettyList of posts + """ + if override_posts is not None: + return PrettyList(override_posts) + posts = self.map("post") if self.config.head is not None and self.config.tail is not None: head_posts = posts[: self.config.head] @@ -393,6 +473,8 @@ def dump_bytecode(self, bucket): class FeedsConfig(pydantic.BaseModel): feeds: List[FeedConfig] = [FeedConfig(slug="archive")] + htmx_version: str = "2.0.8" + skip_htmx_integrity_check: bool = False @property def jinja_env(self): @@ -430,10 +512,313 @@ def __rich__(self) -> Pretty: @hook_impl(tryfirst=True) +@register_attr("config_models") def config_model(markata: Markata) -> None: markata.config_models.append(FeedsConfig) +@hook_impl(tryfirst=True) +def htmx_config_model(markata: Markata) -> None: + """Register HTMX configuration model with validation.""" + + class HtmxConfig(pydantic.BaseModel): + version: str = "2.0.8" + + model_config = ConfigDict( + validate_assignment=True, + extra="forbid", + ) + + markata.config_models.append(HtmxConfig) + + +@hook_impl +def configure(markata: Markata) -> None: + """ + Configure feeds during configuration phase. + """ + _download_htmx_if_needed(markata) + _copy_pagination_static_files(markata, Path(markata.config.output_dir)) + + +def _download_htmx_if_needed(markata: Markata) -> None: + """ + Download HTMX library to static directory if needed with integrity verification. + """ + import hashlib + from urllib.error import HTTPError + from urllib.error import URLError + from urllib.request import Request + + htmx_version = markata.config.htmx_version + htmx_filename = "htmx.min.js" + htmx_static_path = Path(markata.config.output_dir) / "static" / "js" / htmx_filename + htmx_url = f"https://unpkg.com/htmx.org@{htmx_version}/dist/htmx.min.js" + + # Known SHA-256 hashes for HTMX versions + HTMX_INTEGRITY_HASHES = { + "1.9.10": "b3bdcf5c741897a53648b1207fff0469a0d61901429ba1f6e88f98ebd84e669e", + "2.0.8": "22283ef68cb7545914f0a88a1bdedc7256a703d1d580c1d255217d0a50d31313", + "2.0.7": "60231ae6ba9db3825eb15a261122d5f55921c4d53b66bf637dc18b4ee27c79f9", + "2.0.6": "b6768eed4f3af85b73a75054701bd60e17cac718aef2b7f6b254e5e0e2045616", + "2.0.5": "f601807715bde32e458b73821e16c5641a3d90dfb670f6ebd986f128b8222fcf", + "2.0.4": "e209dda5c8235479f3166defc7750e1dbcd5a5c1808b7792fc2e6733768fb447", + "2.0.3": "491955cd1810747d7d7b9ccb936400afb760e06d25d53e4572b64b6563b2784e", + "2.0.2": "e1746d9759ec0d43c5c284452333a310bb5fd7285ebac4b2dc9bf44d72b5a887", + "2.0.1": "6d4aaa4b0d3e8b4c91f8d97b92a361a19b1bd4544dea3f668fdc3e62a63995df", + "2.0.0": "0fc57ba0e655504d282bb6ec1c3d89240cde9f2ce1c393d5b38a95c5bc6da875", + "1.9.12": "449317ade7881e949510db614991e195c3a099c4c791c24dacec55f9f4a2a452", + "1.9.11": "d15107cc7f040a9e83b1b66176fd927ad40b5e0255813a03f8ccfeed46ee42b0", + "1.9.9": "96a334a9570a382cf9c61a1f86d55870ba1c65e166cc5bcae98ddd8cdabeb886", + "1.9.8": "c4fce4dc5cc9c8c3c9bf1aa788d54bb2cb25cd27114eb06551494ff61c30d6fb", + "1.9.7": "30c95cb75e7f7c9471c2bf43fa3db0a30a39077764295b15c405869fed7e5764", + "1.9.6": "cbb723c305cf6d6315c890909815523588509e2e092a59f8cfc4a885829689d5", + "1.9.5": "76a9887f1ce3bf8f88bea3b327f1e74b9d9b42e1dd9cb8237a87a74261d5d042", + "1.9.4": "5c88af44013df62fde8a5e4fdf524d8a16834a28b1d15e34ae0994ac27cd4c7e", + "1.9.3": "8f567d21cbe0553643db48866b2377a3bbb9247f8d924428002c2b847f28b23c", + "1.9.2": "fd346e9c8639d4624893fc455f2407a09b418301736dd18ebbb07764637fb478", + "1.9.1": "d7bff1d0f45e3418fa820d8a6f0de1ca5e87562f218a0f06add08652c7691a9c", + "1.9.0": "97df3adfbf23b873d9a3a80f7143d801a32604ba29de9a33f21a92a171076aa8", + "1.8.5": "705fb60063bf5270b7077409b848b57ea24d2277b806aa04efea513287bf63a6", + "1.8.4": "df72edb141a16578945a0356c8a6a37239015251962071639b99b0184691ed1d", + "1.8.3": "df811b5d27b3dddfec9a858b437b0c7302a56959450f0f9c133ef356c25fcf1c", + "1.8.2": "91e7fb193c4a6a5d3bb56ed0a7007933664e7803da389a696de61147a6f66058", + "1.8.1": "1a1c942f7bb50dcc2198b2f3c6cc64199332e32a5ba08e7bd2215aa0a1966a55", + "1.8.0": "914e05e274362f2e166fc5a8cf6272e2042d9b9e50647678c64c579dcb5fa441", + } + + expected_hash = HTMX_INTEGRITY_HASHES.get(htmx_version) + if not expected_hash: + if markata.config.skip_htmx_integrity_check: + markata.console.warn( + f"No integrity hash available for HTMX version {htmx_version}, skipping verification" + ) + expected_hash = None + else: + raise ValueError( + f"No integrity hash available for HTMX version {htmx_version}. " + f"You can add 'skip_htmx_integrity_check: true' to your config to skip verification, " + f"or add the hash to HTMX_INTEGRITY_HASHES in markata/plugins/feeds.py" + ) + + # Download if file doesn't exist + if not htmx_static_path.exists(): + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=ResourceWarning) + + # Ensure static/js directory exists + htmx_static_path.parent.mkdir(parents=True, exist_ok=True) + + # Download HTMX with timeout and integrity verification + request = Request(htmx_url, headers={"User-Agent": "Markata/1.0"}) + with urlopen(request, timeout=10) as response: + content = response.read() + + # Verify content integrity if hash is available + if expected_hash: + actual_hash = hashlib.sha256(content).hexdigest() + if actual_hash != expected_hash: + raise ValueError( + f"HTMX integrity check failed. Expected: {expected_hash}, Got: {actual_hash}" + ) + + htmx_static_path.write_bytes(content) + + verification_status = ( + "verified" if expected_hash else "without verification" + ) + markata.console.print( + f"Downloaded HTMX {htmx_version} to {htmx_static_path} ({verification_status})" + ) + + except (URLError, HTTPError, ValueError) as e: + markata.console.error(f"Failed to download HTMX: {e}") + # Critical security: no fallback to CDN + raise RuntimeError( + f"HTMX download failed: {e}. Cannot proceed without verified HTMX." + ) + except Exception as e: + markata.console.error(f"Unexpected error downloading HTMX: {e}") + raise RuntimeError(f"HTMX download failed: {e}") + + return True + + +def _generate_pagination_js( + markata: Markata, pagination_config: dict, output_dir: Path +) -> str: + """ + Generate JavaScript file for pagination and return its path. + + Args: + markata: Markata instance + pagination_config: Pagination configuration data + output_dir: Output directory for JS file + + Returns: + Path to generated JS file relative to output_dir + """ + import json + + js_content = f"""// Generated JavaScript for pagination +window.paginationData = {json.dumps(pagination_config)}; +""" + + js_dir = output_dir / "static" / "js" + js_dir.mkdir(parents=True, exist_ok=True) + + js_file = js_dir / "pagination-config.js" + js_file.write_text(js_content) + + return "/static/js/pagination-config.js" + + +def _copy_pagination_static_files(markata: Markata, output_dir: Path) -> None: + """ + Copy pagination static files (JS and CSS) from markata package to output directory. + + Args: + markata: Markata instance + output_dir: Output directory for static files + """ + import importlib.resources + + # Get the markata static directory + static_package = importlib.resources.files("markata") / "static" + + # Copy pagination.js + js_src = static_package / "js" / "pagination.js" + js_dst_dir = output_dir / "static" / "js" + js_dst_dir.mkdir(parents=True, exist_ok=True) + js_dst = js_dst_dir / "pagination.js" + + if js_src.is_file(): + js_dst.write_text(js_src.read_text()) + markata.console.print(f"Copied pagination.js to {js_dst}") + + # Copy pagination.css + css_src = static_package / "css" / "pagination.css" + css_dst_dir = output_dir / "static" / "css" + css_dst_dir.mkdir(parents=True, exist_ok=True) + css_dst = css_dst_dir / "pagination.css" + + if css_src.is_file(): + css_dst.write_text(css_src.read_text()) + markata.console.print(f"Copied pagination.css to {css_dst}") + + +def _sanitize_feed_slug(slug: str) -> str: + """ + Sanitize feed slug to prevent path traversal attacks. + + Args: + slug: User-provided feed slug + + Returns: + Sanitized slug safe for filesystem use + + Raises: + ValueError: If slug contains dangerous characters + """ + import re + + if not slug: + raise ValueError("Feed slug cannot be empty") + + # Remove path traversal sequences (allow forward slashes for nested paths) + if ".." in slug or "\\" in slug: + raise ValueError(f"Invalid characters in feed slug: {slug}") + + # Allow alphanumeric characters, hyphens, underscores, and forward slashes for nested paths + if not re.match(r"^[a-zA-Z0-9_/-]+$", slug): + raise ValueError(f"Feed slug contains invalid characters: {slug}") + + # Prevent leading or trailing slashes and double slashes + if slug.startswith("/") or slug.endswith("/") or "//" in slug: + raise ValueError(f"Feed slug has invalid slash usage: {slug}") + + # Sanitize by removing any path traversal attempts + safe_slug = slug.replace("..", "") + + # Additional safety check + if safe_slug != slug: + raise ValueError(f"Feed slug attempts path traversal: {slug}") + + return safe_slug + + +def _ensure_head_links(markata: Markata) -> None: + """ + Ensure pagination CSS and JS links are in markata.config.head.link + without duplicating existing links. + """ + pagination_css_href = "/static/css/pagination.css" + pagination_js_config_href = "/static/js/pagination-config.js" + pagination_js_href = "/static/js/pagination.js" + htmx_version = markata.config.htmx_version + htmx_static_href = "/static/js/htmx.min.js" + + # Try to download HTMX first + if not _download_htmx_if_needed(markata): + # Fallback to CDN if download fails + htmx_cdn_href = f"https://unpkg.com/htmx.org@{htmx_version}" + else: + htmx_cdn_href = htmx_static_href + + # Helper function to get href from link (supports both dicts and objects) + def get_href(link): + if hasattr(link, "href"): + return link.href + return link.get("href", "") + + # Helper function to get src from script (supports both dicts and objects) + def get_src(script): + if hasattr(script, "src"): + return script.src + return script.get("src", "") + + # Check if pagination CSS is already in head.links + css_exists = any( + get_href(link) == pagination_css_href for link in markata.config.head.link + ) + + # Add CSS link if not already present + if not css_exists: + markata.config.head.link.append( + {"rel": "stylesheet", "href": pagination_css_href} + ) + + # Check if pagination JS config is already in head.script + js_config_exists = any( + get_src(script) == pagination_js_config_href + for script in markata.config.head.script + ) + + # Check if pagination JS is already in head.script + js_exists = any( + get_src(script) == pagination_js_href for script in markata.config.head.script + ) + + # Add JS config link if not already present + if not js_config_exists: + markata.config.head.script.append({"src": pagination_js_config_href}) + + # Add JS link if not already present + if not js_exists: + markata.config.head.script.append({"src": pagination_js_href}) + + # Check if HTMX is already in head.script + htmx_exists = any( + get_src(script) in [htmx_cdn_href, htmx_static_href] + for script in markata.config.head.script + ) + + # Add HTMX link if not already present + if not htmx_exists: + markata.config.head.script.append({"src": htmx_cdn_href}) + + @hook_impl @register_attr("feeds") def pre_render(markata: Markata) -> None: @@ -448,13 +833,21 @@ def save(markata: Markata) -> None: """ Creates a new feed page for each page in the config. """ + _ensure_head_links(markata) with markata.cache as cache: for feed in markata.feeds.values(): - create_page( - markata, - feed, - cache, - ) + if feed.config.enabled: + create_paginated_feed( + markata, + feed, + cache, + ) + else: + create_page( + markata, + feed, + cache, + ) home = Path(str(markata.config.output_dir)) / "index.html" archive = Path(str(markata.config.output_dir)) / "archive" / "index.html" @@ -478,6 +871,7 @@ def save(markata: Markata) -> None: if should_write: xsl_file.write_text(xsl) + def create_page( markata: Markata, feed: Feed, @@ -489,7 +883,10 @@ def create_page( template = get_template(markata.jinja_env, feed.config.template) partial_template = get_template(markata.jinja_env, feed.config.partial_template) - canonical_url = f"{markata.config.url}/{feed.config.slug}/" + + # Security: Sanitize feed slug to prevent path traversal attacks + safe_slug = _sanitize_feed_slug(feed.config.slug) + canonical_url = f"{markata.config.url}/{safe_slug}/" # Get templates mtime to bust cache when any template changes templates_mtime = get_templates_mtime(markata.jinja_env) @@ -503,7 +900,9 @@ def create_page( if cache_key_posts not in markata._feed_hash_cache: # Use post slugs and published dates instead of full to_dict() # This provides a stable, lightweight cache key - posts_data = feed.map("(post.slug, str(getattr(post, 'date', '')), getattr(post, 'title', ''))") + posts_data = feed.map( + "(post.slug, str(getattr(post, 'date', '')), getattr(post, 'title', ''))" + ) markata._feed_hash_cache[cache_key_posts] = str(sorted(posts_data)) posts_hash_data = markata._feed_hash_cache[cache_key_posts] @@ -534,17 +933,13 @@ def create_page( feed_sitemap_from_cache = markata.precache.get(feed_sitemap_key) feed_atom_from_cache = markata.precache.get(feed_atom_key) - output_file = Path(markata.config.output_dir) / feed.config.slug / "index.html" + output_file = Path(markata.config.output_dir) / safe_slug / "index.html" partial_output_file = ( - Path(markata.config.output_dir) / feed.config.slug / "partial" / "index.html" - ) - rss_output_file = Path(markata.config.output_dir) / feed.config.slug / "rss.xml" - sitemap_output_file = ( - Path(markata.config.output_dir) / feed.config.slug / "sitemap.xml" - ) - atom_output_file = ( - Path(markata.config.output_dir) / feed.config.slug / "atom.xml" + Path(markata.config.output_dir) / safe_slug / "partial" / "index.html" ) + rss_output_file = Path(markata.config.output_dir) / safe_slug / "rss.xml" + sitemap_output_file = Path(markata.config.output_dir) / safe_slug / "sitemap.xml" + atom_output_file = Path(markata.config.output_dir) / safe_slug / "atom.xml" # Create all directories in one batch partial_output_file.parent.mkdir(exist_ok=True, parents=True) @@ -597,7 +992,9 @@ def create_page( if feed.config.sitemap: if feed_sitemap_from_cache is None: from_cache = False - sitemap_template = get_template(markata.jinja_env, feed.config.sitemap_template) + sitemap_template = get_template( + markata.jinja_env, feed.config.sitemap_template + ) feed_sitemap = sitemap_template.render(markata=markata, feed=feed) cache.set(feed_sitemap_key, feed_sitemap) else: @@ -662,6 +1059,187 @@ def create_page( atom_output_file.write_text(feed_atom) +def create_paginated_feed( + markata: Markata, + feed: Feed, + cache, +) -> None: + """ + Create paginated feed pages. + """ + posts = feed.posts + per_page = getattr(feed.config, "items_per_page", feed.config.per_page) + + # Validate per_page to prevent division by zero + if per_page <= 0: + raise ValueError( + f"items_per_page must be a positive integer, got {per_page} for feed '{feed.config.slug}'" + ) + + total_posts = len(posts) + + # Handle empty feeds gracefully + if total_posts == 0: + total_pages = 1 # Still create one empty page + else: + total_pages = (total_posts + per_page - 1) // per_page + + # Security: Sanitize feed slug to prevent path traversal attacks + safe_slug = _sanitize_feed_slug(feed.config.slug) + + template = get_template(markata, feed.config.template) + canonical_url = f"{markata.config.url}/{safe_slug}/" + + for page_num in range(1, total_pages + 1): + start_idx = (page_num - 1) * per_page + end_idx = start_idx + per_page + page_posts = posts[start_idx:end_idx] + + # Create pagination context + pagination_context = { + "current_page": page_num, + "total_pages": total_pages, + "total_posts": total_posts, + "per_page": per_page, + "has_prev": page_num > 1, + "has_next": page_num < total_pages, + "prev_page": page_num - 1 if page_num > 1 else None, + "next_page": page_num + 1 if page_num < total_pages else None, + "pagination_type": feed.config.pagination_type, + } + + # Generate JS config file if JS pagination is used + pagination_js_url = None + if feed.config.pagination_type == "js": + pagination_config = { + "enabled": True, + "type": feed.config.pagination_type, + "page": page_num, + "totalPages": total_pages, + "totalPosts": total_posts, + "itemsShown": len(page_posts), + "feedName": safe_slug, + "hasNext": page_num < total_pages, + "config": { + "pagination_type": feed.config.pagination_type, + "posts_per_page": getattr(feed.config, "posts_per_page", None), + "template": getattr(feed.config, "template", None), + }, + } + pagination_js_url = _generate_pagination_js( + markata, pagination_config, Path(markata.config.output_dir) + ) + + # Create a feed object for this page (no state mutation) + page_feed = Feed(config=feed.config, markata=feed.markata) + + key = markata.make_hash( + "feeds", + "paginated", + template, + __version__, + markata.config.url, + markata.config.description, + feed.config.title, + [p.content for p in page_posts], + canonical_url, + page_num, + pagination_context, + ) + + html_key = markata.make_hash(key, "html") + html_partial_key = markata.make_hash(key, "partial_html") + + # Determine output file paths + if page_num == 1: + # First page goes to the main feed index + output_file = Path(markata.config.output_dir) / safe_slug / "index.html" + else: + # Subsequent pages go to numbered subdirectories + output_file = ( + Path(markata.config.output_dir) + / safe_slug + / str(page_num) + / "index.html" + ) + + partial_output_file = output_file.parent / "partial" / "index.html" + output_file.parent.mkdir(exist_ok=True, parents=True) + partial_output_file.parent.mkdir(exist_ok=True, parents=True) + + # Check cache + feed_html_from_cache = markata.precache.get(html_key) + feed_html_partial_from_cache = markata.precache.get(html_partial_key) + + from_cache = True + if feed_html_from_cache is None: + from_cache = False + feed_html = template.render( + markata=markata, + __version__=__version__, + post=feed.config.model_dump(), + url=markata.config.url, + config=markata.config, + feed=page_feed, + pagination_enabled=True, + pagination_config=pagination_context, + pagination_context=pagination_context, + title=feed.config.title, + page=page_num, + total_pages=total_pages, + total_posts=total_posts, + has_next=pagination_context["has_next"], + has_prev=pagination_context["has_prev"], + next_page=pagination_context["next_page"], + prev_page=pagination_context["prev_page"], + feed_name=safe_slug, + posts=page_posts, + page_posts=page_posts, + pagination_js_url=pagination_js_url, + ) + cache.set(html_key, feed_html) + else: + feed_html = feed_html_from_cache + + if feed_html_partial_from_cache is None: + from_cache = False + # For HTMX partials, use items-only template to avoid duplicating page structure + items_partial_template = get_template(markata, "feed_items_partial.html") + feed_html_partial = items_partial_template.render( + markata=markata, + __version__=__version__, + post=feed.config.model_dump(), + url=markata.config.url, + config=markata.config, + feed=page_feed, + card_template=feed.config.card_template, + posts=page_posts, + page_posts=page_posts, + has_next=pagination_context["has_next"], + next_page=pagination_context["next_page"], + feed_name=safe_slug, + page=page_num, + total_pages=total_pages, + total_posts=total_posts, + pagination_context=pagination_context, + ) + cache.set(html_partial_key, feed_html_partial) + else: + feed_html_partial = feed_html_partial_from_cache + + if from_cache and output_file.exists() and partial_output_file.exists(): + continue + + current_html = output_file.read_text() if output_file.exists() else "" + if current_html != feed_html: + output_file.write_text(feed_html) + + current_partial_html = ( + partial_output_file.read_text() if partial_output_file.exists() else "" + ) + if current_partial_html != feed_html_partial: + partial_output_file.write_text(feed_html_partial) + @background.task def create_card( @@ -803,7 +1381,7 @@ def refresh(self): for feed_config in self.markata.config.feeds: # Ensure feed has a name, falling back to slug if needed if feed_config.name is None and feed_config.slug is not None: - feed_config.name = feed_config.slug.replace("-", "_") + feed_config.name = to_pythonic_identifier(str(feed_config.slug)) elif feed_config.name is None and feed_config.slug is None: feed_config.slug = "archive" feed_config.name = "archive" @@ -824,10 +1402,10 @@ def items(self): return [(key, self[key]) for key in self.config] def __getitem__(self, key: str) -> Any: - return getattr(self, key.replace("-", "_").lower()) + return getattr(self, to_pythonic_identifier(str(key))) def get(self, key: str, default: Any = None) -> Any: - return getattr(self, key.replace("-", "_").lower(), default) + return getattr(self, to_pythonic_identifier(str(key)), default) def _dict_panel(self, config) -> str: """pretty print configs with rich""" diff --git a/markata/static/css/pagination.css b/markata/static/css/pagination.css new file mode 100644 index 00000000..f688fe17 --- /dev/null +++ b/markata/static/css/pagination.css @@ -0,0 +1,180 @@ +/* Pagination Styles */ + +.loading-indicator { + display: flex; + align-items: center; + justify-content: center; + gap: 0.75rem; + padding: 2rem; + color: var(--text-color-muted, #6b7280); +} + +.spinner { + width: 1.5rem; + height: 1.5rem; + border: 2px solid var(--border-color, #e5e7eb); + border-top: 2px solid var(--primary-bg, #3b82f6); + border-radius: 50%; + animation: spin 1s linear infinite; +} + +@keyframes spin { + 0% { transform: rotate(0deg); } + 100% { transform: rotate(360deg); } +} + +.pagination-info { + color: var(--text-color-muted, #6b7280); + font-size: 0.875rem; +} + +/* Manual Pagination */ +.pagination { + display: flex; + flex-direction: column; + align-items: center; + gap: 1rem; + margin: 2rem 0; + padding: 1rem; + background: var(--bg-color, #fff); + border-radius: 8px; + border: 1px solid var(--border-color, #e5e7eb); +} + +.pagination-links { + display: flex; + gap: 0.5rem; + flex-wrap: wrap; + justify-content: center; +} + +.page-link { + display: inline-flex; + align-items: center; + justify-content: center; + min-width: 2.5rem; + height: 2.5rem; + padding: 0 0.75rem; + border: 1px solid var(--border-color, #e5e7eb); + border-radius: 6px; + background: var(--bg-color, #fff); + color: var(--text-color, #374151); + text-decoration: none; + font-size: 0.875rem; + font-weight: 500; + transition: all 0.2s ease; +} + +.page-link:hover { + background: var(--hover-bg, #f3f4f6); + border-color: var(--hover-border, #d1d5db); + color: var(--text-color, #374151); +} + +.page-link.current { + background: var(--primary-bg, #3b82f6); + border-color: var(--primary-border, #3b82f6); + color: var(--primary-text, #fff); + font-weight: 600; +} + +.page-link[aria-label="First page"], +.page-link[aria-label="Last page"] { + font-size: 0.75rem; + min-width: auto; + padding: 0 0.5rem; +} + +/* Error and End Messages */ +.end-message { + text-align: center; + color: var(--text-color-muted, #6b7280); + font-style: italic; + margin-top: 1rem; +} + +.error-message { + text-align: center; + padding: 2rem; + background: var(--error-bg, #fef2f2); + border: 1px solid var(--error-border, #fecaca); + border-radius: 6px; + color: var(--error-text, #dc2626); + margin: 2rem 0; +} + +.error-message a { + color: var(--error-text, #dc2626); + text-decoration: underline; +} + +.error-message a:hover { + text-decoration: none; +} + +/* Responsive design */ +@media (max-width: 640px) { + .pagination-links { + gap: 0.25rem; + } + + .page-link { + min-width: 2rem; + height: 2rem; + padding: 0 0.5rem; + font-size: 0.75rem; + } + + .page-link[aria-label="First page"], + .page-link[aria-label="Last page"] { + display: none; + } +} + +/* Dark mode support */ +@media (prefers-color-scheme: dark) { + .loading-indicator, + .pagination-info, + .end-message { + color: var(--text-color-muted-dark, #9ca3af); + } + + .spinner { + border-color: var(--border-color-dark, #374151); + border-top-color: var(--primary-bg-dark, #2563eb); + } + + .pagination { + background: var(--bg-color-dark, #1f2937); + border-color: var(--border-color-dark, #374151); + } + + .page-link { + background: var(--bg-color-dark, #1f2937); + border-color: var(--border-color-dark, #374151); + color: var(--text-color-dark, #f9fafb); + } + + .page-link:hover { + background: var(--hover-bg-dark, #374151); + border-color: var(--hover-border-dark, #4b5563); + } + + .error-message { + background: var(--error-bg-dark, #7f1d1d); + border-color: var(--error-border-dark, #991b1b); + color: var(--error-text-dark, #fecaca); + } + + .error-message a { + color: var(--error-text-dark, #fecaca); + } +} + +/* Accessible focus styles */ +.loading-indicator:focus, +.error-message:focus, +.page-link:focus { + outline: 2px solid var(--primary-bg, #3b82f6); + outline-offset: 2px; +} \ No newline at end of file diff --git a/markata/static/js/pagination.js b/markata/static/js/pagination.js new file mode 100644 index 00000000..6f222b9c --- /dev/null +++ b/markata/static/js/pagination.js @@ -0,0 +1,213 @@ +// JavaScript-based infinite scroll pagination +class InfiniteScroll { + constructor(paginationData) { + this.currentPage = paginationData.page; + this.totalPages = paginationData.totalPages; + this.totalPosts = paginationData.totalPosts; + this.itemsShown = paginationData.itemsShown; + this.feedName = paginationData.feedName; + this.loading = false; + this.retryCount = 0; + this.maxRetries = 3; + + this.setupObserver(); + + // Check if we need to load more content initially + // (when initial content doesn't fill the viewport) + this.checkInitialFill(); + } + + setupObserver() { + // Create a persistent element at the bottom to observe + this.createPersistentTrigger(); + + this.observer = new IntersectionObserver((entries) => { + if (entries[0].isIntersecting && !this.loading) { + this.loadMore(); + } + }, { + rootMargin: '100px' + }); + + this.observeTrigger(); + } + + createPersistentTrigger() { + // Create a persistent trigger that won't be replaced + this.persistentTrigger = document.createElement('div'); + this.persistentTrigger.id = 'js-scroll-trigger'; + this.persistentTrigger.style.height = '1px'; + this.persistentTrigger.style.width = '100%'; + + // Insert it before the template trigger + const templateTrigger = document.getElementById('scroll-trigger'); + if (templateTrigger) { + templateTrigger.parentNode.insertBefore(this.persistentTrigger, templateTrigger); + } else { + // Fallback: add to end of feed container + const feed = document.getElementById('feed'); + if (feed) { + feed.appendChild(this.persistentTrigger); + } + } + } + + observeTrigger() { + if (this.persistentTrigger && this.observer) { + this.observer.observe(this.persistentTrigger); + } + } + + checkInitialFill() { + // Wait a frame for layout to complete + requestAnimationFrame(() => { + this.fillViewportIfNeeded(); + }); + } + + fillViewportIfNeeded() { + // If we're already loading, no more pages, or exceeded retries, stop + if (this.loading || this.currentPage >= this.totalPages || this.retryCount >= this.maxRetries) return; + + // Check if the trigger is visible in the viewport + // (meaning content doesn't fill the page) + if (this.isTriggerVisible()) { + this.loadMore().then((success) => { + if (success) { + // Reset retry count on success + this.retryCount = 0; + // After loading, check again if we need more + // Use requestAnimationFrame to wait for DOM update + requestAnimationFrame(() => { + this.fillViewportIfNeeded(); + }); + } + }); + } + } + + isTriggerVisible() { + if (!this.persistentTrigger) return false; + + const rect = this.persistentTrigger.getBoundingClientRect(); + const viewportHeight = window.innerHeight || document.documentElement.clientHeight; + + // Check if the trigger is within the viewport (with some margin) + return rect.top < viewportHeight + 100; + } + + async loadMore() { + if (this.currentPage >= this.totalPages) return false; + + this.loading = true; + this.showLoading(); + + try { + const nextPage = this.currentPage + 1; + const response = await fetch(`/${this.feedName}/${nextPage}/`); + + // Check if response is ok (status 200-299) + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const html = await response.text(); + + const parser = new DOMParser(); + const doc = parser.parseFromString(html, 'text/html'); + const newItems = doc.querySelectorAll('#feed li'); + const container = document.getElementById('feed'); + + if (!container) { + throw new Error('Feed container not found'); + } + + newItems.forEach(item => container.appendChild(item)); + + this.currentPage = nextPage; + this.itemsShown += newItems.length; + + // Update pagination info + this.updatePaginationInfo(); + + // Remove our persistent trigger if this was the last page + if (this.currentPage >= this.totalPages) { + if (this.persistentTrigger) { + this.persistentTrigger.remove(); + } + } + + return true; + + } catch (error) { + console.error('Failed to load more content:', error); + this.retryCount++; + + // Show error message if we've exceeded retries + if (this.retryCount >= this.maxRetries) { + this.showError('Failed to load more content. Please refresh the page.'); + } + + return false; + } finally { + this.loading = false; + this.hideLoading(); + } + } + + showLoading() { + const indicator = document.querySelector('.loading-indicator'); + if (indicator) indicator.style.display = 'flex'; + } + + hideLoading() { + const indicator = document.querySelector('.loading-indicator'); + if (indicator) indicator.style.display = 'none'; + } + + showError(message) { + const container = document.getElementById('feed'); + if (container) { + const errorDiv = document.createElement('div'); + errorDiv.className = 'error-message'; + errorDiv.textContent = message; + container.appendChild(errorDiv); + } + } + + updatePaginationInfo() { + const currentPageEl = document.getElementById('current-page'); + const itemsShownEl = document.getElementById('items-shown'); + + if (currentPageEl) currentPageEl.textContent = this.currentPage; + if (itemsShownEl) itemsShownEl.textContent = this.itemsShown; + } + + // Clean up observer on page unload + destroy() { + if (this.observer) { + this.observer.disconnect(); + } + } +} + +// Feature detection and initialization +if ('IntersectionObserver' in window && window.paginationData) { + let infiniteScroll; + + // Initialize when DOM is ready + if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', () => { + infiniteScroll = new InfiniteScroll(window.paginationData); + }); + } else { + infiniteScroll = new InfiniteScroll(window.paginationData); + } + + // Clean up on page unload to prevent memory leaks + window.addEventListener('beforeunload', () => { + if (infiniteScroll) { + infiniteScroll.destroy(); + } + }); +} diff --git a/markata/templates/didyoumean_partial.html b/markata/templates/didyoumean_partial.html index 8ab727fe..59702a99 100644 --- a/markata/templates/didyoumean_partial.html +++ b/markata/templates/didyoumean_partial.html @@ -13,8 +13,6 @@ + +{% if has_next %} +
    +
    +{% endif %} \ No newline at end of file diff --git a/markata/templates/feed_partial.html b/markata/templates/feed_partial.html index d9a14deb..eb8d6f5c 100644 --- a/markata/templates/feed_partial.html +++ b/markata/templates/feed_partial.html @@ -1,12 +1,96 @@

    {{ title }}

    + {% if pagination_enabled %} +
    + Page {{ page }} of {{ total_pages }} + Showing {{ posts|length }} of {{ total_posts }} items +
    + {% endif %} + + {% if pagination_enabled and pagination_context.pagination_type == 'js' %} + + + {% endif %}
      - {% for post in feed.posts %} + {% for post in posts %} {% include card_template or feed.config.card_template or config.feeds.card_template %} {% endfor %}
    + + {% if pagination_enabled %} + + {# Manual pagination controls - shown for all types as fallback #} + {% include "pagination_controls.html" %} + + {% if pagination_context.pagination_type == 'htmx' %} + + {% if has_next %} +
    +
    + {% endif %} + + + + + + {% endif %} + + {% if pagination_context.pagination_type == 'js' %} + +
    + + + + + {% endif %} + + {% endif %}
    diff --git a/markata/templates/pagination_controls.html b/markata/templates/pagination_controls.html new file mode 100644 index 00000000..86423e82 --- /dev/null +++ b/markata/templates/pagination_controls.html @@ -0,0 +1,25 @@ +{# Manual pagination controls - reusable fragment #} +{# Used directly for manual pagination, and as fallback for js/htmx #} +
    + {% if prev_page %} + {% if page > 2 %} + + ← Previous + + {% endif %} + + + {{ page }} / {{ total_pages }} + + + {% if has_next %} + + Next → + + {% endif %} +
    diff --git a/pyproject.toml b/pyproject.toml index a1f46cc4..75240649 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,6 @@ dependencies = [ "pathspec", "pillow", "pluggy", - "polyfactory", "pydantic>=2.0", "pydantic_extra_types>=2.0", "pydantic_settings", diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 00000000..beb8440b --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,70 @@ +# Development Scripts + +This directory contains utility scripts for Markata developers. + +## HTMX Hash Management + +### `add_htmx_hash.py` + +A utility script to add HTMX integrity hashes to feeds plugin when new HTMX versions are released. + +**Usage:** +```bash +# Add a new HTMX version hash +python scripts/add_htmx_hash.py 2.0.8 + +# List all available versions and their hash status +python scripts/add_htmx_hash.py --list + +# Add hashes for all missing versions +python scripts/add_htmx_hash.py --add-all + +# Replace an existing hash without prompting +python scripts/add_htmx_hash.py 2.0.8 --replace + +# Show detailed output +python scripts/add_htmx_hash.py --list --verbose + +# Add all versions, skipping failed ones +python scripts/add_htmx_hash.py --add-all --skip-failed + +# Show help +python scripts/add_htmx_hash.py --help +``` + +**Features:** +- **Smart Hash Retrieval**: First tries GitHub API for official hashes, falls back to unpkg.com +- **Version Management**: Lists all available HTMX versions from GitHub releases +- **Batch Operations**: Add all missing versions with `--add-all` +- **Status Tracking**: See which versions have hashes and which are missing +- **Safety**: Prompts before replacing existing hashes (unless `--replace` used) +- **Verbose Mode**: Detailed output for debugging and monitoring + +**What it does:** +1. Fetches all HTMX versions from GitHub releases API +2. For single versions: Gets hash from GitHub API or downloads from unpkg.com and calculates SHA-256 +3. Updates `HTMX_INTEGRITY_HASHES` dictionary in `markata/plugins/feeds.py` +4. For batch operations: Processes all missing versions automatically + +**When to use:** +- **Single Version**: When a new HTMX version is released +- **List Mode**: To see current hash coverage and available versions +- **Batch Mode**: To populate hashes for many versions at once +- **Development**: When setting up a new development environment + +**Examples:** +```bash +# Quick check of current status +python scripts/add_htmx_hash.py --list + +# Add the latest version +python scripts/add_htmx_hash.py 2.0.7 + +# Populate all missing hashes (great for initial setup) +python scripts/add_htmx_hash.py --add-all + +# Add with verbose output to see what's happening +python scripts/add_htmx_hash.py 2.0.7 --verbose +``` + +This ensures that HTMX files downloaded by Markata are verified for integrity and provides developers with easy tools to maintain the hash database. \ No newline at end of file diff --git a/scripts/add_htmx_hash.py b/scripts/add_htmx_hash.py new file mode 100755 index 00000000..b92406a9 --- /dev/null +++ b/scripts/add_htmx_hash.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python3 +""" +Development script to add HTMX integrity hashes to feeds plugin. + +This script should be used by developers when a new HTMX version is released. +It will: +1. Download/Get specified HTMX version +2. Calculate its SHA-256 hash +3. Add it to HTMX_INTEGRITY_HASHES dictionary in feeds.py + +Usage: + python scripts/add_htmx_hash.py 2.0.8 + python scripts/add_htmx_hash.py --list + python scripts/add_htmx_hash.py --add-all + python scripts/add_htmx_hash.py 1.9.10 --replace +""" + +import argparse +import hashlib +import json +import re +from pathlib import Path +from typing import Optional +from urllib.request import Request, urlopen +from urllib.error import URLError, HTTPError + + +def get_htmx_versions(): + """Get list of all available HTMX versions from GitHub releases API.""" + try: + url = "https://api.github.com/repos/bigskysoftware/htmx/releases" + request = Request(url, headers={"User-Agent": "Markata-Dev/1.0"}) + with urlopen(request, timeout=10) as response: + releases_data = json.loads(response.read().decode("utf-8")) + + # Extract version numbers from tag names (remove 'v' prefix) + versions = [] + for release in releases_data: + if "tag_name" in release and release["tag_name"].startswith("v"): + version = release["tag_name"][1:] # Remove 'v' prefix + versions.append(version) + + # Filter out duplicate and sort by semantic version + unique_versions = list(set(versions)) + unique_versions.sort( + key=lambda v: [int(x) for x in re.findall(r"\d+", v)], reverse=True + ) + + return unique_versions + except Exception as e: + print(f"Error fetching HTMX versions: {e}") + return [] + + +def get_htmx_hash_from_github(version: str) -> Optional[str]: + """Get HTMX hash directly from GitHub releases API.""" + try: + url = "https://api.github.com/repos/bigskysoftware/htmx/releases" + request = Request(url, headers={"User-Agent": "Markata-Dev/1.0"}) + with urlopen(request, timeout=10) as response: + releases_data = json.loads(response.read().decode("utf-8")) + + # Find release with matching version + for release in releases_data: + if release["tag_name"] == f"v{version}": + # Look for htmx.min.js asset + for asset in release.get("assets", []): + if asset["name"] == "htmx.min.js": + # Extract hash from digest (remove 'sha256:' prefix) + digest = asset.get("digest", "") + if digest.startswith("sha256:"): + return digest[7:] # Remove 'sha256:' prefix + break + + return None + except Exception as e: + print(f"Error fetching hash from GitHub: {e}") + return None + + +def add_htmx_hash(version: str, replace: bool = False, verbose: bool = False) -> bool: + """Add HTMX version hash to feeds.py. Returns True if successful.""" + # First try to get hash from GitHub API (more reliable) + sha256_hash = get_htmx_hash_from_github(version) + + if not sha256_hash: + # Fall back to downloading from unpkg.com + try: + url = f"https://unpkg.com/htmx.org@{version}/dist/htmx.min.js" + + if verbose: + print(f"Downloading HTMX {version} from {url}") + + request = Request(url, headers={"User-Agent": "Markata-Dev/1.0"}) + with urlopen(request, timeout=10) as response: + content = response.read() + sha256_hash = hashlib.sha256(content).hexdigest() + if verbose: + print(f"SHA-256 hash: {sha256_hash}") + except (URLError, HTTPError) as e: + print(f"Error: Failed to download HTMX: {e}") + return False + else: + if verbose: + print(f"Got HTMX {version} hash from GitHub API") + + try: + # Find and update the feeds.py file + project_root = Path(__file__).parent.parent + feeds_file = project_root / "markata" / "plugins" / "feeds.py" + + if not feeds_file.exists(): + print(f"Error: Could not find feeds.py at {feeds_file}") + return False + + with open(feeds_file, "r") as f: + file_content = f.read() + + # Find HTMX_INTEGRITY_HASHES dictionary + pattern = r"(HTMX_INTEGRITY_HASHES = \{[^}]+)}" + match = re.search(pattern, file_content, re.DOTALL) + + if not match: + print("Error: Could not find HTMX_INTEGRITY_HASHES in feeds.py") + return False + + # Add new hash + new_hash_entry = f' "{version}": "{sha256_hash}"' + existing_dict = match.group(1) + + # Check if version already exists + if f'"{version}":' in existing_dict: + if verbose: + print(f"Warning: HTMX version {version} already exists in hashes") + if not replace: + response = input("Replace existing hash? [y/N]: ") + if response.lower() != "y": + print("Cancelled.") + return False + + # Replace existing entry + new_dict = re.sub( + rf' "{version}": "[^"]*"', new_hash_entry, existing_dict + ) + else: + # Add new entry (before the closing brace) + new_dict = existing_dict.rstrip() + f",\n{new_hash_entry}" + + # Update the file + updated_content = file_content.replace(match.group(0), new_dict + "}") + + with open(feeds_file, "w") as f: + f.write(updated_content) + + print(f"āœ… Added HTMX {version} hash to feeds.py") + if verbose: + print(f"šŸ“ File updated: {feeds_file}") + return True + + except Exception as e: + print(f"Error: {e}") + return False + + +def add_all_htmx_versions(verbose: bool = False, skip_failed: bool = False) -> None: + """Add hashes for all available HTMX versions.""" + versions = get_htmx_versions() + if not versions: + print("Could not fetch HTMX versions") + return + + print(f"Found {len(versions)} HTMX versions") + + # Get existing versions to avoid duplicates + project_root = Path(__file__).parent.parent + feeds_file = project_root / "markata" / "plugins" / "feeds.py" + + with open(feeds_file, "r") as f: + file_content = f.read() + + pattern = r"HTMX_INTEGRITY_HASHES = \{([^}]+)}" + match = re.search(pattern, file_content, re.DOTALL) + existing_versions = set() + if match: + existing_matches = re.findall(r'"([^"]+)":', match.group(1)) + existing_versions = set(existing_matches) + + if verbose: + print(f"Existing versions: {sorted(existing_versions)}") + + # Filter out existing versions + new_versions = [v for v in versions if v not in existing_versions] + + if not new_versions: + print("All available versions already have hashes!") + return + + print(f"Adding {len(new_versions)} new versions...") + + success_count = 0 + for version in new_versions: + if verbose: + print(f"\nProcessing {version}...") + + success = add_htmx_hash(version, replace=True, verbose=False) + if success: + success_count += 1 + elif not skip_failed: + print(f"Failed to add {version}, stopping. Use --skip-failed to continue.") + break + + print(f"\nāœ… Successfully added {success_count}/{len(new_versions)} versions") + + +def list_htmx_versions(verbose: bool = False) -> None: + """List all available HTMX versions.""" + versions = get_htmx_versions() + if not versions: + print("Could not fetch HTMX versions") + return + + # Get existing versions + project_root = Path(__file__).parent.parent + feeds_file = project_root / "markata" / "plugins" / "feeds.py" + + with open(feeds_file, "r") as f: + file_content = f.read() + + pattern = r"HTMX_INTEGRITY_HASHES = \{([^}]+)}" + match = re.search(pattern, file_content, re.DOTALL) + existing_versions = set() + if match: + existing_matches = re.findall(r'"([^"]+)":', match.group(1)) + existing_versions = set(existing_matches) + + print("HTMX Versions:") + print("=" * 50) + + for version in versions[:20]: # Show first 20 to avoid too much output + status = "āœ…" if version in existing_versions else "āŒ" + print(f" {status} {version}") + + if len(versions) > 20: + print(f" ... and {len(versions) - 20} more versions") + + print( + f"\nSummary: {len(existing_versions)} versions have hashes, {len(versions) - len(existing_versions)} missing" + ) + + if verbose: + print(f"\nAll versions: {versions}") + print(f"Existing versions: {sorted(existing_versions)}") + print( + f"Missing versions: {[v for v in versions if v not in existing_versions]}" + ) + + +def main(): + parser = argparse.ArgumentParser( + description="Add HTMX integrity hash to feeds.py", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + parser.add_argument("version", nargs="?", help="HTMX version (e.g., 2.0.8, 1.9.10)") + + parser.add_argument( + "--list", + action="store_true", + help="List all available HTMX versions and their hash status", + ) + + parser.add_argument( + "--add-all", + action="store_true", + help="Add hashes for all missing HTMX versions", + ) + + parser.add_argument( + "--replace", action="store_true", help="Replace existing hash without prompting" + ) + + parser.add_argument("--verbose", action="store_true", help="Show detailed output") + + parser.add_argument( + "--skip-failed", + action="store_true", + help="Continue adding versions even if some fail (used with --add-all)", + ) + + args = parser.parse_args() + + if args.list: + list_htmx_versions(args.verbose) + elif args.add_all: + add_all_htmx_versions(args.verbose, args.skip_failed) + elif args.version: + success = add_htmx_hash(args.version, args.replace, args.verbose) + if not success: + exit(1) + else: + parser.print_help() + print("\nExamples:") + print(" python add_htmx_hash.py 2.0.8 # Add specific version") + print(" python add_htmx_hash.py --list # List all versions") + print( + " python add_htmx_hash.py --add-all # Add all missing versions" + ) + print(" python add_htmx_hash.py 2.0.8 --replace # Replace existing hash") + + +if __name__ == "__main__": + main() diff --git a/tests/test_feeds_security.py b/tests/test_feeds_security.py new file mode 100644 index 00000000..e649e2e7 --- /dev/null +++ b/tests/test_feeds_security.py @@ -0,0 +1,200 @@ +import pytest +import tempfile +import shutil +from pathlib import Path +from unittest.mock import Mock, patch +import hashlib + +from markata.plugins.feeds import Feed, _sanitize_feed_slug, _download_htmx_if_needed +from markata import Markata + + +class TestSecurity: + """Test suite for security vulnerabilities in feeds plugin.""" + + def test_path_traversal_prevention(self): + """Test that path traversal attacks are prevented in feed slugs.""" + + # Malicious slugs that should be rejected + malicious_slugs = [ + "../../../etc/passwd", + "..\\..\\windows\\system32\\config\\sam", + "normal/../../../etc/passwd", + "normal\\..\\..\\windows\\system32", + "etc/passwd", + "C:\\Windows\\System32", + "/etc/shadow", + "", + ".", + "./hidden", + "hidden/.", + ] + + for slug in malicious_slugs: + with pytest.raises( + ValueError, match=r"(Invalid characters|cannot be empty)" + ): + _sanitize_feed_slug(slug) + + def test_safe_slug_validation(self): + """Test that safe slugs are allowed.""" + + safe_slugs = [ + "blog", + "my-feed", + "news_posts", + "test123", + "a", + "my_blog_posts_2023", + "feed-with-dashes", + ] + + for slug in safe_slugs: + result = _sanitize_feed_slug(slug) + assert result == slug + + def test_htmx_integrity_verification(self): + """Test that HTMX download verifies file integrity.""" + + # Mock the responses with wrong hash + mock_content = b"malicious javascript content" + mock_response = Mock() + mock_response.read.return_value = mock_content + + with patch("markata.plugins.feeds.urlopen", return_value=mock_response): + with patch("pathlib.Path.exists", return_value=False): + with patch("pathlib.Path.parent"): + with patch("pathlib.Path.write_bytes"): + mock_markata = Mock() + mock_markata.config.htmx_version = "1.9.10" + mock_markata.config.output_dir = "/tmp/test" + + with pytest.raises(RuntimeError, match="HTMX download failed"): + _download_htmx_if_needed(mock_markata) + + def test_htmx_timeout_protection(self): + """Test that HTMX download has timeout protection.""" + + mock_markata = Mock() + mock_markata.config.htmx_version = "1.9.10" + mock_markata.config.output_dir = "/tmp/test" + + # Mock urlopen to raise timeout + with patch( + "markata.plugins.feeds.urlopen", + side_effect=TimeoutError("Request timed out"), + ): + with pytest.raises(RuntimeError, match="HTMX download failed"): + _download_htmx_if_needed(mock_markata) + + def test_xss_prevention_in_template_context(self): + """Test that template context doesn't contain dangerous config data.""" + + # Create a feed with potentially dangerous config + dangerous_config = { + "pagination_type": "js", + "posts_per_page": 10, + "template": '', + "card_template": "dangerous-template.html", + "xss_payload": '', + "admin_password": "secret123", + "api_key": "sk-1234567890", + } + + # Safe config should only include essential pagination settings + safe_config = { + "pagination_type": dangerous_config["pagination_type"], + "posts_per_page": dangerous_config["posts_per_page"], + "template": dangerous_config["template"], + } + + # Verify only safe keys are included + for key in dangerous_config: + if key not in safe_config: + assert key not in safe_config, ( + f"Dangerous key '{key}' should not be in safe config" + ) + + def test_canonical_url_sanitization(self): + """Test that canonical URLs use sanitized slugs.""" + + mock_markata = Mock() + mock_markata.config.url = "https://example.com" + + # Test with safe slug + safe_slug = "my-blog-feed" + feed_config = Mock() + feed_config.slug = safe_slug + + feed = Feed(config=feed_config, markata=mock_markata) + + # The canonical URL should use the safe slug + expected_url = f"https://example.com/{safe_slug}/" + # This would be tested in actual template rendering + + def test_feed_file_path_security(self): + """Test that feed file paths cannot escape output directory.""" + + with tempfile.TemporaryDirectory() as temp_dir: + output_dir = Path(temp_dir) + + # Try to create a feed with malicious slug + malicious_slugs = [ + "../outside", + "normal/../../../etc/passwd", + "normal\\..\\..\\windows\\system32", + ] + + for malicious_slug in malicious_slugs: + with pytest.raises(ValueError): + _sanitize_feed_slug(malicious_slug) + + # Ensure no files can be created outside output directory + safe_slug = _sanitize_feed_slug("safe-feed") + file_path = output_dir / safe_slug / "index.html" + + # Verify path is within output directory + assert file_path.resolve().is_relative_to(output_dir.resolve()) + + def test_template_injection_prevention(self): + """Test that template injection is prevented in feed names.""" + + dangerous_names = [ + "{{7*7}}", # Template injection + "${7*7}", # Expression injection + "", + "javascript:void(0)", + "data:text/html,", + ] + + for dangerous_name in dangerous_names: + # These should be sanitized or rejected + sanitized = _sanitize_feed_slug(dangerous_name) + # Should either be rejected or sanitized to safe version + assert "{{" not in sanitized + assert "}}" not in sanitized + assert " Date: Thu, 22 Jan 2026 03:35:52 +0000 Subject: [PATCH 37/37] =?UTF-8?q?Bump=20version:=200.11.0.dev10=20?= =?UTF-8?q?=E2=86=92=200.11.0.dev11?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- markata/__about__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/markata/__about__.py b/markata/__about__.py index 1d454318..24d8e989 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.11.0.dev10" +__version__ = "0.11.0.dev11"