diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cfd649b..098baeff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,86 @@ # Markata Changelog +## 0.11.0 + +### CLI Configuration Overrides + +- Feat: Add `-s/--set` flag for runtime config overrides with dot notation (e.g., `-s 'style.theme=nord'`) +- Feat: Add `-c/--config` flag for alternate config files +- Feat: Add `-o/--output-dir` flag for output directory override +- Feat: Add support for `MARKATA_*` environment variables with nested config (e.g., `MARKATA_STYLE__THEME=nord`) +- Feat: Add `parse_set_options()` and `_deep_merge()` utilities for config handling +- Enable: Runtime theme switching and configuration without editing files + +### Performance Improvements + +- Perf: increased diskcache size limit to 5GB and reduced cull_limit to minimize expensive eviction operations (saves ~4s during cache culling) +- Perf: optimized feed hash generation to use lightweight post identifiers (slug + content_hash) instead of expensive `str(post.to_dict())` serialization (saves ~6s) +- Perf: feeds now cache expensive `feed.map()` calls during hash generation (~7.7s savings) +- Perf: feeds batch directory creation operations (~2s savings) +- Perf: feeds only read XSL files when they exist and need comparison +- Perf: to_json, service_worker, redirects, jinja_env, and post_template now only write files when content changes (prevents unnecessary file system modifications and downstream syncing) + +### Cache Invalidation Improvements + +- Fix: feeds now properly invalidate cache when post metadata changes (title, date, slug, published, description) +- Fix: feeds now properly invalidate cache when template files are modified +- Fix: post_template now tracks template file changes for cache invalidation +- Fix: redirects now invalidate cache when template files change +- Fix: jinja_md now includes post metadata and version in cache keys +- Fix: standardized cache keys across plugins to include `__version__` for proper invalidation on updates +- Fix: render_markdown now includes backend and extensions in cache key +- Fix: `auto_description` now strips wikilinks, HTML tags, markdown-it attributes (e.g. {.class-name}), Jinja template tags, admonitions (!!!, !!!+, ???, ???+), and HTML comments for cleaner descriptions +- Fix: `publish_html` now properly resolves custom `output_html` paths relative to `output_dir`, preventing files from being written to project root +- Perf: heading_link replaced expensive file I/O with `__version__` in cache key +- Feat: feeds now support atom feeds + +### Enhanced Wikilinks System + +- Feat: Added comprehensive configuration system for wikilinks resolution with priority-based scoring +- Feat: Implemented intelligent duplicate resolution that eliminates false warnings for hierarchical patterns (e.g., `tag/python` vs `python`) +- Fix: Fixed display text override syntax `[[page|Display Text]]` - now properly splits on first pipe and preserves display text +- Feat: Added support for anchor links `[[page#anchor]]` and complex syntax `[[folder/page#anchor|Display Text]]` +- Feat: Configurable priority rules allow custom scoring patterns for different content types +- Feat: Warning suppression patterns to reduce noise for expected duplicates (e.g., `tag/*`, `category/*`) +- Feat: Multiple resolution strategies: "priority" (default), "first", or "warn" + +### Template Utilities (Breaking for Plugin Authors) + +- **BREAKING**: Removed internal `get_template()` functions from `feeds.py` and `post_template.py` +- Feat: added centralized `get_template()`, `get_template_paths()`, and `get_templates_mtime()` to `jinja_env` plugin +- Feat: `get_template()` includes automatic caching with `@lru_cache` and smart fallback handling +- **Plugin authors**: Import from `markata.plugins.jinja_env` instead of using internal functions + ```python + from markata.plugins.jinja_env import get_template, get_templates_mtime + template = get_template(markata.jinja_env, "template.html") + ``` + +## 0.10.1 + +- Release: version bump + ## 0.10.0 +### Feed Pagination + +- Feat: Add feed pagination with three types: `htmx`, `manual`, `js` +- Feat: HTMX infinite scroll with partial loading +- Feat: JavaScript infinite scroll using Intersection Observer +- Feat: Manual page navigation with prev/next controls +- Feat: Configurable `items_per_page`, `pagination_type`, and `enabled` per feed + +### Security + +- Feat: SHA-256 integrity verification for HTMX downloads (25+ versions supported) +- Feat: Path traversal protection for feed slugs +- Feat: XSS prevention in templates using `|tojson` filter +- Feat: Fail securely if HTMX download fails (no CDN fallback) +- Feat: Comprehensive security test suite for feeds + +### Other + +- Feat: Improve feed name sanitization with Python identifier conversion +- Feat: Add pagination implementation guide documentation - Fix: `auto_description` now more accurately returns plain text, does not cut off words, and add an ellipsis. - Fix: article_html now typed such that it may be a dict without warning - publish_source now only supports using post models that include a dumps command, i.e. no longer frontmatter post objects diff --git a/docs/pagination-implementation-guide.md b/docs/pagination-implementation-guide.md new file mode 100644 index 00000000..82ba6fe8 --- /dev/null +++ b/docs/pagination-implementation-guide.md @@ -0,0 +1,357 @@ +--- +title: Pagination Implementation Guide +description: Guide for implementing pagination in Markata templates with manual, HTMX, and JavaScript options + +--- + +# Pagination Implementation Guide + +This guide provides clear instructions for implementing pagination in your Markata templates. The pagination system is integrated into the feeds plugin and supports three types: manual, HTMX, and JavaScript infinite scroll. + +## Core Components + +### Feeds Plugin with Pagination +**Location:** `markata/plugins/feeds.py` + +The feeds plugin includes built-in pagination support with these features: +- Automatic pagination for any feed +- Three pagination types: manual, HTMX, JavaScript +- Configurable items per page +- SEO-friendly URL generation +- Template context variables + +### Pagination Plugin (Core Logic) +**Location:** `markata/plugins/pagination.py` + +Core pagination functionality that provides: +- Pagination configuration models +- URL generation helpers +- Template rendering context +- Support for all pagination types + +## Quick Setup + +### 1. Basic Configuration + +Add to your `markata.yaml`: + +```yaml +# Enable pagination globally (optional, per-feed config also available) +pagination: + default: + enabled: true + items_per_page: 20 + pagination_type: 'manual' # 'manual', 'htmx', 'js' + +# Or configure per feed +feeds: + blog: + template: "feed.html" + partial_template: "feed_partial.html" + enabled: true + items_per_page: 10 + pagination_type: 'htmx' +``` + +### 2. Template Variables + +All pagination templates receive these variables: + +```jinja2 +{{ markata }} # Markata instance +{{ feed }} # Current feed object +{{ posts }} # Posts for current page +{{ page }} # Current page number (1-based) +{{ total_pages }} # Total number of pages +{{ has_prev }} # Boolean: has previous page? +{{ has_next }} # Boolean: has next page? +{{ prev_page }} # Previous page number or null +{{ next_page }} # Next page number or null +{{ pagination_enabled }} # Boolean: is pagination enabled? +{{ pagination_config }} # PaginationConfig object +{{ feed_name }} # Feed slug/name +{{ pagination_js_url }} # URL to pagination config JS (when using JS pagination) +``` + +## Pagination Types + +### 1. Manual Pagination + +**Best for:** SEO, accessibility, traditional blogs +**Features:** +- Traditional click navigation +- Page numbers +- Previous/Next buttons +- Clean permanent URLs +- Works without JavaScript + +**Configuration:** +```yaml +feeds: + blog: + pagination_type: 'manual' + items_per_page: 10 + show_page_numbers: true + max_page_links: 7 +``` + +**Template Implementation:** + +The feeds plugin automatically handles manual pagination when `pagination_type: 'manual'`. Your template just needs to include pagination controls: + +```jinja2 +{% if pagination_enabled %} +{% set config = pagination_config %} + +
+ {% if prev_page %} + {% if page > 2 %} + + ← Previous + + {% endif %} + + + {{ page }} / {{ total_pages }} + + + {% if has_next %} + + Next → + + {% endif %} +
+{% endif %} +``` + +### 2. HTMX Pagination + +**Best for:** Progressive enhancement, modern UX with fallback +**Features:** +- Infinite scroll with 14KB HTMX library +- Progressive enhancement built-in +- SEO-friendly URLs +- Graceful JavaScript fallback + +**Configuration:** +```yaml +feeds: + blog: + pagination_type: 'htmx' + items_per_page: 15 + show_loading_skeleton: true + auto_load_threshold: 200 +``` + +**Template Implementation:** + +```jinja2 +{% if pagination_enabled and pagination_context.pagination_type == 'htmx' %} +{% if has_next %} +
+
+{% endif %} + + + + +{% endif %} +``` + +### 3. JavaScript Pagination + +**Best for:** Custom infinite scroll, zero external dependencies +**Features:** +- Custom infinite scroll using Intersection Observer +- Zero external dependencies +- AJAX content loading +- URL history management +- Loading indicators + +**Configuration:** +```yaml +feeds: + blog: + pagination_type: 'js' + items_per_page: 12 + show_loading_skeleton: true + auto_load_threshold: 300 + show_end_message: true +``` + +**Template Implementation:** + +```jinja2 +{% if pagination_enabled and pagination_context.pagination_type == 'js' %} + + + +
+ + +{% endif %} +``` + +## URL Structure + +The pagination system generates clean, SEO-friendly URLs: + +- **First page:** `/feed-name/` +- **Subsequent pages:** `/feed-name/page/2/`, `/feed-name/page/3/`, etc. +- **Partial files:** `/feed-name/partial/`, `/feed-name/page/2/partial/` + +## Static Assets + +### Required Files + +The pagination system includes these static files (automatically created): + +```bash +markata/static/js/ +├── pagination.js # JavaScript infinite scroll module +├── pagination-config.js # Generated config (JS pagination only) +└── htmx.org@1.9.10.min.js # Secure HTMX download + +markata/static/css/ +└── pagination.css # Pagination styles and animations +``` + +## Advanced Configuration + +### Per-Feed Customization + +```yaml +feeds: + blog: + enabled: true + pagination_type: 'manual' + items_per_page: 8 + show_page_numbers: true + max_page_links: 5 + show_loading_skeleton: false + auto_load_threshold: 100 + show_end_message: false + + news: + enabled: true + pagination_type: 'htmx' + items_per_page: 20 + show_loading_skeleton: true + auto_load_threshold: 200 + show_end_message: true + + portfolio: + enabled: true + pagination_type: 'js' + items_per_page: 12 + show_loading_skeleton: true + auto_load_threshold: 300 + show_end_message: true +``` + +### Template Includes + +Create reusable template components: + +**`includes/pagination_info.html`:** +```jinja2 +{% if pagination_enabled %} +
+ Page {{ page }} of {{ total_pages }} + Showing {{ posts|length }} items +
+{% endif %} +``` + +**`includes/post_card.html`:** +```jinja2 +
+

{{ post.title }}

+ {% if post.date %} + + {% endif %} + {% if post.description %} +

{{ post.description }}

+ {% endif %} +
+``` + +Then in your main template: +```jinja2 +{% include "includes/pagination_info.html" %} + +
+ {% for post in posts %} + {% include "includes/post_card.html" %} + {% endfor %} +
+``` + +## CLI Helper + +Get example configuration: + +```bash +markata pagination config-example +``` + +This outputs a ready-to-use configuration block for your `markata.yaml`. + +## Testing Your Implementation + +1. **Manual Pagination:** Verify page numbers and Previous/Next links work +2. **HTMX Pagination:** Test infinite scroll and JavaScript fallback +3. **JavaScript Pagination:** Verify auto-loading and URL updates +4. **SEO:** Check that each page has unique titles and meta tags +5. **Accessibility:** Test keyboard navigation and screen readers + +## Troubleshooting + +### Common Issues + +**Pagination not showing:** +- Ensure `enabled: true` is set for your feed +- Check that pagination plugin loads before feeds plugin + +**HTMX not working:** +- Verify HTMX script is loaded +- Check that partial template exists and is accessible + +**JavaScript errors:** +- Ensure browser supports Intersection Observer +- Check console for fetch API errors + +**URL issues:** +- Verify your web server supports clean URLs +- Check that page 1 redirects work correctly + +### Debug Mode + +Add this to templates to debug pagination data: + +```jinja2 +{% if markata.config.debug %} +
{{ pagination_config | pprint }}
+
Page: {{ page }}, Total: {{ total_pages }}
+
Has Prev: {{ has_prev }}, Has Next: {{ has_next }}
+{% endif %} +``` + +This comprehensive guide should help you implement any pagination type in your Markata templates. Choose the pagination type that best fits your use case and customize templates to match your site's design. \ No newline at end of file diff --git a/justfile b/justfile index 24edbf23..2d98df77 100644 --- a/justfile +++ b/justfile @@ -34,6 +34,7 @@ lint: build-docs: #!/usr/bin/env bash set -euxo pipefail + . ./.venv/bin/activate markata build serve: @@ -44,7 +45,7 @@ serve: ruff-fix: #!/usr/bin/env bash set -euxo pipefail - ruff check markata --fix + uv run ruff check markata --fix lint-test: lint cov diff --git a/markata.toml b/markata.toml index 04a9f1eb..55a7024c 100644 --- a/markata.toml +++ b/markata.toml @@ -28,6 +28,9 @@ markdown_backend = 'markdown-it-py' default_cache_expire = 1209600 # subroute = "docs" +# HTMX version for pagination +htmx_version = "2.0.8" + # set the subroute if you are deploying to a subroute of a site # make sure you enable the subroute plugin # subroute = "docs" @@ -292,5 +295,39 @@ plugin = "markata.plugins.md_it_wikilinks:wikilinks_plugin" config = { markata = "markata" } [markata.glob] -glob_patterns = ["docs/**/*.md","CHANGELOG.md"] +glob_patterns = [ +"docs/**/*.md", +"pages/**/*.md", +"CHANGELOG.md", +] use_gitignore = true + +[[markata.feeds]] +slug = 'tag/htmx' +filter = "date<=today and published==True" +sort = "date" +reverse = true +description = 'Test HTMX pagination feed' +enabled = true +items_per_page = 2 +pagination_type = 'htmx' + +[[markata.feeds]] +slug = 'tag/manual' +filter = "date<=today and published==True" +sort = "date" +reverse = true +description = 'Test manual pagination feed' +enabled = true +items_per_page = 3 +pagination_type = 'manual' + +[[markata.feeds]] +slug = 'tag/js' +filter = "date<=today and published==True" +sort = "date" +reverse = true +description = 'Test JS pagination feed' +enabled = true +items_per_page = 2 +pagination_type = 'js' diff --git a/markata/__about__.py b/markata/__about__.py index 61fb31ca..24d8e989 100644 --- a/markata/__about__.py +++ b/markata/__about__.py @@ -1 +1 @@ -__version__ = "0.10.0" +__version__ = "0.11.0.dev11" diff --git a/markata/__init__.py b/markata/__init__.py index 2d176e26..09c9458a 100644 --- a/markata/__init__.py +++ b/markata/__init__.py @@ -59,6 +59,7 @@ ] DEFAULT_HOOKS = [ + "markata.plugins.mermaid", "markata.plugins.didyoumean", "markata.plugins.skip", "markata.plugins.md_it_wikilinks", @@ -123,7 +124,13 @@ class HooksConfig(pydantic.BaseModel): class Markata: - def __init__(self: "Markata", console: Console = None, config=None) -> None: + def __init__( + self: "Markata", + console: Console = None, + config=None, + config_overrides: Optional[Dict[str, Any]] = None, + config_file: Optional[Path] = None, + ) -> None: self.__version__ = __version__ self.stages_ran = set() self.threded = False @@ -134,6 +141,11 @@ def __init__(self: "Markata", console: Console = None, config=None) -> None: self.MARKATA_CACHE_DIR.mkdir(exist_ok=True) self._pm = pluggy.PluginManager("markata") self._pm.add_hookspecs(hookspec.MarkataSpecs) + + # Store config overrides for later use in load_config hook + self._config_overrides = config_overrides or {} + self._config_file = config_file + if config is not None: self.config = config with self.cache as cache: @@ -144,7 +156,12 @@ def __init__(self: "Markata", console: Console = None, config=None) -> None: if config is not None: raw_hooks = config else: - raw_hooks = standard_config.load("markata") + raw_hooks = standard_config.load( + "markata", + project_home=config_file.parent if config_file else ".", + overrides=config_overrides or {}, + config_file=config_file, + ) self.hooks_conf = HooksConfig.parse_obj(raw_hooks) try: default_index = self.hooks_conf.hooks.index("default") @@ -172,7 +189,12 @@ def cache(self: "Markata") -> Cache: # FanoutCache(self.MARKATA_CACHE_DIR, statistics=True) if self._cache is not None: return self._cache - self._cache = Cache(self.MARKATA_CACHE_DIR, statistics=True) + self._cache = Cache( + self.MARKATA_CACHE_DIR, + statistics=True, + size_limit=5 * 1024**3, # 5GB to reduce culling frequency + cull_limit=10, # Evict fewer entries at a time (default is 100) + ) self._cache.expire() return self._cache @@ -203,7 +225,13 @@ def __getattr__(self: "Markata", item: str) -> Any: f"Running to [purple]{stage_to_run_to}[/] to retrieve [purple]{item}[/]" ) self.run(stage_to_run_to) - return getattr(self, item) + # Check __dict__ directly to avoid infinite recursion + if item in self.__dict__: + return self.__dict__[item] + else: + raise AttributeError( + f"'Markata' object has no attribute '{item}' after running {stage_to_run_to}" + ) elif item == "precache": return self._precache or {} else: diff --git a/markata/plugins/auto_description.py b/markata/plugins/auto_description.py index 5046e507..45b1e94c 100644 --- a/markata/plugins/auto_description.py +++ b/markata/plugins/auto_description.py @@ -93,6 +93,7 @@ from bs4 import MarkupResemblesLocatorWarning +from markata import __version__ from markata.hookspec import hook_impl warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) @@ -110,9 +111,33 @@ def get_description(article: "Post") -> str: Uses markdown-it-py to parse the markdown and extracts text content from all nodes. Strips out any HTML tags, returning only plain text. Properly handles markdown links and formatting. """ + import re + from bs4 import BeautifulSoup from markdown_it import MarkdownIt + content = article.content + + # Remove admonitions (e.g., !!!, !!!+, ???, ???+) + content = re.sub(r'^[!?]{3}\+? .*?$', '', content, flags=re.MULTILINE) + + # Remove CSS class attributes {.class-name} + content = re.sub(r'\{\.[\w\-]+\}', '', content) + + # Remove Jinja template tags {% %} and {{ }} + content = re.sub(r'\{%.*?%\}', '', content, flags=re.DOTALL) + content = re.sub(r'\{\{.*?\}\}', '', content, flags=re.DOTALL) + + # Remove wikilinks [[link]] or [[link|text]] + content = re.sub(r'\[\[([^\]|]+)(?:\|([^\]]+))?\]\]', lambda m: m.group(2) if m.group(2) else m.group(1), content) + + # Remove HTML comments + content = re.sub(r'', '', content, flags=re.DOTALL) + + # Remove HTML tags before markdown parsing + soup = BeautifulSoup(content, "html.parser") + content = soup.get_text(separator=" ") + def extract_text(tokens): text_chunks = [] for token in tokens: @@ -124,14 +149,15 @@ def extract_text(tokens): return " ".join(text_chunks) md = MarkdownIt("commonmark") - tokens = md.parse(article.content) + tokens = md.parse(content) # Recursively extract visible text from all tokens description = extract_text(tokens) - # Remove any HTML tags using BeautifulSoup - soup = BeautifulSoup(description, "html.parser") - plain_text = soup.get_text(separator=" ", strip=True) - return plain_text + + # Clean up excessive whitespace + description = re.sub(r'\s+', ' ', description).strip() + + return description def set_description( @@ -147,10 +173,11 @@ def set_description( the configured descriptions for the article. """ key = markata.make_hash( - "auto_description", + "auto_description2", article.content, plugin_text, config, + __version__, ) description_from_cache = markata.cache.get(key) diff --git a/markata/plugins/base_cli.py b/markata/plugins/base_cli.py index 00a3b722..8b53eeee 100644 --- a/markata/plugins/base_cli.py +++ b/markata/plugins/base_cli.py @@ -44,6 +44,42 @@ --serve Start development server --profile Profile the build process --debug Enable debug mode +-c, --config Path to alternate config file +-o, --output-dir Override output directory +-s, --set Set config values (key=value format) +``` + +### Configuration Overrides + +Override configuration at runtime: + +```bash +# Use alternate config file +markata build -c themes/catppuccin.toml + +# Override output directory +markata build -o dist/theme-everforest + +# Set multiple config values +markata build -s output_dir=dist -s style.theme=nord + +# Combine multiple overrides +markata build -c base.toml -s output_dir=custom -s style.theme=gruvbox +``` + +### Environment Variable Overrides + +All config can be overridden with environment variables prefixed with `MARKATA_`: + +```bash +# Override output directory +MARKATA_OUTPUT_DIR=dist markata build + +# Override theme +MARKATA_STYLE__THEME=nord markata build + +# Use double underscore for nested config +MARKATA_STYLE__THEME=catppuccin MARKATA_OUTPUT_DIR=dist/catppuccin markata build ``` ### List Command @@ -110,7 +146,9 @@ import warnings from pathlib import Path from typing import TYPE_CHECKING +from typing import Any from typing import Callable +from typing import Dict from typing import List from typing import Literal from typing import Optional @@ -150,6 +188,51 @@ def make_pretty() -> None: ) +def parse_set_options(set_args: List[str]) -> Dict[str, Any]: + """Parse --set key=value arguments into a nested config dict. + + Supports dot notation for nested keys: + - output_dir=dist -> {"output_dir": "dist"} + - style.theme=nord -> {"style": {"theme": "nord"}} + """ + config = {} + for arg in set_args: + if "=" not in arg: + raise ValueError(f"Invalid --set format: {arg}. Expected key=value") + + key, value = arg.split("=", 1) + keys = key.split(".") + + # Navigate/create nested dict structure + current = config + for k in keys[:-1]: + if k not in current: + current[k] = {} + current = current[k] + + # Set the value, attempting type conversion + final_key = keys[-1] + # Try to parse as JSON for complex types + try: + import json + + current[final_key] = json.loads(value) + except (json.JSONDecodeError, ValueError): + # Keep as string if not valid JSON + current[final_key] = value + + return config + + +def _deep_merge(target: Dict, source: Dict) -> None: + """Deep merge source dict into target dict.""" + for key, value in source.items(): + if key in target and isinstance(target[key], dict) and isinstance(value, dict): + _deep_merge(target[key], value) + else: + target[key] = value + + @hook_impl() def cli(app: typer.Typer, markata: "Markata") -> None: """ @@ -309,6 +392,24 @@ def build( "--pdb", ), profile: bool = True, + config_file: Optional[Path] = typer.Option( + None, + "-c", + "--config", + help="Path to alternate config file", + ), + output_dir: Optional[str] = typer.Option( + None, + "-o", + "--output-dir", + help="Override output directory", + ), + set_config: List[str] = typer.Option( + [], + "-s", + "--set", + help="Set config value (key=value, supports dot notation)", + ), ) -> None: """ Markata's primary way of building your site for production. @@ -319,6 +420,59 @@ def build( markata build ``` + ## Configuration Overrides + + Override configuration at runtime using multiple methods: + + ### Alternate Config File + Use a different config file with `-c` or `--config`: + ``` bash + markata build -c themes/catppuccin.toml + ``` + + ### Output Directory + Override the output directory with `-o` or `--output-dir`: + ``` bash + markata build -o dist/theme-everforest + ``` + + ### Generic Config Override + Set any config value using `-s` or `--set` with dot notation: + ``` bash + # Single value + markata build -s output_dir=dist + + # Nested config + markata build -s style.theme=nord + + # Multiple values + markata build -s output_dir=dist -s style.theme=catppuccin + + # Complex values (use JSON) + markata build -s 'nav={"home":"/","docs":"/docs"}' + ``` + + ### Environment Variables + Override any config with environment variables: + ``` bash + # Simple value + MARKATA_OUTPUT_DIR=dist markata build + + # Nested value (use double underscore) + MARKATA_STYLE__THEME=nord markata build + + # Multiple values + MARKATA_OUTPUT_DIR=dist MARKATA_STYLE__THEME=gruvbox markata build + ``` + + ### Combining Overrides + All override methods can be combined (applied in order: file -> env -> cli): + ``` bash + MARKATA_STYLE__THEME=nord markata build -c base.toml -s output_dir=custom + ``` + + ## Debugging + If you are having an issue and want to pop immediately into a debugger upon failure you can pass the `--pdb` flag to the build command. @@ -355,21 +509,51 @@ def build( if pretty: make_pretty() + # Save console reference before potential reinit + console = markata.console + if quiet: - markata.console.quiet = True + console.quiet = True if verbose: - markata.console.print("console options:", markata.console.options) + console.print("console options:", console.options) + + # Build config overrides from CLI arguments + config_overrides = {} + + # Add output_dir if specified + if output_dir: + config_overrides["output_dir"] = output_dir + + # Parse and merge --set options + if set_config: + set_overrides = parse_set_options(set_config) + # Deep merge set_overrides into config_overrides + _deep_merge(config_overrides, set_overrides) + + # Reinitialize markata with overrides if any were provided + if config_file or config_overrides: + from markata import Markata + + # Create a new instance with overrides + markata_instance = Markata( + console=console, + config_file=config_file, + config_overrides=config_overrides, + ) + else: + # Use the existing instance + markata_instance = markata if not profile: - markata.config.profiler.should_profile = False + markata_instance.config.profiler.should_profile = False if should_pdb: - pdb_run(markata.run) + pdb_run(markata_instance.run) else: - markata.console.log("[purple]starting the build") - markata.run() + markata_instance.console.log("[purple]starting the build") + markata_instance.run() @app.command() def list( diff --git a/markata/plugins/config_model.py b/markata/plugins/config_model.py index e7ce5a50..8baad2b2 100644 --- a/markata/plugins/config_model.py +++ b/markata/plugins/config_model.py @@ -226,7 +226,16 @@ def config_model(markata: "Markata") -> None: @register_attr("config") def load_config(markata: "Markata") -> None: if "config" not in markata.__dict__.keys(): - config = standard_config.load("markata") + # Get overrides from markata instance if available + config_overrides = getattr(markata, "_config_overrides", {}) + config_file = getattr(markata, "_config_file", None) + + config = standard_config.load( + "markata", + project_home=config_file.parent if config_file else ".", + overrides=config_overrides, + config_file=config_file, + ) if config == {}: markata.config = markata.Config() else: diff --git a/markata/plugins/feeds.py b/markata/plugins/feeds.py index 6e04763c..f52a2f84 100644 --- a/markata/plugins/feeds.py +++ b/markata/plugins/feeds.py @@ -188,16 +188,19 @@ """ +from __future__ import annotations + import datetime +import re import shutil import textwrap import warnings -from functools import lru_cache from pathlib import Path from typing import TYPE_CHECKING from typing import Any from typing import List from typing import Optional +from urllib.request import urlopen import jinja2 import pydantic @@ -205,29 +208,84 @@ from jinja2 import Template from jinja2 import Undefined from pydantic import ConfigDict +from pydantic import Field from pydantic import field_validator +from rich.console import Console from rich.jupyter import JupyterMixin from rich.pretty import Pretty from rich.table import Table -from markata import Markata from markata import __version__ from markata import background -from markata.errors import DeprecationWarning from markata.hookspec import hook_impl from markata.hookspec import register_attr +from markata.plugins.jinja_env import get_template +from markata.plugins.jinja_env import get_templates_mtime if TYPE_CHECKING: from frontmatter import Post - from rich.console import Console + + from markata import Markata + + +def to_pythonic_identifier(name: str) -> str: + """ + Convert a string to a valid Python identifier. + + This function handles various problematic characters that might appear + in feed names or slugs, making them suitable for use as Python attribute + names and dictionary keys. + + Rules applied: + - Replace spaces, slashes, dots, and other non-alphanumeric characters with underscores + - Convert to lowercase + - Remove leading/trailing underscores + - Ensure the result starts with a letter or underscore + - Collapse multiple consecutive underscores to a single one + + Examples: + 'project-gallery' -> 'project_gallery' + 'tag/htmx' -> 'tag_htmx' + 'My Feed Name' -> 'my_feed_name' + '123start' -> '_123start' + """ + if not name: + return "_unnamed" + + # Replace non-alphanumeric characters (except underscores) with underscores + pythonic = re.sub(r"[^a-zA-Z0-9_]", "_", str(name)) + + # Convert to lowercase + pythonic = pythonic.lower() + + # Collapse multiple consecutive underscores + pythonic = re.sub(r"_+", "_", pythonic) + + # Remove leading and trailing underscores + pythonic = pythonic.strip("_") + + # Ensure it starts with a letter or underscore (not a digit) + if pythonic and pythonic[0].isdigit(): + pythonic = "_" + pythonic + + # Handle empty result or result that became empty after processing + if not pythonic: + pythonic = "_unnamed" + + return pythonic class SilentUndefined(Undefined): + """A Jinja2 Undefined subclass that silently returns empty string on errors.""" + def _fail_with_undefined_error(self, *args, **kwargs): return "" -class MarkataFilterError(RuntimeError): ... +class MarkataFilterError(RuntimeError): + """Raised when a feed filter expression fails.""" + + ... class FeedConfig(pydantic.BaseModel, JupyterMixin): @@ -243,6 +301,8 @@ class FeedConfig(pydantic.BaseModel, JupyterMixin): tail: Optional[int] = None rss: bool = True sitemap: bool = True + atom: bool = True + atom_template: str = "atom.xml" # feed_groups: Dict[str, List[str]] = Field(default_factory=dict) # sidebar_feeds: List[str] = Field(default_factory=list) card_template: str = "card.html" @@ -252,6 +312,12 @@ class FeedConfig(pydantic.BaseModel, JupyterMixin): sitemap_template: str = "sitemap.xml" xsl_template: str = "rss.xsl" + # Pagination configuration + enabled: bool = False + items_per_page: int = 10 + pagination_type: str = "htmx" # htmx, manual, js + per_page: int = 10 # backwards compatibility + model_config = ConfigDict( validate_assignment=True, # Config model arbitrary_types_allowed=True, @@ -266,11 +332,11 @@ class FeedConfig(pydantic.BaseModel, JupyterMixin): @classmethod def default_name(cls, v, info) -> str: if v: - return v + return to_pythonic_identifier(str(v)) slug = info.data.get("slug") if not slug: raise ValueError("Either name or slug must be provided") - return str(slug).replace("-", "_") + return to_pythonic_identifier(str(slug)) @field_validator("slug", mode="before") @classmethod @@ -287,7 +353,7 @@ def __rich_console__(self) -> "Console": return self.markata.console @property - def __rich__(self) -> Pretty: + def __rich__(self): return lambda: Pretty(self) @@ -297,8 +363,9 @@ class Feed(pydantic.BaseModel, JupyterMixin): ## Usage ``` python - from markata import Markata - m = Markata() + if not TYPE_CHECKING: + from markata import Markata + m = Markata() # access posts for a feed m.feeds.docs.posts @@ -309,7 +376,7 @@ class Feed(pydantic.BaseModel, JupyterMixin): """ config: FeedConfig - markata: Markata = pydantic.Field(exclude=True) + markata: Any = Field(exclude=True) model_config = ConfigDict( validate_assignment=False, @@ -328,6 +395,22 @@ def name(self) -> str: @property def posts(self): + # Get posts from instance state or compute normally + return self._get_posts() + + def _get_posts(self, override_posts=None): + """ + Get posts with optional override for pagination. + + Args: + override_posts: If provided, returns these posts instead of computing + + Returns: + PrettyList of posts + """ + if override_posts is not None: + return PrettyList(override_posts) + posts = self.map("post") if self.config.head is not None and self.config.tail is not None: head_posts = posts[: self.config.head] @@ -390,6 +473,8 @@ def dump_bytecode(self, bucket): class FeedsConfig(pydantic.BaseModel): feeds: List[FeedConfig] = [FeedConfig(slug="archive")] + htmx_version: str = "2.0.8" + skip_htmx_integrity_check: bool = False @property def jinja_env(self): @@ -427,10 +512,313 @@ def __rich__(self) -> Pretty: @hook_impl(tryfirst=True) +@register_attr("config_models") def config_model(markata: Markata) -> None: markata.config_models.append(FeedsConfig) +@hook_impl(tryfirst=True) +def htmx_config_model(markata: Markata) -> None: + """Register HTMX configuration model with validation.""" + + class HtmxConfig(pydantic.BaseModel): + version: str = "2.0.8" + + model_config = ConfigDict( + validate_assignment=True, + extra="forbid", + ) + + markata.config_models.append(HtmxConfig) + + +@hook_impl +def configure(markata: Markata) -> None: + """ + Configure feeds during configuration phase. + """ + _download_htmx_if_needed(markata) + _copy_pagination_static_files(markata, Path(markata.config.output_dir)) + + +def _download_htmx_if_needed(markata: Markata) -> None: + """ + Download HTMX library to static directory if needed with integrity verification. + """ + import hashlib + from urllib.error import HTTPError + from urllib.error import URLError + from urllib.request import Request + + htmx_version = markata.config.htmx_version + htmx_filename = "htmx.min.js" + htmx_static_path = Path(markata.config.output_dir) / "static" / "js" / htmx_filename + htmx_url = f"https://unpkg.com/htmx.org@{htmx_version}/dist/htmx.min.js" + + # Known SHA-256 hashes for HTMX versions + HTMX_INTEGRITY_HASHES = { + "1.9.10": "b3bdcf5c741897a53648b1207fff0469a0d61901429ba1f6e88f98ebd84e669e", + "2.0.8": "22283ef68cb7545914f0a88a1bdedc7256a703d1d580c1d255217d0a50d31313", + "2.0.7": "60231ae6ba9db3825eb15a261122d5f55921c4d53b66bf637dc18b4ee27c79f9", + "2.0.6": "b6768eed4f3af85b73a75054701bd60e17cac718aef2b7f6b254e5e0e2045616", + "2.0.5": "f601807715bde32e458b73821e16c5641a3d90dfb670f6ebd986f128b8222fcf", + "2.0.4": "e209dda5c8235479f3166defc7750e1dbcd5a5c1808b7792fc2e6733768fb447", + "2.0.3": "491955cd1810747d7d7b9ccb936400afb760e06d25d53e4572b64b6563b2784e", + "2.0.2": "e1746d9759ec0d43c5c284452333a310bb5fd7285ebac4b2dc9bf44d72b5a887", + "2.0.1": "6d4aaa4b0d3e8b4c91f8d97b92a361a19b1bd4544dea3f668fdc3e62a63995df", + "2.0.0": "0fc57ba0e655504d282bb6ec1c3d89240cde9f2ce1c393d5b38a95c5bc6da875", + "1.9.12": "449317ade7881e949510db614991e195c3a099c4c791c24dacec55f9f4a2a452", + "1.9.11": "d15107cc7f040a9e83b1b66176fd927ad40b5e0255813a03f8ccfeed46ee42b0", + "1.9.9": "96a334a9570a382cf9c61a1f86d55870ba1c65e166cc5bcae98ddd8cdabeb886", + "1.9.8": "c4fce4dc5cc9c8c3c9bf1aa788d54bb2cb25cd27114eb06551494ff61c30d6fb", + "1.9.7": "30c95cb75e7f7c9471c2bf43fa3db0a30a39077764295b15c405869fed7e5764", + "1.9.6": "cbb723c305cf6d6315c890909815523588509e2e092a59f8cfc4a885829689d5", + "1.9.5": "76a9887f1ce3bf8f88bea3b327f1e74b9d9b42e1dd9cb8237a87a74261d5d042", + "1.9.4": "5c88af44013df62fde8a5e4fdf524d8a16834a28b1d15e34ae0994ac27cd4c7e", + "1.9.3": "8f567d21cbe0553643db48866b2377a3bbb9247f8d924428002c2b847f28b23c", + "1.9.2": "fd346e9c8639d4624893fc455f2407a09b418301736dd18ebbb07764637fb478", + "1.9.1": "d7bff1d0f45e3418fa820d8a6f0de1ca5e87562f218a0f06add08652c7691a9c", + "1.9.0": "97df3adfbf23b873d9a3a80f7143d801a32604ba29de9a33f21a92a171076aa8", + "1.8.5": "705fb60063bf5270b7077409b848b57ea24d2277b806aa04efea513287bf63a6", + "1.8.4": "df72edb141a16578945a0356c8a6a37239015251962071639b99b0184691ed1d", + "1.8.3": "df811b5d27b3dddfec9a858b437b0c7302a56959450f0f9c133ef356c25fcf1c", + "1.8.2": "91e7fb193c4a6a5d3bb56ed0a7007933664e7803da389a696de61147a6f66058", + "1.8.1": "1a1c942f7bb50dcc2198b2f3c6cc64199332e32a5ba08e7bd2215aa0a1966a55", + "1.8.0": "914e05e274362f2e166fc5a8cf6272e2042d9b9e50647678c64c579dcb5fa441", + } + + expected_hash = HTMX_INTEGRITY_HASHES.get(htmx_version) + if not expected_hash: + if markata.config.skip_htmx_integrity_check: + markata.console.warn( + f"No integrity hash available for HTMX version {htmx_version}, skipping verification" + ) + expected_hash = None + else: + raise ValueError( + f"No integrity hash available for HTMX version {htmx_version}. " + f"You can add 'skip_htmx_integrity_check: true' to your config to skip verification, " + f"or add the hash to HTMX_INTEGRITY_HASHES in markata/plugins/feeds.py" + ) + + # Download if file doesn't exist + if not htmx_static_path.exists(): + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=ResourceWarning) + + # Ensure static/js directory exists + htmx_static_path.parent.mkdir(parents=True, exist_ok=True) + + # Download HTMX with timeout and integrity verification + request = Request(htmx_url, headers={"User-Agent": "Markata/1.0"}) + with urlopen(request, timeout=10) as response: + content = response.read() + + # Verify content integrity if hash is available + if expected_hash: + actual_hash = hashlib.sha256(content).hexdigest() + if actual_hash != expected_hash: + raise ValueError( + f"HTMX integrity check failed. Expected: {expected_hash}, Got: {actual_hash}" + ) + + htmx_static_path.write_bytes(content) + + verification_status = ( + "verified" if expected_hash else "without verification" + ) + markata.console.print( + f"Downloaded HTMX {htmx_version} to {htmx_static_path} ({verification_status})" + ) + + except (URLError, HTTPError, ValueError) as e: + markata.console.error(f"Failed to download HTMX: {e}") + # Critical security: no fallback to CDN + raise RuntimeError( + f"HTMX download failed: {e}. Cannot proceed without verified HTMX." + ) + except Exception as e: + markata.console.error(f"Unexpected error downloading HTMX: {e}") + raise RuntimeError(f"HTMX download failed: {e}") + + return True + + +def _generate_pagination_js( + markata: Markata, pagination_config: dict, output_dir: Path +) -> str: + """ + Generate JavaScript file for pagination and return its path. + + Args: + markata: Markata instance + pagination_config: Pagination configuration data + output_dir: Output directory for JS file + + Returns: + Path to generated JS file relative to output_dir + """ + import json + + js_content = f"""// Generated JavaScript for pagination +window.paginationData = {json.dumps(pagination_config)}; +""" + + js_dir = output_dir / "static" / "js" + js_dir.mkdir(parents=True, exist_ok=True) + + js_file = js_dir / "pagination-config.js" + js_file.write_text(js_content) + + return "/static/js/pagination-config.js" + + +def _copy_pagination_static_files(markata: Markata, output_dir: Path) -> None: + """ + Copy pagination static files (JS and CSS) from markata package to output directory. + + Args: + markata: Markata instance + output_dir: Output directory for static files + """ + import importlib.resources + + # Get the markata static directory + static_package = importlib.resources.files("markata") / "static" + + # Copy pagination.js + js_src = static_package / "js" / "pagination.js" + js_dst_dir = output_dir / "static" / "js" + js_dst_dir.mkdir(parents=True, exist_ok=True) + js_dst = js_dst_dir / "pagination.js" + + if js_src.is_file(): + js_dst.write_text(js_src.read_text()) + markata.console.print(f"Copied pagination.js to {js_dst}") + + # Copy pagination.css + css_src = static_package / "css" / "pagination.css" + css_dst_dir = output_dir / "static" / "css" + css_dst_dir.mkdir(parents=True, exist_ok=True) + css_dst = css_dst_dir / "pagination.css" + + if css_src.is_file(): + css_dst.write_text(css_src.read_text()) + markata.console.print(f"Copied pagination.css to {css_dst}") + + +def _sanitize_feed_slug(slug: str) -> str: + """ + Sanitize feed slug to prevent path traversal attacks. + + Args: + slug: User-provided feed slug + + Returns: + Sanitized slug safe for filesystem use + + Raises: + ValueError: If slug contains dangerous characters + """ + import re + + if not slug: + raise ValueError("Feed slug cannot be empty") + + # Remove path traversal sequences (allow forward slashes for nested paths) + if ".." in slug or "\\" in slug: + raise ValueError(f"Invalid characters in feed slug: {slug}") + + # Allow alphanumeric characters, hyphens, underscores, and forward slashes for nested paths + if not re.match(r"^[a-zA-Z0-9_/-]+$", slug): + raise ValueError(f"Feed slug contains invalid characters: {slug}") + + # Prevent leading or trailing slashes and double slashes + if slug.startswith("/") or slug.endswith("/") or "//" in slug: + raise ValueError(f"Feed slug has invalid slash usage: {slug}") + + # Sanitize by removing any path traversal attempts + safe_slug = slug.replace("..", "") + + # Additional safety check + if safe_slug != slug: + raise ValueError(f"Feed slug attempts path traversal: {slug}") + + return safe_slug + + +def _ensure_head_links(markata: Markata) -> None: + """ + Ensure pagination CSS and JS links are in markata.config.head.link + without duplicating existing links. + """ + pagination_css_href = "/static/css/pagination.css" + pagination_js_config_href = "/static/js/pagination-config.js" + pagination_js_href = "/static/js/pagination.js" + htmx_version = markata.config.htmx_version + htmx_static_href = "/static/js/htmx.min.js" + + # Try to download HTMX first + if not _download_htmx_if_needed(markata): + # Fallback to CDN if download fails + htmx_cdn_href = f"https://unpkg.com/htmx.org@{htmx_version}" + else: + htmx_cdn_href = htmx_static_href + + # Helper function to get href from link (supports both dicts and objects) + def get_href(link): + if hasattr(link, "href"): + return link.href + return link.get("href", "") + + # Helper function to get src from script (supports both dicts and objects) + def get_src(script): + if hasattr(script, "src"): + return script.src + return script.get("src", "") + + # Check if pagination CSS is already in head.links + css_exists = any( + get_href(link) == pagination_css_href for link in markata.config.head.link + ) + + # Add CSS link if not already present + if not css_exists: + markata.config.head.link.append( + {"rel": "stylesheet", "href": pagination_css_href} + ) + + # Check if pagination JS config is already in head.script + js_config_exists = any( + get_src(script) == pagination_js_config_href + for script in markata.config.head.script + ) + + # Check if pagination JS is already in head.script + js_exists = any( + get_src(script) == pagination_js_href for script in markata.config.head.script + ) + + # Add JS config link if not already present + if not js_config_exists: + markata.config.head.script.append({"src": pagination_js_config_href}) + + # Add JS link if not already present + if not js_exists: + markata.config.head.script.append({"src": pagination_js_href}) + + # Check if HTMX is already in head.script + htmx_exists = any( + get_src(script) in [htmx_cdn_href, htmx_static_href] + for script in markata.config.head.script + ) + + # Add HTMX link if not already present + if not htmx_exists: + markata.config.head.script.append({"src": htmx_cdn_href}) + + @hook_impl @register_attr("feeds") def pre_render(markata: Markata) -> None: @@ -440,44 +828,33 @@ def pre_render(markata: Markata) -> None: markata.feeds = Feeds(markata) -@lru_cache() -def get_template(markata, template): - try: - return markata.jinja_env.get_template(template) - except jinja2.TemplateNotFound: - # try to load it as a file - ... - - try: - return Template(Path(template).read_text(), undefined=SilentUndefined) - except FileNotFoundError: - # default to load it as a string - ... - except OSError: # thrown by File name too long - # default to load it as a string - ... - return Template(template, undefined=SilentUndefined) - - @hook_impl def save(markata: Markata) -> None: """ Creates a new feed page for each page in the config. """ + _ensure_head_links(markata) with markata.cache as cache: for feed in markata.feeds.values(): - create_page( - markata, - feed, - cache, - ) + if feed.config.enabled: + create_paginated_feed( + markata, + feed, + cache, + ) + else: + create_page( + markata, + feed, + cache, + ) home = Path(str(markata.config.output_dir)) / "index.html" archive = Path(str(markata.config.output_dir)) / "archive" / "index.html" if not home.exists() and archive.exists(): shutil.copy(str(archive), str(home)) - xsl_template = get_template(markata, feed.config.xsl_template) + xsl_template = get_template(markata.jinja_env, feed.config.xsl_template) xsl = xsl_template.render( markata=markata, __version__=__version__, @@ -485,8 +862,13 @@ def save(markata: Markata) -> None: config=markata.config, ) xsl_file = Path(markata.config.output_dir) / "rss.xsl" - current_xsl = xsl_file.read_text() if xsl_file.exists() else "" - if current_xsl != xsl: + # Only read file if it exists and we need to compare + should_write = True + if xsl_file.exists(): + current_xsl = xsl_file.read_text() + should_write = current_xsl != xsl + + if should_write: xsl_file.write_text(xsl) @@ -499,20 +881,42 @@ def create_page( create an html unorderd list of posts. """ - template = get_template(markata, feed.config.template) - partial_template = get_template(markata, feed.config.partial_template) - canonical_url = f"{markata.config.url}/{feed.config.slug}/" + template = get_template(markata.jinja_env, feed.config.template) + partial_template = get_template(markata.jinja_env, feed.config.partial_template) + + # Security: Sanitize feed slug to prevent path traversal attacks + safe_slug = _sanitize_feed_slug(feed.config.slug) + canonical_url = f"{markata.config.url}/{safe_slug}/" + + # Get templates mtime to bust cache when any template changes + templates_mtime = get_templates_mtime(markata.jinja_env) + + # Use simpler hash for posts instead of expensive str(post.to_dict()) + # Hash just the essential post identifiers: slug + content_hash + cache_key_posts = f"feed_hash_posts_{feed.config.slug}" + if not hasattr(markata, "_feed_hash_cache"): + markata._feed_hash_cache = {} + + if cache_key_posts not in markata._feed_hash_cache: + # Use post slugs and published dates instead of full to_dict() + # This provides a stable, lightweight cache key + posts_data = feed.map( + "(post.slug, str(getattr(post, 'date', '')), getattr(post, 'title', ''))" + ) + markata._feed_hash_cache[cache_key_posts] = str(sorted(posts_data)) + + posts_hash_data = markata._feed_hash_cache[cache_key_posts] key = markata.make_hash( "feeds", template, __version__, - # cards, markata.config.url, markata.config.description, feed.config.title, - feed.map("content"), + posts_hash_data, # Use cached post data canonical_url, + str(templates_mtime), # Track template file changes # datetime.datetime.today(), # markata.config, ) @@ -521,29 +925,28 @@ def create_page( html_partial_key = markata.make_hash(key, "partial_html") feed_rss_key = markata.make_hash(key, "rss") feed_sitemap_key = markata.make_hash(key, "sitemap") + feed_atom_key = markata.make_hash(key, "atom") feed_html_from_cache = markata.precache.get(html_key) feed_html_partial_from_cache = markata.precache.get(html_partial_key) feed_rss_from_cache = markata.precache.get(feed_rss_key) feed_sitemap_from_cache = markata.precache.get(feed_sitemap_key) + feed_atom_from_cache = markata.precache.get(feed_atom_key) - output_file = Path(markata.config.output_dir) / feed.config.slug / "index.html" - output_file.parent.mkdir(exist_ok=True, parents=True) - + output_file = Path(markata.config.output_dir) / safe_slug / "index.html" partial_output_file = ( - Path(markata.config.output_dir) / feed.config.slug / "partial" / "index.html" + Path(markata.config.output_dir) / safe_slug / "partial" / "index.html" ) - partial_output_file.parent.mkdir(exist_ok=True, parents=True) + rss_output_file = Path(markata.config.output_dir) / safe_slug / "rss.xml" + sitemap_output_file = Path(markata.config.output_dir) / safe_slug / "sitemap.xml" + atom_output_file = Path(markata.config.output_dir) / safe_slug / "atom.xml" - rss_output_file = Path(markata.config.output_dir) / feed.config.slug / "rss.xml" - rss_output_file.parent.mkdir(exist_ok=True, parents=True) - - sitemap_output_file = ( - Path(markata.config.output_dir) / feed.config.slug / "sitemap.xml" - ) - sitemap_output_file.parent.mkdir(exist_ok=True, parents=True) + # Create all directories in one batch + partial_output_file.parent.mkdir(exist_ok=True, parents=True) from_cache = True + + # ---------- HTML ---------- if feed_html_from_cache is None: from_cache = False feed_html = template.render( @@ -558,6 +961,7 @@ def create_page( else: feed_html = feed_html_from_cache + # ---------- Partial HTML ---------- if feed_html_partial_from_cache is None: from_cache = False feed_html_partial = partial_template.render( @@ -572,47 +976,269 @@ def create_page( else: feed_html_partial = feed_html_partial_from_cache - if feed_rss_from_cache is None: - from_cache = False - rss_template = get_template(markata, feed.config.rss_template) - feed_rss = rss_template.render(markata=markata, feed=feed) - cache.set(feed_rss_key, feed_rss) + # ---------- RSS ---------- + if feed.config.rss: + if feed_rss_from_cache is None: + from_cache = False + rss_template = get_template(markata.jinja_env, feed.config.rss_template) + feed_rss = rss_template.render(markata=markata, feed=feed) + cache.set(feed_rss_key, feed_rss) + else: + feed_rss = feed_rss_from_cache else: - feed_rss = feed_rss_from_cache - - if feed_sitemap_from_cache is None: - from_cache = False - sitemap_template = get_template(markata, feed.config.sitemap_template) - feed_sitemap = sitemap_template.render(markata=markata, feed=feed) - cache.set(feed_sitemap_key, feed_sitemap) + feed_rss = None + + # ---------- Sitemap ---------- + if feed.config.sitemap: + if feed_sitemap_from_cache is None: + from_cache = False + sitemap_template = get_template( + markata.jinja_env, feed.config.sitemap_template + ) + feed_sitemap = sitemap_template.render(markata=markata, feed=feed) + cache.set(feed_sitemap_key, feed_sitemap) + else: + feed_sitemap = feed_sitemap_from_cache else: - feed_sitemap = feed_sitemap_from_cache - - if ( - from_cache - and output_file.exists() - and partial_output_file.exists() - and rss_output_file.exists() - and sitemap_output_file.exists() - ): - return - + feed_sitemap = None + + # ---------- Atom ---------- + if feed.config.atom: + if feed_atom_from_cache is None: + from_cache = False + atom_template = get_template(markata.jinja_env, feed.config.atom_template) + feed_atom = atom_template.render( + markata=markata, + feed=feed, + datetime=datetime, # ⭐ so the template can use datetime + ) + cache.set(feed_atom_key, feed_atom) + else: + feed_atom = feed_atom_from_cache + # If everything came from cache and files exist, bail early + if ( + from_cache + and output_file.exists() + and partial_output_file.exists() + and (not feed.config.rss or rss_output_file.exists()) + and (not feed.config.sitemap or sitemap_output_file.exists()) + and (not feed.config.atom or atom_output_file.exists()) + ): + return + + # Write HTML current_html = output_file.read_text() if output_file.exists() else "" if current_html != feed_html: output_file.write_text(feed_html) + + # Write partial HTML current_partial_html = ( partial_output_file.read_text() if partial_output_file.exists() else "" ) if current_partial_html != feed_html_partial: partial_output_file.write_text(feed_html_partial) - current_rss = rss_output_file.read_text() if rss_output_file.exists() else "" - if current_rss != feed_rss: - rss_output_file.write_text(feed_rss) - current_sitemap = ( - sitemap_output_file.read_text() if sitemap_output_file.exists() else "" - ) - if current_sitemap != feed_sitemap: - sitemap_output_file.write_text(feed_sitemap) + + # Write RSS (if enabled) + if feed_rss is not None: + current_rss = rss_output_file.read_text() if rss_output_file.exists() else "" + if current_rss != feed_rss: + rss_output_file.write_text(feed_rss) + + # Write sitemap (if enabled) + if feed_sitemap is not None: + current_sitemap = ( + sitemap_output_file.read_text() if sitemap_output_file.exists() else "" + ) + if current_sitemap != feed_sitemap: + sitemap_output_file.write_text(feed_sitemap) + + # Write Atom (if enabled) + if feed_atom is not None: + current_atom = atom_output_file.read_text() if atom_output_file.exists() else "" + if current_atom != feed_atom: + atom_output_file.write_text(feed_atom) + + +def create_paginated_feed( + markata: Markata, + feed: Feed, + cache, +) -> None: + """ + Create paginated feed pages. + """ + posts = feed.posts + per_page = getattr(feed.config, "items_per_page", feed.config.per_page) + + # Validate per_page to prevent division by zero + if per_page <= 0: + raise ValueError( + f"items_per_page must be a positive integer, got {per_page} for feed '{feed.config.slug}'" + ) + + total_posts = len(posts) + + # Handle empty feeds gracefully + if total_posts == 0: + total_pages = 1 # Still create one empty page + else: + total_pages = (total_posts + per_page - 1) // per_page + + # Security: Sanitize feed slug to prevent path traversal attacks + safe_slug = _sanitize_feed_slug(feed.config.slug) + + template = get_template(markata, feed.config.template) + canonical_url = f"{markata.config.url}/{safe_slug}/" + + for page_num in range(1, total_pages + 1): + start_idx = (page_num - 1) * per_page + end_idx = start_idx + per_page + page_posts = posts[start_idx:end_idx] + + # Create pagination context + pagination_context = { + "current_page": page_num, + "total_pages": total_pages, + "total_posts": total_posts, + "per_page": per_page, + "has_prev": page_num > 1, + "has_next": page_num < total_pages, + "prev_page": page_num - 1 if page_num > 1 else None, + "next_page": page_num + 1 if page_num < total_pages else None, + "pagination_type": feed.config.pagination_type, + } + + # Generate JS config file if JS pagination is used + pagination_js_url = None + if feed.config.pagination_type == "js": + pagination_config = { + "enabled": True, + "type": feed.config.pagination_type, + "page": page_num, + "totalPages": total_pages, + "totalPosts": total_posts, + "itemsShown": len(page_posts), + "feedName": safe_slug, + "hasNext": page_num < total_pages, + "config": { + "pagination_type": feed.config.pagination_type, + "posts_per_page": getattr(feed.config, "posts_per_page", None), + "template": getattr(feed.config, "template", None), + }, + } + pagination_js_url = _generate_pagination_js( + markata, pagination_config, Path(markata.config.output_dir) + ) + + # Create a feed object for this page (no state mutation) + page_feed = Feed(config=feed.config, markata=feed.markata) + + key = markata.make_hash( + "feeds", + "paginated", + template, + __version__, + markata.config.url, + markata.config.description, + feed.config.title, + [p.content for p in page_posts], + canonical_url, + page_num, + pagination_context, + ) + + html_key = markata.make_hash(key, "html") + html_partial_key = markata.make_hash(key, "partial_html") + + # Determine output file paths + if page_num == 1: + # First page goes to the main feed index + output_file = Path(markata.config.output_dir) / safe_slug / "index.html" + else: + # Subsequent pages go to numbered subdirectories + output_file = ( + Path(markata.config.output_dir) + / safe_slug + / str(page_num) + / "index.html" + ) + + partial_output_file = output_file.parent / "partial" / "index.html" + output_file.parent.mkdir(exist_ok=True, parents=True) + partial_output_file.parent.mkdir(exist_ok=True, parents=True) + + # Check cache + feed_html_from_cache = markata.precache.get(html_key) + feed_html_partial_from_cache = markata.precache.get(html_partial_key) + + from_cache = True + if feed_html_from_cache is None: + from_cache = False + feed_html = template.render( + markata=markata, + __version__=__version__, + post=feed.config.model_dump(), + url=markata.config.url, + config=markata.config, + feed=page_feed, + pagination_enabled=True, + pagination_config=pagination_context, + pagination_context=pagination_context, + title=feed.config.title, + page=page_num, + total_pages=total_pages, + total_posts=total_posts, + has_next=pagination_context["has_next"], + has_prev=pagination_context["has_prev"], + next_page=pagination_context["next_page"], + prev_page=pagination_context["prev_page"], + feed_name=safe_slug, + posts=page_posts, + page_posts=page_posts, + pagination_js_url=pagination_js_url, + ) + cache.set(html_key, feed_html) + else: + feed_html = feed_html_from_cache + + if feed_html_partial_from_cache is None: + from_cache = False + # For HTMX partials, use items-only template to avoid duplicating page structure + items_partial_template = get_template(markata, "feed_items_partial.html") + feed_html_partial = items_partial_template.render( + markata=markata, + __version__=__version__, + post=feed.config.model_dump(), + url=markata.config.url, + config=markata.config, + feed=page_feed, + card_template=feed.config.card_template, + posts=page_posts, + page_posts=page_posts, + has_next=pagination_context["has_next"], + next_page=pagination_context["next_page"], + feed_name=safe_slug, + page=page_num, + total_pages=total_pages, + total_posts=total_posts, + pagination_context=pagination_context, + ) + cache.set(html_partial_key, feed_html_partial) + else: + feed_html_partial = feed_html_partial_from_cache + + if from_cache and output_file.exists() and partial_output_file.exists(): + continue + + current_html = output_file.read_text() if output_file.exists() else "" + if current_html != feed_html: + output_file.write_text(feed_html) + + current_partial_html = ( + partial_output_file.read_text() if partial_output_file.exists() else "" + ) + if current_partial_html != feed_html_partial: + partial_output_file.write_text(feed_html_partial) @background.task @@ -630,32 +1256,11 @@ def create_card( if template is None: template = markata.config.get("feeds_config", {}).get("card_template", None) - # Get template modification time if template exists - template_mtime = 0 - if template: - template_path = None - # Check user template paths first - for path in markata.jinja_env.template_paths: - potential_path = Path(path) / template - if potential_path.exists(): - template_path = potential_path - break - - # Check package templates if not found in user paths - if not template_path: - import importlib - - package_template = ( - importlib.resources.files("markata") / "templates" / template - ) - if package_template.exists(): - template_path = package_template - - if template_path: - template_mtime = template_path.stat().st_mtime + # Get templates mtime to bust cache when any template changes + templates_mtime = get_templates_mtime(markata.jinja_env) key = markata.make_hash( - "feeds", template, str(post), post.content, str(template_mtime) + "feeds", template, str(post.to_dict()), str(templates_mtime) ) card = markata.precache.get(key) @@ -776,7 +1381,7 @@ def refresh(self): for feed_config in self.markata.config.feeds: # Ensure feed has a name, falling back to slug if needed if feed_config.name is None and feed_config.slug is not None: - feed_config.name = feed_config.slug.replace("-", "_") + feed_config.name = to_pythonic_identifier(str(feed_config.slug)) elif feed_config.name is None and feed_config.slug is None: feed_config.slug = "archive" feed_config.name = "archive" @@ -797,10 +1402,10 @@ def items(self): return [(key, self[key]) for key in self.config] def __getitem__(self, key: str) -> Any: - return getattr(self, key.replace("-", "_").lower()) + return getattr(self, to_pythonic_identifier(str(key))) def get(self, key: str, default: Any = None) -> Any: - return getattr(self, key.replace("-", "_").lower(), default) + return getattr(self, to_pythonic_identifier(str(key)), default) def _dict_panel(self, config) -> str: """pretty print configs with rich""" diff --git a/markata/plugins/heading_link.py b/markata/plugins/heading_link.py index b2a3f2cc..7fe32a35 100644 --- a/markata/plugins/heading_link.py +++ b/markata/plugins/heading_link.py @@ -69,12 +69,12 @@ """ import re -from pathlib import Path from typing import TYPE_CHECKING from bs4 import BeautifulSoup from markata import Markata +from markata import __version__ from markata.hookspec import hook_impl if TYPE_CHECKING: @@ -93,7 +93,7 @@ def post_render(markata: Markata) -> None: key = markata.make_hash( "heading_link", "post_render", - Path(__file__).read_text(), + __version__, article.content, article.html, ) diff --git a/markata/plugins/jinja_env.py b/markata/plugins/jinja_env.py index 30b4cf3f..4e4182d9 100644 --- a/markata/plugins/jinja_env.py +++ b/markata/plugins/jinja_env.py @@ -61,6 +61,7 @@ def render_template(markata, content): - Silent undefined behavior means undefined variables render as empty strings """ +from functools import lru_cache from pathlib import Path from typing import List @@ -175,11 +176,12 @@ def configure(markata: Markata) -> None: markata.config.dynamic_templates_dir.mkdir(parents=True, exist_ok=True) head_template = markata.config.dynamic_templates_dir / "head.html" - head_template.write_text( - env_for_dynamic_render.get_template("dynamic_head.html").render( - {"markata": markata} - ), + new_content = env_for_dynamic_render.get_template("dynamic_head.html").render( + {"markata": markata} ) + current_content = head_template.read_text() if head_template.exists() else "" + if current_content != new_content: + head_template.write_text(new_content) # Set up loaders loaders = [] @@ -211,3 +213,86 @@ def configure(markata: Markata) -> None: # Register the environment on the config's private attribute markata.jinja_env = env + + +def get_template_paths(env: Environment) -> list[str]: + """Extract template paths from Jinja2 Environment's loader. + + Args: + env: Jinja2 Environment instance + + Returns: + List of template directory paths from all FileSystemLoaders + """ + paths = [] + loader = env.loader + + if isinstance(loader, ChoiceLoader): + for sub_loader in loader.loaders: + if isinstance(sub_loader, FileSystemLoader): + paths.extend(sub_loader.searchpath) + elif isinstance(loader, FileSystemLoader): + paths.extend(loader.searchpath) + + return paths + + +def get_templates_mtime(env: Environment) -> float: + """Get latest mtime from all template directories. + + This tracks changes to any template file including includes, extends, and imports. + + Args: + env: Jinja2 Environment instance + + Returns: + Maximum modification time across all template files, or 0 if none found + """ + max_mtime = 0 + for template_dir in get_template_paths(env): + template_path = Path(template_dir) + if template_path.exists(): + for path in template_path.rglob('*'): + if path.is_file(): + try: + max_mtime = max(max_mtime, path.stat().st_mtime) + except (OSError, FileNotFoundError): + continue + return max_mtime + + +@lru_cache(maxsize=128) +def get_template(env: Environment, template: str) -> jinja2.Template: + """Get a template with fallback handling and caching. + + Tries to load the template in the following order: + 1. From the Jinja2 environment (template loader) + 2. As a file path (if the string is a valid file path) + 3. As a string template (direct template compilation) + + Templates are cached after loading for performance. + + Args: + env: Jinja2 Environment instance + template: Template name, file path, or template string + + Returns: + Compiled Jinja2 Template object + """ + # Try to load from environment first + try: + return env.get_template(template) + except jinja2.TemplateNotFound: + pass + + # Try to load as a file + try: + template_content = Path(template).read_text() + return env.from_string(template_content) + except FileNotFoundError: + pass + except OSError: # File name too long, etc. + pass + + # Fall back to treating it as a string template + return env.from_string(template) diff --git a/markata/plugins/jinja_md.py b/markata/plugins/jinja_md.py index 5e9112a2..6ed29b77 100644 --- a/markata/plugins/jinja_md.py +++ b/markata/plugins/jinja_md.py @@ -217,7 +217,15 @@ def pre_render(markata: "Markata") -> None: for post in markata.filter("jinja==True"): if post.get("jinja", True) and not ignore_spec.match_file(post["path"]): try: - key = markata.make_hash("jina_md", "pre_render", post.content) + # Include post metadata and markata version in cache key + # since these affect the rendered output + key = markata.make_hash( + "jinja_md", + "pre_render", + post.content, + str(post.to_dict()), # Include all post metadata + __version__, # Include markata version + ) content_from_cache = markata.precache.get(key) if content_from_cache is None and post.content is not None: post.content = jinja_env.from_string(post.content).render( diff --git a/markata/plugins/md_it_wikilinks.py b/markata/plugins/md_it_wikilinks.py index 843e8779..49205fe1 100644 --- a/markata/plugins/md_it_wikilinks.py +++ b/markata/plugins/md_it_wikilinks.py @@ -27,8 +27,213 @@ ## Configuration -This plugin requires no explicit configuration. It automatically processes wikilinks -in your markdown content. +This plugin supports comprehensive configuration for wikilink resolution. + +Configuration Options: +```toml +[plugins.md_it_wikilinks] + +# Resolution strategy for duplicate matches +# "priority" (default): Use priority scoring to select best match +# "first": Use first match found +# "warn": Always warn about duplicates +resolution_strategy = "priority" + +# Score difference threshold for clear winner selection (default: 20) +clear_winner_threshold = 20 + +# Suppress warnings for links matching these patterns +suppress_patterns = ["tag/*", "category/*", "archive/*"] + +# Custom priority rules (higher priority = higher score) +priority_rules = [ + { pattern = "pages/*", priority = 100 }, + { pattern = "posts/*", priority = 90 }, + { pattern = "blog/*", priority = 85 }, + { pattern = "tutorials/*", priority = 80 }, + { pattern = "docs/*", priority = 75 }, + { pattern = "tag/*", priority = 60 }, + { pattern = "category/*", priority = 55 }, + { pattern = "archive/*", priority = 50 }, + { pattern = "feed/*", priority = 45 }, +] + +# Behavior for broken links (default: "warn", "silent", "error") +fallback_behavior = "warn" + +# Enable/disable logging (default: true) +enable_logging = true +``` + +## Resolution Priority System + +Built-in Scores (configurable via priority_rules): +```toml +# Default scoring system (overridable via priority_rules) +exact_slug_match = 100 # Exact slug match: highest priority +path_match = 80 # Path match: [[folder/page]] -> high priority +feed_slug_match = 60 # Feed slug match: tag/*, category/* patterns +basename_match = 40 # Basename match: default fallback +``` + +Examples: +```toml +# Override scores for entire patterns +priority_rules = [ + { pattern = "tag/python", priority = 70 }, # Higher than default 60 + { pattern = "docs/*", priority = 95 }, # Higher than exact match! +] + +# Change entire scoring algorithm +resolution_strategy = "first" # Disable scoring, use first match +resolution_strategy = "warn" # Always warn about duplicates +clear_winner_threshold = 15 # Lower threshold = more warnings +``` + +## Smart Slug Resolution + +The plugin: +1. Looks up the target file in your content +2. Finds its generated slug +3. Creates a link to the final URL + +## Link Formats + +Supports multiple link styles: +- Basic: `[[filename]]` +- With text: `[[filename|Link Text]]` +- With path: `[[folder/file]]` +- With extension: `[[file.md]]` (extension stripped in output) +- With anchors: `[[filename#anchor]]` or `[[filename#anchor|Display Text]]` +- Complex: `[[folder/file#anchor|Display Text]]` + +**Edge Cases Handled:** +- **Spaces**: `[[my tag]]` → matches slug "my-tag" (spaces normalized to hyphens by Markata) +- **Empty display text**: `[[page|]]` → falls back to "page" +- **Multiple pipes**: `[[page|Text with | pipes]]` → splits only on first pipe +- **Quotes**: `[[page|"Display Text"]]` → supports quoted display text +- **Mixed anchors**: `[[page#anchor|Text]]` and `[[page|Text#anchor]]` +- **Complex expressions**: `[[tag/my tag|Posts about "my tag"]]` (full syntax support with quotes and spaces) + +## Duplicate Resolution System + +The wikilinks system uses intelligent priority-based resolution: + +How it works: +1. Creates mapping of all possible matches in markata.possible_wikilink +2. Scores each candidate using configurable priority rules +3. Selects highest-scoring match (clear winner if score difference > threshold) +4. Only warns for truly ambiguous cases + +## HTML Output + +Generated HTML structure: +```html +Link Text +``` + +## Uninstallation + +Since this plugin is included in the default plugin set, to disable it you must explicitly +add it to the disabled_hooks list if you are using the 'default' plugin: + +```toml +disabled_hooks = [ + "markata.plugins.md_it_wikilinks", +] +``` + +## Configuration + +This plugin supports comprehensive configuration for wikilink resolution: + +```toml +[plugins.md_it_wikilinks] +# Resolution strategy for duplicate matches +# "priority" (default): Use priority scoring to select best match +# "first": Use first match found +# "warn": Always warn about duplicates +resolution_strategy = "priority" + +# Score difference threshold for clear winner selection (default: 20) +clear_winner_threshold = 20 + +# Suppress warnings for links matching these patterns +suppress_patterns = ["tag/*", "category/*", "archive/*"] + +# Custom priority rules (higher priority = higher score) +# If no rules match, defaults to built-in scoring system +priority_rules = [ + { pattern = "pages/*", priority = 100 }, + { pattern = "posts/*", priority = 90 }, + { pattern = "blog/*", priority = 85 }, + { pattern = "tutorials/*", priority = 80 }, + { pattern = "docs/*", priority = 75 }, + { pattern = "tag/*", priority = 60 }, + { pattern = "category/*", priority = 55 }, + { pattern = "archive/*", priority = 50 }, + { pattern = "feed/*", priority = 45 }, +] + +## Priority Scoring System + +**Built-in Scores:** +- **Exact slug match**: 100 points (highest priority) +- **Path match**: 80 points (when original link includes path structure) +- **Feed slug match**: 60 points (for hierarchical feeds like `tag/*`) +- **Basename match**: 40 points (default fallback) + +**Resolution Examples:** +```toml +# For site with feed "tag/python" and page "python": +[[python]] # Matches: ["python", "tag/python"] → selects "python" (100 vs 60) +[[tag/python]] # Matches: ["python", "tag/python"] → selects "tag/python" (100 vs 40) + +# For site with multiple tag feeds: +priority_rules = [ + { pattern = "tag/python", priority = 90 }, # Higher than default tag/* + { pattern = "tag/javascript", priority = 85 }, +] +``` + +# Behavior for broken links (default: "warn") +# "warn": Log warning and use fallback link +# "silent": Use fallback link silently +# "error": Log error and use fallback link +fallback_behavior = "warn" + +# Enable/disable logging (default: true) +enable_logging = true +``` + +### Priority Rules + +Priority rules allow you to customize how links are resolved when there are multiple matches: + +- **Pattern**: Glob pattern matching page slugs (supports `*` wildcard) +- **Priority**: Score value (higher = more likely to be selected) + +The plugin evaluates rules in order and uses the first matching rule. If no custom rules match, it falls back to the built-in scoring system: + +- Exact slug match: 100 points +- Path match (`[[folder/page]]`): 80 points +- Feed slug match: 60 points +- Basename match: 40 points + +### Warning Suppression + +Use `suppress_patterns` to reduce warning noise for expected duplicates: + +```toml +suppress_patterns = [ + "tag/*", # Suppress all tag-related warnings + "category/*", # Suppress category warnings + "*/index", # Suppress index page conflicts +] +``` + +By default, the plugin uses priority-based resolution that automatically selects +the best match and only warns for truly ambiguous cases. ## Functionality @@ -64,6 +269,175 @@ - With text: `[[filename|Link Text]]` - With path: `[[folder/file]]` - With extension: `[[file.md]]` (extension stripped in output) +- With anchors: `[[filename#anchor]]` or `[[filename#anchor|Display Text]]` +- Complex: `[[folder/file#anchor|Display Text]]` + +**Edge Cases Handled:** +- **Spaces**: `[[my tag]]` → matches slug "my-tag" (spaces normalized to hyphens by Markata) +- **Empty display text**: `[[page|]]` → falls back to "page" +- **Multiple pipes**: `[[page|Text with | pipes]]` → splits only on first pipe (first `|` is separator, rest is content) +- **Quotes**: `[[page|"Display Text"]]` → supports quoted display text (quotes preserved in display text) +- **Mixed anchors**: `[[page#anchor|Text]]` and `[[page|Text#anchor]]` (anchors and display text work together) +- **Complex expressions**: `[[tag/my tag|Posts about "my tag"]]` (full syntax support with quotes and spaces) + +**Normalization Behavior:** +- **Link target normalization**: Spaces and special characters handled according to Markata slug conventions +- **Display text preservation**: Exact display text (including quotes and pipes) preserved +- **Case sensitivity**: Resolution is case-sensitive (matches Markata's slug handling) + +**Common Use Cases:** +```markdown +# Basic usage +[[thoughts]] # → /thoughts +[[tag/python]] # → /tag/python +[[docs/getting-started]] # → /docs/getting-started + +# With display text +[[thoughts|My Thoughts]] # → /thoughts (text: "My Thoughts") +[[tag/python|Python Posts]] # → /tag/python (text: "Python Posts") + +# With anchors +[[thoughts#intro]] # → /thoughts#intro +[[thoughts#intro|Introduction]] # → /thoughts#intro (text: "Introduction") + +# Complex with spaces and quotes +[[my tag|Posts about "my tag"]] # → /my-tag (text: 'Posts about "my tag"') +``` + +## Resolution Priority System + +**Built-in Scores (configurable via priority_rules):** +- **Exact slug match**: 100 points (highest priority) +- **Path match**: 80 points (when original link includes path structure) +- **Feed slug match**: 60 points (for hierarchical feeds like `tag/*`) +- **Basename match**: 40 points (default fallback) + +## Duplicate Resolution System + +The wikilinks system uses intelligent priority-based resolution to eliminate warning noise for common hierarchical patterns: + +### How Matches Are Created + +**Feed Slugs**: Feed configurations (e.g., `slug = "tag/python"`) +**Page Slugs**: Regular page slugs (e.g., `slug = "python"`) +**Mapping Creation**: Both are mapped in `markata.possible_wikilink` dictionary: + +```python +# For feeds in markata.feeds: +for slug in [v.config.slug for v in markata.feeds.values()]: + wikilink = slug.split("/")[-1] # Extract basename: "python" + markata.possible_wikilink[wikilink].append(slug) # Maps "python" → ["python", "tag/python"] + +# For regular pages: +for slug in markata.map("slug"): + wikilink = slug.split("/")[-1] # Extract basename: "python" + if wikilink not in markata.possible_wikilink: + markata.possible_wikilink[wikilink] = [slug] +``` + +### Resolution Priority System + +## Resolution Priority System + +**Built-in Scores (configurable via priority_rules):** +```toml +# Default scoring system (overridable via priority_rules) +exact_slug_match = 100 # Exact slug match: highest priority +path_match = 80 # Path match: [[folder/page]] -> high priority +feed_slug_match = 60 # Feed slug match: tag/*, category/* patterns +basename_match = 40 # Basename match: default fallback +``` + +**Configuration Options:** +```toml +# Method 1: Override default scores for entire patterns +priority_rules = [ + { pattern = "tag/*", priority = 70 }, # Higher than default 60 + { pattern = "category/*", priority = 80 }, # Higher than default 55 + { pattern = "docs/*", priority = 95 }, # Very high priority + { pattern = "posts/*", priority = 90 }, # High priority for content +] + +# Method 2: Change entire scoring algorithm +resolution_strategy = "first" # Disable scoring, use first match +resolution_strategy = "warn" # Always warn about duplicates +clear_winner_threshold = 15 # Lower threshold = more warnings +``` + +**Resolution Examples:** +```toml +# Example 1: Default behavior with feed "tag/python" and page "python" +# markata.possible_wikilink = {"python": ["python", "tag/python"]} + +[[python]] # → selects "python" (100 vs 60 points, clear winner) +[[tag/python]] # → selects "tag/python" (100 vs 40 points, clear winner) + +# Example 2: Custom priority rules +priority_rules = [ + { pattern = "tag/python", priority = 75 }, # Lower than default 60 + { pattern = "docs/getting-started", priority = 120 }, # Higher than exact match! +] + +[[python]] # Against "tag/python": 75 vs 60 → selects "tag/python" (custom rule wins) +[[tag/python]] # Against "tag/python": 100 vs 75 → selects "tag/python" (exact match still wins) + +# Example 3: Different resolution strategies +resolution_strategy = "first" # Ignores scoring, always first match +resolution_strategy = "warn" # Always warns about duplicates +``` + +**Advanced Configuration:** +```toml +# Fine-tune duplicate resolution behavior +clear_winner_threshold = 30 # Require larger score difference for clear winner +suppress_patterns = ["tag/*"] # Suppress warnings for all tag/* matches +fallback_behavior = "silent" # No warnings for broken links +``` + +**Example 2: Multiple Feeds with Same Basename** +```toml +# Site structure: +# posts/thoughts.md (slug: "thoughts") +# feed: tag/thoughts (slug: "tag/thoughts") +# feed: category/thoughts (slug: "category/thoughts") + +# Result in markata.possible_wikilink: +{ + "thoughts": ["thoughts", "tag/thoughts", "category/thoughts"] # Three matches! +} + +[[thoughts]] # → selects "thoughts" (100 vs 60 vs 55 points) +[[tag/thoughts]] # → selects "tag/thoughts" (100 vs 40 vs 55 points) +[[category/thoughts]] # → selects "category/thoughts" (100 vs 40 vs 55 points) +``` + +**Example 3: Custom Priority Rules** +```toml +# If you want specific feed patterns to have higher priority: +priority_rules = [ + { pattern = "tag/python", priority = 70 }, # Higher than default tag/* (60) + { pattern = "docs/*", priority = 95 }, # Documentation gets highest priority + { pattern = "posts/*", priority = 90 }, # Posts get high priority +] + +# Result: Custom rules override built-in scoring for matching patterns +``` + +**Configuration Priority Rules Override:** +```toml +# Custom scoring for specific patterns +priority_rules = [ + { pattern = "pages/*", priority = 100 }, # Pages get highest priority + { pattern = "tag/python", priority = 90 }, # Specific tag gets boost + { pattern = "docs/*", priority = 95 }, # Documentation gets high priority +] + +# Feed patterns with custom priorities +priority_rules = [ + { pattern = "tag/python", priority = 70 }, # Higher than default tag/* (60) + { pattern = "category/javascript", priority = 75 }, # Boost specific categories +] +``` ## HTML Output @@ -87,7 +461,10 @@ """ import logging +import re from typing import TYPE_CHECKING +from typing import Dict +from typing import List from markdown_it import MarkdownIt from markdown_it.rules_inline import StateInline @@ -101,6 +478,242 @@ logger = logging.getLogger("markata") +def get_default_config() -> Dict: + """ + Get default configuration for wikilinks plugin. + + Returns: + Dictionary with default configuration values + """ + return { + "resolution_strategy": "priority", + "clear_winner_threshold": 20, + "suppress_patterns": [], + "priority_rules": [ + {"pattern": "pages/*", "priority": 100}, + {"pattern": "posts/*", "priority": 90}, + {"pattern": "blog/*", "priority": 85}, + {"pattern": "tutorials/*", "priority": 80}, + {"pattern": "docs/*", "priority": 75}, + {"pattern": "tag/*", "priority": 60}, + {"pattern": "category/*", "priority": 55}, + {"pattern": "archive/*", "priority": 50}, + {"pattern": "feed/*", "priority": 45}, + ], + "fallback_behavior": "warn", # "warn", "silent", "error" + "enable_logging": True, + } + + +def get_plugin_config(markata: "Markata") -> Dict: + """ + Get plugin configuration with defaults merged. + + Args: + markata: Markata instance + + Returns: + Merged configuration dictionary + """ + default_config = get_default_config() + user_config = ( + getattr(markata, "config", {}).get("plugins", {}).get("md_it_wikilinks", {}) + ) + + # Deep merge user config with defaults + merged_config = default_config.copy() + merged_config.update(user_config) + + # Handle nested priority_rules merging + if "priority_rules" in user_config: + merged_config["priority_rules"] = user_config["priority_rules"] + + return merged_config + + +def matches_pattern(path: str, pattern: str) -> bool: + """ + Check if a path matches a glob-like pattern. + + Args: + path: The path to check + pattern: The pattern (supports * wildcard) + + Returns: + True if path matches pattern + """ + # Convert glob pattern to regex + regex_pattern = pattern.replace("*", ".*") + return re.match(f"^{regex_pattern}$", path) is not None + + +def should_suppress_warning(link_target: str, suppress_patterns: List[str]) -> bool: + """ + Check if warning should be suppressed for a link target. + + Args: + link_target: The link target to check + suppress_patterns: List of patterns to suppress + + Returns: + True if warning should be suppressed + """ + for pattern in suppress_patterns: + if matches_pattern(link_target, pattern): + return True + return False + + +def calculate_match_score( + link_target: str, candidate_slug: str, original_link: str, config: Dict +) -> int: + """ + Calculate priority score for a wikilink match. + Higher scores indicate better matches. + + Args: + link_target: The target link text + candidate_slug: The candidate slug to score + original_link: The original link text from markdown + config: Plugin configuration + + Returns: + Score for the candidate (higher is better) + """ + # Check custom priority rules first + priority_rules = config.get("priority_rules", []) + for rule in priority_rules: + pattern = rule.get("pattern", "") + priority = rule.get("priority", 50) + if matches_pattern(candidate_slug, pattern): + return priority + + # Default scoring system if no custom rules match + # Exact slug match (highest priority) + if link_target == candidate_slug: + return 100 + + # Path match - when original link includes path structure + if "/" in original_link and original_link.strip("/") == candidate_slug: + return 80 + + # Check if this is a basename match + basename = candidate_slug.split("/")[-1] + if link_target == basename: + # Heuristic: feeds typically have paths like "tag/", "category/", "archive/" + feed_prefixes = ["tag/", "category/", "archive/", "feed/", "topic/"] + if any(candidate_slug.startswith(prefix) for prefix in feed_prefixes): + return 60 # Feed slug match + else: + return 40 # Regular basename match + + return 0 + + +def resolve_best_match( + link_target: str, + possible_pages: list, + original_link: str, + markata: "Markata", + md=None, +) -> str: + """ + Resolve the best match from possible pages using priority scoring and configuration. + + Args: + link_target: The target link text + possible_pages: List of possible page slugs + original_link: The original link text from markdown + markata: Markata instance + md: Markdown-it instance (optional) + + Returns: + Best matching page slug + """ + # Get full plugin configuration + config = get_plugin_config(markata) + resolution_strategy = config.get("resolution_strategy", "priority") + threshold = config.get("clear_winner_threshold", 20) + suppress_patterns = config.get("suppress_patterns", []) + fallback_behavior = config.get("fallback_behavior", "warn") + enable_logging = config.get("enable_logging", True) + + if len(possible_pages) == 1: + return possible_pages[0] + + # Check if warning should be suppressed + should_suppress = should_suppress_warning(link_target, suppress_patterns) + + # For non-priority strategies, fallback to simple behavior + if resolution_strategy == "first": + return possible_pages[0] + elif resolution_strategy == "warn": + # Always warn and use first match (unless suppressed) + if not should_suppress and enable_logging and fallback_behavior == "warn": + if md is None or md.options.get("article") is None: + debug_value = "UNKNOWN" + else: + debug_value = md.options["article"].get( + "path", + md.options["article"].get( + "title", md.options["article"].get("slug", "") + ), + ) + logger.warning( + f"wikilink [[{original_link}]] has duplicate matches ({possible_pages}) in file '{debug_value}', defaulting to the first match ({possible_pages[0]})", + ) + return possible_pages[0] + + # Priority-based resolution (default) + # Calculate scores for all candidates using custom configuration + scored_candidates = [] + for candidate in possible_pages: + score = calculate_match_score(link_target, candidate, original_link, config) + scored_candidates.append((score, candidate)) + + # Sort by score (descending) and return the highest scoring match + scored_candidates.sort(key=lambda x: x[0], reverse=True) + + # Check if we have a clear winner (score difference > threshold) + if len(scored_candidates) >= 2: + top_score, top_candidate = scored_candidates[0] + second_score, second_candidate = scored_candidates[1] + + # If clear winner, return it without warning + if top_score - second_score > threshold: + return top_candidate + + # If no clear winner, return top choice but log warning for ambiguity (unless suppressed) + top_score, top_candidate = scored_candidates[0] + + if ( + not should_suppress + and enable_logging + and fallback_behavior in ["warn", "error"] + ): + if md is None or md.options.get("article") is None: + debug_value = "UNKNOWN" + else: + debug_value = md.options["article"].get( + "path", + md.options["article"].get( + "title", md.options["article"].get("slug", "") + ), + ) + + message = ( + f"wikilink [[{original_link}]] has ambiguous matches ({possible_pages}) " + f"in file '{debug_value}', selecting highest priority match ({top_candidate})" + ) + + if fallback_behavior == "error": + logger.error(message) + else: + logger.warning(message) + + return top_candidate + + @hook_impl() @register_attr("possible_wikilink") def pre_render(markata: "Markata") -> None: @@ -124,9 +737,19 @@ def pre_render(markata: "Markata") -> None: markata.possible_wikilink["index"] = ["index"] for slug in [v.config.slug for v in markata.feeds.values()]: + # Register the full slug (e.g., "tag/python") + wikilink = slug + if wikilink in markata.possible_wikilink: + if slug not in markata.possible_wikilink[wikilink]: + markata.possible_wikilink[wikilink].append(slug) + else: + markata.possible_wikilink[wikilink] = [slug] + + # Register the basename (e.g., "python") wikilink = slug.split("/")[-1] if wikilink in markata.possible_wikilink: - markata.possible_wikilink[wikilink].append(slug) + if slug not in markata.possible_wikilink[wikilink]: + markata.possible_wikilink[wikilink].append(slug) else: markata.possible_wikilink[wikilink] = [slug] @@ -157,8 +780,8 @@ def wikilinks_plugin( def _wikilinks_inline(state: StateInline, silent: bool): try: if ( - state.srcCharCode[state.pos] != start_char - or state.srcCharCode[state.pos + 1] != start_char + ord(state.src[state.pos]) != start_char + or ord(state.src[state.pos + 1]) != start_char ): return False except IndexError: @@ -168,11 +791,11 @@ def _wikilinks_inline(state: StateInline, silent: bool): found_closing = False while True: try: - end = state.srcCharCode.index(end_char, pos) + end = state.src.find(chr(end_char), pos) except ValueError: return False try: - if state.srcCharCode[end + 1] == end_char: + if state.src[end + 1] == chr(end_char): found_closing = True break except IndexError: @@ -191,56 +814,85 @@ def _wikilinks_inline(state: StateInline, silent: bool): token = state.push("link_open", "a", 1) token.block = False token.attrSet("class", "wikilink") - if "#" in text: - link, id = text.split("#") + + # Parse display text override syntax: [[page|Display Text]] + if "|" in text: + # Split only on first pipe to allow pipes in display text + link_part, display_text = text.split("|", 1) + display_text = display_text.strip() + # Fall back to link_part if display_text is empty + if not display_text: + display_text = link_part + else: + link_part, display_text = text, None + + # Handle anchor in link part: [[page#anchor]] or [[page#anchor|Display Text]] + if "#" in link_part: + link, id = link_part.split("#", 1) link = link.strip("/") else: - link, id = text, None + link, id = link_part.strip("/"), None + + # Get configuration for handling broken links + if markata: + config = get_plugin_config(markata) + suppress_patterns = config.get("suppress_patterns", []) + fallback_behavior = config.get("fallback_behavior", "warn") + enable_logging = config.get("enable_logging", True) + else: + config = get_default_config() + suppress_patterns = config.get("suppress_patterns", []) + fallback_behavior = config.get("fallback_behavior", "warn") + enable_logging = config.get("enable_logging", True) # possible_pages = markata.filter( # f'str(path).split("/")[-1].split(".")[0].replace("_", "-") == "{link.replace("_", "-")}"', # ) - possible_pages = markata.possible_wikilink.get(link, []) + possible_pages = markata.possible_wikilink.get(link, []) if markata else [] if len(possible_pages) == 1: link = possible_pages[0] elif len(possible_pages) > 1: - if md.options["article"] is None: - debug_value = "UNKNOWN" - else: - debug_value = md.options["article"].get( - "path", - md.options["article"].get( - "title", md.options["article"].get("slug", "") - ), - ) - logger.warning( - f"wikilink [[{text}]] has duplicate matches ({possible_pages}) in file '{debug_value}', defaulting to the first match ({possible_pages[0]})", - ) - link = possible_pages[0] + # Use priority-based resolution instead of simple first match + link = resolve_best_match(link, possible_pages, text, markata, md) else: - if md.options["article"] is None: - debug_value = "UNKNOWN" - else: - debug_value = md.options["article"].get( - "path", - md.options["article"].get( - "title", md.options["article"].get("slug", "") - ), - ) - logger.warning( - f"wikilink [[{text}]] no matches in file '{debug_value}', defaulting to '/{text}'", - ) - link = text + # No matches found - handle according to configuration + should_suppress = should_suppress_warning(link_part, suppress_patterns) + + if ( + not should_suppress + and enable_logging + and fallback_behavior in ["warn", "error"] + ): + if md.options.get("article") is None: + debug_value = "UNKNOWN" + else: + debug_value = md.options["article"].get( + "path", + md.options["article"].get( + "title", md.options["article"].get("slug", "") + ), + ) + + message = f"wikilink [[{text}]] no matches in file '{debug_value}', defaulting to '/{link_part}'" + + if fallback_behavior == "error": + logger.error(message) + else: + logger.warning(message) + + # Fallback to original link text + link = link_part if id and not link.endswith(f"#{id}"): link = f"{link}#{id}" token.attrSet("href", f"/{link}") content_token = state.push("text", "", 0) - content_token.content = text + # Use display text if available, otherwise fall back to the link part + content_token.content = display_text if display_text is not None else link_part token = state.push("link_close", "a", -1) - token.content = text + token.content = display_text if display_text is not None else link_part return True diff --git a/markata/plugins/post_template.py b/markata/plugins/post_template.py index 5a037861..0c314442 100644 --- a/markata/plugins/post_template.py +++ b/markata/plugins/post_template.py @@ -241,6 +241,8 @@ from markata import __version__ from markata.hookspec import hook_impl +from markata.plugins.jinja_env import get_template +from markata.plugins.jinja_env import get_templates_mtime if TYPE_CHECKING: from markata import Markata @@ -386,27 +388,15 @@ def dynamic_templates_in_templates_dir(cls, value): return templates_dir -_template_cache = {} - - -def get_template(markata, template): - """Get a template from the cache or compile it.""" - cache_key = str(template) - if cache_key in _template_cache: - return _template_cache[cache_key] - - if isinstance(template, str): - template = markata.jinja_env.get_template(template) - _template_cache[cache_key] = template - return template - - def render_article(markata, cache, article): """Render an article using cached templates.""" + templates_mtime = get_templates_mtime(markata.jinja_env) + key = markata.make_hash( "post_template", __version__, article.key, + str(templates_mtime), # Track template file changes ) html = markata.precache.get(key) @@ -414,12 +404,12 @@ def render_article(markata, cache, article): return html if isinstance(article.template, str): - template = get_template(markata, article.template) + template = get_template(markata.jinja_env, article.template) html = render_template(markata, article, template) if isinstance(article.template, dict): html = { - slug: render_template(markata, article, get_template(markata, template)) + slug: render_template(markata, article, get_template(markata.jinja_env, template)) for slug, template in article.template.items() } cache.set(key, html, expire=markata.config.default_cache_expire) @@ -458,9 +448,12 @@ def save(markata: "Markata") -> None: if t.endswith("css") or t.endswith("js") or t.endswith("xsl") ] for template in linked_templates: - template = get_template(markata, template) + template = get_template(markata.jinja_env, template) css = template.render(markata=markata, __version__=__version__) - Path(markata.config.output_dir / Path(template.filename).name).write_text(css) + output_path = Path(markata.config.output_dir / Path(template.filename).name) + current_content = output_path.read_text() if output_path.exists() else "" + if current_content != css: + output_path.write_text(css) @hook_impl() diff --git a/markata/plugins/publish_html.py b/markata/plugins/publish_html.py index f6dd6d3b..3f7566da 100644 --- a/markata/plugins/publish_html.py +++ b/markata/plugins/publish_html.py @@ -167,7 +167,35 @@ def default_output_html(cls, v: Optional[Union[str, Path]], info) -> Optional[Pa def output_html_relative(cls, v: Optional[Path], info) -> Optional[Path]: if v is None: return cls.default_output_html(v, info) - return v + + # If a custom output_html is provided, ensure it's relative to output_dir + markata = info.data.get("markata") + if markata is None: + return v + + output_dir = markata.config.output_dir + + # Convert string to Path if needed + if isinstance(v, str): + v = Path(v) + + # If v is already absolute and within output_dir, keep it + if v.is_absolute(): + try: + v.relative_to(output_dir) + return v + except ValueError: + # Not relative to output_dir, make it so + pass + + # Check if path already starts with output_dir + try: + v.relative_to(output_dir) + # Path is already relative to output_dir + return v + except ValueError: + # Path doesn't start with output_dir, prepend it + return output_dir / v @field_validator("output_html", mode="before") @classmethod diff --git a/markata/plugins/redirects.py b/markata/plugins/redirects.py index 87e3ed82..93cbdc6f 100644 --- a/markata/plugins/redirects.py +++ b/markata/plugins/redirects.py @@ -160,14 +160,6 @@ def save(markata: "Markata") -> None: else: raw_redirects = [] - key = markata.make_hash("redirects", "raw_redirects", raw_redirects) - with markata.cache as cache: - cache.get(key) - if cache.get(key) == "done": - return - - cache.set(key, "done", expire=markata.config.default_cache_expire) - redirects = [ Redirect(original=s[0], new=s[1], markata=markata) for r in raw_redirects @@ -178,9 +170,24 @@ def save(markata: "Markata") -> None: template_file = Path(str(markata.config.get("redirect_template"))) else: template_file = DEFAULT_REDIRECT_TEMPLATE + + # Get template mtime to bust cache when template changes + template_mtime = template_file.stat().st_mtime if template_file.exists() else 0 + + key = markata.make_hash("redirects", "raw_redirects", raw_redirects, str(template_mtime)) + with markata.cache as cache: + cache.get(key) + if cache.get(key) == "done": + return + + cache.set(key, "done", expire=markata.config.default_cache_expire) + template = Template(template_file.read_text()) for redirect in redirects: file = markata.config.output_dir / redirect.original.strip("/") / "index.html" file.parent.mkdir(parents=True, exist_ok=True) - file.write_text(template.render(redirect.dict(), config=markata.config)) + new_content = template.render(redirect.dict(), config=markata.config) + current_content = file.read_text() if file.exists() else "" + if current_content != new_content: + file.write_text(new_content) diff --git a/markata/plugins/render_markdown.py b/markata/plugins/render_markdown.py index cfe8a21f..c56c3457 100644 --- a/markata/plugins/render_markdown.py +++ b/markata/plugins/render_markdown.py @@ -104,6 +104,7 @@ import pydantic +from markata import __version__ from markata.hookspec import hook_impl from markata.hookspec import register_attr from markata.plugins.md_it_highlight_code import highlight_code @@ -288,7 +289,14 @@ def render_article_parallel(markata, config, cache, article): article.html = "" return article, "" - key = markata.make_hash("render_markdown", "render", content) + key = markata.make_hash( + "render_markdown", + "render", + content, + __version__, + markata.config.render_markdown.backend.value, + str(markata.config.render_markdown.extensions), + ) html_from_cache = markata.precache.get(key) if html_from_cache is not None: diff --git a/markata/plugins/seo.py b/markata/plugins/seo.py index 0064f904..162ecee0 100644 --- a/markata/plugins/seo.py +++ b/markata/plugins/seo.py @@ -174,6 +174,7 @@ def render(markata: Markata) -> None: twitter_card, article.metadata["title"], str(config_seo), + __version__, ) html_from_cache = markata.precache.get(key) diff --git a/markata/plugins/service_worker.py b/markata/plugins/service_worker.py index 04fcdce7..4185ecc6 100644 --- a/markata/plugins/service_worker.py +++ b/markata/plugins/service_worker.py @@ -129,4 +129,6 @@ def save(markata: "Markata") -> None: ) output_file = markata.config.output_dir / "service-worker.js" - output_file.write_text(service_worker_js) + current_content = output_file.read_text() if output_file.exists() else "" + if current_content != service_worker_js: + output_file.write_text(service_worker_js) diff --git a/markata/plugins/to_json.py b/markata/plugins/to_json.py index 58a13827..ac92c54a 100644 --- a/markata/plugins/to_json.py +++ b/markata/plugins/to_json.py @@ -10,4 +10,7 @@ @hook_impl def save(markata: "Markata") -> None: output_file = markata.config.output_dir / "markata.json" - output_file.write_text(json.dumps(markata.to_dict(), default=str)) + new_content = json.dumps(markata.to_dict(), default=str) + current_content = output_file.read_text() if output_file.exists() else "" + if current_content != new_content: + output_file.write_text(new_content) diff --git a/markata/static/css/pagination.css b/markata/static/css/pagination.css new file mode 100644 index 00000000..f688fe17 --- /dev/null +++ b/markata/static/css/pagination.css @@ -0,0 +1,180 @@ +/* Pagination Styles */ + +.loading-indicator { + display: flex; + align-items: center; + justify-content: center; + gap: 0.75rem; + padding: 2rem; + color: var(--text-color-muted, #6b7280); +} + +.spinner { + width: 1.5rem; + height: 1.5rem; + border: 2px solid var(--border-color, #e5e7eb); + border-top: 2px solid var(--primary-bg, #3b82f6); + border-radius: 50%; + animation: spin 1s linear infinite; +} + +@keyframes spin { + 0% { transform: rotate(0deg); } + 100% { transform: rotate(360deg); } +} + +.pagination-info { + color: var(--text-color-muted, #6b7280); + font-size: 0.875rem; +} + +/* Manual Pagination */ +.pagination { + display: flex; + flex-direction: column; + align-items: center; + gap: 1rem; + margin: 2rem 0; + padding: 1rem; + background: var(--bg-color, #fff); + border-radius: 8px; + border: 1px solid var(--border-color, #e5e7eb); +} + +.pagination-links { + display: flex; + gap: 0.5rem; + flex-wrap: wrap; + justify-content: center; +} + +.page-link { + display: inline-flex; + align-items: center; + justify-content: center; + min-width: 2.5rem; + height: 2.5rem; + padding: 0 0.75rem; + border: 1px solid var(--border-color, #e5e7eb); + border-radius: 6px; + background: var(--bg-color, #fff); + color: var(--text-color, #374151); + text-decoration: none; + font-size: 0.875rem; + font-weight: 500; + transition: all 0.2s ease; +} + +.page-link:hover { + background: var(--hover-bg, #f3f4f6); + border-color: var(--hover-border, #d1d5db); + color: var(--text-color, #374151); +} + +.page-link.current { + background: var(--primary-bg, #3b82f6); + border-color: var(--primary-border, #3b82f6); + color: var(--primary-text, #fff); + font-weight: 600; +} + +.page-link[aria-label="First page"], +.page-link[aria-label="Last page"] { + font-size: 0.75rem; + min-width: auto; + padding: 0 0.5rem; +} + +/* Error and End Messages */ +.end-message { + text-align: center; + color: var(--text-color-muted, #6b7280); + font-style: italic; + margin-top: 1rem; +} + +.error-message { + text-align: center; + padding: 2rem; + background: var(--error-bg, #fef2f2); + border: 1px solid var(--error-border, #fecaca); + border-radius: 6px; + color: var(--error-text, #dc2626); + margin: 2rem 0; +} + +.error-message a { + color: var(--error-text, #dc2626); + text-decoration: underline; +} + +.error-message a:hover { + text-decoration: none; +} + +/* Responsive design */ +@media (max-width: 640px) { + .pagination-links { + gap: 0.25rem; + } + + .page-link { + min-width: 2rem; + height: 2rem; + padding: 0 0.5rem; + font-size: 0.75rem; + } + + .page-link[aria-label="First page"], + .page-link[aria-label="Last page"] { + display: none; + } +} + +/* Dark mode support */ +@media (prefers-color-scheme: dark) { + .loading-indicator, + .pagination-info, + .end-message { + color: var(--text-color-muted-dark, #9ca3af); + } + + .spinner { + border-color: var(--border-color-dark, #374151); + border-top-color: var(--primary-bg-dark, #2563eb); + } + + .pagination { + background: var(--bg-color-dark, #1f2937); + border-color: var(--border-color-dark, #374151); + } + + .page-link { + background: var(--bg-color-dark, #1f2937); + border-color: var(--border-color-dark, #374151); + color: var(--text-color-dark, #f9fafb); + } + + .page-link:hover { + background: var(--hover-bg-dark, #374151); + border-color: var(--hover-border-dark, #4b5563); + } + + .error-message { + background: var(--error-bg-dark, #7f1d1d); + border-color: var(--error-border-dark, #991b1b); + color: var(--error-text-dark, #fecaca); + } + + .error-message a { + color: var(--error-text-dark, #fecaca); + } +} + +/* Accessible focus styles */ +.loading-indicator:focus, +.error-message:focus, +.page-link:focus { + outline: 2px solid var(--primary-bg, #3b82f6); + outline-offset: 2px; +} \ No newline at end of file diff --git a/markata/static/js/pagination.js b/markata/static/js/pagination.js new file mode 100644 index 00000000..6f222b9c --- /dev/null +++ b/markata/static/js/pagination.js @@ -0,0 +1,213 @@ +// JavaScript-based infinite scroll pagination +class InfiniteScroll { + constructor(paginationData) { + this.currentPage = paginationData.page; + this.totalPages = paginationData.totalPages; + this.totalPosts = paginationData.totalPosts; + this.itemsShown = paginationData.itemsShown; + this.feedName = paginationData.feedName; + this.loading = false; + this.retryCount = 0; + this.maxRetries = 3; + + this.setupObserver(); + + // Check if we need to load more content initially + // (when initial content doesn't fill the viewport) + this.checkInitialFill(); + } + + setupObserver() { + // Create a persistent element at the bottom to observe + this.createPersistentTrigger(); + + this.observer = new IntersectionObserver((entries) => { + if (entries[0].isIntersecting && !this.loading) { + this.loadMore(); + } + }, { + rootMargin: '100px' + }); + + this.observeTrigger(); + } + + createPersistentTrigger() { + // Create a persistent trigger that won't be replaced + this.persistentTrigger = document.createElement('div'); + this.persistentTrigger.id = 'js-scroll-trigger'; + this.persistentTrigger.style.height = '1px'; + this.persistentTrigger.style.width = '100%'; + + // Insert it before the template trigger + const templateTrigger = document.getElementById('scroll-trigger'); + if (templateTrigger) { + templateTrigger.parentNode.insertBefore(this.persistentTrigger, templateTrigger); + } else { + // Fallback: add to end of feed container + const feed = document.getElementById('feed'); + if (feed) { + feed.appendChild(this.persistentTrigger); + } + } + } + + observeTrigger() { + if (this.persistentTrigger && this.observer) { + this.observer.observe(this.persistentTrigger); + } + } + + checkInitialFill() { + // Wait a frame for layout to complete + requestAnimationFrame(() => { + this.fillViewportIfNeeded(); + }); + } + + fillViewportIfNeeded() { + // If we're already loading, no more pages, or exceeded retries, stop + if (this.loading || this.currentPage >= this.totalPages || this.retryCount >= this.maxRetries) return; + + // Check if the trigger is visible in the viewport + // (meaning content doesn't fill the page) + if (this.isTriggerVisible()) { + this.loadMore().then((success) => { + if (success) { + // Reset retry count on success + this.retryCount = 0; + // After loading, check again if we need more + // Use requestAnimationFrame to wait for DOM update + requestAnimationFrame(() => { + this.fillViewportIfNeeded(); + }); + } + }); + } + } + + isTriggerVisible() { + if (!this.persistentTrigger) return false; + + const rect = this.persistentTrigger.getBoundingClientRect(); + const viewportHeight = window.innerHeight || document.documentElement.clientHeight; + + // Check if the trigger is within the viewport (with some margin) + return rect.top < viewportHeight + 100; + } + + async loadMore() { + if (this.currentPage >= this.totalPages) return false; + + this.loading = true; + this.showLoading(); + + try { + const nextPage = this.currentPage + 1; + const response = await fetch(`/${this.feedName}/${nextPage}/`); + + // Check if response is ok (status 200-299) + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const html = await response.text(); + + const parser = new DOMParser(); + const doc = parser.parseFromString(html, 'text/html'); + const newItems = doc.querySelectorAll('#feed li'); + const container = document.getElementById('feed'); + + if (!container) { + throw new Error('Feed container not found'); + } + + newItems.forEach(item => container.appendChild(item)); + + this.currentPage = nextPage; + this.itemsShown += newItems.length; + + // Update pagination info + this.updatePaginationInfo(); + + // Remove our persistent trigger if this was the last page + if (this.currentPage >= this.totalPages) { + if (this.persistentTrigger) { + this.persistentTrigger.remove(); + } + } + + return true; + + } catch (error) { + console.error('Failed to load more content:', error); + this.retryCount++; + + // Show error message if we've exceeded retries + if (this.retryCount >= this.maxRetries) { + this.showError('Failed to load more content. Please refresh the page.'); + } + + return false; + } finally { + this.loading = false; + this.hideLoading(); + } + } + + showLoading() { + const indicator = document.querySelector('.loading-indicator'); + if (indicator) indicator.style.display = 'flex'; + } + + hideLoading() { + const indicator = document.querySelector('.loading-indicator'); + if (indicator) indicator.style.display = 'none'; + } + + showError(message) { + const container = document.getElementById('feed'); + if (container) { + const errorDiv = document.createElement('div'); + errorDiv.className = 'error-message'; + errorDiv.textContent = message; + container.appendChild(errorDiv); + } + } + + updatePaginationInfo() { + const currentPageEl = document.getElementById('current-page'); + const itemsShownEl = document.getElementById('items-shown'); + + if (currentPageEl) currentPageEl.textContent = this.currentPage; + if (itemsShownEl) itemsShownEl.textContent = this.itemsShown; + } + + // Clean up observer on page unload + destroy() { + if (this.observer) { + this.observer.disconnect(); + } + } +} + +// Feature detection and initialization +if ('IntersectionObserver' in window && window.paginationData) { + let infiniteScroll; + + // Initialize when DOM is ready + if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', () => { + infiniteScroll = new InfiniteScroll(window.paginationData); + }); + } else { + infiniteScroll = new InfiniteScroll(window.paginationData); + } + + // Clean up on page unload to prevent memory leaks + window.addEventListener('beforeunload', () => { + if (infiniteScroll) { + infiniteScroll.destroy(); + } + }); +} diff --git a/markata/templates/atom.xml b/markata/templates/atom.xml new file mode 100644 index 00000000..050835d8 --- /dev/null +++ b/markata/templates/atom.xml @@ -0,0 +1,50 @@ + + + + {# Normalize once: pydantic Url -> string, strip trailing slash #} + {% set base_url = markata.config.url | string | trim('/') %} + {% set feed_slug = feed.config.slug | string | trim('/') %} + + {{ feed.config.title|e }} + {{ markata.config.description | e }} + {{ markata.config.url | e }} + {{ (base_url ~ '/' ~ feed_slug ~ '/') | e }} + + + + + {% set updated_post = feed.posts[0] if feed.posts else None %} + {% if updated_post and updated_post.date %} + {{ updated_post.date.isoformat() }}Z + {% else %} + {{ datetime.datetime.utcnow().isoformat() }}Z + {% endif %} + + {% for post in feed.posts %} + {% set post_slug = post.slug | string | trim('/') %} + + {{ post.title|e }} + + {{ (base_url ~ '/' ~ post_slug ~ '/') | e }} + + + {% if post.date %} + {{ post.date.isoformat() }}Z + {{ post.date.isoformat() }}Z + {% endif %} + + {% if post.description %} + + {% endif %} + + {% if post.content %} + + {% endif %} + + {% endfor %} + + diff --git a/markata/templates/atom.xsl b/markata/templates/atom.xsl new file mode 100644 index 00000000..389cd800 --- /dev/null +++ b/markata/templates/atom.xsl @@ -0,0 +1,67 @@ +{% extends "base.xsl" %} +{% block content %} +
+
+
+

+ + + + + + + + + + +

+ +

+ + + + + + + + + + +

+ + + + + + + + + + + + + + + + + + + + + + Visit Website → + +
+ +
+

Recent Items

+
    + + + {% include 'rss_card.html' %} + +
+
+
+
+{% endblock %} diff --git a/markata/templates/didyoumean_partial.html b/markata/templates/didyoumean_partial.html index 8ab727fe..59702a99 100644 --- a/markata/templates/didyoumean_partial.html +++ b/markata/templates/didyoumean_partial.html @@ -13,8 +13,6 @@ + +{% if has_next %} +
+
+{% endif %} \ No newline at end of file diff --git a/markata/templates/feed_partial.html b/markata/templates/feed_partial.html index d9a14deb..eb8d6f5c 100644 --- a/markata/templates/feed_partial.html +++ b/markata/templates/feed_partial.html @@ -1,12 +1,96 @@

{{ title }}

+ {% if pagination_enabled %} +
+ Page {{ page }} of {{ total_pages }} + Showing {{ posts|length }} of {{ total_posts }} items +
+ {% endif %} + + {% if pagination_enabled and pagination_context.pagination_type == 'js' %} + + + {% endif %}
+ + {% if pagination_enabled %} + + {# Manual pagination controls - shown for all types as fallback #} + {% include "pagination_controls.html" %} + + {% if pagination_context.pagination_type == 'htmx' %} + + {% if has_next %} +
+
+ {% endif %} + + + + + + {% endif %} + + {% if pagination_context.pagination_type == 'js' %} + +
+ + + + + {% endif %} + + {% endif %}
diff --git a/markata/templates/pagination_controls.html b/markata/templates/pagination_controls.html new file mode 100644 index 00000000..86423e82 --- /dev/null +++ b/markata/templates/pagination_controls.html @@ -0,0 +1,25 @@ +{# Manual pagination controls - reusable fragment #} +{# Used directly for manual pagination, and as fallback for js/htmx #} +
+ {% if prev_page %} + {% if page > 2 %} + + ← Previous + + {% endif %} + + + {{ page }} / {{ total_pages }} + + + {% if has_next %} + + Next → + + {% endif %} +
diff --git a/markata/templates/rss.xml b/markata/templates/rss.xml index b0348374..0e028256 100644 --- a/markata/templates/rss.xml +++ b/markata/templates/rss.xml @@ -2,7 +2,7 @@ - {{ feed.config.name | e }} + {{ feed.config.title | e }} {{ markata.config.url | e }} {{ markata.config.description | e }} Markata diff --git a/markata/templates/rss_card.html b/markata/templates/rss_card.html index cc7f1ec8..f4d4f5d9 100644 --- a/markata/templates/rss_card.html +++ b/markata/templates/rss_card.html @@ -1,16 +1,47 @@
  • + - + + + + + + + + + + + + + + +

    - + +

    +

    - + + + + + + + + + + + +

    +
  • + diff --git a/pyproject.toml b/pyproject.toml index 404fb173..75240649 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,6 @@ dependencies = [ "pathspec", "pillow", "pluggy", - "polyfactory", "pydantic>=2.0", "pydantic_extra_types>=2.0", "pydantic_settings", @@ -65,7 +64,7 @@ description = "Static site generator plugins all the way down." keywords = ["static-site"] name = "markata" readme = "README.md" -requires-python = ">=3.6" +requires-python = ">=3.10" [[project.authors]] name = "Waylon Walker" @@ -207,3 +206,12 @@ unfixable = [] [tool.ruff.lint.isort] force-single-line = true + +[dependency-groups] +dev = [ + "psutil>=7.2.1", + "pytest>=9.0.2", + "pytest-mock>=3.15.1", + "pytest-tmp-files>=0.0.2", + "ruff>=0.14.13", +] diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 00000000..beb8440b --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,70 @@ +# Development Scripts + +This directory contains utility scripts for Markata developers. + +## HTMX Hash Management + +### `add_htmx_hash.py` + +A utility script to add HTMX integrity hashes to feeds plugin when new HTMX versions are released. + +**Usage:** +```bash +# Add a new HTMX version hash +python scripts/add_htmx_hash.py 2.0.8 + +# List all available versions and their hash status +python scripts/add_htmx_hash.py --list + +# Add hashes for all missing versions +python scripts/add_htmx_hash.py --add-all + +# Replace an existing hash without prompting +python scripts/add_htmx_hash.py 2.0.8 --replace + +# Show detailed output +python scripts/add_htmx_hash.py --list --verbose + +# Add all versions, skipping failed ones +python scripts/add_htmx_hash.py --add-all --skip-failed + +# Show help +python scripts/add_htmx_hash.py --help +``` + +**Features:** +- **Smart Hash Retrieval**: First tries GitHub API for official hashes, falls back to unpkg.com +- **Version Management**: Lists all available HTMX versions from GitHub releases +- **Batch Operations**: Add all missing versions with `--add-all` +- **Status Tracking**: See which versions have hashes and which are missing +- **Safety**: Prompts before replacing existing hashes (unless `--replace` used) +- **Verbose Mode**: Detailed output for debugging and monitoring + +**What it does:** +1. Fetches all HTMX versions from GitHub releases API +2. For single versions: Gets hash from GitHub API or downloads from unpkg.com and calculates SHA-256 +3. Updates `HTMX_INTEGRITY_HASHES` dictionary in `markata/plugins/feeds.py` +4. For batch operations: Processes all missing versions automatically + +**When to use:** +- **Single Version**: When a new HTMX version is released +- **List Mode**: To see current hash coverage and available versions +- **Batch Mode**: To populate hashes for many versions at once +- **Development**: When setting up a new development environment + +**Examples:** +```bash +# Quick check of current status +python scripts/add_htmx_hash.py --list + +# Add the latest version +python scripts/add_htmx_hash.py 2.0.7 + +# Populate all missing hashes (great for initial setup) +python scripts/add_htmx_hash.py --add-all + +# Add with verbose output to see what's happening +python scripts/add_htmx_hash.py 2.0.7 --verbose +``` + +This ensures that HTMX files downloaded by Markata are verified for integrity and provides developers with easy tools to maintain the hash database. \ No newline at end of file diff --git a/scripts/add_htmx_hash.py b/scripts/add_htmx_hash.py new file mode 100755 index 00000000..b92406a9 --- /dev/null +++ b/scripts/add_htmx_hash.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python3 +""" +Development script to add HTMX integrity hashes to feeds plugin. + +This script should be used by developers when a new HTMX version is released. +It will: +1. Download/Get specified HTMX version +2. Calculate its SHA-256 hash +3. Add it to HTMX_INTEGRITY_HASHES dictionary in feeds.py + +Usage: + python scripts/add_htmx_hash.py 2.0.8 + python scripts/add_htmx_hash.py --list + python scripts/add_htmx_hash.py --add-all + python scripts/add_htmx_hash.py 1.9.10 --replace +""" + +import argparse +import hashlib +import json +import re +from pathlib import Path +from typing import Optional +from urllib.request import Request, urlopen +from urllib.error import URLError, HTTPError + + +def get_htmx_versions(): + """Get list of all available HTMX versions from GitHub releases API.""" + try: + url = "https://api.github.com/repos/bigskysoftware/htmx/releases" + request = Request(url, headers={"User-Agent": "Markata-Dev/1.0"}) + with urlopen(request, timeout=10) as response: + releases_data = json.loads(response.read().decode("utf-8")) + + # Extract version numbers from tag names (remove 'v' prefix) + versions = [] + for release in releases_data: + if "tag_name" in release and release["tag_name"].startswith("v"): + version = release["tag_name"][1:] # Remove 'v' prefix + versions.append(version) + + # Filter out duplicate and sort by semantic version + unique_versions = list(set(versions)) + unique_versions.sort( + key=lambda v: [int(x) for x in re.findall(r"\d+", v)], reverse=True + ) + + return unique_versions + except Exception as e: + print(f"Error fetching HTMX versions: {e}") + return [] + + +def get_htmx_hash_from_github(version: str) -> Optional[str]: + """Get HTMX hash directly from GitHub releases API.""" + try: + url = "https://api.github.com/repos/bigskysoftware/htmx/releases" + request = Request(url, headers={"User-Agent": "Markata-Dev/1.0"}) + with urlopen(request, timeout=10) as response: + releases_data = json.loads(response.read().decode("utf-8")) + + # Find release with matching version + for release in releases_data: + if release["tag_name"] == f"v{version}": + # Look for htmx.min.js asset + for asset in release.get("assets", []): + if asset["name"] == "htmx.min.js": + # Extract hash from digest (remove 'sha256:' prefix) + digest = asset.get("digest", "") + if digest.startswith("sha256:"): + return digest[7:] # Remove 'sha256:' prefix + break + + return None + except Exception as e: + print(f"Error fetching hash from GitHub: {e}") + return None + + +def add_htmx_hash(version: str, replace: bool = False, verbose: bool = False) -> bool: + """Add HTMX version hash to feeds.py. Returns True if successful.""" + # First try to get hash from GitHub API (more reliable) + sha256_hash = get_htmx_hash_from_github(version) + + if not sha256_hash: + # Fall back to downloading from unpkg.com + try: + url = f"https://unpkg.com/htmx.org@{version}/dist/htmx.min.js" + + if verbose: + print(f"Downloading HTMX {version} from {url}") + + request = Request(url, headers={"User-Agent": "Markata-Dev/1.0"}) + with urlopen(request, timeout=10) as response: + content = response.read() + sha256_hash = hashlib.sha256(content).hexdigest() + if verbose: + print(f"SHA-256 hash: {sha256_hash}") + except (URLError, HTTPError) as e: + print(f"Error: Failed to download HTMX: {e}") + return False + else: + if verbose: + print(f"Got HTMX {version} hash from GitHub API") + + try: + # Find and update the feeds.py file + project_root = Path(__file__).parent.parent + feeds_file = project_root / "markata" / "plugins" / "feeds.py" + + if not feeds_file.exists(): + print(f"Error: Could not find feeds.py at {feeds_file}") + return False + + with open(feeds_file, "r") as f: + file_content = f.read() + + # Find HTMX_INTEGRITY_HASHES dictionary + pattern = r"(HTMX_INTEGRITY_HASHES = \{[^}]+)}" + match = re.search(pattern, file_content, re.DOTALL) + + if not match: + print("Error: Could not find HTMX_INTEGRITY_HASHES in feeds.py") + return False + + # Add new hash + new_hash_entry = f' "{version}": "{sha256_hash}"' + existing_dict = match.group(1) + + # Check if version already exists + if f'"{version}":' in existing_dict: + if verbose: + print(f"Warning: HTMX version {version} already exists in hashes") + if not replace: + response = input("Replace existing hash? [y/N]: ") + if response.lower() != "y": + print("Cancelled.") + return False + + # Replace existing entry + new_dict = re.sub( + rf' "{version}": "[^"]*"', new_hash_entry, existing_dict + ) + else: + # Add new entry (before the closing brace) + new_dict = existing_dict.rstrip() + f",\n{new_hash_entry}" + + # Update the file + updated_content = file_content.replace(match.group(0), new_dict + "}") + + with open(feeds_file, "w") as f: + f.write(updated_content) + + print(f"✅ Added HTMX {version} hash to feeds.py") + if verbose: + print(f"📝 File updated: {feeds_file}") + return True + + except Exception as e: + print(f"Error: {e}") + return False + + +def add_all_htmx_versions(verbose: bool = False, skip_failed: bool = False) -> None: + """Add hashes for all available HTMX versions.""" + versions = get_htmx_versions() + if not versions: + print("Could not fetch HTMX versions") + return + + print(f"Found {len(versions)} HTMX versions") + + # Get existing versions to avoid duplicates + project_root = Path(__file__).parent.parent + feeds_file = project_root / "markata" / "plugins" / "feeds.py" + + with open(feeds_file, "r") as f: + file_content = f.read() + + pattern = r"HTMX_INTEGRITY_HASHES = \{([^}]+)}" + match = re.search(pattern, file_content, re.DOTALL) + existing_versions = set() + if match: + existing_matches = re.findall(r'"([^"]+)":', match.group(1)) + existing_versions = set(existing_matches) + + if verbose: + print(f"Existing versions: {sorted(existing_versions)}") + + # Filter out existing versions + new_versions = [v for v in versions if v not in existing_versions] + + if not new_versions: + print("All available versions already have hashes!") + return + + print(f"Adding {len(new_versions)} new versions...") + + success_count = 0 + for version in new_versions: + if verbose: + print(f"\nProcessing {version}...") + + success = add_htmx_hash(version, replace=True, verbose=False) + if success: + success_count += 1 + elif not skip_failed: + print(f"Failed to add {version}, stopping. Use --skip-failed to continue.") + break + + print(f"\n✅ Successfully added {success_count}/{len(new_versions)} versions") + + +def list_htmx_versions(verbose: bool = False) -> None: + """List all available HTMX versions.""" + versions = get_htmx_versions() + if not versions: + print("Could not fetch HTMX versions") + return + + # Get existing versions + project_root = Path(__file__).parent.parent + feeds_file = project_root / "markata" / "plugins" / "feeds.py" + + with open(feeds_file, "r") as f: + file_content = f.read() + + pattern = r"HTMX_INTEGRITY_HASHES = \{([^}]+)}" + match = re.search(pattern, file_content, re.DOTALL) + existing_versions = set() + if match: + existing_matches = re.findall(r'"([^"]+)":', match.group(1)) + existing_versions = set(existing_matches) + + print("HTMX Versions:") + print("=" * 50) + + for version in versions[:20]: # Show first 20 to avoid too much output + status = "✅" if version in existing_versions else "❌" + print(f" {status} {version}") + + if len(versions) > 20: + print(f" ... and {len(versions) - 20} more versions") + + print( + f"\nSummary: {len(existing_versions)} versions have hashes, {len(versions) - len(existing_versions)} missing" + ) + + if verbose: + print(f"\nAll versions: {versions}") + print(f"Existing versions: {sorted(existing_versions)}") + print( + f"Missing versions: {[v for v in versions if v not in existing_versions]}" + ) + + +def main(): + parser = argparse.ArgumentParser( + description="Add HTMX integrity hash to feeds.py", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + parser.add_argument("version", nargs="?", help="HTMX version (e.g., 2.0.8, 1.9.10)") + + parser.add_argument( + "--list", + action="store_true", + help="List all available HTMX versions and their hash status", + ) + + parser.add_argument( + "--add-all", + action="store_true", + help="Add hashes for all missing HTMX versions", + ) + + parser.add_argument( + "--replace", action="store_true", help="Replace existing hash without prompting" + ) + + parser.add_argument("--verbose", action="store_true", help="Show detailed output") + + parser.add_argument( + "--skip-failed", + action="store_true", + help="Continue adding versions even if some fail (used with --add-all)", + ) + + args = parser.parse_args() + + if args.list: + list_htmx_versions(args.verbose) + elif args.add_all: + add_all_htmx_versions(args.verbose, args.skip_failed) + elif args.version: + success = add_htmx_hash(args.version, args.replace, args.verbose) + if not success: + exit(1) + else: + parser.print_help() + print("\nExamples:") + print(" python add_htmx_hash.py 2.0.8 # Add specific version") + print(" python add_htmx_hash.py --list # List all versions") + print( + " python add_htmx_hash.py --add-all # Add all missing versions" + ) + print(" python add_htmx_hash.py 2.0.8 --replace # Replace existing hash") + + +if __name__ == "__main__": + main() diff --git a/tests/test_feeds.py b/tests/test_feeds.py index f993aaa3..fd955089 100644 --- a/tests/test_feeds.py +++ b/tests/test_feeds.py @@ -1,7 +1,8 @@ import rich import markata -from markata.plugins.feeds import Feed, Feeds +from markata.plugins.feeds import Feed +from markata.plugins.feeds import Feeds class DummyMarkata: diff --git a/tests/test_feeds_security.py b/tests/test_feeds_security.py new file mode 100644 index 00000000..e649e2e7 --- /dev/null +++ b/tests/test_feeds_security.py @@ -0,0 +1,200 @@ +import pytest +import tempfile +import shutil +from pathlib import Path +from unittest.mock import Mock, patch +import hashlib + +from markata.plugins.feeds import Feed, _sanitize_feed_slug, _download_htmx_if_needed +from markata import Markata + + +class TestSecurity: + """Test suite for security vulnerabilities in feeds plugin.""" + + def test_path_traversal_prevention(self): + """Test that path traversal attacks are prevented in feed slugs.""" + + # Malicious slugs that should be rejected + malicious_slugs = [ + "../../../etc/passwd", + "..\\..\\windows\\system32\\config\\sam", + "normal/../../../etc/passwd", + "normal\\..\\..\\windows\\system32", + "etc/passwd", + "C:\\Windows\\System32", + "/etc/shadow", + "", + ".", + "./hidden", + "hidden/.", + ] + + for slug in malicious_slugs: + with pytest.raises( + ValueError, match=r"(Invalid characters|cannot be empty)" + ): + _sanitize_feed_slug(slug) + + def test_safe_slug_validation(self): + """Test that safe slugs are allowed.""" + + safe_slugs = [ + "blog", + "my-feed", + "news_posts", + "test123", + "a", + "my_blog_posts_2023", + "feed-with-dashes", + ] + + for slug in safe_slugs: + result = _sanitize_feed_slug(slug) + assert result == slug + + def test_htmx_integrity_verification(self): + """Test that HTMX download verifies file integrity.""" + + # Mock the responses with wrong hash + mock_content = b"malicious javascript content" + mock_response = Mock() + mock_response.read.return_value = mock_content + + with patch("markata.plugins.feeds.urlopen", return_value=mock_response): + with patch("pathlib.Path.exists", return_value=False): + with patch("pathlib.Path.parent"): + with patch("pathlib.Path.write_bytes"): + mock_markata = Mock() + mock_markata.config.htmx_version = "1.9.10" + mock_markata.config.output_dir = "/tmp/test" + + with pytest.raises(RuntimeError, match="HTMX download failed"): + _download_htmx_if_needed(mock_markata) + + def test_htmx_timeout_protection(self): + """Test that HTMX download has timeout protection.""" + + mock_markata = Mock() + mock_markata.config.htmx_version = "1.9.10" + mock_markata.config.output_dir = "/tmp/test" + + # Mock urlopen to raise timeout + with patch( + "markata.plugins.feeds.urlopen", + side_effect=TimeoutError("Request timed out"), + ): + with pytest.raises(RuntimeError, match="HTMX download failed"): + _download_htmx_if_needed(mock_markata) + + def test_xss_prevention_in_template_context(self): + """Test that template context doesn't contain dangerous config data.""" + + # Create a feed with potentially dangerous config + dangerous_config = { + "pagination_type": "js", + "posts_per_page": 10, + "template": '', + "card_template": "dangerous-template.html", + "xss_payload": '', + "admin_password": "secret123", + "api_key": "sk-1234567890", + } + + # Safe config should only include essential pagination settings + safe_config = { + "pagination_type": dangerous_config["pagination_type"], + "posts_per_page": dangerous_config["posts_per_page"], + "template": dangerous_config["template"], + } + + # Verify only safe keys are included + for key in dangerous_config: + if key not in safe_config: + assert key not in safe_config, ( + f"Dangerous key '{key}' should not be in safe config" + ) + + def test_canonical_url_sanitization(self): + """Test that canonical URLs use sanitized slugs.""" + + mock_markata = Mock() + mock_markata.config.url = "https://example.com" + + # Test with safe slug + safe_slug = "my-blog-feed" + feed_config = Mock() + feed_config.slug = safe_slug + + feed = Feed(config=feed_config, markata=mock_markata) + + # The canonical URL should use the safe slug + expected_url = f"https://example.com/{safe_slug}/" + # This would be tested in actual template rendering + + def test_feed_file_path_security(self): + """Test that feed file paths cannot escape output directory.""" + + with tempfile.TemporaryDirectory() as temp_dir: + output_dir = Path(temp_dir) + + # Try to create a feed with malicious slug + malicious_slugs = [ + "../outside", + "normal/../../../etc/passwd", + "normal\\..\\..\\windows\\system32", + ] + + for malicious_slug in malicious_slugs: + with pytest.raises(ValueError): + _sanitize_feed_slug(malicious_slug) + + # Ensure no files can be created outside output directory + safe_slug = _sanitize_feed_slug("safe-feed") + file_path = output_dir / safe_slug / "index.html" + + # Verify path is within output directory + assert file_path.resolve().is_relative_to(output_dir.resolve()) + + def test_template_injection_prevention(self): + """Test that template injection is prevented in feed names.""" + + dangerous_names = [ + "{{7*7}}", # Template injection + "${7*7}", # Expression injection + "", + "javascript:void(0)", + "data:text/html,", + ] + + for dangerous_name in dangerous_names: + # These should be sanitized or rejected + sanitized = _sanitize_feed_slug(dangerous_name) + # Should either be rejected or sanitized to safe version + assert "{{" not in sanitized + assert "}}" not in sanitized + assert "