diff --git a/.yamllint.yml b/.yamllint.yml new file mode 100644 index 0000000..a89f5d4 --- /dev/null +++ b/.yamllint.yml @@ -0,0 +1,6 @@ +--- +extends: default +rules: + truthy: disable + line-length: + max: 120 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..9e500f2 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,19 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## [0.2.0] - 2026-03-23 + +### Fixed + +- Prevent orphaned headings at page breaks — headings no longer render alone at the bottom of a page with their content flowing to the next page. Uses `CondPageBreak`, `KeepTogether`, and `keepWithNext` for robust prevention. Thanks to [@0xlaveen](https://github.com/0xlaveen) for identifying this issue and proposing the fix in [#1](https://github.com/araa47/markpdf/pull/1). + +### Added + +- Tests for heading orphan prevention (structural and integration). + +## [0.1.0] - 2026-03-22 + +### Added + +- Initial release: markdown to PDF with light/dark themes, code blocks, tables, lists, images, blockquotes, task lists, extended formatting, and async remote image fetching. diff --git a/README.md b/README.md index d6995cc..a655228 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,21 @@ -# markpdf +
+
+
+ Beautiful PDFs from markdown. One command, zero config. +
+ + + +--- ```bash markpdf report.md @@ -8,18 +23,24 @@ markpdf report.md ## Install -Agent skill (Claude Code, Cursor, Codex, Gemini CLI): +**Agent skill** (Claude Code, Cursor, Codex, Gemini CLI): ```bash npx skills add araa47/markpdf ``` -CLI: +**CLI**: ```bash uv tool install git+https://github.com/araa47/markpdf ``` +Or with pip: + +```bash +pip install markpdf +``` + ## Usage ```bash @@ -27,6 +48,7 @@ markpdf report.md # creates report.pdf markpdf report.md --dark # dark mode markpdf report.md -o final.pdf # custom output path markpdf report.md -k # keep sections on same page +markpdf report.md -v # verbose output ``` ## Output @@ -48,13 +70,33 @@ markpdf report.md -k # keep sections on same page > Source: [`tests/fixtures/showcase.md`](tests/fixtures/showcase.md) | Full PDFs: [`examples/`](examples/) +## Features + +- **Full markdown** -- headers, lists, tables, code blocks, blockquotes, images, task lists +- **Extended syntax** -- `==highlight==`, `^super^`, `~sub~`, `~~strike~~` +- **Light & dark themes** -- shadcn/ui zinc palette +- **Smart page breaks** -- headings stay with their content, no orphans +- **Remote images** fetched concurrently +- **Async I/O** with optional uvloop +- **Single command**, agent-friendly -- no browser, no LaTeX, no config + ## Why markpdf? -Most messaging apps (Slack, Discord, Teams, WhatsApp, email) don't render markdown. `markpdf` turns it into a polished PDF — no browser, no LaTeX, no config. +Most messaging apps (Slack, Discord, Teams, WhatsApp, email) don't render markdown. `markpdf` turns it into a polished PDF -- no browser, no LaTeX, no config. + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup. + +```bash +uv sync --all-extras --dev +uv run pytest +``` + +## Changelog + +See [CHANGELOG.md](CHANGELOG.md) for release history. + +## License -- Full markdown — headers, lists, tables, code blocks, blockquotes, images, task lists -- Extended syntax — `==highlight==`, `^super^`, `~sub~`, `~~strike~~` -- Light & dark themes — shadcn/ui zinc palette -- Remote images fetched concurrently -- Async I/O with optional uvloop -- Single binary-style command, agent-friendly +[MIT](LICENSE) diff --git a/assets/logo.png b/assets/logo.png new file mode 100644 index 0000000..9ea4848 Binary files /dev/null and b/assets/logo.png differ diff --git a/ignore-spelling-words.txt b/ignore-spelling-words.txt new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml index 63c52c6..2d16c4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "markpdf" -version = "0.1.0" +version = "0.2.0" description = "Agent-friendly markdown to PDF. Beautiful docs from the terminal." readme = "README.md" license = "MIT" diff --git a/src/markpdf/cli.py b/src/markpdf/cli.py index 2c91701..af65278 100644 --- a/src/markpdf/cli.py +++ b/src/markpdf/cli.py @@ -10,7 +10,7 @@ import aiohttp import typer -from .parser import BLOCK_IMAGE, HEADER_BLOCKS, parse_markdown +from .parser import BLOCK_IMAGE, parse_markdown from .renderer import build_story, create_styles, group_sections, render_pdf from .themes import THEME_DARK, THEME_LIGHT @@ -23,13 +23,9 @@ ) -async def fetch_remote_image( - url: str, session: aiohttp.ClientSession -) -> str | None: +async def fetch_remote_image(url: str, session: aiohttp.ClientSession) -> str | None: try: - async with session.get( - url, timeout=aiohttp.ClientTimeout(total=15) - ) as resp: + async with session.get(url, timeout=aiohttp.ClientTimeout(total=15)) as resp: if resp.status != 200: return None data = await resp.read() @@ -57,9 +53,7 @@ async def prefetch_images( return {} if verbose: - print( - f" Fetching {len(remote_urls)} remote image(s) concurrently..." - ) + print(f" Fetching {len(remote_urls)} remote image(s) concurrently...") resolved: dict[str, str | None] = {} async with aiohttp.ClientSession() as session: @@ -110,9 +104,7 @@ async def convert( remote_cache = await prefetch_images(blocks, verbose) styles = create_styles(theme) - story = build_story( - blocks, styles, theme, md_path, remote_cache, verbose - ) + story = build_story(blocks, styles, theme, md_path, remote_cache, verbose) if keep_together: story = group_sections(story) diff --git a/src/markpdf/parser.py b/src/markpdf/parser.py index da22b3c..c7776e7 100644 --- a/src/markpdf/parser.py +++ b/src/markpdf/parser.py @@ -80,9 +80,7 @@ def parse_markdown(content: str, verbose: bool = False) -> list[tuple[str, Any]] while i < len(lines) and not lines[i].strip().startswith("```"): code_lines.append(lines[i]) i += 1 - blocks.append( - (BLOCK_CODE, {"lang": lang, "code": "\n".join(code_lines)}) - ) + blocks.append((BLOCK_CODE, {"lang": lang, "code": "\n".join(code_lines)})) if verbose: print(f" [Code] {len(code_lines)} lines ({lang or 'plain'})") i += 1 @@ -91,16 +89,12 @@ def parse_markdown(content: str, verbose: bool = False) -> list[tuple[str, Any]] if "|" in stripped and stripped.startswith("|"): table_lines = [line] i += 1 - while ( - i < len(lines) and "|" in lines[i].strip() and lines[i].strip() - ): + while i < len(lines) and "|" in lines[i].strip() and lines[i].strip(): table_lines.append(lines[i]) i += 1 if len(table_lines) >= 2: headers, rows = parse_table(table_lines) - blocks.append( - (BLOCK_TABLE, {"headers": headers, "rows": rows}) - ) + blocks.append((BLOCK_TABLE, {"headers": headers, "rows": rows})) continue if stripped.startswith(">"): @@ -116,9 +110,7 @@ def parse_markdown(content: str, verbose: bool = False) -> list[tuple[str, Any]] while i < len(lines): item_line = lines[i].strip() if re.match(r"^[-*+]\s", item_line): - task_match = re.match( - r"^[-*+]\s+\[([ xX])\]\s*(.*)$", item_line - ) + task_match = re.match(r"^[-*+]\s+\[([ xX])\]\s*(.*)$", item_line) if task_match: checked = task_match.group(1).lower() == "x" prefix = "\u2611 " if checked else "\u2610 " diff --git a/src/markpdf/renderer.py b/src/markpdf/renderer.py index fabb03b..8da58e8 100644 --- a/src/markpdf/renderer.py +++ b/src/markpdf/renderer.py @@ -2,7 +2,7 @@ import re from pathlib import Path -from typing import Any +from typing import Any, TypedDict from PIL import Image as PILImage from reportlab.lib import colors @@ -11,6 +11,7 @@ from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet from reportlab.lib.units import inch from reportlab.platypus import ( + CondPageBreak, HRFlowable, Image, KeepTogether, @@ -40,7 +41,16 @@ BLOCK_TABLE, ) -HEADING_DEFS = [ + +class _HeadingDef(TypedDict): + size: int + tk: str + sb: int + sa: int + ld: int + + +HEADING_DEFS: list[_HeadingDef] = [ {"size": 30, "tk": "h1", "sb": 20, "sa": 6, "ld": 36}, {"size": 22, "tk": "h2", "sb": 24, "sa": 4, "ld": 28}, {"size": 17, "tk": "h3", "sb": 18, "sa": 4, "ld": 23}, @@ -80,9 +90,7 @@ def process_inline_formatting(text: str, theme: dict[str, str | None]) -> str: text = re.sub(r"___(.+?)___", r"\1", text) text = re.sub(r"\*\*(.+?)\*\*", r"\1", text) text = re.sub(r"__(.+?)__", r"\1", text) - text = re.sub( - r"(?\1", text - ) + text = re.sub(r"(?\1", text) text = re.sub(r"(?\1", text) text = re.sub( r"\[([^\]]+)\]\(([^)]+)\)", @@ -114,6 +122,7 @@ def create_styles(theme: dict[str, str | None]) -> dict: spaceBefore=hd["sb"], leading=hd["ld"], fontName="Helvetica-Bold", + keepWithNext=True, ) ) @@ -229,9 +238,7 @@ def resolve_image( return None -def load_image_flowable( - path: str, max_w: float, max_h: float -) -> Image | None: +def load_image_flowable(path: str, max_w: float, max_h: float) -> Image | None: try: pil = PILImage.open(path) iw, ih = pil.size @@ -298,27 +305,30 @@ def build_story( ): level = int(btype[1]) text = _fmt(content, theme) - story.append( - Paragraph(text, styles[f"Heading{level}Custom"]) - ) + # Avoid orphaned headings: break page if insufficient space + story.append(CondPageBreak(1.2 * inch)) + heading_para = Paragraph(text, styles[f"Heading{level}Custom"]) if level == 2: - story.append(Spacer(1, 2)) story.append( - HRFlowable( - width="100%", - thickness=0.5, - color=colors.HexColor(theme["border"]), - spaceBefore=0, - spaceAfter=8, + KeepTogether( + [ + heading_para, + Spacer(1, 2), + HRFlowable( + width="100%", + thickness=0.5, + color=colors.HexColor(theme["border"]), + spaceBefore=0, + spaceAfter=8, + ), + ] ) ) else: - story.append(Spacer(1, 4)) + story.append(KeepTogether([heading_para, Spacer(1, 4)])) elif btype == BLOCK_PARA: - story.append( - Paragraph(_fmt(content, theme), styles["Normal"]) - ) + story.append(Paragraph(_fmt(content, theme), styles["Normal"])) story.append(Spacer(1, 4)) elif btype == BLOCK_CODE: @@ -349,14 +359,10 @@ def build_story( headers = content.get("headers", []) rows = content.get("rows", []) h_p = [ - Paragraph(_fmt(h, theme), styles["TableHeader"]) - for h in headers + Paragraph(_fmt(h, theme), styles["TableHeader"]) for h in headers ] r_p = [ - [ - Paragraph(_fmt(c, theme), styles["TableCell"]) - for c in r - ] + [Paragraph(_fmt(c, theme), styles["TableCell"]) for c in r] for r in rows ] nc = len(headers) @@ -423,9 +429,7 @@ def build_story( elif btype == BLOCK_BULLET_LIST: items = [ - ListItem( - Paragraph(_fmt(t, theme), styles["ListItem"]) - ) + ListItem(Paragraph(_fmt(t, theme), styles["ListItem"])) for t in content ] story.append( @@ -444,9 +448,7 @@ def build_story( elif btype == BLOCK_NUMBER_LIST: items = [ - ListItem( - Paragraph(_fmt(t, theme), styles["ListItem"]) - ) + ListItem(Paragraph(_fmt(t, theme), styles["ListItem"])) for t in content ] story.append( @@ -454,9 +456,7 @@ def build_story( items, bulletType="1", bulletFontSize=10, - bulletColor=colors.HexColor( - theme["muted_foreground"] - ), + bulletColor=colors.HexColor(theme["muted_foreground"]), leftIndent=18, spaceBefore=4, spaceAfter=4, @@ -466,12 +466,8 @@ def build_story( elif btype == BLOCK_QUOTE: formatted = _fmt(content, theme) - para = Paragraph( - f"{formatted}", styles["BlockQuote"] - ) - tbl = Table( - [["", para]], colWidths=[3, None], hAlign="LEFT" - ) + para = Paragraph(f"{formatted}", styles["BlockQuote"]) + tbl = Table([["", para]], colWidths=[3, None], hAlign="LEFT") tbl.setStyle( TableStyle( [ @@ -505,9 +501,7 @@ def build_story( alt = content.get("alt", "") resolved = resolve_image(src, md_path, remote_cache) if resolved: - flowable = load_image_flowable( - resolved, avail_w, avail_h - ) + flowable = load_image_flowable(resolved, avail_w, avail_h) if flowable: elems: list = [flowable] if alt: diff --git a/tests/fixtures/empty.md b/tests/fixtures/empty.md index 8b13789..e69de29 100644 --- a/tests/fixtures/empty.md +++ b/tests/fixtures/empty.md @@ -1 +0,0 @@ - diff --git a/tests/test_markpdf.py b/tests/test_markpdf.py index cfa1768..5a1a3c1 100644 --- a/tests/test_markpdf.py +++ b/tests/test_markpdf.py @@ -22,6 +22,7 @@ def _require_markpdf(): def run_markpdf( md_path: Path, output_path: Path, verbose: bool = False, dark: bool = False ) -> subprocess.CompletedProcess: + assert MARKPDF is not None cmd = [MARKPDF, str(md_path), "-o", str(output_path)] if verbose: cmd.append("-v") @@ -34,7 +35,9 @@ def run_markpdf( def test_fixture_produces_pdf(md_file: Path, tmp_path: Path): output = tmp_path / f"{md_file.stem}.pdf" result = run_markpdf(md_file, output, verbose=True) - assert result.returncode == 0, f"Failed for {md_file.name}:\n{result.stderr}\n{result.stdout}" + assert ( + result.returncode == 0 + ), f"Failed for {md_file.name}:\n{result.stderr}\n{result.stdout}" assert output.exists(), f"PDF not created for {md_file.name}" assert output.stat().st_size > 0, f"PDF is empty for {md_file.name}" assert output.read_bytes()[:5] == b"%PDF-", f"Not a valid PDF for {md_file.name}" @@ -44,16 +47,22 @@ def test_fixture_produces_pdf(md_file: Path, tmp_path: Path): def test_fixture_dark_mode(md_file: Path, tmp_path: Path): output = tmp_path / f"{md_file.stem}-dark.pdf" result = run_markpdf(md_file, output, dark=True) - assert result.returncode == 0, f"Dark mode failed for {md_file.name}:\n{result.stderr}" + assert ( + result.returncode == 0 + ), f"Dark mode failed for {md_file.name}:\n{result.stderr}" assert output.exists() assert output.read_bytes()[:5] == b"%PDF-" def test_default_output_name(tmp_path: Path): + assert MARKPDF is not None md = FIXTURES_DIR / "headers.md" result = subprocess.run( [MARKPDF, str(md)], - capture_output=True, text=True, timeout=30, cwd=str(tmp_path), + capture_output=True, + text=True, + timeout=30, + cwd=str(tmp_path), ) assert result.returncode == 0 assert (tmp_path / "headers.pdf").exists() @@ -83,3 +92,59 @@ def test_empty_file(tmp_path: Path): result = run_markpdf(FIXTURES_DIR / "empty.md", output) assert result.returncode == 0 assert output.exists() + + +def test_heading_orphan_prevention(): + """Headings should be wrapped with CondPageBreak and KeepTogether to avoid orphans.""" + from reportlab.platypus import CondPageBreak, KeepTogether + + from markpdf.parser import parse_markdown + from markpdf.renderer import build_story, create_styles + from markpdf.themes import THEME_LIGHT + + md = "# Title\n\nSome text.\n\n## Section\n\nMore text.\n\n### Subsection\n\nDetails." + blocks = parse_markdown(md) + styles = create_styles(THEME_LIGHT) + story = build_story(blocks, styles, THEME_LIGHT, Path("."), {}) + + cond_breaks = [f for f in story if isinstance(f, CondPageBreak)] + keep_togethers = [f for f in story if isinstance(f, KeepTogether)] + + # 3 headings -> 3 CondPageBreak + 3 KeepTogether (one per heading block) + assert len(cond_breaks) == 3, f"Expected 3 CondPageBreak, got {len(cond_breaks)}" + assert ( + len(keep_togethers) >= 3 + ), f"Expected >=3 KeepTogether, got {len(keep_togethers)}" + + +def test_heading_keepwithnext_style(): + """All heading styles should have keepWithNext=True.""" + from markpdf.renderer import create_styles + from markpdf.themes import THEME_LIGHT + + styles = create_styles(THEME_LIGHT) + for level in range(1, 7): + style = styles[f"Heading{level}Custom"] + assert style.keepWithNext is True, f"Heading{level}Custom missing keepWithNext" + + +def test_heading_orphan_long_document(tmp_path: Path): + """A long document with many sections should produce a valid PDF with orphan prevention.""" + # Generate a markdown document with many sections to force page breaks + sections = [] + for i in range(20): + sections.append(f"## Section {i + 1}") + sections.append("") + # Add enough content per section to push headings near page boundaries + for j in range(8): + sections.append(f"This is paragraph {j + 1} of section {i + 1}. " * 4) + sections.append("") + md_content = "\n".join(sections) + + md_file = tmp_path / "long_headings.md" + md_file.write_text(md_content) + output = tmp_path / "long_headings.pdf" + result = run_markpdf(md_file, output) + assert result.returncode == 0 + assert output.exists() + assert output.stat().st_size > 1000 diff --git a/uv.lock b/uv.lock index 6dace2d..2161e5c 100644 --- a/uv.lock +++ b/uv.lock @@ -573,7 +573,7 @@ wheels = [ [[package]] name = "markpdf" -version = "0.1.0" +version = "0.2.0" source = { editable = "." } dependencies = [ { name = "aiofiles" },