-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconvert.py
More file actions
163 lines (133 loc) · 5.21 KB
/
convert.py
File metadata and controls
163 lines (133 loc) · 5.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/env python3
"""One-time: convert Ghost JSON export to markdown files with frontmatter."""
import json
import re
import os
from datetime import datetime
from pathlib import Path
DATA = Path(__file__).parent / "data.json"
CONTENT_DIR = Path(__file__).parent / "content" / "posts"
PAGES_DIR = Path(__file__).parent / "content" / "pages"
GHOST_URL = "https://zerion.io/blog"
def html_to_markdown(html_str):
"""Lightweight HTML-to-markdown. Keeps HTML that markdown can't express."""
if not html_str:
return ""
text = html_str
# Fix Ghost URL placeholders
text = text.replace("__GHOST_URL__", GHOST_URL)
return text.strip()
def slugify_date(iso_str):
"""Extract YYYY-MM-DD from ISO date."""
if not iso_str:
return "0000-00-00"
return iso_str[:10]
def escape_yaml(s):
"""Escape a string for YAML frontmatter."""
if not s:
return ""
# Quote if it contains special chars
if any(c in s for c in [':', '"', "'", '#', '{', '}', '[', ']', ',', '&', '*', '?', '|', '-', '<', '>', '=', '!', '%', '@', '`']):
return '"' + s.replace('\\', '\\\\').replace('"', '\\"') + '"'
return s
def get_excerpt(plaintext, max_len=200):
if not plaintext:
return ""
text = plaintext.strip().replace("\n", " ")
if len(text) > max_len:
return text[:max_len].rsplit(" ", 1)[0] + "\u2026"
return text
def main():
with open(DATA) as f:
raw = json.load(f)
data = raw["db"][0]["data"]
users = {u["id"]: u for u in data["users"]}
tags = {t["id"]: t for t in data["tags"]}
# Build lookups
post_authors = {}
for pa in data["posts_authors"]:
post_authors.setdefault(pa["post_id"], []).append(users.get(pa["author_id"]))
post_tags = {}
for pt in data["posts_tags"]:
tag = tags.get(pt["tag_id"])
if tag and tag["visibility"] == "public":
post_tags.setdefault(pt["post_id"], []).append(tag)
# Process posts and pages
for p in data["posts"]:
if p["status"] != "published":
continue
is_page = p["type"] == "page"
out_dir = PAGES_DIR if is_page else CONTENT_DIR
out_dir.mkdir(parents=True, exist_ok=True)
slug = p["slug"]
title = p["title"] or "Untitled"
published_at = p.get("published_at", "")
date_str = slugify_date(published_at)
feature_image = (p.get("feature_image") or "").replace("__GHOST_URL__", GHOST_URL)
featured = p.get("featured", 0)
# Authors
authors_list = [a for a in post_authors.get(p["id"], []) if a]
author_names = [a["name"] for a in authors_list]
author_slugs = [a["slug"] for a in authors_list]
# Tags
tags_list = post_tags.get(p["id"], [])
tag_names = [t["name"] for t in tags_list if not t["name"].startswith("#")]
# Excerpt
excerpt = get_excerpt(p.get("plaintext"))
# HTML content
content_html = html_to_markdown(p.get("html", ""))
# Build frontmatter
fm_lines = [
"---",
f"title: {escape_yaml(title)}",
f"slug: {slug}",
f"date: {date_str}",
]
if published_at:
fm_lines.append(f"published_at: {published_at}")
if feature_image:
fm_lines.append(f"feature_image: {feature_image}")
if featured:
fm_lines.append(f"featured: true")
if author_names:
fm_lines.append(f"authors:")
for i, name in enumerate(author_names):
fm_lines.append(f" - name: {escape_yaml(name)}")
fm_lines.append(f" slug: {author_slugs[i]}")
profile_img = (authors_list[i].get("profile_image") or "").replace("__GHOST_URL__", GHOST_URL)
if profile_img:
fm_lines.append(f" avatar: {profile_img}")
if tag_names:
fm_lines.append(f"tags:")
for t in tag_names:
fm_lines.append(f" - {escape_yaml(t)}")
if excerpt:
fm_lines.append(f"excerpt: {escape_yaml(excerpt)}")
fm_lines.append("---")
# File name: date-slug.md for posts, slug.md for pages
if is_page:
filename = f"{slug}.md"
else:
filename = f"{date_str}-{slug}.md"
file_path = out_dir / filename
file_path.write_text("\n".join(fm_lines) + "\n\n" + content_html + "\n")
# Count results
post_count = len(list(CONTENT_DIR.glob("*.md")))
page_count = len(list(PAGES_DIR.glob("*.md")))
print(f"Converted {post_count} posts to content/posts/")
print(f"Converted {page_count} pages to content/pages/")
# Write authors.json for the build script
authors_out = {}
for uid, u in users.items():
authors_out[u["slug"]] = {
"name": u["name"],
"slug": u["slug"],
"bio": u.get("bio") or "",
"profile_image": (u.get("profile_image") or "").replace("__GHOST_URL__", GHOST_URL),
}
authors_path = Path(__file__).parent / "content" / "authors.json"
with open(authors_path, "w") as f:
json.dump(authors_out, f, indent=2)
print(f"Wrote {len(authors_out)} authors to content/authors.json")
if __name__ == "__main__":
main()