-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmd_to_json.py
More file actions
153 lines (136 loc) · 5.14 KB
/
md_to_json.py
File metadata and controls
153 lines (136 loc) · 5.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import json
import re
from pathlib import Path
import blog_generator
class MarkdownConverter:
def __init__(self):
self.output = {
"title": "",
"sections": []
}
self.current_section = None
self.current_subsection = None
def convert(self, md_path, json_path):
"""主转换方法"""
lines = Path(md_path).read_text(encoding='utf-8').split('\n')
# 状态跟踪
in_code = False
code_lines = []
code_lang = ""
in_list = False
list_items = []
for line in lines:
# 处理代码块
if line.startswith('```'):
if in_code:
# 结束代码块
self._add_code_block(code_lines, code_lang)
code_lines = []
code_lang = ""
in_code = False
else:
# 开始代码块
code_lang = line[3:].strip()
in_code = True
continue
if in_code:
code_lines.append(line)
continue
# 处理标题
header_match = re.match(r'^(#{1,3})\s+(.+)', line)
if header_match:
level = len(header_match.group(1))
text = header_match.group(2).strip()
if level == 1:
self.output["title"] = text
elif level == 2:
self._finalize_current()
self.current_section = {
"id": text,
"title": text,
"content": []
}
elif level == 3:
self._finalize_current()
self.current_subsection = {
"id": text,
"title": text,
"content": []
}
continue
# 处理列表
if re.match(r'^\s*[-*+] ', line):
if not in_list:
list_items = []
in_list = True
list_items.append(re.sub(r'^\s*[-*+] ', '', line).strip())
continue
else:
if in_list:
self._add_list(list_items)
list_items = []
in_list = False
# 处理普通段落
if line.strip() and not in_list and not in_code:
self._add_paragraph(line.strip())
# 处理未关闭的状态
self._finalize_current()
if in_list:
self._add_list(list_items)
# 保存结果
with open(json_path, 'w', encoding='utf-8') as f:
json.dump(self.output, f, indent=2, ensure_ascii=False)
def _add_paragraph(self, text):
"""添加段落"""
target = self._get_current_content()
target.append(text)
def _add_code_block(self, code_lines, lang):
"""添加代码块"""
target = self._get_current_content()
code_block = {
"code": code_lines
}
if lang:
code_block["notes"] = {
"title": "语言说明",
"items": [f"编程语言: {lang}"]
}
target.append({"code": code_lines})
def _add_list(self, items):
"""添加列表为note"""
target = self._get_current_content()
target.append({
"notes": {
"title": "要点说明",
"items": items
}
})
def _get_current_content(self):
"""获取当前内容容器"""
if self.current_subsection:
return self.current_subsection.setdefault("content", [])
elif self.current_section:
return self.current_section.setdefault("content", [])
elif self.output["sections"]:
return self.output["sections"][-1].setdefault("content", [])
else:
new_section = {"title": "", "content": []}
self.output["sections"].append(new_section)
return new_section["content"]
def _finalize_current(self):
"""结束当前层级"""
if self.current_subsection:
if self.current_section:
self.current_section.setdefault("subsections", []).append(self.current_subsection)
else:
new_section = {"title": "", "subsections": [self.current_subsection]}
self.output["sections"].append(new_section)
self.current_subsection = None
elif self.current_section:
self.output["sections"].append(self.current_section)
self.current_section = None
# 使用示例
if __name__ == "__main__":
converter = MarkdownConverter()
converter.convert("input.md", "config.json")
blog_generator.BlogGenerator("template.html").generate("config.json", "output.html")