Skip to content

Commit 97b0533

Browse files
committed
Refactor mathematical definitions and properties in various documents
- Enhanced clarity in the definition of basis transformations and their properties. - Improved explanations of Hilbert spaces and Fourier analysis. - Clarified the concept of orthogonality in linear spaces. - Updated the definition and properties of orthogonal projections. - Refined the explanation of linear transformations and their matrix representations. - Revised the definitions and properties related to determinants and permutations. - Added details on the properties of matroids and their bases.
1 parent a35e803 commit 97b0533

59 files changed

Lines changed: 937 additions & 176 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Other/说明/本地排除文件.md

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,25 @@ git sparse-checkout set /* !'assets/' !'Other/'
3434
* **恢复显示**: 如果某天你又需要那个目录了,只需重新应用规则,把它从排除列表中去掉即可。要恢复所有文件,可执行:
3535
```bash
3636
git sparse-checkout reapply
37-
```
37+
```
38+
39+
## 6. 稀疏提交
40+
41+
如果你想让本地只保留需要的数学内容,不把前端、部署或其他配置文件拉到你的本地磁盘里, 可以用这个方案.
42+
这个方案只影响本地显示,不影响远端仓库,也不影响别人正常使用。
43+
操作流程:
44+
45+
1. 克隆仓库后,在仓库根目录启用 `sparse-checkout`
46+
2. 按自己的需要写排除规则,只保留常用目录。
47+
3. 后续 `pull / commit / push` 照常使用,不需要额外步骤。
48+
49+
如果只是本地不想下载某些目录, 比如GitHub Action部署相关的`static.yml`,可以直接用这套方法;
50+
51+
```bash
52+
git show origin/main:.github/workflows/static.yml
53+
```
54+
55+
如果临时需要查看远端的部署配置,也不必恢复全部文件,可以直接读取远端内容,例如:
56+
```bash
57+
git ls-tree -r --name-only origin/main .github/workflows
58+
```
Lines changed: 358 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,358 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
r"""
5+
检测并修复 Markdown 数学公式中的异常 Unicode 字符。
6+
7+
目标:
8+
1. 扫描 `$...$`、`$$...$$`、`\(...\)`、`\[...\]` 中的异常字符。
9+
2. 安全替换常见 OCR / 输入法问题:
10+
- 非 ASCII 数字 -> ASCII 数字
11+
- 全角 / 阿拉伯标点 -> ASCII 或 TeX 命令
12+
- 中文文本 -> `\text{中文}`
13+
3. 对无法安全自动修复的字符保留并报告。
14+
15+
默认行为:仅检测,不改文件。
16+
使用 `--write` 才会写回文件。
17+
18+
默认只处理高风险脏字符,不处理纯风格差异。
19+
例如 `N(0, 1)` 与 `N(0,1)`、`(1, 2)` 与 `(1,2)` 默认都不会报。
20+
如果确实想统一这类写法,可以加 `--aggressive`。
21+
"""
22+
23+
from __future__ import annotations
24+
25+
import argparse
26+
import re
27+
import sys
28+
import unicodedata
29+
from dataclasses import dataclass
30+
from pathlib import Path
31+
from typing import Callable, Iterable, List, Sequence, Tuple
32+
33+
34+
MATH_PATTERN = re.compile(
35+
r"(\$\$[\s\S]+?\$\$|\$(?:\\.|[^$\\\n])+\$|\\\([\s\S]+?\\\)|\\\[[\s\S]+?\\\])",
36+
re.MULTILINE,
37+
)
38+
39+
TEXT_MACRO_PATTERN = re.compile(r"\\(?:text|mbox|mathrm|operatorname)\s*\{[^{}]*\}")
40+
CJK_PATTERN = re.compile(r"[\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff]+")
41+
42+
SAFE_CHAR_MAP = {
43+
"(": "(",
44+
")": ")",
45+
"[": "[",
46+
"]": "]",
47+
"{": "{",
48+
"}": "}",
49+
",": ",",
50+
"、": ",",
51+
":": ":",
52+
";": ";",
53+
"。": ".",
54+
".": ".",
55+
"!": "!",
56+
"?": "?",
57+
"+": "+",
58+
"-": "-",
59+
"—": "-",
60+
"–": "-",
61+
"−": "-",
62+
"=": "=",
63+
"<": "<",
64+
">": ">",
65+
"|": "|",
66+
"¦": "|",
67+
"/": "/",
68+
"%": r"\%",
69+
"×": r"\times",
70+
"÷": r"\div",
71+
"·": r"\cdot",
72+
"•": r"\cdot",
73+
"∙": r"\cdot",
74+
"⋅": r"\cdot",
75+
"∶": ":",
76+
"′": "'",
77+
"″": "''",
78+
"“": '"',
79+
"”": '"',
80+
"‘": "'",
81+
"’": "'",
82+
"٫": ".",
83+
"٬": ",",
84+
"﹣": "-",
85+
"0": "0",
86+
"1": "1",
87+
"2": "2",
88+
"3": "3",
89+
"4": "4",
90+
"5": "5",
91+
"6": "6",
92+
"7": "7",
93+
"8": "8",
94+
"9": "9",
95+
}
96+
97+
AGGRESSIVE_SPACE_PATTERNS: list[tuple[re.Pattern[str], str | Callable[[re.Match[str]], str]]] = [
98+
(re.compile(r"(?<=\d)\s*[.,]\s*(?=\d)"), lambda m: m.group(0).strip()),
99+
(re.compile(r"(?<=\d)\s+(?=\d)"), ""),
100+
]
101+
102+
103+
@dataclass
104+
class Issue:
105+
file_path: Path
106+
line_number: int
107+
kind: str
108+
original: str
109+
suggested: str
110+
snippet: str
111+
112+
113+
@dataclass
114+
class ProcessResult:
115+
changed: bool
116+
content: str
117+
issues: List[Issue]
118+
119+
120+
def iter_markdown_files(paths: Sequence[Path]) -> Iterable[Path]:
121+
for path in paths:
122+
if not path.exists():
123+
print(f"警告: 路径不存在,跳过: {path}", file=sys.stderr)
124+
continue
125+
if path.is_file() and path.suffix.lower() == ".md":
126+
yield path
127+
continue
128+
if path.is_dir():
129+
for md_file in path.rglob("*.md"):
130+
yield md_file
131+
132+
133+
def compute_line_number(text: str, index: int) -> int:
134+
return text.count("\n", 0, index) + 1
135+
136+
137+
def snippet_from_math(math_text: str) -> str:
138+
compact = re.sub(r"\s+", " ", math_text.strip())
139+
return compact[:120] + ("..." if len(compact) > 120 else "")
140+
141+
142+
def normalize_unicode_digit(char: str) -> str | None:
143+
if char in SAFE_CHAR_MAP:
144+
return SAFE_CHAR_MAP[char]
145+
146+
try:
147+
return str(unicodedata.decimal(char))
148+
except (TypeError, ValueError):
149+
pass
150+
151+
try:
152+
numeric_value = unicodedata.numeric(char)
153+
if float(numeric_value).is_integer() and 0 <= int(numeric_value) <= 9:
154+
return str(int(numeric_value))
155+
except (TypeError, ValueError):
156+
pass
157+
158+
return None
159+
160+
161+
def protect_text_macros(content: str) -> Tuple[str, List[str]]:
162+
placeholders: List[str] = []
163+
164+
def replace(match: re.Match[str]) -> str:
165+
placeholders.append(match.group(0))
166+
return f"@@TEXT_PLACEHOLDER_{len(placeholders) - 1}@@"
167+
168+
return TEXT_MACRO_PATTERN.sub(replace, content), placeholders
169+
170+
171+
def restore_text_macros(content: str, placeholders: Sequence[str]) -> str:
172+
for index, item in enumerate(placeholders):
173+
content = content.replace(f"@@TEXT_PLACEHOLDER_{index}@@", item)
174+
return content
175+
176+
177+
def wrap_cjk_runs(content: str) -> str:
178+
return CJK_PATTERN.sub(lambda match: rf"\text{{{match.group(0)}}}", content)
179+
180+
181+
def contains_cjk(text: str) -> bool:
182+
return bool(CJK_PATTERN.search(text))
183+
184+
185+
def contains_non_ascii_digit(text: str) -> bool:
186+
for char in text:
187+
if ord(char) < 128:
188+
continue
189+
replacement = normalize_unicode_digit(char)
190+
if replacement is not None and replacement != char:
191+
return True
192+
return False
193+
194+
195+
def contains_mapped_punctuation(text: str) -> bool:
196+
return any(char in SAFE_CHAR_MAP for char in text)
197+
198+
199+
def strip_math_delimiter(math_text: str) -> str:
200+
if math_text.startswith("$$") and math_text.endswith("$$"):
201+
return math_text[2:-2]
202+
if math_text.startswith("$") and math_text.endswith("$"):
203+
return math_text[1:-1]
204+
if math_text.startswith(r"\(") and math_text.endswith(r"\)"):
205+
return math_text[2:-2]
206+
if math_text.startswith(r"\[") and math_text.endswith(r"\]"):
207+
return math_text[2:-2]
208+
return math_text
209+
210+
211+
def rebuild_math(delimited_math: str, new_body: str) -> str:
212+
if delimited_math.startswith("$$") and delimited_math.endswith("$$"):
213+
return f"$${new_body}$$"
214+
if delimited_math.startswith("$") and delimited_math.endswith("$"):
215+
return f"${new_body}$"
216+
if delimited_math.startswith(r"\(") and delimited_math.endswith(r"\)"):
217+
return rf"\({new_body}\)"
218+
if delimited_math.startswith(r"\[") and delimited_math.endswith(r"\]"):
219+
return rf"\[{new_body}\]"
220+
return new_body
221+
222+
223+
def normalize_math_body(content: str, aggressive: bool = False) -> str:
224+
protected, placeholders = protect_text_macros(content)
225+
226+
normalized_chars: List[str] = []
227+
for char in protected:
228+
replacement = normalize_unicode_digit(char)
229+
normalized_chars.append(replacement if replacement is not None else char)
230+
normalized = "".join(normalized_chars)
231+
232+
normalized = "".join(SAFE_CHAR_MAP.get(char, char) for char in normalized)
233+
normalized = wrap_cjk_runs(normalized)
234+
normalized = re.sub(r"\\times\s*\\times", r"\\times", normalized)
235+
236+
if aggressive:
237+
for pattern, replacement in AGGRESSIVE_SPACE_PATTERNS:
238+
normalized = pattern.sub(replacement, normalized)
239+
240+
return restore_text_macros(normalized, placeholders)
241+
242+
243+
def analyze_difference(file_path: Path, original_math: str, new_math: str, start_index: int, full_text: str) -> List[Issue]:
244+
if original_math == new_math:
245+
return []
246+
247+
original_body = strip_math_delimiter(original_math)
248+
line_number = compute_line_number(full_text, start_index)
249+
snippet = snippet_from_math(original_math)
250+
issues: List[Issue] = []
251+
252+
if contains_cjk(original_body):
253+
issues.append(Issue(file_path, line_number, "中文文本", original_math, new_math, snippet))
254+
255+
if contains_non_ascii_digit(original_body):
256+
issues.append(Issue(file_path, line_number, "Unicode 数字", original_math, new_math, snippet))
257+
258+
if contains_mapped_punctuation(original_body):
259+
issues.append(Issue(file_path, line_number, "全角/特殊符号", original_math, new_math, snippet))
260+
261+
return issues
262+
263+
264+
def process_content(file_path: Path, content: str, aggressive: bool = False) -> ProcessResult:
265+
issues: List[Issue] = []
266+
changed = False
267+
parts: List[str] = []
268+
last_end = 0
269+
270+
for match in MATH_PATTERN.finditer(content):
271+
parts.append(content[last_end:match.start()])
272+
original_math = match.group(0)
273+
normalized_body = normalize_math_body(strip_math_delimiter(original_math), aggressive=aggressive)
274+
normalized_math = rebuild_math(original_math, normalized_body)
275+
276+
issues.extend(analyze_difference(file_path, original_math, normalized_math, match.start(), content))
277+
if normalized_math != original_math:
278+
changed = True
279+
280+
parts.append(normalized_math)
281+
last_end = match.end()
282+
283+
parts.append(content[last_end:])
284+
return ProcessResult(changed=changed, content="".join(parts), issues=issues)
285+
286+
287+
def print_issue(issue: Issue) -> None:
288+
print(f"[{issue.kind}] {issue.file_path}:{issue.line_number}")
289+
print(f" 片段: {issue.snippet}")
290+
if issue.original != issue.suggested:
291+
preview = issue.suggested[:160]
292+
suffix = "..." if len(issue.suggested) > 160 else ""
293+
print(f" 建议: {preview}{suffix}")
294+
295+
296+
def parse_args() -> argparse.Namespace:
297+
parser = argparse.ArgumentParser(description="检测并修复 Markdown 数学公式中的异常 Unicode 字符")
298+
parser.add_argument(
299+
"paths",
300+
nargs="*",
301+
help="要扫描的文件或目录;默认扫描 概率论/ 线性代数/ 微积分/",
302+
)
303+
parser.add_argument("--write", action="store_true", help="写回文件")
304+
parser.add_argument("--quiet", action="store_true", help="仅输出汇总")
305+
parser.add_argument(
306+
"--aggressive",
307+
action="store_true",
308+
help="启用激进归一化:额外压缩数字附近空格与部分标点空格,可能产生更多纯风格改动",
309+
)
310+
return parser.parse_args()
311+
312+
313+
def main() -> int:
314+
repo_root = Path(__file__).resolve().parents[2]
315+
args = parse_args()
316+
317+
if args.paths:
318+
targets = [Path(path) if Path(path).is_absolute() else repo_root / path for path in args.paths]
319+
else:
320+
targets = [repo_root / name for name in ("概率论", "线性代数", "微积分")]
321+
322+
markdown_files = sorted(set(iter_markdown_files(targets)))
323+
if not markdown_files:
324+
print("未找到任何 Markdown 文件")
325+
return 1
326+
327+
total_issues = 0
328+
changed_files = 0
329+
330+
for file_path in markdown_files:
331+
content = file_path.read_text(encoding="utf-8")
332+
result = process_content(file_path, content, aggressive=args.aggressive)
333+
if not result.issues:
334+
continue
335+
336+
total_issues += len(result.issues)
337+
if not args.quiet:
338+
for issue in result.issues:
339+
print_issue(issue)
340+
341+
if args.write and result.changed:
342+
file_path.write_text(result.content, encoding="utf-8")
343+
changed_files += 1
344+
if not args.quiet:
345+
print(f" 已写回: {file_path}")
346+
print()
347+
348+
print("\n扫描完成")
349+
print(f"- 扫描文件数: {len(markdown_files)}")
350+
print(f"- 发现问题数: {total_issues}")
351+
print(f"- 写回文件数: {changed_files if args.write else 0}")
352+
print(f"- 模式: {'写入' if args.write else '只读检测'}")
353+
print(f"- 归一化级别: {'激进' if args.aggressive else '保守'}")
354+
return 0
355+
356+
357+
if __name__ == "__main__":
358+
raise SystemExit(main())

0 commit comments

Comments
 (0)