Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ os-cargo-config:
printf '\n' >> $(OS_DIR)/.cargo/config.toml; \
cat "$(OS_DIR)/cargo-vendor-config.toml" >> $(OS_DIR)/.cargo/config.toml; \
echo "[OSCOMP] 已追加 vendored 源(离线构建)"; \
echo "[OSCOMP] 重建 vendor 校验和(评测机过滤删除了 .cargo-checksum.json)"; \
python3 scripts/restore_vendor_checksums.py $(OS_DIR); \
fi; \
else \
echo "[OSCOMP] 保留现有 $(OS_DIR)/.cargo/config.toml(本地开发)"; \
Expand Down
123 changes: 123 additions & 0 deletions scripts/restore_vendor_checksums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#!/usr/bin/env python3
"""重建 os/vendor/*/.cargo-checksum.json(OSCOMP 评测机会过滤隐藏文件,删掉它)。

cargo 的 vendored directory 源要求每个 crate 目录下有 .cargo-checksum.json。
评测机 clone 时按"隐藏文件"递归删除了它,导致离线构建在加载依赖阶段失败。
本脚本从 Cargo.lock(非隐藏,过滤后存活)离线重建这些文件。

用法:
restore_vendor_checksums.py [OS_DIR] # 重建(缺失才写)
restore_vendor_checksums.py --verify [OS_DIR] # 仅校验:比对计算值与现有文件
默认 OS_DIR = <脚本目录>/../os
"""
import json
import os
import re
import sys


def parse_lock(path):
"""Cargo.lock -> {(name, version): checksum}(仅含 registry crate)。"""
cks, name, ver, ck = {}, None, None, None
with open(path, encoding="utf-8") as f:
for line in f:
s = line.strip()
if s == "[[package]]":
if name and ver and ck:
cks[(name, ver)] = ck
name = ver = ck = None
elif s.startswith("name ="):
name = s.split("=", 1)[1].strip().strip('"')
elif s.startswith("version ="):
ver = s.split("=", 1)[1].strip().strip('"')
elif s.startswith("checksum ="):
ck = s.split("=", 1)[1].strip().strip('"')
if name and ver and ck:
cks[(name, ver)] = ck
return cks
Comment on lines +19 to +37

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

问题分析
当前 parse_lock 的实现没有限制解析范围在 [[package]] 块内。如果 Cargo.lock 中包含其他非 package 的配置段(例如 [metadata][workspace]),或者某个 package 是没有 checksum 的本地/Git 依赖,其解析出的 nameversion 可能会被后续其他段的 key-value 污染,导致在文件末尾或下一个 package 处错误地关联了不属于它的校验和。

改进建议
引入 in_package 状态标志,仅在 [[package]] 块内解析 nameversionchecksum,并在遇到其他 [...] 段或文件结束时安全地提交当前 package。

def parse_lock(path):
    """Cargo.lock -> {(name, version): checksum}(仅含 registry crate)。"""
    cks = {}
    name = ver = ck = None
    in_package = False
    with open(path, encoding="utf-8") as f:
        for line in f:
            s = line.strip()
            if s == "[[package]]":
                if in_package and name and ver and ck:
                    cks[(name, ver)] = ck
                name = ver = ck = None
                in_package = True
            elif s.startswith("["):
                if in_package and name and ver and ck:
                    cks[(name, ver)] = ck
                name = ver = ck = None
                in_package = False
            elif in_package:
                if s.startswith("name ="):
                    name = s.split("=", 1)[1].strip().strip('"')
                elif s.startswith("version ="):
                    ver = s.split("=", 1)[1].strip().strip('"')
                elif s.startswith("checksum ="):
                    ck = s.split("=", 1)[1].strip().strip('"')
    if in_package and name and ver and ck:
        cks[(name, ver)] = ck
    return cks



def pkg_name_ver(cargo_toml):
"""从 vendored Cargo.toml 的 [package] 表读 name/version(已规范化为具体值)。"""
name = ver = None
in_pkg = False
with open(cargo_toml, encoding="utf-8") as f:
for line in f:
s = line.strip()
if s.startswith("["):
in_pkg = s == "[package]"
continue
if in_pkg:
if name is None:
m = re.match(r'name\s*=\s*"([^"]+)"', s)
if m:
name = m.group(1)
if ver is None:
m = re.match(r'version\s*=\s*"([^"]+)"', s)
if m:
ver = m.group(1)
if name and ver:
break
return name, ver
Comment on lines +40 to +61

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

问题分析
当前正则表达式仅支持双引号包裹的字符串(如 name = "foo")。虽然 cargo vendor 自动生成的 Cargo.toml 通常使用双引号,但 TOML 规范同样允许单引号(如 name = 'foo')。为了提高脚本的健壮性,建议兼容单引号和双引号。

改进建议
修改正则表达式,使用 ["\'] 匹配单双引号。

def pkg_name_ver(cargo_toml):
    """从 vendored Cargo.toml 的 [package] 表读 name/version(已规范化为具体值)。"""
    name = ver = None
    in_pkg = False
    with open(cargo_toml, encoding="utf-8") as f:
        for line in f:
            s = line.strip()
            if s.startswith("["):
                in_pkg = s == "[package]"
                continue
            if in_pkg:
                if name is None:
                    m = re.match(r'name\s*=\s*["\']([^"\']+)["\']', s)
                    if m:
                        name = m.group(1)
                if ver is None:
                    m = re.match(r'version\s*=\s*["\']([^"\']+)["\']', s)
                    if m:
                        ver = m.group(1)
            if name and ver:
                break
    return name, ver



def main(argv):
verify = "--verify" in argv
argv = [a for a in argv if a != "--verify"]
script_dir = os.path.dirname(os.path.abspath(__file__))
os_dir = os.path.abspath(argv[1]) if len(argv) > 1 else os.path.normpath(
os.path.join(script_dir, "..", "os"))
lock = os.path.join(os_dir, "Cargo.lock")
vendor = os.path.join(os_dir, "vendor")

if not os.path.isdir(vendor):
print(f"[checksum] 无 {vendor},跳过")
return 0
cks = parse_lock(lock)

made = kept = miss = mismatch = ok = 0
for d in sorted(os.listdir(vendor)):
cdir = os.path.join(vendor, d)
ctoml = os.path.join(cdir, "Cargo.toml")
if not os.path.isfile(ctoml):
continue
name, ver = pkg_name_ver(ctoml)
ck = cks.get((name, ver))
ckfile = os.path.join(cdir, ".cargo-checksum.json")

if verify:
if not os.path.isfile(ckfile):
print(f"[checksum] (verify) 缺文件 {d}")
miss += 1
continue
with open(ckfile) as f:
cur = json.load(f).get("package")
Comment on lines +93 to +94

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

问题分析
在读取 .cargo-checksum.json 时,open(ckfile) 没有指定 encoding="utf-8"。虽然该文件通常只包含 ASCII 字符,但在非 UTF-8 默认编码的系统环境(例如某些 Windows 环境)下,显式指定 encoding="utf-8" 可以避免潜在的 UnicodeDecodeError,并与脚本中其他 open 调用保持一致。

改进建议
open 函数中显式添加 encoding="utf-8" 参数。

Suggested change
with open(ckfile) as f:
cur = json.load(f).get("package")
with open(ckfile, encoding="utf-8") as f:
cur = json.load(f).get("package")

if cur == ck:
ok += 1
else:
print(f"[checksum] (verify) 不匹配 {name} {ver}: lock={ck} file={cur}")
mismatch += 1
continue

if os.path.exists(ckfile):
kept += 1
continue
if ck is None:
print(f"[checksum] 警告: Cargo.lock 无 {name} {ver}(目录 {d})")
data = {"files": {}}
miss += 1
else:
data = {"files": {}, "package": ck}
with open(ckfile, "w", encoding="utf-8") as f:
json.dump(data, f)
made += 1

if verify:
print(f"[checksum] verify: 匹配={ok} 不匹配={mismatch} 缺文件={miss}")
return 1 if mismatch else 0
print(f"[checksum] 重建={made} 保留={kept} 缺checksum={miss}")
return 0


if __name__ == "__main__":
sys.exit(main(sys.argv))
Loading