Skip to content

Commit 104e379

Browse files
committed
Deduplicate canonicalization, add comments
1 parent cb259c3 commit 104e379

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

scripts/properties_to_json.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@
7272
CSV_PATH.glob("*.csv"), key=lambda p: float(p.stem.lstrip("v")), reverse=True
7373
)
7474

75+
def canonicalize(s):
76+
"""strips non-letters and lower-cases"""
77+
return re.sub("\\W", "", item["Type"]).lower()
78+
7579
for csv_path in paths:
7680
version = csv_path.stem
7781
# header = ["Parent Type", "Property", "Type", "Description"]
@@ -96,10 +100,12 @@
96100
# over new ones.
97101
# update the versions for these here and break to avoid duplicate rows
98102
if item["Property"] == existing_item["Property"] and item["Parent Type"] == existing_item["Parent Type"]:
99-
if re.sub("\\W", "", item["Type"]).lower() != re.sub("\\W", "", existing_item["Type"]).lower():
103+
if canonicalize(item["Type"]) != canonicalize(existing_item["Type"]):
104+
# both types meaningfully differ
100105
item["versions"] = [version]
101106
json_items.append(item)
102107
elif item["Type"] == existing_item["Type"]:
108+
# both types differ, but it's probably just typesetting. keep the newest one
103109
item["Type"] = existing_item["Type"]
104110
if version not in existing_item["versions"]:
105111
existing_item["versions"].append(version)

0 commit comments

Comments
 (0)