-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
63 lines (47 loc) · 1.84 KB
/
utils.py
File metadata and controls
63 lines (47 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import re
from typing import Any
def normalize_keyword(keyword: str) -> str:
return keyword.strip().lower()
def sort_text_values(values: list[str]) -> list[str]:
return sorted(values, key=lambda item: item.casefold())
def normalize_scalar_field(value: Any) -> str:
if value is None:
return ""
return str(value).strip()
def normalize_labeled_list_field(value: Any) -> str:
if value is None:
return ""
if isinstance(value, (list, tuple, set)):
normalized_values = [str(item).strip() for item in value if str(item).strip()]
return ",".join(sort_text_values(normalized_values))
return str(value).strip()
def normalize_unlabeled_list_field(value: Any) -> str:
if value is None:
return ""
if isinstance(value, (list, tuple, set)):
normalized_values = [str(item).strip() for item in value if str(item).strip()]
return " ".join(sort_text_values(normalized_values))
return str(value).strip()
def cleanse_text(value: str) -> str:
cleaned = str(value)
cleaned = cleaned.encode("ascii", "ignore").decode("ascii")
cleaned = re.sub(r"\s+", " ", cleaned)
return cleaned.strip()
def cleansed_normalize_labeled_list_field(value: Any) -> str:
if value is None:
return ""
if isinstance(value, (list, tuple, set)):
cleaned_values: list[str] = []
seen: set[str] = set()
for item in value:
cleaned = cleanse_text(str(item))
dedupe_key = cleaned.casefold()
if cleaned and dedupe_key not in seen:
seen.add(dedupe_key)
cleaned_values.append(cleaned)
return ",".join(sort_text_values(cleaned_values))
return cleanse_text(str(value))
def cleansed_normalize_scalar_field(value: Any) -> str:
if value is None:
return ""
return cleanse_text(str(value))