Skip to content

Commit d3f2cfd

Browse files
Opeyem1aclaude
andcommitted
fix: normalize names to alphanumeric for robust CSV matching
Handles NBSP and other non-standard characters introduced by Excel/Google Sheets exports. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 6d8e998 commit d3f2cfd

1 file changed

Lines changed: 21 additions & 13 deletions

File tree

runs/whitelist_blacklist/transform.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,31 @@
11
import csv
22
import hashlib
33
import json
4+
import re
45
from typing import Dict, List, Set
56

67
from algorithms.dataclasses.enums import Relationship
78
from algorithms.dataclasses.student import Student
89

910

11+
def _to_key(s: str) -> str:
12+
"""Strip to alphanumeric only for robust name matching across export formats."""
13+
return re.sub(r"[^a-zA-Z0-9]", "", s).lower()
14+
15+
1016
def _make_id(first: str, last: str) -> int:
11-
key = (first + last).replace(" ", "")
17+
key = _to_key(first + last)
1218
return int(hashlib.sha256(key.encode()).hexdigest(), 16) % (10**9)
1319

1420

1521
def _parse_names(field: str) -> Set[str]:
16-
"""Parse 'part 1: FirstName LastName; part 2: ...' into a set of unique full names."""
22+
"""Parse 'part 1: FirstName LastName; part 2: ...' into a set of alphanumeric keys."""
1723
names = set()
1824
for part in field.split(";"):
1925
if ":" in part:
20-
name_str = part.split(":", 1)[1].strip()
21-
if name_str:
22-
names.add(name_str)
26+
key = _to_key(part.split(":", 1)[1])
27+
if key:
28+
names.add(key)
2329
return names
2430

2531

@@ -30,25 +36,27 @@ def get_students(csv_path: str) -> List[Student]:
3036
rows.append(row)
3137

3238
name_to_id: Dict[str, int] = {
33-
f"{row['First_name']} {row['Last_name']}": _make_id(row["First_name"], row["Last_name"])
39+
_to_key(f"{row['First_name']}{row['Last_name']}"): _make_id(
40+
row["First_name"], row["Last_name"]
41+
)
3442
for row in rows
3543
}
3644

3745
students = []
3846
for row in rows:
3947
first, last = row["First_name"], row["Last_name"]
4048
sid = _make_id(first, last)
41-
full_name = f"{first} {last}"
49+
full_name = f"{re.sub(r'[^a-zA-Z0-9]', '', first)} {re.sub(r'[^a-zA-Z0-9]', '', last)}"
4250

4351
relationships: Dict[int, Relationship] = {}
4452

45-
for name in _parse_names(row.get("whitelist", "")):
46-
if name in name_to_id and name_to_id[name] != sid:
47-
relationships[name_to_id[name]] = Relationship.FRIEND
53+
for key in _parse_names(row.get("whitelist", "")):
54+
if key in name_to_id and name_to_id[key] != sid:
55+
relationships[name_to_id[key]] = Relationship.FRIEND
4856

49-
for name in _parse_names(row.get("blacklist", "")):
50-
if name in name_to_id and name_to_id[name] != sid:
51-
relationships[name_to_id[name]] = Relationship.ENEMY
57+
for key in _parse_names(row.get("blacklist", "")):
58+
if key in name_to_id and name_to_id[key] != sid:
59+
relationships[name_to_id[key]] = Relationship.ENEMY
5260

5361
students.append(
5462
Student(

0 commit comments

Comments
 (0)