Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions demo/harmonize_example/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ It includes a small input CSV, a rules JSON file, and a Python script that perfo
- Splits `name` (stored as `"Last, First"`) into two new columns:
- `given_name`
- `family_name`
- Maps `visit_type_code` to `visit_type_label` using an enum-to-enum rule.
(Note: this primitive currently requires numeric mapping values.)
- Maps `visit_type_code` (e.g., BL/FU/SC) to `visit_type_label` using an enum-to-enum rule.

## Run it
From the repository root:
Expand All @@ -29,10 +28,10 @@ This writes `demo/harmonize_example/output.csv`.

## Expected output columns
The output CSV includes:
- `age_years`
- `weight_kg`
- `given_name`
- `family_name`
- `age_years`
- `weight_kg`
- `visit_type_label`
- `source dataset` (set to `"demo"` for each row)
- `original_id` (the original row index)
Expand Down
6 changes: 3 additions & 3 deletions demo/harmonize_example/input.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
age,weight_lbs,name,visit_type_code
10,78.26,"Smith, Alice",1
5,44.5,"Jones, Bob",2
8,92.2,"Nguyen, Carol",3
10,78.26,"Smith, Alice",BL
5,44.5,"Jones, Bob",FU
8,92.2,"Nguyen, Carol",SC
8 changes: 4 additions & 4 deletions demo/harmonize_example/output.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
age_years,weight_kg,family_name,visit_type_label,given_name,source dataset,original_id
10,35.5,Smith,1,Alice,input.csv,0
5,20.18,Jones,2,Bob,input.csv,1
8,41.82,Nguyen,3,Carol,input.csv,2
given_name,family_name,age_years,weight_kg,visit_type_label,source dataset,original_id
Alice,Smith,10,35.5,baseline,demo,0
Bob,Jones,5,20.18,follow_up,demo,1
Carol,Nguyen,8,41.82,screening,demo,2
8 changes: 4 additions & 4 deletions demo/harmonize_example/rules.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@
{
"operation": "enum_to_enum",
"mapping": {
"1": 1,
"2": 2,
"3": 3
"BL": "baseline",
"FU": "follow_up",
"SC": "screening"
},
"default": 0,
"default": "unknown",
"strict": false
}
]
Expand Down
2 changes: 1 addition & 1 deletion demo/harmonize_example/run_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ def main() -> None:

# Reorder columns for readability in the demo output.
preferred_order = [
"family_name",
"given_name",
"family_name",
"age_years",
"weight_kg",
"visit_type_label",
Expand Down
22 changes: 16 additions & 6 deletions src/harmonization_framework/primitives/enum2enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ class EnumToEnum(PrimitiveOperation):
If strict is True, missing mappings raise a KeyError.
If strict is False, missing mappings return the configured default (or None).
"""
def __init__(self, mapping: Dict[int, int], default: Any = None, strict: bool = False):
def __init__(self, mapping: Dict[Any, Any], default: Any = None, strict: bool = False):
"""
Create a mapping from source enum values to target enum values.

Args:
mapping: Dict of source -> target values.
mapping: Dict of source -> target values (string or numeric keys/values).
default: Value to return when a mapping is missing (strict=False only).
strict: When True, raise a KeyError for missing mappings.
"""
Expand Down Expand Up @@ -57,10 +57,20 @@ def from_serialization(cls, serialization):
"""
Reconstruct an EnumToEnum mapping from a serialized dict.
"""
mapping = {
int(key): int(value)
for key, value in serialization["mapping"].items()
}
mapping = serialization["mapping"]

def is_int_like(value: Any) -> bool:
if isinstance(value, bool):
return False
if isinstance(value, int):
return True
if isinstance(value, str):
stripped = value.strip()
return stripped.lstrip("-").isdigit() and stripped != ""
return False

if mapping and all(is_int_like(k) for k in mapping) and all(is_int_like(v) for v in mapping.values()):
mapping = {int(key): int(value) for key, value in mapping.items()}
default = serialization.get("default")
strict = bool(serialization.get("strict", False))
return EnumToEnum(mapping, default=default, strict=strict)
26 changes: 26 additions & 0 deletions tests/test_primitives_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,32 @@ def test_enum_to_enum_serialization_and_transform():
assert primitive.transform([1, 2]) == [10, 20]


def test_enum_to_enum_string_mapping_roundtrip():
payload = {
"operation": "enum_to_enum",
"mapping": {"BL": "baseline", "FU": "follow_up"},
"default": "unknown",
"strict": False,
}

roundtrip = EnumToEnum.from_serialization(payload)
assert roundtrip.to_dict() == payload
assert roundtrip.transform("BL") == "baseline"
assert roundtrip.transform(["BL", "FU", "ZZ"]) == ["baseline", "follow_up", "unknown"]


def test_enum_to_enum_string_keys_preserved():
payload = {
"operation": "enum_to_enum",
"mapping": {"1": "one", "2": "two"},
"strict": False,
}

roundtrip = EnumToEnum.from_serialization(payload)
assert roundtrip.to_dict() == payload
assert roundtrip.transform("1") == "one"


def test_enum_to_enum_default_for_missing_value():
primitive = EnumToEnum({1: 10}, default=-1)
assert primitive.transform(2) == -1
Expand Down