From 57a8716fa1fd70c2d68e166352830b9ebe7865ee Mon Sep 17 00:00:00 2001 From: Matthew Horridge Date: Tue, 27 Jan 2026 10:34:36 -0800 Subject: [PATCH 1/2] Support string mappings in EnumToEnum Allow EnumToEnum to load string-coded mappings without forced int coercion. Keep numeric mapping behavior by converting only when keys and values are int-like. Add tests covering string mappings and string keys. --- .../primitives/enum2enum.py | 22 +++++++++++----- tests/test_primitives_serialization.py | 26 +++++++++++++++++++ 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/src/harmonization_framework/primitives/enum2enum.py b/src/harmonization_framework/primitives/enum2enum.py index d2215ac..3744413 100644 --- a/src/harmonization_framework/primitives/enum2enum.py +++ b/src/harmonization_framework/primitives/enum2enum.py @@ -8,12 +8,12 @@ class EnumToEnum(PrimitiveOperation): If strict is True, missing mappings raise a KeyError. If strict is False, missing mappings return the configured default (or None). """ - def __init__(self, mapping: Dict[int, int], default: Any = None, strict: bool = False): + def __init__(self, mapping: Dict[Any, Any], default: Any = None, strict: bool = False): """ Create a mapping from source enum values to target enum values. Args: - mapping: Dict of source -> target values. + mapping: Dict of source -> target values (string or numeric keys/values). default: Value to return when a mapping is missing (strict=False only). strict: When True, raise a KeyError for missing mappings. """ @@ -57,10 +57,20 @@ def from_serialization(cls, serialization): """ Reconstruct an EnumToEnum mapping from a serialized dict. """ - mapping = { - int(key): int(value) - for key, value in serialization["mapping"].items() - } + mapping = serialization["mapping"] + + def is_int_like(value: Any) -> bool: + if isinstance(value, bool): + return False + if isinstance(value, int): + return True + if isinstance(value, str): + stripped = value.strip() + return stripped.lstrip("-").isdigit() and stripped != "" + return False + + if mapping and all(is_int_like(k) for k in mapping) and all(is_int_like(v) for v in mapping.values()): + mapping = {int(key): int(value) for key, value in mapping.items()} default = serialization.get("default") strict = bool(serialization.get("strict", False)) return EnumToEnum(mapping, default=default, strict=strict) diff --git a/tests/test_primitives_serialization.py b/tests/test_primitives_serialization.py index 7597025..6edda9d 100644 --- a/tests/test_primitives_serialization.py +++ b/tests/test_primitives_serialization.py @@ -98,6 +98,32 @@ def test_enum_to_enum_serialization_and_transform(): assert primitive.transform([1, 2]) == [10, 20] +def test_enum_to_enum_string_mapping_roundtrip(): + payload = { + "operation": "enum_to_enum", + "mapping": {"BL": "baseline", "FU": "follow_up"}, + "default": "unknown", + "strict": False, + } + + roundtrip = EnumToEnum.from_serialization(payload) + assert roundtrip.to_dict() == payload + assert roundtrip.transform("BL") == "baseline" + assert roundtrip.transform(["BL", "FU", "ZZ"]) == ["baseline", "follow_up", "unknown"] + + +def test_enum_to_enum_string_keys_preserved(): + payload = { + "operation": "enum_to_enum", + "mapping": {"1": "one", "2": "two"}, + "strict": False, + } + + roundtrip = EnumToEnum.from_serialization(payload) + assert roundtrip.to_dict() == payload + assert roundtrip.transform("1") == "one" + + def test_enum_to_enum_default_for_missing_value(): primitive = EnumToEnum({1: 10}, default=-1) assert primitive.transform(2) == -1 From f020cdcc4ae6a590bcddd2c14d2cb4bd4c3da43f Mon Sep 17 00:00:00 2001 From: Matthew Horridge Date: Tue, 27 Jan 2026 10:40:19 -0800 Subject: [PATCH 2/2] Update demo enum mappings and output order Switch visit_type_code example to string-coded enum mappings. Update demo input, rules, output, and README to reflect string labels. Reorder output so given_name and family_name are adjacent. Closes #77. --- demo/harmonize_example/README.md | 7 +++---- demo/harmonize_example/input.csv | 6 +++--- demo/harmonize_example/output.csv | 8 ++++---- demo/harmonize_example/rules.json | 8 ++++---- demo/harmonize_example/run_example.py | 2 +- 5 files changed, 15 insertions(+), 16 deletions(-) diff --git a/demo/harmonize_example/README.md b/demo/harmonize_example/README.md index 590f6d7..a408d57 100644 --- a/demo/harmonize_example/README.md +++ b/demo/harmonize_example/README.md @@ -15,8 +15,7 @@ It includes a small input CSV, a rules JSON file, and a Python script that perfo - Splits `name` (stored as `"Last, First"`) into two new columns: - `given_name` - `family_name` -- Maps `visit_type_code` to `visit_type_label` using an enum-to-enum rule. - (Note: this primitive currently requires numeric mapping values.) +- Maps `visit_type_code` (e.g., BL/FU/SC) to `visit_type_label` using an enum-to-enum rule. ## Run it From the repository root: @@ -29,10 +28,10 @@ This writes `demo/harmonize_example/output.csv`. ## Expected output columns The output CSV includes: -- `age_years` -- `weight_kg` - `given_name` - `family_name` +- `age_years` +- `weight_kg` - `visit_type_label` - `source dataset` (set to `"demo"` for each row) - `original_id` (the original row index) diff --git a/demo/harmonize_example/input.csv b/demo/harmonize_example/input.csv index 8c10d93..f6aea93 100644 --- a/demo/harmonize_example/input.csv +++ b/demo/harmonize_example/input.csv @@ -1,4 +1,4 @@ age,weight_lbs,name,visit_type_code -10,78.26,"Smith, Alice",1 -5,44.5,"Jones, Bob",2 -8,92.2,"Nguyen, Carol",3 +10,78.26,"Smith, Alice",BL +5,44.5,"Jones, Bob",FU +8,92.2,"Nguyen, Carol",SC diff --git a/demo/harmonize_example/output.csv b/demo/harmonize_example/output.csv index 3e7cb19..cf0fc3c 100644 --- a/demo/harmonize_example/output.csv +++ b/demo/harmonize_example/output.csv @@ -1,4 +1,4 @@ -age_years,weight_kg,family_name,visit_type_label,given_name,source dataset,original_id -10,35.5,Smith,1,Alice,input.csv,0 -5,20.18,Jones,2,Bob,input.csv,1 -8,41.82,Nguyen,3,Carol,input.csv,2 +given_name,family_name,age_years,weight_kg,visit_type_label,source dataset,original_id +Alice,Smith,10,35.5,baseline,demo,0 +Bob,Jones,5,20.18,follow_up,demo,1 +Carol,Nguyen,8,41.82,screening,demo,2 diff --git a/demo/harmonize_example/rules.json b/demo/harmonize_example/rules.json index 8e55d21..b67bbae 100644 --- a/demo/harmonize_example/rules.json +++ b/demo/harmonize_example/rules.json @@ -67,11 +67,11 @@ { "operation": "enum_to_enum", "mapping": { - "1": 1, - "2": 2, - "3": 3 + "BL": "baseline", + "FU": "follow_up", + "SC": "screening" }, - "default": 0, + "default": "unknown", "strict": false } ] diff --git a/demo/harmonize_example/run_example.py b/demo/harmonize_example/run_example.py index 0399499..324095c 100644 --- a/demo/harmonize_example/run_example.py +++ b/demo/harmonize_example/run_example.py @@ -52,8 +52,8 @@ def main() -> None: # Reorder columns for readability in the demo output. preferred_order = [ - "family_name", "given_name", + "family_name", "age_years", "weight_kg", "visit_type_label",