Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion demo/harmonize_example/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ It includes a small input CSV, a rules JSON file, and a Python script that perfo

## What the example does
- Renames `age` to `age_years` (pass-through).
- Converts `weight_lbs` to `weight_kg` (multiply by 0.453592).
- Converts `weight_lbs` to `weight_kg` (multiply by 0.453592) and formats to two decimals.
- Splits `name` (stored as `"Last, First"`) into two new columns:
- `given_name`
- `family_name`
Expand Down
2 changes: 1 addition & 1 deletion demo/harmonize_example/output.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
given_name,family_name,age_years,weight_kg,visit_type_label,source dataset,original_id
Alice,Smith,10,35.5,baseline,demo,0
Alice,Smith,10,35.50,baseline,demo,0
Bob,Jones,5,20.18,follow_up,demo,1
Carol,Nguyen,8,41.82,screening,demo,2
7 changes: 1 addition & 6 deletions demo/harmonize_example/rules.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,8 @@
"scaling_factor": 0.453592
},
{
"operation": "round",
"operation": "format_number",
"precision": 2
},
{
"operation": "cast",
"source": "float",
"target": "text"
}
]
}
Expand Down
4 changes: 3 additions & 1 deletion src/harmonization_framework/harmonization_rule.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Any, List
from .element import DataElement
from .primitives.base import PrimitiveOperation
from .primitives import PrimitiveVocabulary, Bin, Cast, ConvertDate, ConvertUnits, DoNothing, EnumToEnum, NormalizeText, Offset, Reduce, Round, Scale, Substitute, Threshold, Truncate
from .primitives import PrimitiveVocabulary, Bin, Cast, ConvertDate, ConvertUnits, DoNothing, EnumToEnum, FormatNumber, NormalizeText, Offset, Reduce, Round, Scale, Substitute, Threshold, Truncate

import json

Expand Down Expand Up @@ -61,6 +61,8 @@ def from_serialization(cls, serialization):
primitive = DoNothing.from_serialization(operation)
case PrimitiveVocabulary.ENUM_TO_ENUM.value:
primitive = EnumToEnum.from_serialization(operation)
case PrimitiveVocabulary.FORMAT_NUMBER.value:
primitive = FormatNumber.from_serialization(operation)
case PrimitiveVocabulary.NORMALIZE_TEXT.value:
primitive = NormalizeText.from_serialization(operation)
case PrimitiveVocabulary.OFFSET.value:
Expand Down
1 change: 1 addition & 0 deletions src/harmonization_framework/primitives/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from .dates import ConvertDate
from .donothing import DoNothing
from .enum2enum import EnumToEnum
from .format_number import FormatNumber
from .normalize import NormalizeText
from .offset import Offset
from .reduce import Reduce
Expand Down
38 changes: 38 additions & 0 deletions src/harmonization_framework/primitives/format_number.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from .base import PrimitiveOperation, support_iterable
from typing import Union

class FormatNumber(PrimitiveOperation):
"""
Format numeric values to a fixed number of decimal places.

Output is a string, intended for stable presentation (e.g., CSV output).
"""
def __init__(self, precision: int):
if not isinstance(precision, int):
raise TypeError(f"Precision must be an integer, got {type(precision).__name__}")
if precision < 0:
raise ValueError("Precision must be non-negative")
self.precision = precision

def __str__(self):
return f"Format number to {self.precision} decimal places"

def to_dict(self):
"""Serialize this operation to a JSON-friendly dict."""
return {
"operation": "format_number",
"precision": self.precision,
}

@support_iterable
def transform(self, value: Union[int, float]) -> str:
"""Format the numeric value to the configured decimal precision."""
if not isinstance(value, (int, float)) or isinstance(value, bool):
raise TypeError(f"FormatNumber expects a numeric value, got {type(value).__name__}")
return f"{value:.{self.precision}f}"

@classmethod
def from_serialization(cls, serialization):
"""Reconstruct a FormatNumber operation from a serialized dict."""
precision = int(serialization["precision"])
return FormatNumber(precision)
1 change: 1 addition & 0 deletions src/harmonization_framework/primitives/vocabulary.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class PrimitiveVocabulary(Enum):
CONVERT_UNITS = "convert_units"
DO_NOTHING = "do_nothing"
ENUM_TO_ENUM = "enum_to_enum"
FORMAT_NUMBER = "format_number"
NORMALIZE_TEXT = "normalize_text"
OFFSET = "offset"
REDUCE = "reduce"
Expand Down
24 changes: 24 additions & 0 deletions tests/test_primitives_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
ConvertUnits,
DoNothing,
EnumToEnum,
FormatNumber,
NormalizeText,
Offset,
Reduce,
Expand Down Expand Up @@ -136,6 +137,29 @@ def test_enum_to_enum_strict_raises_for_missing_value():
primitive.transform(2)


def test_format_number_serialization_and_transform():
payload = {"operation": "format_number", "precision": 2}

roundtrip = FormatNumber.from_serialization(payload)
assert roundtrip.to_dict() == payload
assert roundtrip.transform(35.5) == "35.50"
assert roundtrip.transform(3) == "3.00"
assert roundtrip.transform([1.234, 2]) == ["1.23", "2.00"]


def test_format_number_rejects_invalid_precision():
with pytest.raises(TypeError, match="Precision must be an integer"):
FormatNumber("2") # type: ignore[arg-type]
with pytest.raises(ValueError, match="non-negative"):
FormatNumber(-1)


def test_format_number_rejects_non_numeric():
primitive = FormatNumber(2)
with pytest.raises(TypeError, match="numeric"):
primitive.transform("nope")


def test_round_serialization_and_transform():
primitive = Round(2)
payload = primitive.to_dict()
Expand Down