Skip to content

Commit 4e23ea0

Browse files
Merge pull request #80 from bmir-radx/feat/format-number-primitive
Add format_number primitive
2 parents 8367447 + eae04c3 commit 4e23ea0

8 files changed

Lines changed: 70 additions & 9 deletions

File tree

demo/harmonize_example/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ It includes a small input CSV, a rules JSON file, and a Python script that perfo
1111

1212
## What the example does
1313
- Renames `age` to `age_years` (pass-through).
14-
- Converts `weight_lbs` to `weight_kg` (multiply by 0.453592).
14+
- Converts `weight_lbs` to `weight_kg` (multiply by 0.453592) and formats to two decimals.
1515
- Splits `name` (stored as `"Last, First"`) into two new columns:
1616
- `given_name`
1717
- `family_name`

demo/harmonize_example/output.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
given_name,family_name,age_years,weight_kg,visit_type_label,source dataset,original_id
2-
Alice,Smith,10,35.5,baseline,demo,0
2+
Alice,Smith,10,35.50,baseline,demo,0
33
Bob,Jones,5,20.18,follow_up,demo,1
44
Carol,Nguyen,8,41.82,screening,demo,2

demo/harmonize_example/rules.json

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,8 @@
1616
"scaling_factor": 0.453592
1717
},
1818
{
19-
"operation": "round",
19+
"operation": "format_number",
2020
"precision": 2
21-
},
22-
{
23-
"operation": "cast",
24-
"source": "float",
25-
"target": "text"
2621
}
2722
]
2823
}

src/harmonization_framework/harmonization_rule.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import Any, List
22
from .element import DataElement
33
from .primitives.base import PrimitiveOperation
4-
from .primitives import PrimitiveVocabulary, Bin, Cast, ConvertDate, ConvertUnits, DoNothing, EnumToEnum, NormalizeText, Offset, Reduce, Round, Scale, Substitute, Threshold, Truncate
4+
from .primitives import PrimitiveVocabulary, Bin, Cast, ConvertDate, ConvertUnits, DoNothing, EnumToEnum, FormatNumber, NormalizeText, Offset, Reduce, Round, Scale, Substitute, Threshold, Truncate
55

66
import json
77

@@ -61,6 +61,8 @@ def from_serialization(cls, serialization):
6161
primitive = DoNothing.from_serialization(operation)
6262
case PrimitiveVocabulary.ENUM_TO_ENUM.value:
6363
primitive = EnumToEnum.from_serialization(operation)
64+
case PrimitiveVocabulary.FORMAT_NUMBER.value:
65+
primitive = FormatNumber.from_serialization(operation)
6466
case PrimitiveVocabulary.NORMALIZE_TEXT.value:
6567
primitive = NormalizeText.from_serialization(operation)
6668
case PrimitiveVocabulary.OFFSET.value:

src/harmonization_framework/primitives/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from .dates import ConvertDate
55
from .donothing import DoNothing
66
from .enum2enum import EnumToEnum
7+
from .format_number import FormatNumber
78
from .normalize import NormalizeText
89
from .offset import Offset
910
from .reduce import Reduce
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from .base import PrimitiveOperation, support_iterable
2+
from typing import Union
3+
4+
class FormatNumber(PrimitiveOperation):
5+
"""
6+
Format numeric values to a fixed number of decimal places.
7+
8+
Output is a string, intended for stable presentation (e.g., CSV output).
9+
"""
10+
def __init__(self, precision: int):
11+
if not isinstance(precision, int):
12+
raise TypeError(f"Precision must be an integer, got {type(precision).__name__}")
13+
if precision < 0:
14+
raise ValueError("Precision must be non-negative")
15+
self.precision = precision
16+
17+
def __str__(self):
18+
return f"Format number to {self.precision} decimal places"
19+
20+
def to_dict(self):
21+
"""Serialize this operation to a JSON-friendly dict."""
22+
return {
23+
"operation": "format_number",
24+
"precision": self.precision,
25+
}
26+
27+
@support_iterable
28+
def transform(self, value: Union[int, float]) -> str:
29+
"""Format the numeric value to the configured decimal precision."""
30+
if not isinstance(value, (int, float)) or isinstance(value, bool):
31+
raise TypeError(f"FormatNumber expects a numeric value, got {type(value).__name__}")
32+
return f"{value:.{self.precision}f}"
33+
34+
@classmethod
35+
def from_serialization(cls, serialization):
36+
"""Reconstruct a FormatNumber operation from a serialized dict."""
37+
precision = int(serialization["precision"])
38+
return FormatNumber(precision)

src/harmonization_framework/primitives/vocabulary.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ class PrimitiveVocabulary(Enum):
77
CONVERT_UNITS = "convert_units"
88
DO_NOTHING = "do_nothing"
99
ENUM_TO_ENUM = "enum_to_enum"
10+
FORMAT_NUMBER = "format_number"
1011
NORMALIZE_TEXT = "normalize_text"
1112
OFFSET = "offset"
1213
REDUCE = "reduce"

tests/test_primitives_serialization.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
ConvertUnits,
88
DoNothing,
99
EnumToEnum,
10+
FormatNumber,
1011
NormalizeText,
1112
Offset,
1213
Reduce,
@@ -136,6 +137,29 @@ def test_enum_to_enum_strict_raises_for_missing_value():
136137
primitive.transform(2)
137138

138139

140+
def test_format_number_serialization_and_transform():
141+
payload = {"operation": "format_number", "precision": 2}
142+
143+
roundtrip = FormatNumber.from_serialization(payload)
144+
assert roundtrip.to_dict() == payload
145+
assert roundtrip.transform(35.5) == "35.50"
146+
assert roundtrip.transform(3) == "3.00"
147+
assert roundtrip.transform([1.234, 2]) == ["1.23", "2.00"]
148+
149+
150+
def test_format_number_rejects_invalid_precision():
151+
with pytest.raises(TypeError, match="Precision must be an integer"):
152+
FormatNumber("2") # type: ignore[arg-type]
153+
with pytest.raises(ValueError, match="non-negative"):
154+
FormatNumber(-1)
155+
156+
157+
def test_format_number_rejects_non_numeric():
158+
primitive = FormatNumber(2)
159+
with pytest.raises(TypeError, match="numeric"):
160+
primitive.transform("nope")
161+
162+
139163
def test_round_serialization_and_transform():
140164
primitive = Round(2)
141165
payload = primitive.to_dict()

0 commit comments

Comments
 (0)