bmir-radx · matthewhorridge · Feb 18, 2026 · Feb 17, 2026 · Feb 17, 2026 · Feb 18, 2026
diff --git a/README.md b/README.md
@@ -135,6 +135,7 @@ All settings are provided in the operation dict for rule serialization.
 | `normalize_boolean` | Normalize truthy/falsy values to booleans. | `truthy` (list, optional; defaults below)<br>`falsy` (list, optional; defaults below)<br>`strict` (bool, default `true`)<br>`default` (optional; used when `strict=false`) |
 | `normalize_text` | Apply a single text normalization. | `normalization` (`strip`, `lower`, `upper`, `remove_accents`, `remove_punctuation`, `remove_special_characters`) |
 | `offset` | Add an offset to numeric values. | `offset` (number) |
+| `parse_array` | Parse array-like values into a list for downstream operations. | `format` (`json` default, `delimiter`)<br>`delimiter` (string; used for `delimiter` format, default `|`, supports `\\n` for newline)<br>`item_type` (`auto`, `string`, `integer`, `float`, `boolean`)<br>`strict` (bool, default `true`)<br>`default` (optional; used when `strict=false`)<br>`allow_singleton` (bool, default `false`) |
 | `reduce` | Reduce a list of values to one value. | `reduction` (`any`, `none`, `all`, `one-hot`, `sum`); expects a list/tuple input; one-hot returns index or None |
 | `round` | Round numeric values to a given precision. | `precision` (int, >=0); uses Python `round` semantics |
 | `scale` | Multiply numeric values by a factor. | `scaling_factor` (number) |
@@ -162,6 +163,7 @@ Each operation is represented by a JSON-friendly dict. Examples:
 | `normalize_boolean` | `{"operation":"normalize_boolean","truthy":["yes","y","1"],"falsy":["no","n","0"],"strict":true}` |
 | `normalize_text` | `{"operation":"normalize_text","normalization":"lower"}` |
 | `offset` | `{"operation":"offset","offset":2.5}` |
+| `parse_array` | `{"operation":"parse_array","format":"json","item_type":"integer","strict":true}` |
 | `reduce` | `{"operation":"reduce","reduction":"one-hot"}` |
 | `round` | `{"operation":"round","precision":2}` |
 | `scale` | `{"operation":"scale","scaling_factor":0.453592}` |
@@ -176,3 +178,31 @@ If you use the `normalize_boolean` primitive without specifying `truthy` or
 
 - truthy: `["true", "t", "yes", "y", "1", 1, true, "on"]`
 - falsy: `["false", "f", "no", "n", "0", 0, false, "off", ""]`
+
+### ParseArray + Reduce for CSV data
+
+When arrays are serialized as text in CSV (for example `"[8,8,8,8,6]"` or
+`"8|8|8|8|6"`), chain `parse_array` before `reduce`:
+
+```json
+{
+  "source": "week_hours",
+  "target": "total_hours",
+  "operations": [
+    {"operation": "parse_array", "format": "json", "item_type": "integer", "strict": true},
+    {"operation": "reduce", "reduction": "sum"}
+  ]
+}
+```
+
+For delimiter input, use:
+
+```json
+{"operation": "parse_array", "format": "delimiter", "delimiter": "|", "item_type": "integer"}
+```
+
+For newline-separated input, use:
+
+```json
+{"operation": "parse_array", "format": "delimiter", "delimiter": "\\n", "item_type": "integer"}
+```
diff --git a/demo/primitives_ui/input.csv b/demo/primitives_ui/input.csv
@@ -0,0 +1,4 @@
+age_years,zip_code_text,visit_date_iso,weight_kg,record_id,bmi,smoker_response,city_raw,thermometer_c,week_hours,medication_dose_mg,price_usd,name_last_first,pulse_rate,username
+34,02139,2026-02-17,70.5,REC-0001,27.345,Yes,"  New York  ",36.6,"[8,8,8,8,6]",2.675,19.99,"DOE, Jane",220,alexandria
+12,60614,2025-12-31,82.0,REC-0002,18.0,No,"  San Francisco  ",37.1,"[10,10,10,10,5]",1.005,3.5,"SMITH, John",35,bo
+70,98101,2024-07-04,95.2,REC-0003,31.889,unknown,"  Austin  ",36.2,"[0,0,0,0,0]",0.999,120.0,"LEE, Ada",88,charlie
diff --git a/demo/primitives_ui/rules.json b/demo/primitives_ui/rules.json
@@ -0,0 +1,219 @@
+{
+  "age_years": {
+    "age_group": {
+      "source": "age_years",
+      "target": "age_group",
+      "operations": [
+        {
+          "operation": "bin",
+          "bins": [
+            {"label": 1, "start": 0, "end": 12},
+            {"label": 2, "start": 13, "end": 17},
+            {"label": 3, "start": 18, "end": 64},
+            {"label": 4, "start": 65, "end": 120}
+          ]
+        }
+      ]
+    }
+  },
+  "age_group": {
+    "age_group_label": {
+      "source": "age_group",
+      "target": "age_group_label",
+      "operations": [
+        {
+          "operation": "enum_to_enum",
+          "mapping": {
+            "1": "child",
+            "2": "teen",
+            "3": "adult",
+            "4": "senior"
+          },
+          "default": "unknown",
+          "strict": false
+        }
+      ]
+    }
+  },
+  "zip_code_text": {
+    "zip_code": {
+      "source": "zip_code_text",
+      "target": "zip_code",
+      "operations": [
+        {
+          "operation": "cast",
+          "source": "text",
+          "target": "integer"
+        }
+      ]
+    }
+  },
+  "visit_date_iso": {
+    "visit_date_us": {
+      "source": "visit_date_iso",
+      "target": "visit_date_us",
+      "operations": [
+        {
+          "operation": "convert_date",
+          "source_format": "%Y-%m-%d",
+          "target_format": "%m/%d/%Y"
+        }
+      ]
+    }
+  },
+  "weight_kg": {
+    "weight_lb": {
+      "source": "weight_kg",
+      "target": "weight_lb",
+      "operations": [
+        {
+          "operation": "convert_units",
+          "source_unit": "kg",
+          "target_unit": "lb"
+        }
+      ]
+    }
+  },
+  "record_id": {
+    "record_id_copy": {
+      "source": "record_id",
+      "target": "record_id_copy",
+      "operations": [
+        {
+          "operation": "do_nothing"
+        }
+      ]
+    }
+  },
+  "bmi": {
+    "bmi_formatted": {
+      "source": "bmi",
+      "target": "bmi_formatted",
+      "operations": [
+        {
+          "operation": "format_number",
+          "precision": 1
+        }
+      ]
+    }
+  },
+  "smoker_response": {
+    "is_smoker": {
+      "source": "smoker_response",
+      "target": "is_smoker",
+      "operations": [
+        {
+          "operation": "normalize_boolean",
+          "truthy": ["true", "t", "yes", "y", "1", 1, true, "on"],
+          "falsy": ["false", "f", "no", "n", "0", 0, false, "off", ""],
+          "strict": false,
+          "default": null
+        }
+      ]
+    }
+  },
+  "city_raw": {
+    "city_normalized": {
+      "source": "city_raw",
+      "target": "city_normalized",
+      "operations": [
+        {
+          "operation": "normalize_text",
+          "normalization": "lower"
+        }
+      ]
+    }
+  },
+  "thermometer_c": {
+    "calibrated_c": {
+      "source": "thermometer_c",
+      "target": "calibrated_c",
+      "operations": [
+        {
+          "operation": "offset",
+          "offset": 0.5
+        }
+      ]
+    }
+  },
+  "week_hours": {
+    "total_hours": {
+      "source": "week_hours",
+      "target": "total_hours",
+      "operations": [
+        {
+          "operation": "parse_array",
+          "format": "json",
+          "item_type": "integer",
+          "strict": true
+        },
+        {
+          "operation": "reduce",
+          "reduction": "sum"
+        }
+      ]
+    }
+  },
+  "medication_dose_mg": {
+    "medication_dose_mg_rounded": {
+      "source": "medication_dose_mg",
+      "target": "medication_dose_mg_rounded",
+      "operations": [
+        {
+          "operation": "round",
+          "precision": 2
+        }
+      ]
+    }
+  },
+  "price_usd": {
+    "price_cents": {
+      "source": "price_usd",
+      "target": "price_cents",
+      "operations": [
+        {
+          "operation": "scale",
+          "scaling_factor": 100
+        }
+      ]
+    }
+  },
+  "name_last_first": {
+    "name_first_last": {
+      "source": "name_last_first",
+      "target": "name_first_last",
+      "operations": [
+        {
+          "operation": "substitute",
+          "expression": "^\\s*([^,]+),\\s*(.+)$",
+          "substitution": "\\2 \\1"
+        }
+      ]
+    }
+  },
+  "pulse_rate": {
+    "pulse_rate_clamped": {
+      "source": "pulse_rate",
+      "target": "pulse_rate_clamped",
+      "operations": [
+        {
+          "operation": "threshold",
+          "lower": 40,
+          "upper": 200
+        }
+      ]
+    }
+  },
+  "username": {
+    "username_short": {
+      "source": "username",
+      "target": "username_short",
+      "operations": [
+        {
+          "operation": "truncate",
+          "length": 8
+        }
+      ]
+    }
+  }
+}
diff --git a/src/harmonization_framework/harmonization_rule.py b/src/harmonization_framework/harmonization_rule.py
@@ -1,7 +1,7 @@
 from typing import Any, List
 from .element import DataElement
 from .primitives.base import PrimitiveOperation
-from .primitives import PrimitiveVocabulary, Bin, Cast, ConvertDate, ConvertUnits, DoNothing, EnumToEnum, FormatNumber, NormalizeBoolean, NormalizeText, Offset, Reduce, Round, Scale, Substitute, Threshold, Truncate
+from .primitives import PrimitiveVocabulary, Bin, Cast, ConvertDate, ConvertUnits, DoNothing, EnumToEnum, FormatNumber, NormalizeBoolean, NormalizeText, Offset, ParseArray, Reduce, Round, Scale, Substitute, Threshold, Truncate
 
 import json
 
@@ -69,6 +69,8 @@ def from_serialization(cls, serialization):
                     primitive = NormalizeText.from_serialization(operation)
                 case PrimitiveVocabulary.OFFSET.value:
                     primitive = Offset.from_serialization(operation)
+                case PrimitiveVocabulary.PARSE_ARRAY.value:
+                    primitive = ParseArray.from_serialization(operation)
                 case PrimitiveVocabulary.REDUCE.value:
                     primitive = Reduce.from_serialization(operation)
                 case PrimitiveVocabulary.ROUND.value:

diff --git a/src/harmonization_framework/primitives/__init__.py b/src/harmonization_framework/primitives/__init__.py
@@ -8,6 +8,7 @@
 from .normalize_boolean import NormalizeBoolean
 from .normalize import NormalizeText
 from .offset import Offset
+from .parse_array import ParseArray
 from .reduce import Reduce
 from .round_decimal import Round
 from .scale import Scale