From f99922436db868315d8dc6d1cdf6dd0de2a199d4 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Fri, 24 Apr 2026 11:24:51 -0400 Subject: [PATCH 1/4] fix codelist operations type inference for empty dataframes --- cdisc_rules_engine/operations/codelist_extensible.py | 6 ++++++ cdisc_rules_engine/operations/codelist_terms.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/cdisc_rules_engine/operations/codelist_extensible.py b/cdisc_rules_engine/operations/codelist_extensible.py index 1e7d91af3..9cad79511 100644 --- a/cdisc_rules_engine/operations/codelist_extensible.py +++ b/cdisc_rules_engine/operations/codelist_extensible.py @@ -26,6 +26,12 @@ def _handle_multiple_versions(self) -> pd.Series: self.params.ct_package_type, unique_ct_versions ) ct_df = self.evaluation_dataset.__class__.from_dict(ct_data) + ct_df = ct_df.astype( + { + "version": str, + "codelist_code": str, + } + ) if self.params.codelist_code in self.evaluation_dataset.columns: is_extensible = self.evaluation_dataset.merge( ct_df.data, diff --git a/cdisc_rules_engine/operations/codelist_terms.py b/cdisc_rules_engine/operations/codelist_terms.py index 39707c3da..225e29311 100644 --- a/cdisc_rules_engine/operations/codelist_terms.py +++ b/cdisc_rules_engine/operations/codelist_terms.py @@ -58,6 +58,12 @@ def _handle_multiple_versions(self) -> pd.Series: self.params.ct_package_type, unique_ct_versions ) ct_df = self.evaluation_dataset.__class__.from_dict(ct_data) + ct_df = ct_df.astype( + { + "version": str, + "codelist_code": str, + } + ) if self.params.codelist_code in self.evaluation_dataset.columns: result = self.evaluation_dataset.merge( ct_df.data, From 25b1b5460e794e1bb0a0e0bf7cf6007cdbf771e9 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Mon, 27 Apr 2026 11:45:36 -0400 Subject: [PATCH 2/4] added unit tests --- .../test_codelist_extensible.py | 26 ++++++++++-- .../test_operations/test_codelist_terms.py | 42 +++++++++++++++++++ 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_operations/test_codelist_extensible.py b/tests/unit/test_operations/test_codelist_extensible.py index 38739ea8d..a59076be4 100644 --- a/tests/unit/test_operations/test_codelist_extensible.py +++ b/tests/unit/test_operations/test_codelist_extensible.py @@ -124,32 +124,44 @@ def test_empty_metadata(operation_params): @mark.parametrize( - "package_type, codelist_code, expected", + "package_type, codelist_code, versions, codelist_codes, expected", [ ( "mock_package", "codelist_code", + ["v1", "v2", "v3"], + ["C1", "C2", "C3"], [True, False, None], ), ( "mock_package", "C1", + ["v1", "v2", "v3"], + ["C1", "C2", "C3"], [True, True, True], ), ( "missing_package", "codelist_code", + ["v1", "v2", "v3"], + ["C1", "C2", "C3"], [None, None, None], ), + ("mock_package", "codelist_code", [], [], []), ], ) def test_multiple_versions( - operation_params, mock_metadata, package_type, codelist_code, expected + operation_params, + mock_metadata, + package_type, + codelist_code, + versions, + codelist_codes, + expected, ): operation_params.ct_package_type = package_type operation_params.ct_version = "version" operation_params.codelist_code = codelist_code - versions = ["v1", "v2", "v3"] library_metadata = LibraryMetadataContainer() for version in versions: @@ -157,7 +169,13 @@ def test_multiple_versions( library_metadata._ct_package_metadata = mock_metadata evaluation_dataset = PandasDataset.from_dict( - {"version": versions, "codelist_code": ["C1", "C2", "C3"]} + {"version": versions, "codelist_code": codelist_codes} + ) + evaluation_dataset = evaluation_dataset.astype( + { + "version": str, + "codelist_code": str, + } ) operation = CodelistExtensible( diff --git a/tests/unit/test_operations/test_codelist_terms.py b/tests/unit/test_operations/test_codelist_terms.py index cf27e7f14..fb6412a0a 100644 --- a/tests/unit/test_operations/test_codelist_terms.py +++ b/tests/unit/test_operations/test_codelist_terms.py @@ -323,3 +323,45 @@ def test_multiple_versions( except RuleExecutionError: result = pd.Series(RuleExecutionError) assert result.equals(pd.Series(expected)) + + +def test_empty_dataset_multiple_versions( + operation_params, + mock_metadata, +): + operation_params.ct_package_type = "mock_package" + operation_params.ct_version = "version" + operation_params.codelist_code = "codelist_code" + operation_params.term_code = "t_code" + operation_params.term_value = None + operation_params.term_pref_term = None + operation_params.returntype = None + + library_metadata = LibraryMetadataContainer() + library_metadata._ct_package_metadata = mock_metadata + + evaluation_dataset = PandasDataset.from_dict( + { + "version": [], + "codelist_code": [], + "t_code": [], + "t_value": [], + "t_pref_term": [], + } + ) + evaluation_dataset = evaluation_dataset.astype( + { + "version": str, + "codelist_code": str, + } + ) + + operation = CodelistTerms( + operation_params, + evaluation_dataset, + MagicMock(), + MagicMock(), + library_metadata, + ) + result = operation._execute_operation() + assert result.tolist() == [] From c933f4d38e0bc1a8d16469b66fb758ae8b662677 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Mon, 27 Apr 2026 17:07:50 -0400 Subject: [PATCH 3/4] Fixed str dtype --- cdisc_rules_engine/operations/codelist_extensible.py | 4 ++-- cdisc_rules_engine/operations/codelist_terms.py | 4 ++-- tests/unit/test_operations/test_codelist_extensible.py | 4 ++-- tests/unit/test_operations/test_codelist_terms.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cdisc_rules_engine/operations/codelist_extensible.py b/cdisc_rules_engine/operations/codelist_extensible.py index 9cad79511..62cd7b56f 100644 --- a/cdisc_rules_engine/operations/codelist_extensible.py +++ b/cdisc_rules_engine/operations/codelist_extensible.py @@ -28,8 +28,8 @@ def _handle_multiple_versions(self) -> pd.Series: ct_df = self.evaluation_dataset.__class__.from_dict(ct_data) ct_df = ct_df.astype( { - "version": str, - "codelist_code": str, + "version": "string", + "codelist_code": "string", } ) if self.params.codelist_code in self.evaluation_dataset.columns: diff --git a/cdisc_rules_engine/operations/codelist_terms.py b/cdisc_rules_engine/operations/codelist_terms.py index 225e29311..90a0a6d18 100644 --- a/cdisc_rules_engine/operations/codelist_terms.py +++ b/cdisc_rules_engine/operations/codelist_terms.py @@ -60,8 +60,8 @@ def _handle_multiple_versions(self) -> pd.Series: ct_df = self.evaluation_dataset.__class__.from_dict(ct_data) ct_df = ct_df.astype( { - "version": str, - "codelist_code": str, + "version": "string", + "codelist_code": "string", } ) if self.params.codelist_code in self.evaluation_dataset.columns: diff --git a/tests/unit/test_operations/test_codelist_extensible.py b/tests/unit/test_operations/test_codelist_extensible.py index a59076be4..0da2a801f 100644 --- a/tests/unit/test_operations/test_codelist_extensible.py +++ b/tests/unit/test_operations/test_codelist_extensible.py @@ -173,8 +173,8 @@ def test_multiple_versions( ) evaluation_dataset = evaluation_dataset.astype( { - "version": str, - "codelist_code": str, + "version": "string", + "codelist_code": "string", } ) diff --git a/tests/unit/test_operations/test_codelist_terms.py b/tests/unit/test_operations/test_codelist_terms.py index fb6412a0a..6b7f07950 100644 --- a/tests/unit/test_operations/test_codelist_terms.py +++ b/tests/unit/test_operations/test_codelist_terms.py @@ -351,8 +351,8 @@ def test_empty_dataset_multiple_versions( ) evaluation_dataset = evaluation_dataset.astype( { - "version": str, - "codelist_code": str, + "version": "string", + "codelist_code": "string", } ) From 5f62f89cf0d31f665cab347277d4dd2e097ed2d9 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Mon, 27 Apr 2026 17:22:29 -0400 Subject: [PATCH 4/4] removed casting from tests --- tests/unit/test_operations/test_codelist_extensible.py | 6 ------ tests/unit/test_operations/test_codelist_terms.py | 6 ------ 2 files changed, 12 deletions(-) diff --git a/tests/unit/test_operations/test_codelist_extensible.py b/tests/unit/test_operations/test_codelist_extensible.py index 0da2a801f..425a00d76 100644 --- a/tests/unit/test_operations/test_codelist_extensible.py +++ b/tests/unit/test_operations/test_codelist_extensible.py @@ -171,12 +171,6 @@ def test_multiple_versions( evaluation_dataset = PandasDataset.from_dict( {"version": versions, "codelist_code": codelist_codes} ) - evaluation_dataset = evaluation_dataset.astype( - { - "version": "string", - "codelist_code": "string", - } - ) operation = CodelistExtensible( operation_params, diff --git a/tests/unit/test_operations/test_codelist_terms.py b/tests/unit/test_operations/test_codelist_terms.py index 6b7f07950..d0fb1811c 100644 --- a/tests/unit/test_operations/test_codelist_terms.py +++ b/tests/unit/test_operations/test_codelist_terms.py @@ -349,12 +349,6 @@ def test_empty_dataset_multiple_versions( "t_pref_term": [], } ) - evaluation_dataset = evaluation_dataset.astype( - { - "version": "string", - "codelist_code": "string", - } - ) operation = CodelistTerms( operation_params,