cdisc-org · SFJohnson24 · Jan 27, 2026 · Jan 14, 2026 · Jan 15, 2026 · Jan 16, 2026
diff --git a/cdisc_rules_engine/utilities/data_processor.py b/cdisc_rules_engine/utilities/data_processor.py
@@ -198,14 +198,13 @@ def merge_pivot_supp_dataset(
         left_dataset: DatasetInterface,
         right_dataset: DatasetInterface,
     ):
-
         static_keys = ["STUDYID", "USUBJID", "APID", "POOLID", "SPDEVID"]
         qnam_list = right_dataset["QNAM"].unique()
         unique_idvar_values = right_dataset["IDVAR"].unique()
         if len(unique_idvar_values) == 1:
             right_dataset = DataProcessor.process_supp(right_dataset)
             dynamic_key = right_dataset["IDVAR"].iloc[0]
-            is_blank = pd.isna(dynamic_key) or str(dynamic_key).strip() == ""
+            is_blank: bool = pd.isna(dynamic_key) or str(dynamic_key).strip() == ""
             # Determine the common keys present in both datasets
             common_keys = [
                 key
@@ -352,6 +351,14 @@ def process_supp(supp_dataset):
         columns_to_drop = [
             col for col in ["QNAM", "QVAL", "QLABEL"] if col in supp_dataset.columns
         ]
+        if "RDOMAIN" in supp_dataset.columns and supp_dataset["RDOMAIN"][0] == "DM":
+            excluded_columns = list(supp_dataset["QNAM"].unique()) + columns_to_drop
+            group_cols = [c for c in supp_dataset.columns if c not in excluded_columns]
+            supp_dataset = PandasDataset(
+                supp_dataset.data.groupby(group_cols, dropna=False, as_index=False).agg(
+                    lambda x: (x.dropna().iloc[0] if not x.dropna().empty else pd.NA)
+                )
+            )
         if columns_to_drop:
             supp_dataset = supp_dataset.drop(labels=columns_to_drop, axis=1)
         return supp_dataset

diff --git a/cdisc_rules_engine/utilities/dataset_preprocessor.py b/cdisc_rules_engine/utilities/dataset_preprocessor.py
@@ -94,6 +94,9 @@ def preprocess(  # noqa
             else:
                 if self._is_split_domain(domain_name):
                     continue
+                target_domain_name: str = (
+                    self._dataset_metadata.domain or self._dataset_metadata.name
+                )
                 file_infos: list[SDTMDatasetMetadata] = [
                     item
                     for item in datasets
@@ -104,12 +107,15 @@ def preprocess(  # noqa
                         or (
                             domain_name == "SUPP--"
                             and (not self._dataset_metadata.is_supp)
-                            and item.rdomain == self._dataset_metadata.domain
+                            and item.rdomain == target_domain_name
                         )
                     )
                 ]
 
-            if not file_infos:
+            if not file_infos and not (
+                (self._dataset_metadata.is_supp and domain_name == "SUPP--")
+                or self._dataset_metadata.name == "RELREC"
+            ):
                 raise PreprocessingError(
                     f"Failed to find related dataset for '{domain_name}' in preprocessor"
                 )

diff --git a/tests/QARegressionTests/test_Issues/test_CoreIssue1345.py b/tests/QARegressionTests/test_Issues/test_CoreIssue1345.py
@@ -0,0 +1,169 @@
+import os
+import subprocess
+
+import pytest
+import json
+from conftest import get_python_executable
+
+
+@pytest.fixture
+def generate_report():
+    # Run the command in the terminal
+    command = [
+        f"{get_python_executable()}",
+        "-m",
+        "core",
+        "validate",
+        "-s",
+        "sdtmig",
+        "-v",
+        "3-4",
+        "-dv",
+        "2-1",
+        "-dxp",
+        os.path.join(
+            "tests",
+            "resources",
+            "CoreIssue1345",
+            "define_msg20_testsupp_core.xml",
+        ),
+        "-d",
+        os.path.join(
+            "tests",
+            "resources",
+            "CoreIssue1345",
+        ),
+        "-lr",
+        os.path.join(
+            "tests",
+            "resources",
+            "CoreIssue1345",
+        ),
+        "-r",
+        "CDISC.SDTMIG.CG0019",
+        "-l",
+        "error",
+        "-ps",
+        "1",
+        "-of",
+        "json",
+    ]
+    subprocess.run(command, check=True)
+    # Get the latest created Excel file
+    files = os.listdir()
+    json_files = [
+        file
+        for file in files
+        if file.startswith("CORE-Report-") and file.endswith(".json")
+    ]
+    json_report_path = sorted(json_files)[-1]
+    json_report = json.load(open(json_report_path))
+    return json_report_path, json_report
+
+
+@pytest.mark.regression
+class TestCoreIssue1345:
+    def test_engine_correctly_merges_datasets_and_flags_row_uniqueness_issues(
+        self, generate_report
+    ):
+        json_report_path, json_report = generate_report
+        dataset_filenames = {
+            d["filename"].upper() for d in json_report.get("Dataset_Details", [])
+        }
+
+        assert "DM" in dataset_filenames, "DM dataset is missing from Dataset_Details"
+        assert (
+            "SUPPDM" in dataset_filenames
+        ), "SUPPDM dataset is missing from Dataset_Details"
+
+        # 2. check for DM / SUPPDM  Issue_Details
+        dm_related_issues = [
+            issue
+            for issue in json_report.get("Issue_Details", [])
+            if issue.get("dataset", "").lower() in {"dm.json", "suppdm.json"}
+        ]
+
+        assert not dm_related_issues, (
+            "Found issues related to DM/SUPPDM datasets:\n" f"{dm_related_issues}"
+        )
+
+        dm_related_summary = [
+            s
+            for s in json_report.get("Issue_Summary", [])
+            if s.get("dataset", "").lower() in {"dm.json", "suppdm.json"}
+        ]
+
+        assert not dm_related_summary, (
+            "Found issue summary entries related to DM/SUPPDM:\n"
+            f"{dm_related_summary}"
+        )
+
+        ec_detail_issues = [
+            i
+            for i in json_report.get("Issue_Details", [])
+            if i.get("dataset", "").lower() == "ec.json"
+        ]
+
+        assert (
+            ec_detail_issues
+        ), "Expected EC-related issues in Issue_Details, but none found"
+        assert (
+            len(ec_detail_issues) == 2
+        ), f"Expected 2 issues for EC dataset, but {len(ec_detail_issues)} found in Issue_Details"
+
+        ec_summary_issues = [
+            s
+            for s in json_report.get("Issue_Summary", [])
+            if s.get("dataset", "").lower() == "ec.json"
+        ]
+
+        assert (
+            ec_summary_issues
+        ), "Expected issues for EC dataset, but none found in Issue_Summary"
+
+        if os.path.exists(json_report_path):
+            os.remove(json_report_path)
+
+    def test_engine_correctly_processes_relrec_when_supp_datasets_provided(
+        self, generate_report
+    ):
+        json_report_path, json_report = generate_report
+        # Open the JSON report file
+        dataset_filenames = {
+            d["filename"].upper() for d in json_report.get("Dataset_Details", [])
+        }
+
+        assert "DM" in dataset_filenames, "DM dataset is missing from Dataset_Details"
+        assert (
+            "SUPPDM" in dataset_filenames
+        ), "SUPPDM dataset is missing from Dataset_Details"
+        assert "EC" in dataset_filenames, "EC dataset is missing from Dataset_Details"
+        assert (
+            "SUPPEC" in dataset_filenames
+        ), "SUPPEC dataset is missing from Dataset_Details"
+
+        # check that relrec was processed and rule checked the data
+        assert (
+            "RELREC" in dataset_filenames
+        ), "RELREC dataset is missing from Dataset_Details"
+        relrec_issues = [
+            i
+            for i in json_report.get("Issue_Details", [])
+            if i.get("dataset", "").lower() == "relrec.json"
+        ]
+        assert (
+            len(relrec_issues) == 2
+        ), f"Expected 2 issues for RELREC dataset, but {len(relrec_issues)} found"
+
+        # to confirm that EC is still processed and contains issues
+        ec_detail_issues = [
+            i
+            for i in json_report.get("Issue_Details", [])
+            if i.get("dataset", "").lower() == "ec.json"
+        ]
+        assert (
+            len(ec_detail_issues) == 2
+        ), f"Expected 2 issues for EC dataset, but {len(ec_detail_issues)} found in Issue_Details"
+
+        if os.path.exists(json_report_path):
+            os.remove(json_report_path)
diff --git a/tests/resources/CoreIssue1345/CG0019.yml b/tests/resources/CoreIssue1345/CG0019.yml
@@ -0,0 +1,104 @@
+# Variable: GEN
+# Condition:
+# Rule: Each record is unique per sponsor defined key variables as documented in the define.xml
+# Marcelina Hungria: testing with the CLI and a define.xml that includes a key from a supp dataset (ec/suppec),
+#   plus dm/suppdm and relrec
+Authorities:
+  - Organization: CDISC
+    Standards:
+      - Name: SDTMIG
+        References:
+          - Citations:
+              - Cited Guidance:
+                  Note that the key variables shown in this table are examples
+                  only. A sponsor's actual key structure may be different.
+                Document: IG v3.4
+                Section: Table 3.2.1
+              - Cited Guidance:
+                  Since the purpose of this column is to aid reviewers in
+                  understanding the structure of a dataset, sponsors should list
+                  all of the natural keys (see definition below) for the
+                  dataset. These keys should define uniqueness for records
+                  within a dataset, and may define a record sort order. The
+                  identified keys for each dataset should be consistent with the
+                  description of the dataset structure as described in the
+                  Define-XML document.
+                Document: IG v3.4
+                Section: 3.2.1.1
+            Origin: SDTM and SDTMIG Conformance Rules
+            Rule Identifier:
+              Id: CG0019
+              Version: "1"
+            Version: "2.0"
+        Version: "3.4"
+      - Name: SDTMIG
+        References:
+          - Citations:
+              - Cited Guidance:
+                  Table 3.2.1[Note that the key variables shown in this table are
+                  examples only. A sponsor's actual key structure may be
+                  different.]|3.2.1.1[Since the purpose of this column is to aid
+                  reviewers in understanding the structure of a dataset,
+                  sponsors should list all of the natural keys (see definition
+                  below) for the dataset. These keys should define uniqueness
+                  for records within a dataset, and may define a record sort
+                  order.]
+                Document: IG v3.2
+                Section: Table 3.2.1|3.2.1.1
+            Origin: SDTM and SDTMIG Conformance Rules
+            Rule Identifier:
+              Id: CG0019
+              Version: "1"
+            Version: "2.0"
+        Version: "3.2"
+      - Name: SDTMIG
+        References:
+          - Citations:
+              - Cited Guidance:
+                  Table 3.2.1[Note that the key variables shown in this table are
+                  examples only. A sponsor's actual key structure may be
+                  different.]||3.2.1.1[Since the purpose of this column is to
+                  aid reviewers in understanding the structure of a dataset,
+                  sponsors should list all of the natural keys (see definition
+                  below) for the dataset. These keys should define uniqueness
+                  for records within a dataset, and may define a record sort
+                  order.]
+                Document: IG v3.3
+                Section: Table 3.2.1|3.2.1.1
+            Origin: SDTM and SDTMIG Conformance Rules
+            Rule Identifier:
+              Id: CG0019
+              Version: "1"
+            Version: "2.0"
+        Version: "3.3"
+Check:
+  all:
+    - name: define_dataset_key_sequence
+      operator: is_not_unique_set
+Core:
+  Id: CDISC.SDTMIG.CG0019
+  Status: Draft
+  Version: "1"
+Description: Trigger error if records are not unique as per sponsor defined key
+  variables as documented in the define.xml
+Executability: Fully Executable
+Match Datasets:
+  - Is Relationship: true
+    Keys:
+      - USUBJID
+    Name: SUPP--
+Outcome:
+  Message: Records are not unique as per sponsor defined key variables as
+    documented in the define.xml
+  # define_dataset_key_sequence doesn't appear to grab all variables needed. See define xml plus data included
+  Output Variables:
+    - define_dataset_key_sequence
+Rule Type: Dataset Contents Check against Define XML
+Scope:
+  Classes:
+    Include:
+      - ALL
+  Domains:
+    Include:
+      - ALL
+Sensitivity: Record