diff --git a/README.md b/README.md index 4340249dc..febe8e9ae 100644 --- a/README.md +++ b/README.md @@ -154,7 +154,9 @@ This will show the list of validation options. -ss, --substandard TEXT Substandard to validate against. SUBSTANDARD environment variable can be used to pass value. "SDTM", "SEND", "ADaM", or "CDASH" [required for TIG] - -uc, --use-case TEXT Use Case for TIG Validation + -uc, --use-case TEXT Use Case for TIG Custom Domains + When performing a TIG validation with custom domain(s), this must be given to identify the custom domains' use case + in order to determine what rules to validate against them "INDH", "PROD", "NONCLIN", or "ANALYSIS" [required for TIG] USE_CASE environment variable can be used to pass value. diff --git a/cdisc_rules_engine/constants/use_cases.py b/cdisc_rules_engine/constants/use_cases.py new file mode 100644 index 000000000..f8eb35d37 --- /dev/null +++ b/cdisc_rules_engine/constants/use_cases.py @@ -0,0 +1,108 @@ +""" +Constants for use cases and their allowed domains. +""" + +SDTM = "SDTM" +SEND = "SEND" +ADAM = "ADAM" +CDASH = "CDASH" + +INDH = "INDH" +PROD = "PROD" +NONCLIN = "NONCLIN" +ANALYSIS = "ANALYSIS" + +# NOTE: this may need to be expanded after the pilot re: custom domains, other applicable domains, etc. +USE_CASE_DOMAINS = { + SDTM: { # only prod and individual health are allowed for sdtm + INDH: [ + "AE", + "CO", + "CM", + "DM", + "DI", + "DU", + "DO", + "DS", + "EG", + "EX", + "EC", + "FA", + "IE", + "LB", + "MH", + "PC", + "PP", + "DA", + "DV", + "QS", + "RELREC", + "RE", + "SC", + "SE", + "SV", + "SU", + "EM", + "TA", + "TE", + "TI", + "TS", + "TV", + "VS", + ], + PROD: ["TO", "PD", "PT", "IT", "IN", "IQ", "ES"], + NONCLIN: [], + ANALYSIS: [], + }, + SEND: { # only nonclin allowed for send + INDH: [], + PROD: [], + NONCLIN: [ + "BW", + "CV", + "CL", + "CO", + "DD", + "DM", + "DI", + "DU", + "DS", + "EG", + "EX", + "FW", + "GT", + "LB", + "MA", + "MI", + "OM", + "PM", + "PK", + "PP", + "POOLDEF", + "RELREC", + "RELREF", + "RE", + "SC", + "SE", + "TA", + "TE", + "TF", + "TX", + "TS", + "VS", + ], + ANALYSIS: [], + }, + ADAM: { # only analysis allowed for adam, ADAM AD-- prefix check is done elsewhere. This is here for completeness. + INDH: [], + PROD: [], + NONCLIN: [], + ANALYSIS: [], + }, + CDASH: { # no conformance rules for CDASH Presently + INDH: [], + PROD: [], + NONCLIN: [], + ANALYSIS: [], + }, +} diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index bc677ef04..9ff27c034 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -222,6 +222,7 @@ def validate_single_dataset( rule, dataset_metadata, self.standard, + self.standard_substandard, self.use_case, ) if is_suitable: diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py index 1395947b2..119be5724 100644 --- a/cdisc_rules_engine/utilities/rule_processor.py +++ b/cdisc_rules_engine/utilities/rule_processor.py @@ -25,6 +25,7 @@ SUPPLEMENTARY_DOMAINS, ) from cdisc_rules_engine.constants.rule_constants import ALL_KEYWORD +from cdisc_rules_engine.constants.use_cases import USE_CASE_DOMAINS from cdisc_rules_engine.interfaces import ConditionInterface from cdisc_rules_engine.models.operation_params import OperationParams from cdisc_rules_engine.models.rule_conditions import AllowedConditionsKeys @@ -47,6 +48,7 @@ from cdisc_rules_engine.interfaces.data_service_interface import ( DataServiceInterface, ) +from cdisc_rules_engine.utilities.sdtm_utilities import is_custom_domain class RuleProcessor: @@ -267,15 +269,43 @@ def rule_applies_to_use_case( self, rule: dict, standard: str, - use_case: str, + standard_substandard: str, + dataset_metadata, + custom_domain_use_case: str, ) -> bool: if standard.lower() != "tig": return True - use_cases = rule.get("use_case") or [] - if not use_cases: + use_cases = ( + [uc.strip() for uc in rule.get("use_case", "").split(",")] + if rule.get("use_case") + else [] + ) + substandard = standard_substandard.upper() + if substandard not in USE_CASE_DOMAINS: + return False + domain_to_check = dataset_metadata.domain + if dataset_metadata.is_supp and dataset_metadata.rdomain: + domain_to_check = dataset_metadata.rdomain + # Handle ADaM datasets with AD prefix + if substandard == "ADAM" and domain_to_check.startswith("AD"): + return "ANALYSIS" in use_cases + + # Standard domain check + allowed_domains = set() + for use in use_cases: + if use in USE_CASE_DOMAINS[substandard]: + allowed_domains.update(USE_CASE_DOMAINS[substandard][use]) + if domain_to_check in allowed_domains: return True - use_cases = [uc.strip() for uc in use_cases.split(",")] - return use_case in use_cases + + domain_is_custom = is_custom_domain(self.library_metadata, domain_to_check) + if not domain_is_custom: + return False + if not custom_domain_use_case: + raise ValueError( + f"Custom domain '{domain_to_check}' requires a use case -uc in validation command but none was provided." + ) + return custom_domain_use_case in use_cases @classmethod def rule_applies_to_entity( @@ -635,7 +665,8 @@ def is_suitable_for_validation( self, rule: dict, dataset_metadata: SDTMDatasetMetadata, - standard, + standard: str, + substandard: str, use_case: str, ) -> Tuple[bool, str]: """Check if rule is suitable and return reason if not""" @@ -653,6 +684,8 @@ def is_suitable_for_validation( if not self.rule_applies_to_use_case( rule, standard, + substandard, + dataset_metadata, use_case, ): reason = ( diff --git a/core.py b/core.py index de6cc7ce3..f6bad6a99 100644 --- a/core.py +++ b/core.py @@ -384,7 +384,7 @@ def load_custom_dotenv_from_data_options(ctx, param, value): default=None, type=click.Choice(["INDH", "PROD", "NONCLIN", "ANALYSIS"], case_sensitive=True), help=( - "CDISC TIG Use Case for scoping a TIG Validation." + "Specifies the CDISC TIG use case for all custom domains in a validation run" "Any of INDH, PROD, NONCLIN, or ANALYSIS." ), envvar="USE_CASE", @@ -626,10 +626,8 @@ def validate( # noqa cache_path: str = os.path.join(os.path.dirname(__file__), cache) if standard == "tig": - if not substandard or not use_case: - logger.error( - "Standard 'tig' requires both --substandard and --use-case to be specified." - ) + if not substandard: + logger.error("Standard 'tig' requires --substandard to be specified.") ctx.exit(2) # Construct ExternalDictionariesContainer: external_dictionaries = ExternalDictionariesContainer( diff --git a/resources/templates/report-template.xlsx b/resources/templates/report-template.xlsx index b27e0fabd..274aef955 100644 Binary files a/resources/templates/report-template.xlsx and b/resources/templates/report-template.xlsx differ diff --git a/tests/unit/test_utilities/test_rule_processor.py b/tests/unit/test_utilities/test_rule_processor.py index 18e73e10b..24ae8492c 100644 --- a/tests/unit/test_utilities/test_rule_processor.py +++ b/tests/unit/test_utilities/test_rule_processor.py @@ -377,47 +377,89 @@ def test_rule_applies_to_class( @pytest.mark.parametrize( - "rule_use_case, use_case, standard, outcome", + "dataset_name, domain, rdomain, rule_use_case, use_case, standard, standard_substandard, outcome, is_custom", [ - # Basic use case tests - user provides "INDH" or "PROD" - ("INDH, PROD", "INDH", "tig", True), - ("INDH, PROD", "PROD", "tig", True), - ("INDH", "INDH", "tig", True), - ("INDH", "INDH", "tig", True), - ("PROD", "PROD", "tig", True), - ("PROD", "INDH", "tig", False), - ("NONCLIN", "NONCLIN", "tig", True), - ("NONCLIN", "INDH", "tig", False), - # Tests for ADaM datasets - ("ANALYSIS", "ANALYSIS", "tig", True), - ("ANALYSIS", "ANALYSIS", "tig", True), - ("ANALYSIS", "INDH", "tig", False), - # Tests for supplementary datasets - ("INDH", "INDH", "tig", True), - ("INDH", "INDH", "tig", True), - ("INDH", "INDH", "tig", True), - ("INDH", "INDH", "tig", True), - ("PROD", "PROD", "tig", True), - # Tests for empty/None use cases in rule (should always return True) - ("", "INDH", "tig", True), - (None, "INDH", "tig", True), - # Tests for non-TIG standard (should always return True) - ("INDH", "INDH", "sdtmig", True), - ("NONCLIN", "NONCLIN", "sendct", True), - # Test case mismatch - ("INDH, PROD", "SAFETY", "tig", False), + # Basic use case tests - custom_domain_use_case is irrelevant for standard domains + ("AE", "AE", None, "INDH, PROD", None, "tig", "SDTM", True, False), + ("CM", "CM", None, "INDH", None, "tig", "SDTM", True, False), + ("TS", "TS", None, "INDH", None, "tig", "SDTM", True, False), + ("ES", "ES", None, "PROD", None, "tig", "SDTM", True, False), + ("BW", "BW", None, "NONCLIN", None, "tig", "SEND", True, False), + # Domain not in rule's use case domains + ("ES", "ES", None, "INDH", None, "tig", "SDTM", False, False), + ("BW", "BW", None, "INDH", None, "tig", "SEND", False, False), + # command line use_case is ignored for standard domains + ("ES", "ES", None, "PROD", "INDH", "tig", "SDTM", True, False), + # ADAM tests + ("ADAE", "ADAE", None, "ANALYSIS", None, "tig", "ADAM", True, False), + ("ADAE", "ADAE", None, "INDH", None, "tig", "ADAM", False, False), + # Supp tests + ("SUPPAE", None, "AE", "INDH", None, "tig", "SDTM", True, False), + ("SUPPQS", None, "QS", "INDH", None, "tig", "SDTM", True, False), + ("SUPPEC", None, "EC", "INDH", None, "tig", "SDTM", True, False), + ("SUPP--", None, "AE", "INDH", None, "tig", "SDTM", True, False), + ("SUPPPT", None, "PT", "PROD", None, "tig", "SDTM", True, False), + # Empty/None use cases in rule + ("AE", "AE", None, "", None, "tig", "SDTM", False, False), + ("AE", "AE", None, None, None, "tig", "SDTM", False, False), + # Non-TIG standard + ("AE", "AE", None, "INDH", None, "sdtmig", "SDTM", True, False), + ("BW", "BW", None, "NONCLIN", None, "sendct", "SEND", True, False), + # command line use_case ignored - AE is in INDH domains + ("AE", "AE", None, "INDH, PROD", "SAFETY", "tig", "SDTM", True, False), + # Custom domains (XYZ-prefixed) + ("XY", "XY", None, "INDH", "INDH", "tig", "SDTM", True, True), + ("XY", "XY", None, "INDH", "PROD", "tig", "SDTM", False, True), + ("ZZ", "ZZ", None, "PROD", "PROD", "tig", "SDTM", True, True), ], ) def test_rule_applies_to_use_case( mock_data_service, + dataset_name, + domain, + rdomain, rule_use_case, - standard, use_case, + standard, + standard_substandard, outcome, + is_custom, ): processor = RuleProcessor(mock_data_service, InMemoryCacheService()) rule = {"use_case": rule_use_case} - assert processor.rule_applies_to_use_case(rule, standard, use_case) == outcome + dataset_metadata = SDTMDatasetMetadata( + name=dataset_name, + first_record=( + {"DOMAIN": domain, "RDOMAIN": rdomain} if domain or rdomain else {} + ), + ) + + with patch( + "cdisc_rules_engine.utilities.rule_processor.is_custom_domain", + return_value=is_custom, + ): + assert ( + processor.rule_applies_to_use_case( + rule, standard, standard_substandard, dataset_metadata, use_case + ) + == outcome + ) + + +def test_rule_applies_to_use_case_custom_domain_no_use_case_argument_raises( + mock_data_service, +): + processor = RuleProcessor(mock_data_service, InMemoryCacheService()) + rule = {"use_case": "INDH"} + dataset_metadata = SDTMDatasetMetadata(name="XY", first_record={"DOMAIN": "XY"}) + with patch( + "cdisc_rules_engine.utilities.rule_processor.is_custom_domain", + return_value=True, + ): + with pytest.raises(ValueError, match="requires a use case"): + processor.rule_applies_to_use_case( + rule, "tig", "SDTM", dataset_metadata, None + ) @pytest.mark.parametrize("dataset_implementation", [PandasDataset, DaskDataset])