Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,9 @@ This will show the list of validation options.
-ss, --substandard TEXT Substandard to validate against. SUBSTANDARD environment variable can be used to pass value.
"SDTM", "SEND", "ADaM", or "CDASH"
[required for TIG]
-uc, --use-case TEXT Use Case for TIG Validation
-uc, --use-case TEXT Use Case for TIG Custom Domains
When performing a TIG validation with custom domain(s), this must be given to identify the custom domains' use case
in order to determine what rules to validate against them
"INDH", "PROD", "NONCLIN", or "ANALYSIS"
[required for TIG]
USE_CASE environment variable can be used to pass value.
Expand Down
108 changes: 108 additions & 0 deletions cdisc_rules_engine/constants/use_cases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""
Constants for use cases and their allowed domains.
"""

SDTM = "SDTM"
SEND = "SEND"
ADAM = "ADAM"
CDASH = "CDASH"

INDH = "INDH"
PROD = "PROD"
NONCLIN = "NONCLIN"
ANALYSIS = "ANALYSIS"

# NOTE: this may need to be expanded after the pilot re: custom domains, other applicable domains, etc.
USE_CASE_DOMAINS = {
SDTM: { # only prod and individual health are allowed for sdtm
INDH: [
"AE",
"CO",
"CM",
"DM",
"DI",
"DU",
"DO",
"DS",
"EG",
"EX",
"EC",
"FA",
"IE",
"LB",
"MH",
"PC",
"PP",
"DA",
"DV",
"QS",
"RELREC",
"RE",
"SC",
"SE",
"SV",
"SU",
"EM",
"TA",
"TE",
"TI",
"TS",
"TV",
"VS",
],
PROD: ["TO", "PD", "PT", "IT", "IN", "IQ", "ES"],
NONCLIN: [],
ANALYSIS: [],
},
SEND: { # only nonclin allowed for send
INDH: [],
PROD: [],
NONCLIN: [
"BW",
"CV",
"CL",
"CO",
"DD",
"DM",
"DI",
"DU",
"DS",
"EG",
"EX",
"FW",
"GT",
"LB",
"MA",
"MI",
"OM",
"PM",
"PK",
"PP",
"POOLDEF",
"RELREC",
"RELREF",
"RE",
"SC",
"SE",
"TA",
"TE",
"TF",
"TX",
"TS",
"VS",
],
ANALYSIS: [],
},
ADAM: { # only analysis allowed for adam, ADAM AD-- prefix check is done elsewhere. This is here for completeness.
INDH: [],
PROD: [],
NONCLIN: [],
ANALYSIS: [],
},
CDASH: { # no conformance rules for CDASH Presently
INDH: [],
PROD: [],
NONCLIN: [],
ANALYSIS: [],
},
}
1 change: 1 addition & 0 deletions cdisc_rules_engine/rules_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ def validate_single_dataset(
rule,
dataset_metadata,
self.standard,
self.standard_substandard,
self.use_case,
)
if is_suitable:
Expand Down
45 changes: 39 additions & 6 deletions cdisc_rules_engine/utilities/rule_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
SUPPLEMENTARY_DOMAINS,
)
from cdisc_rules_engine.constants.rule_constants import ALL_KEYWORD
from cdisc_rules_engine.constants.use_cases import USE_CASE_DOMAINS
from cdisc_rules_engine.interfaces import ConditionInterface
from cdisc_rules_engine.models.operation_params import OperationParams
from cdisc_rules_engine.models.rule_conditions import AllowedConditionsKeys
Expand All @@ -47,6 +48,7 @@
from cdisc_rules_engine.interfaces.data_service_interface import (
DataServiceInterface,
)
from cdisc_rules_engine.utilities.sdtm_utilities import is_custom_domain


class RuleProcessor:
Expand Down Expand Up @@ -267,15 +269,43 @@ def rule_applies_to_use_case(
self,
rule: dict,
standard: str,
use_case: str,
standard_substandard: str,
dataset_metadata,
custom_domain_use_case: str,
) -> bool:
if standard.lower() != "tig":
return True
use_cases = rule.get("use_case") or []
if not use_cases:
use_cases = (
[uc.strip() for uc in rule.get("use_case", "").split(",")]
if rule.get("use_case")
else []
)
substandard = standard_substandard.upper()
if substandard not in USE_CASE_DOMAINS:
return False
domain_to_check = dataset_metadata.domain
if dataset_metadata.is_supp and dataset_metadata.rdomain:
domain_to_check = dataset_metadata.rdomain
# Handle ADaM datasets with AD prefix
if substandard == "ADAM" and domain_to_check.startswith("AD"):
return "ANALYSIS" in use_cases

# Standard domain check
allowed_domains = set()
for use in use_cases:
if use in USE_CASE_DOMAINS[substandard]:
allowed_domains.update(USE_CASE_DOMAINS[substandard][use])
if domain_to_check in allowed_domains:
return True
use_cases = [uc.strip() for uc in use_cases.split(",")]
return use_case in use_cases

domain_is_custom = is_custom_domain(self.library_metadata, domain_to_check)
if not domain_is_custom:
return False
if not custom_domain_use_case:
raise ValueError(
f"Custom domain '{domain_to_check}' requires a use case -uc in validation command but none was provided."
)
return custom_domain_use_case in use_cases

@classmethod
def rule_applies_to_entity(
Expand Down Expand Up @@ -635,7 +665,8 @@ def is_suitable_for_validation(
self,
rule: dict,
dataset_metadata: SDTMDatasetMetadata,
standard,
standard: str,
substandard: str,
use_case: str,
) -> Tuple[bool, str]:
"""Check if rule is suitable and return reason if not"""
Expand All @@ -653,6 +684,8 @@ def is_suitable_for_validation(
if not self.rule_applies_to_use_case(
rule,
standard,
substandard,
dataset_metadata,
use_case,
):
reason = (
Expand Down
8 changes: 3 additions & 5 deletions core.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ def load_custom_dotenv_from_data_options(ctx, param, value):
default=None,
type=click.Choice(["INDH", "PROD", "NONCLIN", "ANALYSIS"], case_sensitive=True),
help=(
"CDISC TIG Use Case for scoping a TIG Validation."
"Specifies the CDISC TIG use case for all custom domains in a validation run"
"Any of INDH, PROD, NONCLIN, or ANALYSIS."
),
envvar="USE_CASE",
Expand Down Expand Up @@ -626,10 +626,8 @@ def validate( # noqa
cache_path: str = os.path.join(os.path.dirname(__file__), cache)

if standard == "tig":
if not substandard or not use_case:
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The readme says that the use case must be given for TIG custom standard. We can keep this check then here and update the error message. Right now the missing use case is only checked deep in the rule execution which can give users unhandled exceptions.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use_case is not required--- only substandard--I will adjust readme

logger.error(
"Standard 'tig' requires both --substandard and --use-case to be specified."
)
if not substandard:
logger.error("Standard 'tig' requires --substandard to be specified.")
ctx.exit(2)
# Construct ExternalDictionariesContainer:
external_dictionaries = ExternalDictionariesContainer(
Expand Down
Binary file modified resources/templates/report-template.xlsx
Binary file not shown.
102 changes: 72 additions & 30 deletions tests/unit/test_utilities/test_rule_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,47 +377,89 @@ def test_rule_applies_to_class(


@pytest.mark.parametrize(
"rule_use_case, use_case, standard, outcome",
"dataset_name, domain, rdomain, rule_use_case, use_case, standard, standard_substandard, outcome, is_custom",
[
# Basic use case tests - user provides "INDH" or "PROD"
("INDH, PROD", "INDH", "tig", True),
("INDH, PROD", "PROD", "tig", True),
("INDH", "INDH", "tig", True),
("INDH", "INDH", "tig", True),
("PROD", "PROD", "tig", True),
("PROD", "INDH", "tig", False),
("NONCLIN", "NONCLIN", "tig", True),
("NONCLIN", "INDH", "tig", False),
# Tests for ADaM datasets
("ANALYSIS", "ANALYSIS", "tig", True),
("ANALYSIS", "ANALYSIS", "tig", True),
("ANALYSIS", "INDH", "tig", False),
# Tests for supplementary datasets
("INDH", "INDH", "tig", True),
("INDH", "INDH", "tig", True),
("INDH", "INDH", "tig", True),
("INDH", "INDH", "tig", True),
("PROD", "PROD", "tig", True),
# Tests for empty/None use cases in rule (should always return True)
("", "INDH", "tig", True),
(None, "INDH", "tig", True),
# Tests for non-TIG standard (should always return True)
("INDH", "INDH", "sdtmig", True),
("NONCLIN", "NONCLIN", "sendct", True),
# Test case mismatch
("INDH, PROD", "SAFETY", "tig", False),
# Basic use case tests - custom_domain_use_case is irrelevant for standard domains
("AE", "AE", None, "INDH, PROD", None, "tig", "SDTM", True, False),
("CM", "CM", None, "INDH", None, "tig", "SDTM", True, False),
("TS", "TS", None, "INDH", None, "tig", "SDTM", True, False),
("ES", "ES", None, "PROD", None, "tig", "SDTM", True, False),
("BW", "BW", None, "NONCLIN", None, "tig", "SEND", True, False),
# Domain not in rule's use case domains
("ES", "ES", None, "INDH", None, "tig", "SDTM", False, False),
("BW", "BW", None, "INDH", None, "tig", "SEND", False, False),
# command line use_case is ignored for standard domains
("ES", "ES", None, "PROD", "INDH", "tig", "SDTM", True, False),
# ADAM tests
("ADAE", "ADAE", None, "ANALYSIS", None, "tig", "ADAM", True, False),
("ADAE", "ADAE", None, "INDH", None, "tig", "ADAM", False, False),
# Supp tests
("SUPPAE", None, "AE", "INDH", None, "tig", "SDTM", True, False),
("SUPPQS", None, "QS", "INDH", None, "tig", "SDTM", True, False),
("SUPPEC", None, "EC", "INDH", None, "tig", "SDTM", True, False),
("SUPP--", None, "AE", "INDH", None, "tig", "SDTM", True, False),
("SUPPPT", None, "PT", "PROD", None, "tig", "SDTM", True, False),
# Empty/None use cases in rule
("AE", "AE", None, "", None, "tig", "SDTM", False, False),
("AE", "AE", None, None, None, "tig", "SDTM", False, False),
# Non-TIG standard
("AE", "AE", None, "INDH", None, "sdtmig", "SDTM", True, False),
("BW", "BW", None, "NONCLIN", None, "sendct", "SEND", True, False),
# command line use_case ignored - AE is in INDH domains
("AE", "AE", None, "INDH, PROD", "SAFETY", "tig", "SDTM", True, False),
# Custom domains (XYZ-prefixed)
("XY", "XY", None, "INDH", "INDH", "tig", "SDTM", True, True),
("XY", "XY", None, "INDH", "PROD", "tig", "SDTM", False, True),
("ZZ", "ZZ", None, "PROD", "PROD", "tig", "SDTM", True, True),
],
)
def test_rule_applies_to_use_case(
mock_data_service,
dataset_name,
domain,
rdomain,
rule_use_case,
standard,
use_case,
standard,
standard_substandard,
outcome,
is_custom,
):
processor = RuleProcessor(mock_data_service, InMemoryCacheService())
rule = {"use_case": rule_use_case}
assert processor.rule_applies_to_use_case(rule, standard, use_case) == outcome
dataset_metadata = SDTMDatasetMetadata(
name=dataset_name,
first_record=(
{"DOMAIN": domain, "RDOMAIN": rdomain} if domain or rdomain else {}
),
)

with patch(
"cdisc_rules_engine.utilities.rule_processor.is_custom_domain",
return_value=is_custom,
):
assert (
processor.rule_applies_to_use_case(
rule, standard, standard_substandard, dataset_metadata, use_case
)
== outcome
)


def test_rule_applies_to_use_case_custom_domain_no_use_case_argument_raises(
mock_data_service,
):
processor = RuleProcessor(mock_data_service, InMemoryCacheService())
rule = {"use_case": "INDH"}
dataset_metadata = SDTMDatasetMetadata(name="XY", first_record={"DOMAIN": "XY"})
with patch(
"cdisc_rules_engine.utilities.rule_processor.is_custom_domain",
return_value=True,
):
with pytest.raises(ValueError, match="requires a use case"):
processor.rule_applies_to_use_case(
rule, "tig", "SDTM", dataset_metadata, None
)


@pytest.mark.parametrize("dataset_implementation", [PandasDataset, DaskDataset])
Expand Down
Loading