From 4ddf1a4293931a16afd0287c694fef0cd6de13b1 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 23 Apr 2026 11:13:47 +0200 Subject: [PATCH 1/7] Add regex support for target sorting in target_is_sorted_by operator --- .../check_operators/dataframe_operators.py | 86 ++++++-- resources/schema/rule/Operator.md | 18 ++ .../test_target_is_sorted_by_regex.py | 185 ++++++++++++++++++ 3 files changed, 277 insertions(+), 12 deletions(-) create mode 100644 tests/unit/test_check_operators/test_target_is_sorted_by_regex.py diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index 78e4f5a9f..653a3f3ff 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -1664,12 +1664,12 @@ def _verify_neighbor_consistency( return is_valid - def check_target_ascending_in_sorted_group( + def check_target_ascending_in_sorted_group_with_regex( self, group, target, comparator, ascending, na_pos ): """ Check if target values are in ascending order within a group - already sorted by comparator. + already sorted by comparator. Supports regex extraction. """ is_valid = pd.Series(True, index=group.index) is_numeric_comparator = pd.api.types.is_numeric_dtype(group[comparator]) @@ -1726,6 +1726,17 @@ def check_target_ascending_in_sorted_group( ) return is_valid + def check_target_ascending_in_sorted_group( + self, group, target, comparator, ascending, na_pos + ): + """ + Check if target values are in ascending order within a group + already sorted by comparator. + """ + return self.check_target_ascending_in_sorted_group_with_regex( + group, target, comparator, ascending, na_pos + ) + def check_date_overlaps(self, group, target, comparator): comparator_values = group[comparator].tolist() is_valid = pd.Series(True, index=group.index) @@ -1798,12 +1809,45 @@ def _process_grouped_result( grouped_result = pd.Series(result_list, index=index_list) return grouped_result.reindex(sorted_df.index, fill_value=True) + def _extract_regex_group(self, series: pd.Series, regex_pattern: str) -> pd.Series: + """ + Extract the first capturing group from a regex pattern and convert to numeric if possible. + Handles zero-padded numbers by converting to numeric. + + Args: + series: Pandas series with string values + regex_pattern: Regex pattern with capturing group(s) + + Returns: + Series with extracted and converted values + """ + + def extract_and_convert(value): + if pd.isna(value) or value == "": + return np.nan + + # YAML escapes backslashes, so we receive ".*\\d+$" which Python interprets as raw \ + # We need to convert this to the actual regex pattern by replacing \\ with \ + # However, since strings from YAML come already unescaped, we just use as-is + match = re.search(regex_pattern, str(value)) + if match and match.groups(): + extracted = match.group(1) # First capturing group + # Try to convert to numeric to handle both padded and non-padded numbers + try: + return pd.to_numeric(extracted) + except (ValueError, TypeError): + return extracted + return np.nan + + return series.apply(extract_and_convert) + @log_operator_execution @type_operator(FIELD_DATAFRAME) def target_is_sorted_by(self, other_value: dict): target = other_value.get("target") within_columns = self._normalize_grouping_columns(other_value.get("within")) columns = other_value["comparator"] + target_regex = other_value.get("regex") # parameter for regex extraction result = pd.Series([True] * len(self.value), index=self.value.index) @@ -1816,16 +1860,32 @@ def target_is_sorted_by(self, other_value: dict): dict.fromkeys([target, comparator, *within_columns]) ) - sorted_df = self.value[selected_columns].sort_values( - by=[*within_columns, target], - ascending=[True] * (len(within_columns) + 1), - ) + # If regex is provided, extract and convert target values + if target_regex: + working_df = self.value[selected_columns].copy() + # Create a temporary column with extracted regex values + working_df[f"{target}_extracted"] = self._extract_regex_group( + working_df[target], target_regex + ) + target_for_sorting = f"{target}_extracted" + # Sort by within columns only, preserve original order within groups + sorted_df = working_df.sort_values( + by=within_columns, + ascending=[True] * len(within_columns), + ) + else: + working_df = self.value[selected_columns] + target_for_sorting = target + sorted_df = working_df.sort_values( + by=[*within_columns, target], + ascending=[True] * (len(within_columns) + 1), + ) grouped_df = sorted_df.groupby(within_columns, sort=False) target_check = grouped_df.apply( - lambda x: self.check_target_ascending_in_sorted_group( - x, target, comparator, ascending, na_pos + lambda x: self.check_target_ascending_in_sorted_group_with_regex( + x, target_for_sorting, comparator, ascending, na_pos ) ) target_check = self._process_grouped_result( @@ -1833,20 +1893,22 @@ def target_is_sorted_by(self, other_value: dict): grouped_df, within_columns, sorted_df, - lambda group: self.check_target_ascending_in_sorted_group( - group, target, comparator, ascending, na_pos + lambda group: self.check_target_ascending_in_sorted_group_with_regex( + group, target_for_sorting, comparator, ascending, na_pos ), ) date_overlap_check = grouped_df.apply( - lambda x: self.check_date_overlaps(x, target, comparator) + lambda x: self.check_date_overlaps(x, target_for_sorting, comparator) ) date_overlap_check = self._process_grouped_result( date_overlap_check, grouped_df, within_columns, sorted_df, - lambda group: self.check_date_overlaps(group, target, comparator), + lambda group: self.check_date_overlaps( + group, target_for_sorting, comparator + ), ) combined_check = target_check & date_overlap_check diff --git a/resources/schema/rule/Operator.md b/resources/schema/rule/Operator.md index 2ba4b3015..31605684b 100644 --- a/resources/schema/rule/Operator.md +++ b/resources/schema/rule/Operator.md @@ -1129,6 +1129,8 @@ Complement of `is_ordered_by` True if the values in name are ordered according to the values specified by value in ascending/descending order, grouped by the values in within. Each value entry requires a variable name, a sort_order of asc or desc, and an optional null_position of first or last (defaults to last) which controls where null/empty comparator values are placed in the expected ordering. Within accepts either a single column or an ordered list of columns. Columns can be either number or Char Dates in ISO8601 YYYY-MM-DD format. Date value(s) with different precisions that overlap (e.g. 2005-10, 2005-10-3 and 2005-10-08) are all flagged as not sorted as their order cannot be inferred. +Optionally supports a `regex` parameter that extracts a portion of the target value for sorting. The regex must contain at least one capturing group. The first captured group is extracted and converted to numeric if possible, allowing proper sorting of sequence numbers (e.g., "MIDS1", "MIDS2", ..., "MIDS10" with regex `.*?(\\d+)$`). This is particularly useful for variables that end with sequence numbers that may or may not be zero-padded. + ```yaml Check: all: @@ -1143,6 +1145,22 @@ Check: null_position: last ``` +Example with regex for extracting sequence numbers: + +```yaml +Check: + all: + - name: MIDS + operator: target_is_sorted_by + regex: ".*?(\\d+)$" # Extract trailing digits, convert to numeric + value: + - name: SMSTDTC + sort_order: asc + within: + - USUBJID + - MIDSTYPE +``` + ### target_is_not_sorted_by Complement of `target_is_sorted_by` diff --git a/tests/unit/test_check_operators/test_target_is_sorted_by_regex.py b/tests/unit/test_check_operators/test_target_is_sorted_by_regex.py new file mode 100644 index 000000000..23b4a3772 --- /dev/null +++ b/tests/unit/test_check_operators/test_target_is_sorted_by_regex.py @@ -0,0 +1,185 @@ +""" +Tests for target_is_sorted_by operator with regex support +""" + +import pytest +import pandas as pd +from cdisc_rules_engine.check_operators.dataframe_operators import DataframeType +from cdisc_rules_engine.models.dataset.pandas_dataset import PandasDataset +from cdisc_rules_engine.models.dataset.dask_dataset import DaskDataset + + +@pytest.mark.parametrize("dataset_class", [PandasDataset, DaskDataset]) +def test_target_is_sorted_by_with_regex_non_padded(dataset_class): + """ + Test target_is_sorted_by with regex extraction for non-zero-padded sequence numbers. + Example: lalala1, lalala2, ..., lalala9, lalala10 + """ + df = dataset_class.from_dict( + { + "USUBJID": ["001", "001", "001", "001", "002", "002", "002"], + "MIDSTYPE": ["A", "A", "A", "A", "B", "B", "B"], + "MIDS": [ + "lalala1", + "lalala2", + "lalala9", + "lalala10", + "test1", + "test2", + "test10", + ], + "SMSTDTC": [ + "2020-01-01", + "2020-01-02", + "2020-01-09", + "2020-01-10", + "2020-02-01", + "2020-02-02", + "2020-02-10", + ], + } + ) + + other_value = { + "target": "MIDS", + "regex": ".*?(\\d+)$", # Non-greedy to correctly extract multi-digit numbers + "within": ["USUBJID", "MIDSTYPE"], + "comparator": [ + {"name": "SMSTDTC", "sort_order": "ASC", "null_position": "first"} + ], + } + + result = DataframeType({"value": df}).target_is_sorted_by(other_value) + # All should be True - sorted correctly by chronological order + assert result.equals(pd.Series([True, True, True, True, True, True, True])) + + +@pytest.mark.parametrize("dataset_class", [PandasDataset, DaskDataset]) +def test_target_is_sorted_by_with_regex_zero_padded(dataset_class): + """ + Test target_is_sorted_by with regex extraction for zero-padded sequence numbers. + Example: lalala01, lalala02, ..., lalala09, lalala10 + """ + df = dataset_class.from_dict( + { + "USUBJID": ["001", "001", "001", "001"], + "MIDSTYPE": ["A", "A", "A", "A"], + "MIDS": ["lalala01", "lalala02", "lalala09", "lalala10"], + "SMSTDTC": [ + "2020-01-01", + "2020-01-02", + "2020-01-09", + "2020-01-10", + ], + } + ) + + other_value = { + "target": "MIDS", + "regex": ".*?(\\d+)$", # Non-greedy to correctly extract multi-digit numbers + "within": ["USUBJID", "MIDSTYPE"], + "comparator": [ + {"name": "SMSTDTC", "sort_order": "ASC", "null_position": "first"} + ], + } + + result = DataframeType({"value": df}).target_is_sorted_by(other_value) + # All should be True - numeric conversion handles zero-padding + assert result.equals(pd.Series([True, True, True, True])) + + +@pytest.mark.parametrize("dataset_class", [PandasDataset, DaskDataset]) +def test_target_is_sorted_by_with_regex_invalid_order(dataset_class): + """ + Test that invalid order is detected even with regex extraction. + """ + df = dataset_class.from_dict( + { + "USUBJID": ["001", "001", "001", "001"], + "MIDSTYPE": ["A", "A", "A", "A"], + "MIDS": ["lalala1", "lalala10", "lalala2", "lalala9"], + "SMSTDTC": [ + "2020-01-01", + "2020-01-02", + "2020-01-09", + "2020-01-10", + ], + } + ) + + other_value = { + "target": "MIDS", + "regex": ".*?(\\d+)$", # Non-greedy to correctly extract multi-digit numbers + "within": ["USUBJID", "MIDSTYPE"], + "comparator": [ + {"name": "SMSTDTC", "sort_order": "ASC", "null_position": "first"} + ], + } + + result = DataframeType({"value": df}).target_is_sorted_by(other_value) + # lalala10 and lalala9 are in wrong positions + # Expected by date: 1, 2, 9, 10 + # Actual: 1, 10, 2, 9 + # The function marks records where neighbor consistency fails + assert result.equals(pd.Series([True, False, True, False])) + + +@pytest.mark.parametrize("dataset_class", [PandasDataset, DaskDataset]) +def test_target_is_sorted_by_with_regex_multiple_groups(dataset_class): + """ + Test regex sorting with multiple USUBJID and MIDSTYPE groups. + """ + df = dataset_class.from_dict( + { + "USUBJID": ["001", "001", "001", "002", "002", "002"], + "MIDSTYPE": ["A", "A", "A", "A", "A", "A"], + "MIDS": ["M1", "M2", "M3", "M1", "M2", "M3"], + "SMSTDTC": [ + "2020-01-01", + "2020-01-02", + "2020-01-03", + "2020-02-01", + "2020-02-02", + "2020-02-03", + ], + } + ) + + other_value = { + "target": "MIDS", + "regex": ".*?(\\d+)$", # Non-greedy to correctly extract multi-digit numbers + "within": ["USUBJID", "MIDSTYPE"], + "comparator": [ + {"name": "SMSTDTC", "sort_order": "ASC", "null_position": "first"} + ], + } + + result = DataframeType({"value": df}).target_is_sorted_by(other_value) + assert result.equals(pd.Series([True, True, True, True, True, True])) + + +@pytest.mark.parametrize("dataset_class", [PandasDataset, DaskDataset]) +def test_target_is_sorted_by_without_regex_still_works(dataset_class): + """ + Test that the operator still works without regex (backward compatibility). + """ + df = dataset_class.from_dict( + { + "USUBJID": ["001", "001", "001"], + "SESEQ": [1, 2, 3], + "SESTDTC": [ + "2020-01-01", + "2020-01-02", + "2020-01-03", + ], + } + ) + + other_value = { + "target": "SESEQ", + "within": "USUBJID", + "comparator": [{"name": "SESTDTC", "sort_order": "ASC"}], + } + + result = DataframeType({"value": df}).target_is_sorted_by(other_value) + assert result.equals(pd.Series([True, True, True])) From 74bf86411f79ee41caba6f6e01c39895309e00c4 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 23 Apr 2026 11:20:28 +0200 Subject: [PATCH 2/7] Remove the deprecated function --- .../check_operators/dataframe_operators.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index 653a3f3ff..5105b209e 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -1726,17 +1726,6 @@ def check_target_ascending_in_sorted_group_with_regex( ) return is_valid - def check_target_ascending_in_sorted_group( - self, group, target, comparator, ascending, na_pos - ): - """ - Check if target values are in ascending order within a group - already sorted by comparator. - """ - return self.check_target_ascending_in_sorted_group_with_regex( - group, target, comparator, ascending, na_pos - ) - def check_date_overlaps(self, group, target, comparator): comparator_values = group[comparator].tolist() is_valid = pd.Series(True, index=group.index) From 3b65d2cd7fceec0d2e49df25308c7c4deebe9fc3 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Fri, 24 Apr 2026 10:25:59 +0200 Subject: [PATCH 3/7] Enhance target_is_sorted_by operator documentation with regex parameter details --- resources/schema/rule-merged/CORE-base.json | 142 +++-- resources/schema/rule-merged/Operations.json | 301 +++++++++-- resources/schema/rule-merged/Operator.json | 489 ++++++++++++++---- .../rule-merged/Organization_CDISC.json | 112 +++- .../rule-merged/Organization_Custom.json | 53 +- .../schema/rule-merged/Organization_FDA.json | 38 +- resources/schema/rule/Operator.md | 19 +- 7 files changed, 923 insertions(+), 231 deletions(-) diff --git a/resources/schema/rule-merged/CORE-base.json b/resources/schema/rule-merged/CORE-base.json index 6fccb4afd..8d2fbe672 100644 --- a/resources/schema/rule-merged/CORE-base.json +++ b/resources/schema/rule-merged/CORE-base.json @@ -9,7 +9,9 @@ "$ref": "#/$defs/CheckItems" } }, - "required": ["all"], + "required": [ + "all" + ], "type": "object" }, { @@ -19,7 +21,9 @@ "$ref": "#/$defs/CheckItems" } }, - "required": ["any"], + "required": [ + "any" + ], "type": "object" }, { @@ -29,7 +33,9 @@ "$ref": "#/$defs/CheckItem" } }, - "required": ["not"], + "required": [ + "not" + ], "type": "object" } ] @@ -127,13 +133,18 @@ "$ref": "#/$defs/Domains" }, "include_split_datasets": { - "enum": [true] + "enum": [ + true + ] } }, "type": "object" }, "JoinType": { - "enum": ["inner", "left"], + "enum": [ + "inner", + "left" + ], "type": "string" }, "LeftRightKeys": { @@ -146,7 +157,10 @@ "$ref": "#/$defs/VariableName" } }, - "required": ["Left", "Right"], + "required": [ + "Left", + "Right" + ], "type": "object" }, "PascalCases": { @@ -231,7 +245,10 @@ "type": "string" } }, - "required": ["Document", "Cited Guidance"], + "required": [ + "Document", + "Cited Guidance" + ], "type": "object" }, "type": "array" @@ -240,10 +257,14 @@ "additionalProperties": false, "anyOf": [ { - "required": ["Logical Expression"] + "required": [ + "Logical Expression" + ] }, { - "required": ["Plain Language Expression"] + "required": [ + "Plain Language Expression" + ] } ], "properties": { @@ -257,18 +278,25 @@ "type": "string" } }, - "required": ["Rule"], + "required": [ + "Rule" + ], "type": "object" }, "Plain Language Expression": { "type": "string" }, "Type": { - "enum": ["Failure", "Success"], + "enum": [ + "Failure", + "Success" + ], "type": "string" } }, - "required": ["Type"], + "required": [ + "Type" + ], "type": "object" }, "Origin": { @@ -282,11 +310,18 @@ "type": "string" }, "Relationship": { - "enum": ["Predecessor", "Related", "Successor"], + "enum": [ + "Predecessor", + "Related", + "Successor" + ], "type": "string" } }, - "required": ["Id", "Relationship"], + "required": [ + "Id", + "Relationship" + ], "type": "object" }, "type": "array" @@ -304,7 +339,9 @@ "type": "string" } }, - "required": ["Id"], + "required": [ + "Id" + ], "type": "object" }, "Validator Rule Message": { @@ -314,7 +351,11 @@ "type": "string" } }, - "required": ["Origin", "Rule Identifier", "Version"], + "required": [ + "Origin", + "Rule Identifier", + "Version" + ], "type": "object" }, "minItems": 1, @@ -327,7 +368,11 @@ "type": "string" } }, - "required": ["Name", "References", "Version"], + "required": [ + "Name", + "References", + "Version" + ], "type": "object" }, "minItems": 1, @@ -350,7 +395,10 @@ "$ref": "Organization_Custom.json" } ], - "required": ["Organization", "Standards"], + "required": [ + "Organization", + "Standards" + ], "type": "object" }, "minItems": 1, @@ -390,10 +438,15 @@ "const": "Published" } }, - "required": ["Id"] + "required": [ + "Id" + ] } ], - "required": ["Status", "Version"], + "required": [ + "Status", + "Version" + ], "type": "object" }, "Description": { @@ -439,7 +492,9 @@ "type": "string" } }, - "required": ["Name"], + "required": [ + "Name" + ], "type": "object" }, "minItems": 1, @@ -465,7 +520,9 @@ "type": "array" } }, - "required": ["Message"], + "required": [ + "Message" + ], "type": "object" }, "Rule Type": { @@ -483,7 +540,9 @@ "$ref": "#/$defs/Classes" } }, - "required": ["Include"], + "required": [ + "Include" + ], "type": "object" }, { @@ -493,7 +552,9 @@ "$ref": "#/$defs/Classes" } }, - "required": ["Exclude"], + "required": [ + "Exclude" + ], "type": "object" } ] @@ -507,7 +568,9 @@ "$ref": "#/$defs/DataStructures" } }, - "required": ["Include"], + "required": [ + "Include" + ], "type": "object" }, { @@ -517,7 +580,9 @@ "$ref": "#/$defs/DataStructures" } }, - "required": ["Exclude"], + "required": [ + "Exclude" + ], "type": "object" } ] @@ -557,10 +622,14 @@ }, "anyOf": [ { - "required": ["Exclude"] + "required": [ + "Exclude" + ] }, { - "required": ["Include"] + "required": [ + "Include" + ] } ], "type": "object" @@ -582,13 +651,20 @@ }, "oneOf": [ { - "required": ["Classes", "Domains"] + "required": [ + "Classes", + "Domains" + ] }, { - "required": ["Data Structures"] + "required": [ + "Data Structures" + ] }, { - "required": ["Entities"] + "required": [ + "Entities" + ] } ], "type": "object" @@ -623,7 +699,9 @@ } }, "then": { - "required": ["Grouping_Variables"] + "required": [ + "Grouping_Variables" + ] }, "type": "object" } diff --git a/resources/schema/rule-merged/Operations.json b/resources/schema/rule-merged/Operations.json index a93003e72..85058679b 100644 --- a/resources/schema/rule-merged/Operations.json +++ b/resources/schema/rule-merged/Operations.json @@ -10,7 +10,10 @@ "markdownDescription": "\nReturns a Series indicating whether a specified codelist is extensible. Used in conjunction with codelist_terms to determine if values outside the codelist are acceptable. From the above example, $extensible will contain a bool if the codelist PKUDUG is extensible in all rows of the column.\n\nIf ct_package_type, version, and codelist_code parameters are provided, it will instead attach a new column containing the extensible value for each combination provided in the source dataset.\n\nFor example, given the current dataset:\n\n```\nid \tcodeSystemVersion \t$codelist_code\n1 \t2024-09-27 \tC201264\n2 \t2024-09-27 \tC201265\n3 \t2023-03-29 \tC127262\n```\n\nand the following operation:\n\n```yaml\n- id: $codelist_extensible\n operator: codelist_extensible\n ct_package_type: DDF\n version: codeSystemVersion\n codelist_code: $codelist_code\n```\n\nThis will result in the following dataset:\n\n```\nid \tcodeSystemVersion \t$codelist_code \t$codelist_extensible\n1 \t2024-09-27 \tC201264 \tfalse\n2 \t2024-09-27 \tC201265 \tfalse\n3 \t2023-03-29 \tC127262 \ttrue\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -20,7 +23,10 @@ "markdownDescription": "\nReturns a list of valid codelist/term values. Used for evaluating whether NCI codes, submission values or NCI preferred terms are valid based on controlled terminology. Expects three parameters: `codelists` which is a list of the codelist submission value(s) to retrieve, `level` which is the level of data (either \"codelist\" or \"term\") at which to return data from, and `returntype` which is the type of values to return: \"code\" for NCI Code(s), \"value\" for submission value(s), or \"pref_term\" for NCI preferred term(s).\n\n```yaml\n- Check:\n - all:\n - name: PPSTRESU\n operator: is_not_contained_by\n value: $terms\n - name: $extensible\n operator: equal_to\n value: true\n- Operations:\n - id: $terms\n operator: codelist_terms\n codelists:\n - PKUDUG\n level: term\n returntype: value\n - id: $extensible\n codelist: PKUDUG\n operator: codelist_extensible\n```\n\nIf `ct_package_type`, `version`, and `codelist_code` parameters are provided, it will instead attach a new column containing the term for each combination provided in the source dataset. If a column name is provided as:\n\n- `term_code`, it will find term information using the term codes in the specified column.\n- `term_value`, it will find term information using the term submission values in the specified column.\n- `term_pref_term`, it will find term information using the term preferred terms in the specified column.\n\nOnly one of `term_code`, `term_value` or `term_pref_term` can be provided. The term information returned will depend on the value of the `returntype` parameter, as described above. If `returntype` is not specified, specifying `term_code` will return the term submission value and specifying either `term_value` or `term_pref_term` will return the term code.\n\nFor example, given the current dataset:\n\n```\nid \tcodeSystemVersion $codelist_code code decode\n1 \t2024-09-27 C201264 C201356 After\n2 \t2024-09-27 C201265 C201352 End to End\n3 \t2023-03-29 C127262 C51282 CLINIC\n```\n\nand the following operations:\n\n```yaml\n- id: $found_term_value\n operator: codelist_terms\n ct_package_type: DDF\n version: codeSystemVersion\n codelist_code: $codelist_code\n term_code: code\n- id: $found_term_pref_term\n operator: codelist_terms\n ct_package_type: DDF\n version: codeSystemVersion\n codelist_code: $codelist_code\n term_code: code\n returntype: pref_term\n```\n\nThis will result in the following dataset:\n\n```\n id codeSystemVersion $codelist_code code decode $found_term_value $found_term_pref_term\n 1 2024-09-27 C201264 C201356 After After After Timing Type\n 2 2024-09-27 C201265 C201352 End to End End to End End to End\n 3 2023-03-31 C127262 C51282 CLINIC CLINIC Clinic\n```\n\nConversely, if given the same dataset, and the following operations:\n\n```yaml\n- id: $found_term_code1\n operator: codelist_terms\n ct_package_type: DDF\n version: codeSystemVersion\n codelist_code: $codelist_code\n term_value: decode\n- id: $found_term_code2\n operator: codelist_terms\n ct_package_type: DDF\n version: codeSystemVersion\n codelist_code: $codelist_code\n term_pref_term: decode\n```\n\nThis will result in the following dataset:\n\n```\n id codeSystemVersion $codelist_code code decode $found_term_code1 $found_term_code2\n 1 2024-09-27 C201264 C201356 After C201356\n 2 2024-09-27 C201265 C201352 End to End C201352 C201352\n 3 2023-03-31 C127262 C51282 CLINIC C51282 C51282\n```\n\nNote that `$found_term_code2` is:\n\n- `null` for the first record because \"After\" does not match any NCI preferred term in the C201264 codelist.\n- populated for the third record because matching is case-insensitive (i.e., \"CLINIC\" matches \"Clinic\").\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -30,7 +36,11 @@ "markdownDescription": "\nReturns a list of valid extensible codelist term's submission values. Used for evaluating whether submission values are valid based on controlled terminology. Expects the parameter codelists which is a list of the codelist submission value(s) to retrieve. If the codelist argument is [\"All\"] will return all extensible terms for the CT in a list.\n\n```yaml\n{\n \"id\": \"$ext_value\",\n \"codelist\": [\"ALL\"],\n \"operator\": \"define_extensible_codelists\",\n}\n```\n" } }, - "required": ["id", "operator", "codelists"], + "required": [ + "id", + "operator", + "codelists" + ], "type": "object" }, { @@ -40,7 +50,11 @@ "markdownDescription": "\nIf a target variable name is specified, returns the specified metadata in the define for the specified target variable.\n\nInput\n\n```yaml\n- operator: define_variable_metadata\n attribute_name: define_variable_label\n name: LBTESTCD\n id: $LBTESTCD_VARIABLE_LABEL\n```\n\nOutput\n\n```\nLaboratory Test Code\n```\n\nIf no target variable name specified, returns a dictionary containing the specified metadata in the define for all variables.\n\nInput\n\n```yaml\n- operator: define_variable_metadata\n attribute_name: define_variable_label\n id: $VARIABLE_LABEL\n```\n\nOutput\n\n```\n{\n \"STUDYID\": \"Study Identifier\",\n \"USUBJID\": \"Unique Subject Identifier\",\n \"LBTESTCD\": \"Laboratory Test Code\",\n \"...\": \"...\"\n}\n```\n" } }, - "required": ["id", "operator", "attribute_name"], + "required": [ + "id", + "operator", + "attribute_name" + ], "type": "object" }, { @@ -50,7 +64,11 @@ "markdownDescription": "\nGet a distinct list of values for the given name.\n\nIf a group list is specified, the distinct value list will be grouped by the variables within group.\nIf a filter object is provided, only values for records that match the filter criteria are included in the distinct values.\nIf `value_is_reference` is set to true, the target column contains the names of other columns, and the operation will check the referenced columns to ensure they exist in the associated dataset before adding them to the distinct list.\n\nIf group is provided, group_aliases may also be provided to assign new grouping variable names so that results grouped by the values in one set of grouping variables can be merged onto a dataset according to the same grouping value(s) stored in different set of grouping variables. When both group and group_aliases are provided, columns are renamed according to corresponding list position (i.e., the 1st column in group is renamed to the 1st column in group_aliases, etc.). If there are more columns listed in group than in group_aliases, only the group columns with corresponding group_aliases columns will be renamed. If there are more columns listed in group_aliases than in group, the extra column names in group_aliases will be ignored. See record_count for an example of the use of group_aliases.\n\n```yaml\nCheck:\n all:\n - name: SSSTRESC\n operator: equal_to\n value: DEAD\n value_is_literal: true\n - name: $ds_dsdecod\n operator: does_not_contain\n value: DEATH\n value_is_literal: true\nOperations:\n - operator: distinct\n domain: DS\n name: DSDECOD\n id: $ds_dsdecod\n group:\n - USUBJID\n filter:\n CAT: \"CATEGORY 1\"\n SCAT: \"SUBCATEGORY A\"\n```\n\n> below, `IDVAR` contains column names, the operation retrieves the value from each column for that row, checks the dataset associated with that column using the CO RDOMAIN. Columns that exist are added to the returns the distinct set.\n\n```yaml\nOperations:\n - domain: CO\n id: $rdomain_variables\n name: IDVAR\n operator: distinct\n value_is_reference: true\n```\n" } }, - "required": ["id", "operator", "name"], + "required": [ + "id", + "operator", + "name" + ], "type": "object" }, { @@ -60,7 +78,10 @@ "markdownDescription": "\nChecks whether the domain is in the set of domains within the provided standard.\n\nInput\n\nTarget Domain: XY\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\nOperations:\n - operator: domain_is_custom\n id: $domain_is_custom\n```\n\nOutput\n\n```\ntrue\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -70,7 +91,10 @@ "markdownDescription": "\nChecks whether the related domain (for example, the parent domain of a SUPP or RELREC dataset) is not present in the set of standard domains for the provided standard and version. This is useful for determining whether relationships point to non-standard or custom domains.\n\nInput\n\nTarget Domain: SUPPEX\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\nOperations:\n - operator: related_domain_is_custom\n id: $related_domain_is_custom\n```\n\nOutput\n\n```\ntrue\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -80,7 +104,10 @@ "markdownDescription": "\nReturns the label for the domain the operation is executing on within the provided standard.\n\nInput.\n\nTarget Domain: LB\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\nOperations:\n - operator: domain_label\n id: $domain_label\n```\n\nOutput\n\n```\nLaboratory Test Results\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -90,7 +117,11 @@ "markdownDescription": "\nCalculates the number of days between the DTC and RFSTDTC. The Study Day value is incremented by 1 for each date following RFSTDTC. Dates prior to RFSTDTC are decreased by 1, with the date preceding RFSTDTC designated as Study Day -1 (there is no Study Day 0). All Study Day values are integers. Thus, to calculate Study Day:\n\n- --DY = (date portion of --DTC) - (date portion of RFSTDTC) + 1 if --DTC is on or after RFSTDTC\n- --DY = (date portion of --DTC) - (date portion of RFSTDTC) if --DTC precedes RFSTDTC\n\nThis algorithm should be used across all domains.\n\n```yaml\nCheck:\n all:\n - name: --DY\n operator: non_empty\n - name: --DTC\n operator: is_complete_date\n - name: RFSTDTC\n operator: is_complete_date\n - name: --DY\n operator: not_equal_to\n value: $dy\nOperations:\n - name: --DTC\n operator: dy\n id: $dy\nMatch Datasets:\n - Name: DM\n Keys:\n - USUBJID\n```\n" } }, - "required": ["id", "operator", "name"], + "required": [ + "id", + "operator", + "name" + ], "type": "object" }, { @@ -100,7 +131,11 @@ "markdownDescription": "\nReturns the requested dataset level metadata value for the current dataset. Possible name values are:\n\n- dataset_size\n- dataset_location\n- dataset_name\n- dataset_label\n- domain\n- is_ap\n- ap_suffix\n\nExample\n\nInput:\n\nTarget domain: LB\n\n```yaml\n- name: dataset_label\n operator: extract_metadata\n id: $dataset_label\n```\n\nOutput:\n\n```\nLaboratory Test Results\n```\n\nExample: ap_suffix\n\nExtracts the domain suffix (characters 3-4) from AP-related domains. For example, \"FA\" from \"APFA\" DOMAIN value.\n\nInput:\n\nTarget domain: APFA\n\n```yaml\n- name: ap_suffix\n operator: extract_metadata\n id: $ap_suffix\n```\n\nOutput:\n\n```\nFA\n```\n" } }, - "required": ["id", "operator", "name"], + "required": [ + "id", + "operator", + "name" + ], "type": "object" }, { @@ -110,7 +145,10 @@ "markdownDescription": "\nReturns the expected (\"Core\" = Exp ) variables for the domain in the current standard Variable Metadata for custom domains will pull from the model while non-custom domains will be from the IG and Model.\n\nInput:\n\nTarget Domain: LB\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\n- operator: expected_variables\n id: $expected_variables\n```\n\nOutput:\n\n```\n[\"LBCAT\", \"LBORRES\", \"LBORRESU\", \"...\"]\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -120,7 +158,13 @@ "markdownDescription": "\nFetches controlled terminology attribute values from CT packages based on row-specific CT package and version references. The operation constructs CT package names based on the standard being validated and the values in the `name` and `version` columns (e.g., SDTMIG \u2192 \"sdtmct-{version}\"). When the `name` column contains \"CDISC\" or \"CDISC CT\", it uses the validation run's standard to determine the package prefix and the version found in the cell of the specified column. The operation extracts all codes matching the specified ct_attribute from the package.\n\n**Required Parameters:**\n\n- `ct_attribute`: Attribute to extract - `\"Term CCODE\"`, `\"Codelist CCODE\"`, `\"Term Value\"`, `\"Codelist Value\"`, or `\"Term Preferred Term\"`\n- `name`: Column containing CT reference (e.g., \"TSVCDREF\") - identifies which terminology system is referenced\n- `version`: Column containing CT version (e.g., \"TSVCDVER\")\n\n```yaml\n- id: $VALID_TERM_CODES\n name: TSVCDREF\n operator: get_codelist_attributes\n ct_attribute: Term CCODE\n version: TSVCDVER\n```\n\n**Note:** due to editor not containing the cache, if using this operator in rule editor, you must put the ctpackage versions contained within your data in the library tab for it work properly in editor.\n" } }, - "required": ["id", "operator", "name", "ct_attribute", "version"], + "required": [ + "id", + "operator", + "name", + "ct_attribute", + "version" + ], "type": "object" }, { @@ -130,7 +174,10 @@ "markdownDescription": "\nReturns list of dataset columns in order\n\n```yaml\nCheck:\n all:\n - name: $column_order_from_dataset\n operator: is_not_ordered_by\n value: $column_order_from_library\nOperations:\n - id: $column_order_from_library\n operator: get_column_order_from_library\n - id: $column_order_from_dataset\n operator: get_column_order_from_dataset\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -140,7 +187,10 @@ "markdownDescription": "\nFetches column order for a given domain from the CDISC library. The lists with column names are sorted in accordance to \"ordinal\" key of library metadata.\nOptionally Filters variables based on specified metadata criteria.\n\nRule Type: Variable Metadata Check\n\n```yaml\nCheck:\n all:\n - name: variable_name\n operator: is_not_contained_by\n value: $ig_variables\nOperations:\n - id: $ig_variables\n operator: get_column_order_from_library\n key_name: \"role\" # role, core, etc\n key_value: \"Exp\" # Timing, Req, Exp, Perm, etc\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -150,7 +200,10 @@ "markdownDescription": "\nReturns the list of domain for a given class from the CDISC Library Implementation Guide. This operation retrieves all domains that belong to a specified class (e.g., \"TRIAL DESIGN\", \"FINDINGS\", \"EVENTS\") based on the current standard and version. The operation uses the standard and version from the validation context as well as the optional `domain_class` parameter which is the name of the class to filter by (e.g., \"TRIAL DESIGN\", \"FINDINGS\", \"EVENTS\", \"INTERVENTIONS). NOTE: Class names are case-sensitive and should match the Library metadata format. If no `domain_class` parameter is provided, the operation returns all domains across all classes in the Implementation Guide:\n\n```yaml\n- operator: get_library_class_domains\n id: $trial_design_domains\n domain_class: \"TRIAL DESIGN\"\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -160,7 +213,10 @@ "markdownDescription": "\nFetches column order for a given model class from the CDISC library. The lists with column names are sorted in accordance to \"ordinal\" key of library metadata.\n\nRule Type: Variable Metadata Check\n\n```yaml\nCheck:\n all:\n - name: variable_name\n operator: is_not_contained_by\n value: $model_variables\nOperations:\n - id: $model_variables\n operator: get_model_column_order\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -170,7 +226,12 @@ "markdownDescription": "\nFetches variable level library model properties filtered by the provided key_name and key_value\n\nExample\n\nInput\n\nTarget Domain: LB\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\n- operator: get_model_filtered_variables\n id: $model_filtered_variables\n key_name: \"role\"\n key_value: \"Timing\"\n```\n\nOutput\n\n```\n[\"VISITNUM\", \"VISIT\", \"VISITDY\", \"TAETORD\", \"...\"]\n```\n" } }, - "required": ["id", "operator", "key_name", "key_value"], + "required": [ + "id", + "operator", + "key_name", + "key_value" + ], "type": "object" }, { @@ -180,7 +241,10 @@ "markdownDescription": "\nFetches column order for a given SUPP's parent model class from the CDISC library. The lists with column names are sorted in accordance to \"ordinal\" key of library metadata.\n\n```yaml\nCheck:\n all:\n - operator: is_not_contained_by\n value: $parent_model_variables\nOperations:\n - id: $parent_model_variables\n operator: get_parent_model_column_order\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -190,7 +254,12 @@ "markdownDescription": "\nFilters variables from the dataset based on specified metadata criteria. Returns a list of variable names that exist in the dataset and match the filter criteria.\n\n```yaml\n- operator: get_dataset_filtered_variables\n id: $timing_variables\n key_name: \"role\"\n key_value: \"Timing\"\n```\n" } }, - "required": ["id", "operator", "key_name", "key_value"], + "required": [ + "id", + "operator", + "key_name", + "key_value" + ], "type": "object" }, { @@ -200,7 +269,11 @@ "markdownDescription": "\nGenerates a dataframe where each record in the dataframe is the library ig variable metadata corresponding with the variable label found in the column provided in name. The metadata column names are prefixed with the string provided in `id`.\n\nInput\n\nTarget Dataset: SUPPLB\n\nProduct: sdtmig\n\nVersion: 3-4\n\nDataset:\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QLABEL\": [\"Toxicity\", \"Viscosity\", \"Analysis Method\"]\n}\n```\n\nRule:\n\n```yaml\n- operator: label_referenced_variable_metadata\n id: $qlabel_referenced_variable_metadata\n name: \"QLABEL\"\n```\n\nOutput\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QLABEL\": [\"Toxicity\", \"Viscosity\", \"Analysis Method\"],\n \"$qlabel_referenced_variable_metadata_name\": [\"LBTOX\", null, \"LBANMETH\"],\n \"$qlabel_referenced_variable_metadata_role\": [\n \"Variable Qualifier\",\n null,\n \"Record Qualifier\"\n ],\n \"$qlabel_referenced_variable_metadata_ordinal\": [44, null, 38],\n \"$qlabel_referenced_variable_metadata_core\": [\"Req\", \"Req\", \"Req\"],\n \"$qlabel_referenced_variable_metadata_label\": [\"Toxicity\", null, \"Analysis Method\"]\n}\n```\n" } }, - "required": ["id", "operator", "name"], + "required": [ + "id", + "operator", + "name" + ], "type": "object" }, { @@ -210,7 +283,11 @@ "markdownDescription": "\nAllows the creation of a lookup table to take the values from multiple input columns and map them to values in an output column. The map parameter contains a list of objects. Each dictionary contains column names as properties that match the column names in the source dataset and an output property that will be returned as a result.\n\nIf map has a single object and output is the only property specified on that object, this will function as a direct assignment.\n\nFor example, given the following current dataset:\n\n```\nid \tparent_entity \tparent_rel\n1 \tTiming \trelativeToFrom\n2 \tSomething \trelativeToFrom\n3 \tTiming \ttype\n```\n\nand the following operation:\n\n```yaml\nOperations:\n - id: $codelist_code\n operator: map\n map:\n - parent_entity: Timing\n parent_rel: type\n output: C201264\n - parent_entity: Timing\n parent_rel: relativeToFrom\n output: C201265\n```\n\nThis will result in the following dataset:\n\n```\nid \tparent_entity \tparent_rel \t$codelist_code\n1 \tTiming \trelativeToFrom \tC201265\n2 \tSomething \trelativeToFrom \tNone\n3 \tTiming \ttype \tC201264\n```\n\nThe following operation:\n\n```yaml\nOperations:\n - id: $codelist_code\n operator: map\n map:\n - output: C201264\n```\n\nWill result in the following dataset:\n\n```\nid \tparent_entity \tparent_rel \t$codelist_code\n1 \tTiming \trelativeToFrom \tC201264\n2 \tSomething \trelativeToFrom \tC201264\n3 \tTiming \ttype \tC201264\n```\n" } }, - "required": ["id", "operator", "map"], + "required": [ + "id", + "operator", + "map" + ], "type": "object" }, { @@ -220,7 +297,11 @@ "markdownDescription": "\nIf no group is provided, returns the max value in name. If group is provided, returns the max value in name, within each unique set of the grouping variables.\n\n```yaml\nCheck:\n all:\n - name: \"$max_age\"\n operator: \"greater_than\"\n value: \"MAXAGE\"\nOperations:\n - operator: \"max\"\n domain: \"DM\"\n name: \"AGE\"\n id: \"$max_age\"\n```\n" } }, - "required": ["id", "operator", "name"], + "required": [ + "id", + "operator", + "name" + ], "type": "object" }, { @@ -230,7 +311,11 @@ "markdownDescription": "\nIf no group is provided, returns the max date value in name. If group is provided, returns the max date value in name, within each unique set of the grouping variables.\n\n```yaml\nCheck:\n all:\n - name: USUBJID\n operator: is_contained_by\n value: $ex_usubjid\n - name: RFXENDTC\n operator: not_equal_to\n value: $max_ex_exstdtc\n - name: RFXENDTC\n operator: not_equal_to\n value: $max_ex_exendtc\nOperations:\n - operator: distinct\n domain: EX\n name: USUBJID\n id: $ex_usubjid\n - operator: max_date\n domain: EX\n name: EXSTDTC\n id: $max_ex_exstdtc\n group:\n - USUBJID\n - operator: max_date\n domain: EX\n name: EXENDTC\n id: $max_ex_exendtc\n group:\n - USUBJID\n```\n" } }, - "required": ["id", "operator", "name"], + "required": [ + "id", + "operator", + "name" + ], "type": "object" }, { @@ -240,7 +325,11 @@ "markdownDescription": "\nExample: AAGE > mean(DM.AGE), where AAGE is a fictitious NSV\n\n```yaml\nCheck:\n all:\n - name: \"AAGE\"\n operator: \"greater_than\"\n value: \"$average_age\"\nOperations:\n - operator: \"mean\"\n domain: \"DM\"\n name: \"AGE\"\n id: \"$average_age\"\n```\n" } }, - "required": ["id", "operator", "name"], + "required": [ + "id", + "operator", + "name" + ], "type": "object" }, { @@ -250,7 +339,11 @@ "markdownDescription": "\nIf no group is provided, returns the min value in name. If group is provided, returns the min value in name, within each unique set of the grouping variables.\n\n```yaml\nCheck:\n all:\n - name: \"$min_age\"\n operator: \"less_than\"\n value: \"MINAGE\"\nOperations:\n - operator: \"min\"\n domain: \"DM\"\n name: \"AGE\"\n id: \"$min_age\"\n```\n" } }, - "required": ["id", "operator", "name"], + "required": [ + "id", + "operator", + "name" + ], "type": "object" }, { @@ -260,7 +353,11 @@ "markdownDescription": "\nIf no group is provided, returns the min date value in name. If group is provided, returns the min date value in name, within each unique set of the grouping variables.\n\nExample: RFSTDTC is greater than min AE.AESTDTC for the current USUBJID\n\n```yaml\nCheck:\n all:\n - name: \"RFSTDTC\"\n operator: \"date_greater_than\"\n value: \"$ae_aestdtc\"\nOperations:\n - operator: \"min_date\"\n domain: \"AE\"\n name: \"AESTDTC\"\n id: \"$ae_aestdtc\"\n group:\n - USUBJID\n```\n" } }, - "required": ["id", "operator", "name"], + "required": [ + "id", + "operator", + "name" + ], "type": "object" }, { @@ -270,7 +367,12 @@ "markdownDescription": "\nComputes set difference: elements in `name` that are not in `subtract`. Uses [set difference]() semantics (A \u2216 B). Preserves order from the first list. Both `name` and `subtract` must reference other operation results (e.g., `$expected_variables`, `$dataset_variables`). When `subtract` is empty or missing, returns all elements from `name`. Can be computed and added to output variables to display missing elements in error results.\n\n```yaml\nOperations:\n - id: $expected_variables\n operator: expected_variables\n - id: $dataset_variables\n operator: get_column_order_from_dataset\n - id: $expected_minus_dataset\n name: $expected_variables\n operator: minus\n subtract: $dataset_variables\n```\n" } }, - "required": ["id", "operator", "name", "subtract"], + "required": [ + "id", + "operator", + "name", + "subtract" + ], "type": "object" }, { @@ -280,7 +382,11 @@ "markdownDescription": "\nGenerates a dataframe where each record in the dataframe is the library ig variable metadata corresponding with the variable name found in the column provided in name. The metadata column names are prefixed with the string provided in `id`.\n\nInput\n\nTarget Dataset: SUPPLB\n\nProduct: sdtmig\n\nVersion: 3-4\n\nDataset:\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QNAM\": [\"Toxicity\", \"LBVISCOS\", \"Analysis Method\"]\n}\n```\n\nRule:\n\n```yaml\n- operator: name_referenced_variable_metadata\n id: $qnam_referenced_variable_metadata\n name: \"QNAM\"\n```\n\nOutput\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QNAM\": [\"LBTOX\", \"LBVISCOS\", \"LBANMETH\"],\n \"$qnam_referenced_variable_metadata_name\": [\"LBTOX\", null, \"LBANMETH\"],\n \"$qnam_referenced_variable_metadata_role\": [\n \"Variable Qualifier\",\n null,\n \"Record Qualifier\"\n ],\n \"$qnam_referenced_variable_metadata_ordinal\": [44, null, 38],\n \"$qnam_referenced_variable_metadata_core\": [\"Req\", \"Req\", \"Req\"],\n \"$qnam_referenced_variable_metadata_label\": [\"Toxicity\", null, \"Analysis Method\"]\n}\n```\n" } }, - "required": ["id", "operator", "name"], + "required": [ + "id", + "operator", + "name" + ], "type": "object" }, { @@ -290,7 +396,10 @@ "markdownDescription": "\nReturns the permissible variables (\"Core\" = Perm ) for a given domain and standard Variable Metadata for custom domains will pull from the model while non-custom domains will be from the IG and Model.\n\nInput:\n\nTarget Domain: LB\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\n- operator: permissible_variables\n id: $permissible_variables\n```\n\nOutput:\n\n```\n[\"LBGRPID\", \"LBREFID\", \"LBSPID\", \"...\"]\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -300,7 +409,10 @@ "markdownDescription": "\nIf no filter or group is provided, returns the number of records in the dataset. If filter is provided, returns the number of records in the dataset that contain the value(s) in the corresponding column(s) provided in the filter. Filter can have a wildcard `&` that when added to the end of the filter value will look for all instances of that prefix (see 4th example below). If group is provided, returns the number of rows matching each unique set of the grouping variables. These can be static column name(s) or can be derived from other operations like get_dataset_filtered_variables.\n\nIf both filter and group are provided, returns the number of records in the dataset that contain the value(s) in the corresponding column(s) provided in the filter that also match each unique set of the grouping variables.\n\n**Wildcard Filtering:** Filter values ending with % will match any records where the column value starts with the specified prefix. For example, RACE% will match RACE1, RACE2, RACE3, etc. This is useful for matching related variables with numeric or alphabetic suffixes.\n\n**Regex Transformation:** If regex is provided along with group, the regex pattern will be applied to transform grouping column values before grouping. The regex is only applied to columns where the pattern matches the data type. For example, using regex `^\\d{4}-\\d{2}-\\d{2}` on a column containing `2022-01-14T08:00` will extract `2022-01-14` for grouping purposes.\n\nIf group is provided, group_aliases may also be provided to assign new grouping variable names so that results grouped by the values in one set of grouping variables can be merged onto a dataset according to the same grouping value(s) stored in different set of grouping variables. When both group and group_aliases are provided, columns are renamed according to corresponding list position (i.e., the 1st column in group is renamed to the 1st column in group_aliases, etc.). If there are more columns listed in group than in group_aliases, only the group columns with corresponding group_aliases columns will be renamed. If there are more columns listed in group_aliases than in group, the extra column names in group_aliases will be ignored.\n\nExample: return the number of records in a dataset.\n\n```yaml\n- operator: record_count\n id: $records_in_dataset\n```\n\nExample: return the number of records where STUDYID = \"CDISC01\" and FLAGVAR = \"Y\".\n\n```yaml\n- operator: record_count\n id: $flagged_cdisc01_records_in_dataset\n filter:\n STUDYID: \"CDISC01\"\n FLAGVAR: \"Y\"\n```\n\nExample: return the number of records grouped by USUBJID and timing variables, extracting only the date portion from datetime values.\n\n```yaml\n- operator: record_count\n id: $records_per_usubjid_date\n group:\n - USUBJID\n - --TESTCD\n - $TIMING_VARIABLES\n regex: \"^\\d{4}-\\d{2}-\\d{2}\"\n```\n\nExample: return the number of records where QNAM starts with \"RACE\" (matches RACE1, RACE2, RACE3, etc.) per USUBJID.\n\n```yaml\n- operator: record_count\n id: $race_records_in_dataset\n filter:\n QNAM: \"RACE&\"\n group:\n - \"USUBJID\"\n```\n\nExample: return the number of records grouped by USUBJID.\n\n```yaml\n- operator: record_count\n id: $records_per_usubjid\n group:\n - USUBJID\n```\n\nExample: return the number of records grouped by USUBJID where FLAGVAR = \"Y\".\n\n```yaml\n- operator: record_count\n id: $flagged_records_per_usubjid\n group:\n - USUBJID\n filter:\n FLAGVAR: \"Y\"\n```\n\nExample: return the number of records grouped by USUBJID and IDVARVAL where QNAM = \"TEST1\" and IDVAR = \"GROUPID\", renaming the IDVARVAL column to GROUPID for subsequent merging.\n\n```yaml\n- operator: record_count\n id: $test1_records_per_usubjid_groupid\n group:\n - USUBJID\n - IDVARVAL\n filter:\n QNAM: \"TEST1\"\n IDVAR: \"GROUPID\"\n group_aliases:\n - USUBJID\n - GROUPID\n```\n\nExample: Group the StudyIdentifier dataset by parent_id and merge the result back to the context dataset StudyVersion using StudyVersion.id == StudyIdentifier.parent_id\n\n```yaml\nScope:\n Entities:\n Include:\n - StudyVersion\nOperations:\n - domain: StudyIdentifier\n filter:\n parent_entity: \"StudyVersion\"\n parent_rel: \"studyIdentifiers\"\n rel_type: \"definition\"\n studyIdentifierScope.organizationType.code: \"C70793\"\n studyIdentifierScope.organizationType.codeSystem: \"http://www.cdisc.org\"\n group:\n - parent_id\n group_aliases:\n - id\n id: $num_sponsor_ids\n operator: record_count\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -310,7 +422,10 @@ "markdownDescription": "\nReturns the required variables ( \"Core\" = Req ) for a given domain and standard Variable Metadata for custom domains will pull from the model while non-custom domains will be from the IG and Model.\n\nInput:\n\nTarget Domain: LB\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\n- operator: required_variables\n id: $required_variables\n```\n\nOutput:\n\n```\n[\"STUDYID\", \"DOMAIN\", \"USUBJID\", \"LBSEQ\", \"LBTESTCD\", \"LBTEST\"]\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -320,7 +435,12 @@ "markdownDescription": "\nSplits a dataset column by a given delimiter\n\n```yaml\nOperations:\n - name: PPSPEC\n delimiter: ;\n id: $ppspec_value\n operator: split_by\n```\n" } }, - "required": ["id", "operator", "delimiter", "name"], + "required": [ + "id", + "operator", + "delimiter", + "name" + ], "type": "object" }, { @@ -330,7 +450,10 @@ "markdownDescription": "\nReturns a list of the domains in the study\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -340,7 +463,10 @@ "markdownDescription": "\nReturns a list of the submitted dataset filenames in all uppercase\n\nex. if TS.xpt, AE.xpt, EC.xpt, and SUPPEC.xpt are submitted -> [TS, AE, EC, SUPPEC] will be returned\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -350,7 +476,10 @@ "markdownDescription": "\nReturns a list of valid SDTM domain names from the standard metadata. This can be used to compare extracted suffixes from DOMAIN values or dataset names.\n\nInput\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\nOperations:\n - operator: standard_domains\n id: $valid_domain_names\n```\n\nOutput\n\n```\n[\"AE\", \"CM\", \"DM\", \"FA\", \"LB\", \"QS\", ...]\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -360,7 +489,10 @@ "markdownDescription": "\nReturns the valid terminology package dates for a given standard.\n\nGiven a list of terminology packages:\n\n```\n[\n \"sdtmct-2023-10-26\",\n \"sdtmct-2023-12-13\",\n \"adamct-2023-12-13\",\n \"cdashct-2023-05-19\"\n]\n```\n\nand standard: sdtmig\n\nthe operation will return:\n\n```\n[\"2023-10-26\", \"2023-12-13\"]\n```\n\nBy default, the standard is as specified when running validation - as the validation runtime parameter and/or as specified in the rule header - and the list of terminology packages is obtained from the current cache. If required, the default standard may be overridden using the optional ct_package_types parameter. For example, given the same list of terminology packages, the following operation:\n\n```yaml\nOperations:\n - operator: valid_codelist_dates\n id: $valid_dates\n ct_package_types:\n - SDTM\n - CDASH\n```\n\nwill return:\n\n```\n[\"2023-05-19\", \"2023-10-26\", \"2023-12-13\"]\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -370,7 +502,11 @@ "markdownDescription": "\nReturns true if the version of an external dictionary provided in the define.xml file matches the version parsed from the dictionary files.\n\nInput:\n\n```yaml\nOperations:\n - operator: valid_define_external_dictionary_version\n id: $is_valid_loinc_version\n external_dictionary_type: loinc\n```\n\nOutput:\n\n```\n[true, true, true, true]\n```\n" } }, - "required": ["id", "operator", "external_dictionary_type"], + "required": [ + "id", + "operator", + "external_dictionary_type" + ], "type": "object" }, { @@ -428,7 +564,10 @@ "markdownDescription": "\nDetermines whether the values are valid in the following variables:\n\n- --SOCCD (System Organ Class Code)\n- --HLGTCD (High Level Group Term Code)\n- --HLTCD (High Level Term Code)\n- --PTCD (Preferred Term Code)\n- --LLTCD (Lowest Level Term Code)\n\nInput:\n\n```yaml\nOperations:\n - id: $is_valid_meddra_codes\n operator: valid_meddra_code_references\n```\n\nOutput:\n\n```\n[true, false, true, true]\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -438,7 +577,10 @@ "markdownDescription": "\nDetermines whether the values are valid in the following variable pairs:\n\n- --SOCCD, --SOC (System Organ Class Code and Term)\n- --HLGTCD, --HLGT (High Level Group Term Code and Term)\n- --HLTCD, --HLT (High Level Term Code and Term)\n- --PTCD, --DECOD (Preferred Term Code and Dictionary-Derived Term)\n- --LLTCD, --LLT (Lowest Level Term Code and Term)\n\nInput:\n\n```yaml\nOperations:\n - id: $is_valid_meddra_pairs\n operator: valid_meddra_code_term_pairs\n```\n\nOutput:\n\n```\n[true, true, false, true, true]\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -448,7 +590,10 @@ "markdownDescription": "\nDetermines whether the values are valid in the following variables:\n\n- --SOC (System Organ Class)\n- --HLGT (High Level Group Term)\n- --HLT (High Level Term)\n- --DECOD (Dictionary-Derived Term)\n- --LLT (Lowest Level Term)\n\nInput:\n\n```yaml\nOperations:\n - id: $is_valid_meddra_terms\n operator: valid_meddra_term_references\n```\n\nOutput:\n\n```\n[true, true, false, true, true]\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -458,7 +603,11 @@ "markdownDescription": "\nChecks if a reference to whodrug term in name points to the existing code in Atc Text (INA) file.\n\nInput:\n\n```yaml\nOperations:\n - id: $whodrug_refs_valid\n operator: valid_whodrug_references\n```\n\nOutput:\n\n```\n[true, false, true, true]\n```\n" } }, - "required": ["id", "operator", "name"], + "required": [ + "id", + "operator", + "name" + ], "type": "object" }, { @@ -468,7 +617,10 @@ "markdownDescription": "\nReturns a mapping of variable names to the number of times that variable appears in a domain within the study.\n\nInput\n\n```\n{\n \"AE\": [\"STUDYID\", \"DOMAIN\", \"USUBJID\", \"AETERM\", \"AEENDTC\"],\n \"LB\": [\"STUDYID\", \"DOMAIN\", \"USUBJID\", \"LBTESTCD\", \"LBENDTC\"]\n}\n```\n\nOutput\n\n```\n{\n \"STUDYID\": 2,\n \"DOMAIN\": 2,\n \"USUBJID\": 2,\n \"--TERM\": 1,\n \"--TESTCD\": 1,\n \"--ENDTC\": 2\n}\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -478,7 +630,10 @@ "markdownDescription": "\nOperation operates only on original submission datasets regardless of rule type. Flags an error if a column exists is in the submission dataset currently being evaluated.\n\nRule Type: Domain Presence Check\n\n```yaml\nCheck:\n all:\n - name: $MIDS_EXISTS\n operator: equal_to\n value: true\n - name: TM\n operator: not_exists\nOperations:\n - id: $MIDS_EXISTS\n name: MIDS\n operator: variable_exists\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -488,7 +643,10 @@ "markdownDescription": "\nReturns true if a variable is missing from the dataset or if all values within the variable are null or empty string. This operation first checks if the target variable exists in the dataset, and if it does exist, evaluates whether all its values are null or empty.\nThe operation supports two sources via the `source` parameter:\n\n- **`submission`** : checks against the raw submission dataset\n- **`evaluation`** (default): checks against the evaluation dataset built based on the rule type\n\n```yaml" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -498,7 +656,10 @@ "markdownDescription": "\nReturns the set of variable names from the library for the given standard. This operation extracts all variable names across all domains in the specified standard's library metadata.\n\nInput:\n\nValidation Standard: sdtmig\nValidation Version: 3-4\n\n```yaml\n- operator: variable_names\n id: $all_variables\n```\n\nOutput:\n\n```\n[\"STUDYID\", \"DOMAIN\", \"USUBJID\", \"SUBJID\", \"RFSTDTC\", \"RFENDTC\", \"SITEID\", \"AGE\", \"AGEU\", \"SEX\", \"RACE\", \"ETHNIC\", \"ARMCD\", \"ARM\", \"ACTARMCD\", \"ACTARM\", \"COUNTRY\", \"DMDTC\", \"DMDY\", \"AETERM\", \"AEDECOD\", \"AECAT\", \"AESCAT\", \"AEPRESP\", \"AEBODSYS\", \"AEBDSYCD\", \"AESOC\", \"AESOCCD\", \"AELLT\", \"AELLTCD\", \"AEHLT\", \"AEHLTCD\", \"AEHLGT\", \"AEHLGTCD\", \"AEPTCD\", \"AESTDTC\", \"AEENDTC\", \"AESTDY\", \"AEENDY\", \"AEDUR\", \"AESER\", \"AESEV\", \"AEACN\", \"AEREL\", \"AEOUT\", \"AESCAN\", \"AESCONG\", \"AESDISAB\", \"AESDTH\", \"AESHOSP\", \"AESLIFE\", \"AESOD\", \"AECONTRT\", \"AETOXGR\", \"LBTESTCD\", \"LBTEST\", \"LBCAT\", \"LBSCAT\", \"LBSPEC\", \"LBMETHOD\", \"LBORRES\", \"LBORRESU\", \"LBORNRLO\", \"LBORNRHI\", \"LBSTRESC\", \"LBSTRESN\", \"LBSTRESU\", \"LBSTNRLO\", \"LBSTNRHI\", \"LBNRIND\", \"LBNAM\", \"LBSPEC\", \"LBANTREG\", \"LBFAST\", \"LBDRVFL\", \"LBTOX\", \"LBTOXGR\", \"LBSTDTC\", \"LBENDTC\", \"LBSTDY\", \"LBENDY\", \"LBTPT\", \"LBTPTNUM\", \"LBELTM\", \"LBTPTREF\", \"LBRFTDTC\", \"...\"]\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -508,7 +669,11 @@ "markdownDescription": "\nGiven a variable name, returns a mapping of variable values to the number of times that value appears in the variable within all datasets in the study.\n" } }, - "required": ["id", "operator", "name"], + "required": [ + "id", + "operator", + "name" + ], "type": "object" }, { @@ -518,7 +683,10 @@ "markdownDescription": "\nDetermines whether the values are valid and in the correct hierarchical structure in the following variables:\n\n- --DECOD\n- --CLAS\n- --CLASCD\n\nInput:\n\n```yaml\nOperations:\n - id: $valid_whodrug_codes\n operator: whodrug_code_hierarchy\n```\n" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" }, { @@ -528,7 +696,12 @@ "markdownDescription": "\nValidates XHTML fragments in the target column against the specified namespace.\n\n```yaml\nOperations:\n - id: $xhtml_errors\n name: text\n operator: get_xhtml_errors\n namespace: http://www.cdisc.org/ns/usdm/xhtml/v1.0\n```\n\nNote that a local XSD file is required for validation. The file must be stored in the folder indicated by the value of the `LOCAL_XSD_FILE_DIR` default file path and the mapping between the namespace and the local XSD file's `sub-folder/name` must be included in the value of the `LOCAL_XSD_FILE_MAP` default file path.\n" } }, - "required": ["id", "operator", "name", "namespace"], + "required": [ + "id", + "operator", + "name", + "namespace" + ], "type": "object" } ], @@ -591,7 +764,13 @@ "type": "string" }, "dictionary_term_type": { - "enum": ["LLT", "PT", "HLT", "HLGT", "SOC"] + "enum": [ + "LLT", + "PT", + "HLT", + "HLGT", + "SOC" + ] }, "domain": { "anyOf": [ @@ -604,7 +783,9 @@ ] }, "external_dictionary_type": { - "enum": ["meddra"] + "enum": [ + "meddra" + ] }, "filter": { "type": "object" @@ -649,7 +830,10 @@ }, "level": { "type": "string", - "enum": ["codelist", "term"] + "enum": [ + "codelist", + "term" + ] }, "map": { "type": "array", @@ -660,7 +844,9 @@ "type": "string" } }, - "required": ["output"] + "required": [ + "output" + ] } }, "name": { @@ -677,7 +863,11 @@ }, "returntype": { "type": "string", - "enum": ["code", "value", "pref_term"] + "enum": [ + "code", + "value", + "pref_term" + ] }, "source": { "type": "string" @@ -701,6 +891,9 @@ "type": "string" } }, - "required": ["id", "operator"], + "required": [ + "id", + "operator" + ], "type": "object" } diff --git a/resources/schema/rule-merged/Operator.json b/resources/schema/rule-merged/Operator.json index 80c62ac56..a748942e0 100644 --- a/resources/schema/rule-merged/Operator.json +++ b/resources/schema/rule-merged/Operator.json @@ -9,7 +9,9 @@ "const": "additional_columns_empty" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -18,7 +20,9 @@ "const": "additional_columns_not_empty" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -28,7 +32,10 @@ "markdownDescription": "\nWill return True if the value in `value` is contained within the collection/iterable in the target column, or if there's an exact match for non-iterable data.\n\nThe operator checks if every value in a column is a list or set. If yes, it compares row-by-row. If any value is blank or a different type (like a string or number), it compares each value against the entire column instead.\n\nExample:\n\n```yaml\n- name: \"--TOXGR\" # Column containing lists like ['GRADE', 'SEVERITY', 'ONSET']\n operator: \"contains\"\n value: \"GRADE\" # True if 'GRADE' is an element in the list\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -38,7 +45,9 @@ "markdownDescription": "\nTrue if all values in `value` are contained within the variable `name`.\n\n> All of ('Screen Failure', 'Not Assigned', 'Not Treated', 'Unplanned Treatment') in ACTARM\n\n```yaml\n- name: \"ACTARM\"\n operator: \"contains_all\"\n value:\n - \"Screen Failure\"\n - \"Not Assigned\"\n - \"Not Treated\"\n - \"Unplanned Treatment\"\n```\n\nThe operator also supports lists:\n\n```yaml\n- name: \"$spec_codelist\"\n operator: \"contains_all\"\n value: \"$ppspec_value\"\n```\n\nWhere:\n\n| $spec_codelist | $ppspec_value |\n| :-------------------------- | :----------------: |\n| [\"CODE1\", \"CODE2\", \"CODE3\"] | [\"CODE1\", \"CODE2\"] |\n| [\"CODE1\", \"CODE2\", \"CODE3\"] | [\"CODE2\", \"CODE3\"] |\n| [\"CODE1\", \"CODE2\", \"CODE3\"] | [\"CODE1\"] |\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -48,7 +57,10 @@ "markdownDescription": "\nTrue if the value in `value` is contained within the collection/iterable in the target column, performing case-insensitive comparison.\n\nExample:\n\n```yaml\n- name: \"--TOXGR\" # Column containing lists like ['Grade', 'Severity', 'Onset']\n operator: \"contains_case_insensitive\"\n value: \"grade\" # True if 'Grade'/'GRADE'/'grade' exists in the list\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -58,7 +70,10 @@ "markdownDescription": "\nDate comparison. Compare `name` to `value`. Compares partial dates if `date_component` is specified.\n\nThe `date_component` parameter accepts: `\"year\"`, `\"month\"`, `\"day\"`, `\"hour\"`, `\"minute\"`, `\"second\"`, `\"microsecond\"`, or `\"auto\"`.\n\nWhen `date_component: \"auto\"` is used, the operator automatically detects the precision of both dates and compares at the common (less precise) level.\n\n```yaml\n- name: \"AESTDTC\"\n operator: \"date_equal_to\"\n value: \"RFSTDTC\"\n date_component: \"auto\"\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -68,7 +83,10 @@ "markdownDescription": "\nDate comparison. Compare `name` to `value`. Compares partial dates if `date_component` is specified. Supports `date_component: \"auto\"`.\n\n> Year part of BRTHDTC > 2021\n\n```yaml\n- name: \"BRTHDTC\"\n operator: \"date_greater_than\"\n date_component: \"year\"\n value: \"2021\"\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -78,7 +96,10 @@ "markdownDescription": "\nDate comparison. Compare `name` to `value`. Compares partial dates if `date_component` is specified. Supports `date_component: \"auto\"`.\n\n> Year part of BRTHDTC >= 2021\n\n```yaml\n- name: \"BRTHDTC\"\n operator: \"date_greater_than_or_equal_to\"\n date_component: \"year\"\n value: \"2021\"\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -88,7 +109,10 @@ "markdownDescription": "\nDate comparison. Compare `name` to `value`. Compares partial dates if `date_component` is specified. Supports `date_component: \"auto\"`.\n\n> AEENDTC < AESTDTC\n\n```yaml\n- name: \"AEENDTC\"\n operator: \"date_less_than\"\n value: \"AESTDTC\"\n```\n\n> SSDTC < all DS.DSSTDTC when SSSTRESC = \"DEAD\"\n\n```yaml\nCheck:\n all:\n - name: \"SSSTRESC\"\n operator: \"equal_to\"\n value: \"DEAD\"\n - name: \"SSDTC\"\n operator: \"date_less_than\"\n value: \"$max_ds_dsstdtc\"\nOperations:\n - operator: \"max_date\"\n domain: \"DS\"\n name: \"DSSTDTC\"\n id: \"$max_ds_dsstdtc\"\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -98,7 +122,10 @@ "markdownDescription": "\nDate comparison. Compare `name` to `value`. Compares partial dates if `date_component` is specified. Supports `date_component: \"auto\"`.\n\n> AEENDTC <= AESTDTC\n\n```yaml\n- name: \"AEENDTC\"\n operator: \"date_less_than_or_equal_to\"\n value: \"AESTDTC\"\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -108,7 +135,10 @@ "markdownDescription": "\nComplement of `date_equal_to`\n\nDate comparison. Compare `name` to `value`. Compares partial dates if `date_component` is specified. Supports `date_component: \"auto\"`.\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -118,7 +148,10 @@ "markdownDescription": "\nComplement of `contains`. Returns True when the value is NOT contained within the target collection.\n\n```yaml\n- name: \"--TOXGR\"\n operator: \"does_not_contain\"\n value: \"GRADE\" # True if 'GRADE' is NOT an element in the list\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -128,7 +161,10 @@ "markdownDescription": "\nComplement of `contains_case_insensitive`. Returns True when the value is NOT contained within the target collection (case-insensitive).\n\nExample:\n\n```yaml\n- name: \"--TOXGR\"\n operator: \"does_not_contain_case_insensitive\"\n value: \"grade\" # True if no case variation of 'grade' exists in the list\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -138,7 +174,11 @@ "markdownDescription": "\nComplement of `equals_string_part`\n" } }, - "required": ["operator", "value", "regex"], + "required": [ + "operator", + "value", + "regex" + ], "type": "object" }, { @@ -148,7 +188,12 @@ "markdownDescription": "\nComplement of `has_next_corresponding_record`\n" } }, - "required": ["operator", "ordering", "value", "within"], + "required": [ + "operator", + "ordering", + "value", + "within" + ], "type": "object" }, { @@ -158,7 +203,9 @@ "markdownDescription": "\nValue presence\n\n> --OCCUR = null\n\n```yaml\n- name: --OCCUR\n operator: empty\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -168,7 +215,10 @@ "markdownDescription": "\n> SEENDTC is not empty when it is not the last record, grouped by USUBJID, sorted by SESTDTC\n\n```yaml\n- name: SEENDTC\n operator: empty_within_except_last_row\n ordering: SESTDTC\n value: USUBJID\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -178,7 +228,10 @@ "markdownDescription": "\nSubstring matching\n\n> DOMAIN ending with 'FOOBAR'\n\n```yaml\n- name: \"DOMAIN\"\n operator: \"ends_with\"\n value: \"FOOBAR\"\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -197,7 +250,10 @@ "type": "boolean" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -216,7 +272,10 @@ "type": "boolean" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -226,7 +285,11 @@ "markdownDescription": "\nChecks that the values in the target column equal the result of parsing the value in the comparison column with a regex\n\n> RDOMAIN equals characters 5 and 6 of SUPP dataset name\n\n```yaml\n- name: RDOMAIN\n operator: equals_string_part\n value: dataset_name\n regex: \".{4}(..).*\"\n```\n" } }, - "required": ["operator", "value", "regex"], + "required": [ + "operator", + "value", + "regex" + ], "type": "object" }, { @@ -236,7 +299,9 @@ "markdownDescription": "\nTrue if the column exists in the current dataframe. (Works for datasets and variables)\n\n> --OCCUR is present in dataset\n\n```yaml\n- name: \"--OCCUR\"\n operator: \"exists\"\n```\n\n> Domain SJ exists\n\n```yaml\nRule Type: Domain Presence Check\nCheck:\n all:\n - name: \"SJ\"\n operator: \"exists\"\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -246,7 +311,10 @@ "markdownDescription": "\nValue comparison\n\n> TSVAL > 0\n\n```yaml\n- name: TSVAL\n operator: greater_than\n value: 0\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -256,7 +324,10 @@ "markdownDescription": "\nValue comparison\n\n> TSVAL >= 0\n\n```yaml\n- name: TSVAL\n operator: greater_than_or_equal_to\n value: 1\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -266,7 +337,9 @@ "markdownDescription": "\nComplement of `has_same_values`\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -276,7 +349,9 @@ "markdownDescription": "\nLength comparison\n\n> Check whether variable values has equal length of another variable.\n\n```yaml\n- name: SEENDTC\n operator: has_equal_length\n value: SESTDTC\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -286,7 +361,12 @@ "markdownDescription": "\nEnsures that a value of a variable `name` in one record is equal to the value of another variable `value` in the next corresponding record. The rows are grouped by `within` and ordered by `ordering`.\n\n> SEENDTC is equal to the SESTDTC of the next record within a USUBJID. Ordered by SESEQ\n\n```yaml\n- name: SEENDTC\n operator: has_next_corresponding_record\n value: SESTDTC\n within: USUBJID\n ordering: SESEQ\n```\n" } }, - "required": ["operator", "ordering", "value", "within"], + "required": [ + "operator", + "ordering", + "value", + "within" + ], "type": "object" }, { @@ -296,7 +376,9 @@ "markdownDescription": "\nComplement of `has_equal_length`\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -306,7 +388,9 @@ "markdownDescription": "\nTrue if all values in `name` are the same\n\n> Condition: MHCAT ^= null\n> Rule: MHCAT ^= the same value for all records\n\n```yaml\nCheck:\n all:\n - name: MHCAT\n operator: non_empty\n - name: MHCAT\n operator: has_same_values\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -316,7 +400,10 @@ "markdownDescription": "\nDuration ISO-8601 check, returns True if a duration is not in ISO-8601 format. The negative parameter must be specified to indicate if negative durations are either allowed (True) or disallowed (False)\n\n> DURVAR is invalid (negative durations disallowed)\n\n```yaml\n- name: \"DURVAR\"\n operator: \"invalid_duration\"\n negative: False\n```\n" } }, - "required": ["operator", "negative"], + "required": [ + "operator", + "negative" + ], "type": "object" }, { @@ -326,7 +413,9 @@ "markdownDescription": "\nThe operator performs date validation against complete and partial dates with uncertainty in the following order:\n\n1. Attempts to parse using [dateutil.parser.isoparse()](https://dateutil.readthedocs.io/en/stable/parser.html)\n2. If parsing fails and the string contains uncertainty indicators (`/`, `--`, `-:`), validates against an extended ISO 8601 dates regex pattern\n3. If parsing succeeds, dates are still validated against the regex pattern.\n\n```yaml\n- name: \"BRTHDTC\"\n operator: \"invalid_date\"\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -336,7 +425,9 @@ "markdownDescription": "\nDate check\n\n> DM.RFSTDTC = complete date\n\n```yaml\n- name: \"RFSTDTC\"\n operator: \"is_complete_date\"\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -346,7 +437,10 @@ "markdownDescription": "\nValue in `name` compared against a list in `value`. The list can have literal values or be a reference to a `$variable`.\n\nThis operator behaves similarly to `contains`. The key distinction: `contains` checks if comparator \u2208 target, while `is_contained_by` checks if target \u2208 comparator.\n\n> ACTARM in ('Screen Failure', 'Not Assigned', 'Not Treated', 'Unplanned Treatment')\n\n```yaml\n- name: \"ACTARM\"\n operator: \"is_contained_by\"\n value:\n - \"Screen Failure\"\n - \"Not Assigned\"\n - \"Not Treated\"\n - \"Unplanned Treatment\"\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -356,7 +450,10 @@ "markdownDescription": "\nValue in `name` case insensitive compared against a list in `value`. The list can have literal values or be a reference to a `$variable`.\n\n> ACTARM in ('Screen Failure', 'Not Assigned', 'Not Treated', 'Unplanned Treatment')\n\n```yaml\n- name: \"ACTARM\"\n operator: \"is_contained_by_case_insensitive\"\n value:\n - \"Screen Failure\"\n - \"Not Assigned\"\n - \"Not Treated\"\n - \"Unplanned Treatment\"\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -366,7 +463,9 @@ "markdownDescription": "\nComplement of `is_complete_date`\n\nDate check\n\n> DM.RFSTDTC ^= complete date\n\n```yaml\n- name: \"RFSTDTC\"\n operator: \"is_incomplete_date\"\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -376,7 +475,10 @@ "markdownDescription": "\nComplement of `is_contained_by`\n\n> ARM not in ('Screen Failure', 'Not Assigned')\n\n```yaml\n- name: \"ARM\"\n operator: \"is_not_contained_by\"\n value:\n - \"Screen Failure\"\n - \"Not Assigned\"\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -386,7 +488,10 @@ "markdownDescription": "\nComplement of `is_contained_by_case_insensitive`\n\n> ARM not in ('Screen Failure', 'Not Assigned')\n\n```yaml\n- name: \"ARM\"\n operator: \"is_not_contained_by_case_insensitive\"\n value:\n - \"Screen Failure\"\n - \"Not Assigned\"\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -396,7 +501,10 @@ "markdownDescription": "\nComplement of `is_ordered_by`\n" } }, - "required": ["operator", "order"], + "required": [ + "operator", + "order" + ], "type": "object" }, { @@ -405,7 +513,10 @@ "const": "is_not_ordered_set" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -415,7 +526,10 @@ "markdownDescription": "\nComplement of `is_unique_relationship`\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -425,7 +539,9 @@ "markdownDescription": "\nComplement of `is_unique_set`.\n\n> --SEQ is not unique within DOMAIN, USUBJID, and --TESTCD\n\n```yaml\n- name: \"--SEQ\"\n operator: is_not_unique_set\n value:\n - \"DOMAIN\"\n - \"USUBJID\"\n - \"--TESTCD\"\n```\n\n> `name` can be a variable containing a list of columns and `value` does not need to be present\n\n```yaml\nRule Type: Dataset Contents Check against Define XML\nCheck:\n all:\n - name: define_dataset_key_sequence # contains list of dataset key columns\n operator: is_not_unique_set\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -435,7 +551,10 @@ "markdownDescription": "\nTrue if the dataset rows are ordered by the values within `name`, given the ordering specified by `order`\n\n```yaml\nCheck:\n all:\n - name: --SEQ\n operator: is_ordered_by\n order: asc\n```\n" } }, - "required": ["operator", "order"], + "required": [ + "operator", + "order" + ], "type": "object" }, { @@ -445,7 +564,10 @@ "markdownDescription": "\nTrue if the dataset rows are in ascending order of the values within `name`, grouped by the values within `value`. Value can either be a single column or multiple.\n\n```yaml\nCheck:\n all:\n - name: --SEQ\n operator: is_ordered_set\n value: USUBJID\n```\n\n```yaml\nCheck:\n all:\n - name: --SEQ\n operator: is_ordered_set\n value:\n - USUBJID\n - \"--TESTCD\"\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -455,7 +577,10 @@ "markdownDescription": "\nRelationship Integrity Check looking for a 1-1 relationship between name and value. Ensures uniqueness of both name and value.\n\n> AETERM and AEDECOD has a 1-to-1 relationship\n\n```yaml\n- name: AETERM\n operator: is_unique_relationship\n value: AEDECOD\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -465,7 +590,10 @@ "markdownDescription": "\nChecks if a variable maintains consistent values within groups defined by one or more grouping variables. Groups records by specified value(s) and validates that the target variable maintains the same value within each unique combination of grouping variables. When inconsistency is detected within a group, the operator attempts to identify a majority value. If one value appears more frequently than all others, only the minority records (those not matching the majority value) are flagged. If no single majority exists \u2014 i.e., two or more values are tied for the highest frequency \u2014 all records in that group are flagged.\n\nSingle grouping variable - true if the values of BGSTRESU differ within USUBJID:\n\nIf a regex parameter is provided, it is applied to the values of the target variable before the consistency check. The first capture group of the regex is used as the normalized value for comparison. This can be useful when only part of the value should be considered during comparison (for example, comparing only the date portion of a datetime value).\n\n- regex is optional.\n- The pattern must include at least one capture group(or whole regex will be wrapped to capture group).\n- Only the first capture group is used for comparison.\n- If the pattern does not match a value, the original value is used.\n\n```yaml\n- name: \"BGSTRESU\"\n operator: is_inconsistent_across_dataset\n value: \"USUBJID\"\n```\n\nMultiple grouping variables - true if the values of --STRESU differ within each combination of --TESTCD, --CAT, --SCAT, --SPEC, and --METHOD:\n\n```yaml\n- name: \"--STRESU\"\n operator: is_inconsistent_across_dataset\n value:\n - \"--TESTCD\"\n - \"--CAT\"\n - \"--SCAT\"\n - \"--SPEC\"\n - \"--METHOD\"\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -475,7 +603,9 @@ "markdownDescription": "\nRelationship Integrity Check\n\n> --SEQ is unique within DOMAIN, USUBJID, and --TESTCD\n\n```yaml\n- name: \"--SEQ\"\n operator: is_unique_set\n value:\n - \"DOMAIN\"\n - \"USUBJID\"\n - \"--TESTCD\"\n```\n\n> `name` can be a variable containing a list of columns and `value` does not need to be present\n\n> The `regex` parameter allows you to extract portions of values using a regex pattern before checking uniqueness.\n\n> Compare date only (YYYY-MM-DD) for uniqueness\n\n```yaml\n- name: \"--REPNUM\"\n operator: is_not_unique_set\n value:\n - \"USUBJID\"\n - \"--TESTCD\"\n - \"$TIMING_VARIABLES\"\n regex: '^\\d{4}-\\d{2}-\\d{2}'\n```\n\n> Compare by first N characters of a string\n\n```yaml\n- name: \"ITEM_ID\"\n operator: is_not_unique_set\n value:\n - \"USUBJID\"\n - \"CATEGORY\"\n regex: \"^.{2}\"\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -485,7 +615,10 @@ "markdownDescription": "\nValue comparison\n\n> TSVAL < 1\n\n```yaml\n- name: TSVAL\n operator: less_than\n value: 1\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -495,7 +628,10 @@ "markdownDescription": "\nValue comparison\n\n> TSVAL <= 1\n\n```yaml\n- name: TSVAL\n operator: less_than_or_equal_to\n value: 1\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -505,7 +641,10 @@ "markdownDescription": "\nLength comparison\n\n> SETCD value length > 8\n\n```yaml\n- name: \"SETCD\"\n operator: \"longer_than\"\n value: 8\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -515,7 +654,10 @@ "markdownDescription": "\nLength comparison\n\n> TSVAL value length >= 201\n\n```yaml\n- name: \"TSVAL\"\n operator: \"longer_than_or_equal_to\"\n value: 201\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -525,7 +667,10 @@ "markdownDescription": "\nRegular Expression value matching\n\n- Determine if each string starts with a match of a regular expression. Refer to this pandas documentation: https://pandas.pydata.org/docs/reference/api/pandas.Series.str.match.html\n- To \"search\" for a regex within the entire text, prefix the regex with `.*` and do not use anchors `^` , `$`\n- To do a \"fullmatch\" of a regex with the entire text, suffix the regex with an anchor `$` and do not prefix the regex with `.*`\n- For syntax guide, refer to this Python documentation: [Regular Expression HOWTO](https://docs.python.org/3/howto/regex.html).\n- Suggestion for an on-line regular expression logic. tester: https://regex101.com, choose the Python dialect.\n- For regex token visualization, try https://www.debuggex.com.\n\n> --DOSTXT value is non-numeric\n\n```yaml\n- name: --DOSTXT\n operator: matches_regex\n value: ^\\d*\\.?\\d*$\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -535,7 +680,9 @@ "markdownDescription": "\nComplement of `empty`\n\n> --OCCUR ^= null\n\n```yaml\n- name: --OCCUR\n operator: non_empty\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -545,7 +692,10 @@ "markdownDescription": "\nComplement of `empty_within_except_last_row`\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -555,7 +705,9 @@ "markdownDescription": "\nComplement of `contains_all`\n\n> All of ('Screen Failure', 'Not Assigned', 'Not Treated', 'Unplanned Treatment') not in ACTARM\n\n```yaml\n- name: \"ACTARM\"\n operator: \"not_contains_all\"\n value:\n - \"Screen Failure\"\n - \"Not Assigned\"\n - \"Not Treated\"\n - \"Unplanned Treatment\"\n```\n\nThe operator also supports lists:\n\n```yaml\n- name: \"$spec_codelist\"\n operator: \"not_contains_all\"\n value: \"$ppspec_value\"\n```\n\nWhere:\n\n| $spec_codelist | $ppspec_value |\n| :-------------------------- | :----------------: |\n| [\"CODE1\", \"CODE2\", \"CODE3\"] | [\"CODE1\", \"CODE2\"] |\n| [\"CODE1\", \"CODE2\", \"CODE3\"] | [\"CODE2\", \"CODE3\"] |\n| [\"CODE1\", \"CODE2\", \"CODE3\"] | [\"CODE1\"] |\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -574,7 +726,10 @@ "type": "boolean" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -593,7 +748,10 @@ "type": "boolean" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -603,7 +761,9 @@ "markdownDescription": "\nComplement of `exists`\n\n> AEOCCUR not present in dataset\n\n```yaml\n- name: \"AEOCCUR\"\n operator: \"not_exists\"\n```\n\n> Domain SJ does not exist\n\n```yaml\nRule Type: Domain Presence Check\nCheck:\n all:\n - name: \"SJ\"\n operator: \"not_exists\"\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -613,7 +773,10 @@ "markdownDescription": "\nComplement of `matches_regex`\n\n> --TESTCD <= 8 chars and contains only letters, numbers, and underscores and can not start with a number\n\n```yaml\n- name: --TESTCD\n operator: not_matches_regex\n value: ^[A-Z_][A-Z0-9_]{0,7}$\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -623,7 +786,11 @@ "markdownDescription": "\nComplement of `prefix_matches_regex`\n" } }, - "required": ["operator", "prefix", "value"], + "required": [ + "operator", + "prefix", + "value" + ], "type": "object" }, { @@ -633,7 +800,10 @@ "markdownDescription": "\nComplement of `present_on_multiple_rows_within`\n\n```yaml\n- operator: \"not_present_on_multiple_rows_within\"\n name: \"RELID\"\n value: 4 (optional)\n within: \"USUBJID\"\n```\n" } }, - "required": ["operator", "within"], + "required": [ + "operator", + "within" + ], "type": "object" }, { @@ -643,7 +813,11 @@ "markdownDescription": "\nComplement of `suffix_matches_regex`\n\n> QNAM does not end with numbers\n\n```yaml\n- name: \"QNAM\"\n operator: \"not_suffix_matches_regex\"\n suffix: 2\n value: \"\\d\\d\"\n```\n" } }, - "required": ["operator", "suffix", "value"], + "required": [ + "operator", + "suffix", + "value" + ], "type": "object" }, { @@ -653,7 +827,11 @@ "markdownDescription": "\nTrue if the `prefix` number of characters beginning a string in `name` match one of the strings in the list in `value`\n\n> Check if a variable's domain identifier exists in the study\n\n```yaml\n- name: variable_name\n operator: prefix_is_contained_by\n prefix: 2\n value: $study_domains\n```\n" } }, - "required": ["operator", "prefix", "value"], + "required": [ + "operator", + "prefix", + "value" + ], "type": "object" }, { @@ -663,7 +841,11 @@ "markdownDescription": "\nTrue if the `prefix` number of characters beginning a string in `name` match the string in `value`\n\n```yaml\n- name: dataset_name\n operator: prefix_equal_to\n prefix: 2\n value: DOMAIN\n```\n" } }, - "required": ["operator", "prefix", "value"], + "required": [ + "operator", + "prefix", + "value" + ], "type": "object" }, { @@ -673,7 +855,11 @@ "markdownDescription": "\nComplement of `prefix_is_contained_by`\n" } }, - "required": ["operator", "prefix", "value"], + "required": [ + "operator", + "prefix", + "value" + ], "type": "object" }, { @@ -683,7 +869,11 @@ "markdownDescription": "\nTrue if the `prefix` number of characters beginning a string in `name` match a regular expression in `value`\n\n```yaml\n- name: DOMAIN\n operator: prefix_matches_regex\n prefix: 2\n value: (AP|ap)\n```\n" } }, - "required": ["operator", "prefix", "value"], + "required": [ + "operator", + "prefix", + "value" + ], "type": "object" }, { @@ -693,7 +883,11 @@ "markdownDescription": "\nComplement of `prefix_equal_to`\n" } }, - "required": ["operator", "prefix", "value"], + "required": [ + "operator", + "prefix", + "value" + ], "type": "object" }, { @@ -703,7 +897,10 @@ "markdownDescription": "\nTrue if the same value of `name` is present on multiple rows, grouped by `within`. A maximum allowed number of occurrences can be specified in the value attribute. In this instance the value: 4 means that an error will be flagged if the same value appears more than 4 times within a USUBJID. By default the operator will flag any time a value appears more than once.\n\n```yaml\n- operator: \"present_on_multiple_rows_within\"\n name: \"RELID\"\n value: 4 (optional)\n within: \"USUBJID\"\n```\n" } }, - "required": ["operator", "within"], + "required": [ + "operator", + "within" + ], "type": "object" }, { @@ -713,7 +910,10 @@ "markdownDescription": "\nWill raise an issue if at least one of the values in `name` is the same as one of the values in `value`. See [shares_no_elements_with](#shares_no_elements_with).\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -723,7 +923,10 @@ "markdownDescription": "\nWill raise an issue if exactly one of the values in `name` is the same as one of the values in `value`. See [shares_no_elements_with](#shares_no_elements_with).\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -733,7 +936,10 @@ "markdownDescription": "\nWill raise an issue if the values in `name` do not share any of the values in `value`\n\n> Check if $dataset_variables shares no elements with $timing_variables\n\n```yaml\nRule Type: Dataset Metadata Check # One record per dataset\nCheck:\n - all:\n name: $dataset_variables\n operator: shares_no_elements_with\n value: $timing_variables\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -743,7 +949,10 @@ "markdownDescription": "\nLength comparison\n\n> SETCD value length < 9\n\n```yaml\n- name: \"SETCD\"\n operator: \"shorter_than\"\n value: 9\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -753,7 +962,10 @@ "markdownDescription": "\nLength comparison\n\n> TSVAL value length <= 200\n\n```yaml\n- name: \"TSVAL\"\n operator: \"shorter_than_or_equal_to\"\n value: 201\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -763,7 +975,9 @@ "markdownDescription": "\nSplits a string by a separator and checks if both parts have equal length. Generic operator for validating paired data formats where both parts must have the same level of detail or precision.\n\nParameters:\n\n- `separator`: The delimiter to split on (default: \"/\")\n\n> Check that string parts separated by a delimiter have equal length\n\n```yaml\n- name: --DTC\n operator: split_parts_have_equal_length\n separator: \"/\"\n```\n\nUse cases:\n\n- **Date/time intervals**: `2003-12-15T10:00/2003-12-15T10:30` \u2192 True (both 16 characters)\n- **Date ranges**: `2003-12-01/2003-12-10` \u2192 True (both 10 characters)\n- **Version ranges**: `1.2.3/2.0.0` \u2192 True (both 5 characters)\n- **Product codes**: `ABC-123/XYZ-789` \u2192 True (both 7 characters)\n\nInvalid example:\n\n- `2003-12-15T10:00/2003-12-15T10:30:15` \u2192 False (16 vs 19 characters - different precision)\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -773,7 +987,9 @@ "markdownDescription": "\nComplement of `split_parts_have_equal_length`. Returns True when parts have unequal lengths (indicates a violation).\n\n```yaml\n- name: --DTC\n operator: split_parts_have_unequal_length\n separator: \"/\"\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -783,7 +999,10 @@ "markdownDescription": "\nSubstring matching\n\n> DOMAIN beginning with 'AP'\n\n```yaml\n- name: \"DOMAIN\"\n operator: \"starts_with\"\n value: \"AP\"\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -793,7 +1012,11 @@ "markdownDescription": "\nTrue if the `suffix` number of characters ending a string in `name` match the string in `value`\n\n```yaml\n- name: dataset_name\n operator: suffix_equal_to\n prefix: 2\n value: DOMAIN\n```\n" } }, - "required": ["operator", "suffix", "value"], + "required": [ + "operator", + "suffix", + "value" + ], "type": "object" }, { @@ -803,7 +1026,11 @@ "markdownDescription": "\nTrue if the `suffix` number of characters ending a string in `name` match one of the strings in the list in `value`\n\n> Check if a supp's parent domain exists in the study\n\n```yaml\n- name: dataset_name\n operator: suffix_is_contained_by\n prefix: 2\n value: $study_domains\n```\n" } }, - "required": ["operator", "suffix", "value"], + "required": [ + "operator", + "suffix", + "value" + ], "type": "object" }, { @@ -813,7 +1040,11 @@ "markdownDescription": "\nComplement of `suffix_is_contained_by`\n" } }, - "required": ["operator", "suffix", "value"], + "required": [ + "operator", + "suffix", + "value" + ], "type": "object" }, { @@ -823,7 +1054,11 @@ "markdownDescription": "\nTrue if the `suffix` number of characters ending a string in `name` match a regular expression in `value`\n\n> QNAM ends with numbers\n\n```yaml\n- name: \"QNAM\"\n operator: \"suffix_matches_regex\"\n suffix: 2\n value: \"\\d\\d\"\n```\n" } }, - "required": ["operator", "suffix", "value"], + "required": [ + "operator", + "suffix", + "value" + ], "type": "object" }, { @@ -833,7 +1068,11 @@ "markdownDescription": "\nComplement of `suffix_equal_to`\n" } }, - "required": ["operator", "suffix", "value"], + "required": [ + "operator", + "suffix", + "value" + ], "type": "object" }, { @@ -843,17 +1082,25 @@ "markdownDescription": "\nComplement of `target_is_sorted_by`\n" } }, - "required": ["operator", "value", "within"], + "required": [ + "operator", + "value", + "within" + ], "type": "object" }, { "properties": { "operator": { "const": "target_is_sorted_by", - "markdownDescription": "\nTrue if the values in name are ordered according to the values specified by value in ascending/descending order, grouped by the values in within. Each value entry requires a variable name, a sort_order of asc or desc, and an optional null_position of first or last (defaults to last) which controls where null/empty comparator values are placed in the expected ordering. Within accepts either a single column or an ordered list of columns. Columns can be either number or Char Dates in ISO8601 YYYY-MM-DD format. Date value(s) with different precisions that overlap (e.g. 2005-10, 2005-10-3 and 2005-10-08) are all flagged as not sorted as their order cannot be inferred.\n\n```yaml\nCheck:\n all:\n - name: --SEQ\n within:\n - USUBJID\n - MIDSTYPE\n operator: target_is_sorted_by\n value:\n - name: --STDTC\n sort_order: asc\n null_position: last\n```\n" + "markdownDescription": "\nTrue if the values in name are ordered according to the values specified by value\nin ascending/descending order, grouped by the values in within. Each value entry\nrequires a variable name, a sort_order of asc or desc, and an optional\nnull_position of first or last (defaults to last) which controls where null/empty\ncomparator values are placed in the expected ordering. Within accepts either a\nsingle column or an ordered list of columns. Columns can be either number or Char\nDates in ISO8601 YYYY-MM-DD format. Date value(s) with different precisions that\noverlap (e.g. 2005-10, 2005-10-3 and 2005-10-08) are all flagged as not sorted as\ntheir order cannot be inferred.\n\nOptionally supports a `regex` parameter that extracts a portion of the target\nvalue for sorting. The regex must contain at least one capturing group. The first\ncaptured group is extracted and converted to numeric if possible, allowing proper\nsorting of sequence numbers (e.g., \"MIDS1\", \"MIDS2\", ..., \"MIDS10\" with regex\n`.*?(\\\\d+)$`). This is particularly useful for variables that end with sequence\nnumbers that may or may not be zero-padded.\n\n```yaml\nCheck:\n all:\n - name: --SEQ\n within:\n - USUBJID\n - MIDSTYPE\n operator: target_is_sorted_by\n value:\n - name: --STDTC\n sort_order: asc\n null_position: last\n```\n\nExample with regex for extracting sequence numbers:\n\n```yaml\nCheck:\n all:\n - name: MIDS\n operator: target_is_sorted_by\n regex: \".*?(\\\\d+)$\" # Extract trailing digits, convert to numeric\n value:\n - name: SMSTDTC\n sort_order: asc\n within:\n - USUBJID\n - MIDSTYPE\n```\n" } }, - "required": ["operator", "value", "within"], + "required": [ + "operator", + "value", + "within" + ], "type": "object" }, { @@ -863,7 +1110,10 @@ "markdownDescription": "\nComplement of `value_has_multiple_references`\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -873,7 +1123,10 @@ "markdownDescription": "\nTrue if the value in `name` has more than one count in the dictionary defined in `value`\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -883,7 +1136,10 @@ "markdownDescription": "\nChecks for inconsistencies in enumerated columns of a DataFrame. Starting with the smallest/largest enumeration of the given variable, returns True if VARIABLE(N+1) is populated but VARIABLE(N) is not populated. Repeats for all variables belonging to the enumeration. Note that the initial variable will not have an index (VARIABLE) and the next enumerated variable has index 1 (VARIABLE1).\n\nex: Check if there are inconsistencies in the TSVAL columns (TSVAL, TSVAL1, TSVAL2, etc.)\n\n```yaml\nCheck:\n all:\n - name: \"TSVAL\"\n operator: \"inconsistent_enumerated_columns\"\n```\n" } }, - "required": ["operator", "name"], + "required": [ + "operator", + "name" + ], "type": "object" }, { @@ -893,7 +1149,10 @@ "markdownDescription": "\nChecks if elements in the target list appear in the same relative order in the comparator list.\n\n> Check if dataset column order is a correctly ordered subset of library column order\n\n```yaml\n- name: $column_order_from_dataset\n operator: is_ordered_subset_of\n value: $column_order_from_library\n```\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -903,7 +1162,10 @@ "markdownDescription": "\nComplement of `is_ordered_subset_of`\n" } }, - "required": ["operator", "value"], + "required": [ + "operator", + "value" + ], "type": "object" }, { @@ -913,7 +1175,9 @@ "markdownDescription": "\nValidates that variable labels follow proper title case formatting rules using the titlecase PyPi library. Title case capitalizes the first word and all major words, while keeping articles (a, an, the), conjunctions (and, but, or), and prepositions (in, of, for) in lowercase unless they are the first word. \nNOTE: The titlecase library may produce false positives or false negatives in syntactic edge cases (e.g. hyphenated words, slash-separated terms, uncommon prepositions).\n\n> Check that AELABEL values are in proper title case\n\n```yaml\n- name: AELABEL\n operator: is_title_case\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" }, { @@ -923,13 +1187,18 @@ "markdownDescription": "\nComplement of `is_title_case`. Returns True when values are NOT in proper title case.\n\n> Flag AELABEL values that violate title case rules\n\n```yaml\n- name: AELABEL\n operator: is_not_title_case\n```\n" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" } ], "properties": { "comparator": { - "type": ["number", "string"] + "type": [ + "number", + "string" + ] }, "context": { "type": "string" @@ -967,18 +1236,27 @@ "type": "boolean" }, "codelistcheck": { - "enum": ["code", "value"], + "enum": [ + "code", + "value" + ], "type": "string" }, "codelistlevel": { - "enum": ["term", "codelist"], + "enum": [ + "term", + "codelist" + ], "type": "string" }, "operator": { "type": "string" }, "order": { - "enum": ["asc", "dsc"], + "enum": [ + "asc", + "dsc" + ], "type": "string" }, "ordering": { @@ -996,17 +1274,25 @@ "value": { "oneOf": [ { - "type": ["boolean", "number", "string"] + "type": [ + "boolean", + "number", + "string" + ] }, { "items": { - "type": ["number"] + "type": [ + "number" + ] }, "type": "array" }, { "items": { - "type": ["string"] + "type": [ + "string" + ] }, "type": "array" }, @@ -1017,7 +1303,10 @@ "$ref": "Operator.json#/properties/name" }, "null_position": { - "enum": ["first", "last"], + "enum": [ + "first", + "last" + ], "type": "string" }, "order": { @@ -1060,6 +1349,8 @@ "type": "string" } }, - "required": ["operator"], + "required": [ + "operator" + ], "type": "object" } diff --git a/resources/schema/rule-merged/Organization_CDISC.json b/resources/schema/rule-merged/Organization_CDISC.json index db1041921..9aaef8a76 100644 --- a/resources/schema/rule-merged/Organization_CDISC.json +++ b/resources/schema/rule-merged/Organization_CDISC.json @@ -22,7 +22,9 @@ "const": "Failure" } }, - "required": ["Type"], + "required": [ + "Type" + ], "type": "object" }, "Origin": { @@ -38,7 +40,9 @@ "type": "object" }, "Version": { - "enum": ["5.0"] + "enum": [ + "5.0" + ] } }, "type": "object" @@ -47,7 +51,12 @@ "type": "array" }, "Version": { - "enum": ["1.0", "1.1", "1.2", "1.3"] + "enum": [ + "1.0", + "1.1", + "1.2", + "1.3" + ] } }, "type": "object" @@ -66,7 +75,9 @@ "const": "Success" } }, - "required": ["Type"], + "required": [ + "Type" + ], "type": "object" }, "Origin": { @@ -79,13 +90,19 @@ "type": "string" }, "Version": { - "enum": ["1", "2", "3"] + "enum": [ + "1", + "2", + "3" + ] } }, "type": "object" }, "Version": { - "enum": ["2.0"] + "enum": [ + "2.0" + ] } }, "type": "object" @@ -94,7 +111,11 @@ "type": "array" }, "Version": { - "enum": ["3.2", "3.3", "3.4"] + "enum": [ + "3.2", + "3.3", + "3.4" + ] } }, "type": "object" @@ -113,7 +134,9 @@ "const": "Success" } }, - "required": ["Type"], + "required": [ + "Type" + ], "type": "object" }, "Origin": { @@ -129,7 +152,9 @@ "type": "object" }, "Version": { - "enum": ["5.0"] + "enum": [ + "5.0" + ] } }, "type": "object" @@ -138,7 +163,11 @@ "type": "array" }, "Version": { - "enum": ["3.0", "3.1", "3.1.1"] + "enum": [ + "3.0", + "3.1", + "3.1.1" + ] } }, "type": "object" @@ -157,7 +186,9 @@ "const": "Success" } }, - "required": ["Type"], + "required": [ + "Type" + ], "type": "object" }, "Origin": { @@ -173,7 +204,9 @@ "type": "object" }, "Version": { - "enum": ["5.0"] + "enum": [ + "5.0" + ] } }, "type": "object" @@ -182,7 +215,10 @@ "type": "array" }, "Version": { - "enum": ["1.1", "1.2"] + "enum": [ + "1.1", + "1.2" + ] } }, "type": "object" @@ -201,7 +237,9 @@ "const": "Success" } }, - "required": ["Type"], + "required": [ + "Type" + ], "type": "object" }, "Origin": { @@ -217,7 +255,9 @@ "type": "object" }, "Version": { - "enum": ["5.0"] + "enum": [ + "5.0" + ] } }, "type": "object" @@ -226,7 +266,9 @@ "type": "array" }, "Version": { - "enum": ["1.0"] + "enum": [ + "1.0" + ] } }, "type": "object" @@ -245,7 +287,9 @@ "const": "Success" } }, - "required": ["Type"], + "required": [ + "Type" + ], "type": "object" }, "Origin": { @@ -261,7 +305,9 @@ "type": "object" }, "Version": { - "enum": ["1.0"] + "enum": [ + "1.0" + ] } }, "type": "object" @@ -270,13 +316,24 @@ "type": "array" }, "Version": { - "enum": ["1.0"] + "enum": [ + "1.0" + ] }, "Substandard": { - "enum": ["SDTM", "SEND", "ADaM", "CDASH"] + "enum": [ + "SDTM", + "SEND", + "ADaM", + "CDASH" + ] } }, - "required": ["Name", "Version", "Substandard"], + "required": [ + "Name", + "Version", + "Substandard" + ], "type": "object" }, { @@ -297,13 +354,17 @@ "type": "string" }, "Version": { - "enum": ["1"] + "enum": [ + "1" + ] } }, "type": "object" }, "Version": { - "enum": ["1.0"] + "enum": [ + "1.0" + ] } }, "type": "object" @@ -312,7 +373,10 @@ "type": "array" }, "Version": { - "enum": ["3.0", "4.0"] + "enum": [ + "3.0", + "4.0" + ] } }, "type": "object" diff --git a/resources/schema/rule-merged/Organization_Custom.json b/resources/schema/rule-merged/Organization_Custom.json index bf5bd7276..c5f591394 100644 --- a/resources/schema/rule-merged/Organization_Custom.json +++ b/resources/schema/rule-merged/Organization_Custom.json @@ -9,7 +9,10 @@ "type": "string", "description": "Name of your custom organization", "not": { - "enum": ["CDISC", "FDA"] + "enum": [ + "CDISC", + "FDA" + ] } }, "Standards": { @@ -45,7 +48,9 @@ "description": "Version of the rule" } }, - "required": ["Id"], + "required": [ + "Id" + ], "type": "object" }, "Version": { @@ -55,7 +60,10 @@ "Criteria": { "properties": { "Type": { - "enum": ["Failure", "Success"], + "enum": [ + "Failure", + "Success" + ], "type": "string" }, "Plain Language Expression": { @@ -70,30 +78,46 @@ "type": "string" } }, - "required": ["Rule"], + "required": [ + "Rule" + ], "type": "object" } }, - "required": ["Type"], + "required": [ + "Type" + ], "anyOf": [ { - "required": ["Logical Expression"] + "required": [ + "Logical Expression" + ] }, { - "required": ["Plain Language Expression"] + "required": [ + "Plain Language Expression" + ] } ], "type": "object" } }, - "required": ["Origin", "Rule Identifier", "Version"], + "required": [ + "Origin", + "Rule Identifier", + "Version" + ], "type": "object" }, "minItems": 1, "type": "array" } }, - "required": ["Name", "References", "Version"], + "required": [ + "Name", + "References", + "Version" + ], "type": "object" }, "minItems": 1, @@ -141,7 +165,10 @@ }, "OutputType": { "type": "string", - "enum": ["Check", "Listing"], + "enum": [ + "Check", + "Listing" + ], "description": "Output type of the rule validation result" }, "Keywords": { @@ -155,7 +182,11 @@ "additionalProperties": true } }, - "required": ["Organization", "Standards", "Category"], + "required": [ + "Organization", + "Standards", + "Category" + ], "type": "object", "$defs": { "metadata": { diff --git a/resources/schema/rule-merged/Organization_FDA.json b/resources/schema/rule-merged/Organization_FDA.json index b0f7de783..94af54bc4 100644 --- a/resources/schema/rule-merged/Organization_FDA.json +++ b/resources/schema/rule-merged/Organization_FDA.json @@ -41,7 +41,10 @@ } } ], - "required": ["Document", "Cited Guidance"], + "required": [ + "Document", + "Cited Guidance" + ], "type": "object" }, "type": "array" @@ -52,7 +55,9 @@ "const": "Success" } }, - "required": ["Type"], + "required": [ + "Type" + ], "type": "object" }, "Origin": { @@ -68,7 +73,9 @@ "type": "object" }, "Version": { - "enum": ["1.5"] + "enum": [ + "1.5" + ] } }, "type": "object" @@ -84,7 +91,11 @@ "const": "SDTMIG" }, "Version": { - "enum": ["3.2", "3.3", "3.4"] + "enum": [ + "3.2", + "3.3", + "3.4" + ] } }, "type": "object" @@ -95,7 +106,11 @@ "const": "SENDIG" }, "Version": { - "enum": ["3.0", "3.1", "3.1.1"] + "enum": [ + "3.0", + "3.1", + "3.1.1" + ] } }, "type": "object" @@ -106,7 +121,9 @@ "const": "SENDIG-AR" }, "Version": { - "enum": ["1.0"] + "enum": [ + "1.0" + ] } }, "type": "object" @@ -117,7 +134,10 @@ "const": "SENDIG-DART" }, "Version": { - "enum": ["1.1", "1.2"] + "enum": [ + "1.1", + "1.2" + ] } }, "type": "object" @@ -128,7 +148,9 @@ "const": "SENDIG-GENETOX" }, "Version": { - "enum": ["1.0"] + "enum": [ + "1.0" + ] } }, "type": "object" diff --git a/resources/schema/rule/Operator.md b/resources/schema/rule/Operator.md index 31605684b..1f6b1078f 100644 --- a/resources/schema/rule/Operator.md +++ b/resources/schema/rule/Operator.md @@ -1127,9 +1127,22 @@ Complement of `is_ordered_by` ### target_is_sorted_by -True if the values in name are ordered according to the values specified by value in ascending/descending order, grouped by the values in within. Each value entry requires a variable name, a sort_order of asc or desc, and an optional null_position of first or last (defaults to last) which controls where null/empty comparator values are placed in the expected ordering. Within accepts either a single column or an ordered list of columns. Columns can be either number or Char Dates in ISO8601 YYYY-MM-DD format. Date value(s) with different precisions that overlap (e.g. 2005-10, 2005-10-3 and 2005-10-08) are all flagged as not sorted as their order cannot be inferred. - -Optionally supports a `regex` parameter that extracts a portion of the target value for sorting. The regex must contain at least one capturing group. The first captured group is extracted and converted to numeric if possible, allowing proper sorting of sequence numbers (e.g., "MIDS1", "MIDS2", ..., "MIDS10" with regex `.*?(\\d+)$`). This is particularly useful for variables that end with sequence numbers that may or may not be zero-padded. +True if the values in name are ordered according to the values specified by value +in ascending/descending order, grouped by the values in within. Each value entry +requires a variable name, a sort_order of asc or desc, and an optional +null_position of first or last (defaults to last) which controls where null/empty +comparator values are placed in the expected ordering. Within accepts either a +single column or an ordered list of columns. Columns can be either number or Char +Dates in ISO8601 YYYY-MM-DD format. Date value(s) with different precisions that +overlap (e.g. 2005-10, 2005-10-3 and 2005-10-08) are all flagged as not sorted as +their order cannot be inferred. + +Optionally supports a `regex` parameter that extracts a portion of the target +value for sorting. The regex must contain at least one capturing group. The first +captured group is extracted and converted to numeric if possible, allowing proper +sorting of sequence numbers (e.g., "MIDS1", "MIDS2", ..., "MIDS10" with regex +`.*?(\\d+)$`). This is particularly useful for variables that end with sequence +numbers that may or may not be zero-padded. ```yaml Check: From b19cb471c3066783f5ee24d965d8662ba2e1c320 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 24 Apr 2026 09:51:51 +0000 Subject: [PATCH 4/7] Update merged schema files with markdown descriptions --- resources/schema/rule-merged/CORE-base.json | 142 ++--- resources/schema/rule-merged/Operations.json | 301 ++--------- resources/schema/rule-merged/Operator.json | 487 ++++-------------- .../rule-merged/Organization_CDISC.json | 112 +--- .../rule-merged/Organization_Custom.json | 53 +- .../schema/rule-merged/Organization_FDA.json | 38 +- 6 files changed, 227 insertions(+), 906 deletions(-) diff --git a/resources/schema/rule-merged/CORE-base.json b/resources/schema/rule-merged/CORE-base.json index 8d2fbe672..6fccb4afd 100644 --- a/resources/schema/rule-merged/CORE-base.json +++ b/resources/schema/rule-merged/CORE-base.json @@ -9,9 +9,7 @@ "$ref": "#/$defs/CheckItems" } }, - "required": [ - "all" - ], + "required": ["all"], "type": "object" }, { @@ -21,9 +19,7 @@ "$ref": "#/$defs/CheckItems" } }, - "required": [ - "any" - ], + "required": ["any"], "type": "object" }, { @@ -33,9 +29,7 @@ "$ref": "#/$defs/CheckItem" } }, - "required": [ - "not" - ], + "required": ["not"], "type": "object" } ] @@ -133,18 +127,13 @@ "$ref": "#/$defs/Domains" }, "include_split_datasets": { - "enum": [ - true - ] + "enum": [true] } }, "type": "object" }, "JoinType": { - "enum": [ - "inner", - "left" - ], + "enum": ["inner", "left"], "type": "string" }, "LeftRightKeys": { @@ -157,10 +146,7 @@ "$ref": "#/$defs/VariableName" } }, - "required": [ - "Left", - "Right" - ], + "required": ["Left", "Right"], "type": "object" }, "PascalCases": { @@ -245,10 +231,7 @@ "type": "string" } }, - "required": [ - "Document", - "Cited Guidance" - ], + "required": ["Document", "Cited Guidance"], "type": "object" }, "type": "array" @@ -257,14 +240,10 @@ "additionalProperties": false, "anyOf": [ { - "required": [ - "Logical Expression" - ] + "required": ["Logical Expression"] }, { - "required": [ - "Plain Language Expression" - ] + "required": ["Plain Language Expression"] } ], "properties": { @@ -278,25 +257,18 @@ "type": "string" } }, - "required": [ - "Rule" - ], + "required": ["Rule"], "type": "object" }, "Plain Language Expression": { "type": "string" }, "Type": { - "enum": [ - "Failure", - "Success" - ], + "enum": ["Failure", "Success"], "type": "string" } }, - "required": [ - "Type" - ], + "required": ["Type"], "type": "object" }, "Origin": { @@ -310,18 +282,11 @@ "type": "string" }, "Relationship": { - "enum": [ - "Predecessor", - "Related", - "Successor" - ], + "enum": ["Predecessor", "Related", "Successor"], "type": "string" } }, - "required": [ - "Id", - "Relationship" - ], + "required": ["Id", "Relationship"], "type": "object" }, "type": "array" @@ -339,9 +304,7 @@ "type": "string" } }, - "required": [ - "Id" - ], + "required": ["Id"], "type": "object" }, "Validator Rule Message": { @@ -351,11 +314,7 @@ "type": "string" } }, - "required": [ - "Origin", - "Rule Identifier", - "Version" - ], + "required": ["Origin", "Rule Identifier", "Version"], "type": "object" }, "minItems": 1, @@ -368,11 +327,7 @@ "type": "string" } }, - "required": [ - "Name", - "References", - "Version" - ], + "required": ["Name", "References", "Version"], "type": "object" }, "minItems": 1, @@ -395,10 +350,7 @@ "$ref": "Organization_Custom.json" } ], - "required": [ - "Organization", - "Standards" - ], + "required": ["Organization", "Standards"], "type": "object" }, "minItems": 1, @@ -438,15 +390,10 @@ "const": "Published" } }, - "required": [ - "Id" - ] + "required": ["Id"] } ], - "required": [ - "Status", - "Version" - ], + "required": ["Status", "Version"], "type": "object" }, "Description": { @@ -492,9 +439,7 @@ "type": "string" } }, - "required": [ - "Name" - ], + "required": ["Name"], "type": "object" }, "minItems": 1, @@ -520,9 +465,7 @@ "type": "array" } }, - "required": [ - "Message" - ], + "required": ["Message"], "type": "object" }, "Rule Type": { @@ -540,9 +483,7 @@ "$ref": "#/$defs/Classes" } }, - "required": [ - "Include" - ], + "required": ["Include"], "type": "object" }, { @@ -552,9 +493,7 @@ "$ref": "#/$defs/Classes" } }, - "required": [ - "Exclude" - ], + "required": ["Exclude"], "type": "object" } ] @@ -568,9 +507,7 @@ "$ref": "#/$defs/DataStructures" } }, - "required": [ - "Include" - ], + "required": ["Include"], "type": "object" }, { @@ -580,9 +517,7 @@ "$ref": "#/$defs/DataStructures" } }, - "required": [ - "Exclude" - ], + "required": ["Exclude"], "type": "object" } ] @@ -622,14 +557,10 @@ }, "anyOf": [ { - "required": [ - "Exclude" - ] + "required": ["Exclude"] }, { - "required": [ - "Include" - ] + "required": ["Include"] } ], "type": "object" @@ -651,20 +582,13 @@ }, "oneOf": [ { - "required": [ - "Classes", - "Domains" - ] + "required": ["Classes", "Domains"] }, { - "required": [ - "Data Structures" - ] + "required": ["Data Structures"] }, { - "required": [ - "Entities" - ] + "required": ["Entities"] } ], "type": "object" @@ -699,9 +623,7 @@ } }, "then": { - "required": [ - "Grouping_Variables" - ] + "required": ["Grouping_Variables"] }, "type": "object" } diff --git a/resources/schema/rule-merged/Operations.json b/resources/schema/rule-merged/Operations.json index 85058679b..a93003e72 100644 --- a/resources/schema/rule-merged/Operations.json +++ b/resources/schema/rule-merged/Operations.json @@ -10,10 +10,7 @@ "markdownDescription": "\nReturns a Series indicating whether a specified codelist is extensible. Used in conjunction with codelist_terms to determine if values outside the codelist are acceptable. From the above example, $extensible will contain a bool if the codelist PKUDUG is extensible in all rows of the column.\n\nIf ct_package_type, version, and codelist_code parameters are provided, it will instead attach a new column containing the extensible value for each combination provided in the source dataset.\n\nFor example, given the current dataset:\n\n```\nid \tcodeSystemVersion \t$codelist_code\n1 \t2024-09-27 \tC201264\n2 \t2024-09-27 \tC201265\n3 \t2023-03-29 \tC127262\n```\n\nand the following operation:\n\n```yaml\n- id: $codelist_extensible\n operator: codelist_extensible\n ct_package_type: DDF\n version: codeSystemVersion\n codelist_code: $codelist_code\n```\n\nThis will result in the following dataset:\n\n```\nid \tcodeSystemVersion \t$codelist_code \t$codelist_extensible\n1 \t2024-09-27 \tC201264 \tfalse\n2 \t2024-09-27 \tC201265 \tfalse\n3 \t2023-03-29 \tC127262 \ttrue\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -23,10 +20,7 @@ "markdownDescription": "\nReturns a list of valid codelist/term values. Used for evaluating whether NCI codes, submission values or NCI preferred terms are valid based on controlled terminology. Expects three parameters: `codelists` which is a list of the codelist submission value(s) to retrieve, `level` which is the level of data (either \"codelist\" or \"term\") at which to return data from, and `returntype` which is the type of values to return: \"code\" for NCI Code(s), \"value\" for submission value(s), or \"pref_term\" for NCI preferred term(s).\n\n```yaml\n- Check:\n - all:\n - name: PPSTRESU\n operator: is_not_contained_by\n value: $terms\n - name: $extensible\n operator: equal_to\n value: true\n- Operations:\n - id: $terms\n operator: codelist_terms\n codelists:\n - PKUDUG\n level: term\n returntype: value\n - id: $extensible\n codelist: PKUDUG\n operator: codelist_extensible\n```\n\nIf `ct_package_type`, `version`, and `codelist_code` parameters are provided, it will instead attach a new column containing the term for each combination provided in the source dataset. If a column name is provided as:\n\n- `term_code`, it will find term information using the term codes in the specified column.\n- `term_value`, it will find term information using the term submission values in the specified column.\n- `term_pref_term`, it will find term information using the term preferred terms in the specified column.\n\nOnly one of `term_code`, `term_value` or `term_pref_term` can be provided. The term information returned will depend on the value of the `returntype` parameter, as described above. If `returntype` is not specified, specifying `term_code` will return the term submission value and specifying either `term_value` or `term_pref_term` will return the term code.\n\nFor example, given the current dataset:\n\n```\nid \tcodeSystemVersion $codelist_code code decode\n1 \t2024-09-27 C201264 C201356 After\n2 \t2024-09-27 C201265 C201352 End to End\n3 \t2023-03-29 C127262 C51282 CLINIC\n```\n\nand the following operations:\n\n```yaml\n- id: $found_term_value\n operator: codelist_terms\n ct_package_type: DDF\n version: codeSystemVersion\n codelist_code: $codelist_code\n term_code: code\n- id: $found_term_pref_term\n operator: codelist_terms\n ct_package_type: DDF\n version: codeSystemVersion\n codelist_code: $codelist_code\n term_code: code\n returntype: pref_term\n```\n\nThis will result in the following dataset:\n\n```\n id codeSystemVersion $codelist_code code decode $found_term_value $found_term_pref_term\n 1 2024-09-27 C201264 C201356 After After After Timing Type\n 2 2024-09-27 C201265 C201352 End to End End to End End to End\n 3 2023-03-31 C127262 C51282 CLINIC CLINIC Clinic\n```\n\nConversely, if given the same dataset, and the following operations:\n\n```yaml\n- id: $found_term_code1\n operator: codelist_terms\n ct_package_type: DDF\n version: codeSystemVersion\n codelist_code: $codelist_code\n term_value: decode\n- id: $found_term_code2\n operator: codelist_terms\n ct_package_type: DDF\n version: codeSystemVersion\n codelist_code: $codelist_code\n term_pref_term: decode\n```\n\nThis will result in the following dataset:\n\n```\n id codeSystemVersion $codelist_code code decode $found_term_code1 $found_term_code2\n 1 2024-09-27 C201264 C201356 After C201356\n 2 2024-09-27 C201265 C201352 End to End C201352 C201352\n 3 2023-03-31 C127262 C51282 CLINIC C51282 C51282\n```\n\nNote that `$found_term_code2` is:\n\n- `null` for the first record because \"After\" does not match any NCI preferred term in the C201264 codelist.\n- populated for the third record because matching is case-insensitive (i.e., \"CLINIC\" matches \"Clinic\").\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -36,11 +30,7 @@ "markdownDescription": "\nReturns a list of valid extensible codelist term's submission values. Used for evaluating whether submission values are valid based on controlled terminology. Expects the parameter codelists which is a list of the codelist submission value(s) to retrieve. If the codelist argument is [\"All\"] will return all extensible terms for the CT in a list.\n\n```yaml\n{\n \"id\": \"$ext_value\",\n \"codelist\": [\"ALL\"],\n \"operator\": \"define_extensible_codelists\",\n}\n```\n" } }, - "required": [ - "id", - "operator", - "codelists" - ], + "required": ["id", "operator", "codelists"], "type": "object" }, { @@ -50,11 +40,7 @@ "markdownDescription": "\nIf a target variable name is specified, returns the specified metadata in the define for the specified target variable.\n\nInput\n\n```yaml\n- operator: define_variable_metadata\n attribute_name: define_variable_label\n name: LBTESTCD\n id: $LBTESTCD_VARIABLE_LABEL\n```\n\nOutput\n\n```\nLaboratory Test Code\n```\n\nIf no target variable name specified, returns a dictionary containing the specified metadata in the define for all variables.\n\nInput\n\n```yaml\n- operator: define_variable_metadata\n attribute_name: define_variable_label\n id: $VARIABLE_LABEL\n```\n\nOutput\n\n```\n{\n \"STUDYID\": \"Study Identifier\",\n \"USUBJID\": \"Unique Subject Identifier\",\n \"LBTESTCD\": \"Laboratory Test Code\",\n \"...\": \"...\"\n}\n```\n" } }, - "required": [ - "id", - "operator", - "attribute_name" - ], + "required": ["id", "operator", "attribute_name"], "type": "object" }, { @@ -64,11 +50,7 @@ "markdownDescription": "\nGet a distinct list of values for the given name.\n\nIf a group list is specified, the distinct value list will be grouped by the variables within group.\nIf a filter object is provided, only values for records that match the filter criteria are included in the distinct values.\nIf `value_is_reference` is set to true, the target column contains the names of other columns, and the operation will check the referenced columns to ensure they exist in the associated dataset before adding them to the distinct list.\n\nIf group is provided, group_aliases may also be provided to assign new grouping variable names so that results grouped by the values in one set of grouping variables can be merged onto a dataset according to the same grouping value(s) stored in different set of grouping variables. When both group and group_aliases are provided, columns are renamed according to corresponding list position (i.e., the 1st column in group is renamed to the 1st column in group_aliases, etc.). If there are more columns listed in group than in group_aliases, only the group columns with corresponding group_aliases columns will be renamed. If there are more columns listed in group_aliases than in group, the extra column names in group_aliases will be ignored. See record_count for an example of the use of group_aliases.\n\n```yaml\nCheck:\n all:\n - name: SSSTRESC\n operator: equal_to\n value: DEAD\n value_is_literal: true\n - name: $ds_dsdecod\n operator: does_not_contain\n value: DEATH\n value_is_literal: true\nOperations:\n - operator: distinct\n domain: DS\n name: DSDECOD\n id: $ds_dsdecod\n group:\n - USUBJID\n filter:\n CAT: \"CATEGORY 1\"\n SCAT: \"SUBCATEGORY A\"\n```\n\n> below, `IDVAR` contains column names, the operation retrieves the value from each column for that row, checks the dataset associated with that column using the CO RDOMAIN. Columns that exist are added to the returns the distinct set.\n\n```yaml\nOperations:\n - domain: CO\n id: $rdomain_variables\n name: IDVAR\n operator: distinct\n value_is_reference: true\n```\n" } }, - "required": [ - "id", - "operator", - "name" - ], + "required": ["id", "operator", "name"], "type": "object" }, { @@ -78,10 +60,7 @@ "markdownDescription": "\nChecks whether the domain is in the set of domains within the provided standard.\n\nInput\n\nTarget Domain: XY\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\nOperations:\n - operator: domain_is_custom\n id: $domain_is_custom\n```\n\nOutput\n\n```\ntrue\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -91,10 +70,7 @@ "markdownDescription": "\nChecks whether the related domain (for example, the parent domain of a SUPP or RELREC dataset) is not present in the set of standard domains for the provided standard and version. This is useful for determining whether relationships point to non-standard or custom domains.\n\nInput\n\nTarget Domain: SUPPEX\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\nOperations:\n - operator: related_domain_is_custom\n id: $related_domain_is_custom\n```\n\nOutput\n\n```\ntrue\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -104,10 +80,7 @@ "markdownDescription": "\nReturns the label for the domain the operation is executing on within the provided standard.\n\nInput.\n\nTarget Domain: LB\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\nOperations:\n - operator: domain_label\n id: $domain_label\n```\n\nOutput\n\n```\nLaboratory Test Results\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -117,11 +90,7 @@ "markdownDescription": "\nCalculates the number of days between the DTC and RFSTDTC. The Study Day value is incremented by 1 for each date following RFSTDTC. Dates prior to RFSTDTC are decreased by 1, with the date preceding RFSTDTC designated as Study Day -1 (there is no Study Day 0). All Study Day values are integers. Thus, to calculate Study Day:\n\n- --DY = (date portion of --DTC) - (date portion of RFSTDTC) + 1 if --DTC is on or after RFSTDTC\n- --DY = (date portion of --DTC) - (date portion of RFSTDTC) if --DTC precedes RFSTDTC\n\nThis algorithm should be used across all domains.\n\n```yaml\nCheck:\n all:\n - name: --DY\n operator: non_empty\n - name: --DTC\n operator: is_complete_date\n - name: RFSTDTC\n operator: is_complete_date\n - name: --DY\n operator: not_equal_to\n value: $dy\nOperations:\n - name: --DTC\n operator: dy\n id: $dy\nMatch Datasets:\n - Name: DM\n Keys:\n - USUBJID\n```\n" } }, - "required": [ - "id", - "operator", - "name" - ], + "required": ["id", "operator", "name"], "type": "object" }, { @@ -131,11 +100,7 @@ "markdownDescription": "\nReturns the requested dataset level metadata value for the current dataset. Possible name values are:\n\n- dataset_size\n- dataset_location\n- dataset_name\n- dataset_label\n- domain\n- is_ap\n- ap_suffix\n\nExample\n\nInput:\n\nTarget domain: LB\n\n```yaml\n- name: dataset_label\n operator: extract_metadata\n id: $dataset_label\n```\n\nOutput:\n\n```\nLaboratory Test Results\n```\n\nExample: ap_suffix\n\nExtracts the domain suffix (characters 3-4) from AP-related domains. For example, \"FA\" from \"APFA\" DOMAIN value.\n\nInput:\n\nTarget domain: APFA\n\n```yaml\n- name: ap_suffix\n operator: extract_metadata\n id: $ap_suffix\n```\n\nOutput:\n\n```\nFA\n```\n" } }, - "required": [ - "id", - "operator", - "name" - ], + "required": ["id", "operator", "name"], "type": "object" }, { @@ -145,10 +110,7 @@ "markdownDescription": "\nReturns the expected (\"Core\" = Exp ) variables for the domain in the current standard Variable Metadata for custom domains will pull from the model while non-custom domains will be from the IG and Model.\n\nInput:\n\nTarget Domain: LB\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\n- operator: expected_variables\n id: $expected_variables\n```\n\nOutput:\n\n```\n[\"LBCAT\", \"LBORRES\", \"LBORRESU\", \"...\"]\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -158,13 +120,7 @@ "markdownDescription": "\nFetches controlled terminology attribute values from CT packages based on row-specific CT package and version references. The operation constructs CT package names based on the standard being validated and the values in the `name` and `version` columns (e.g., SDTMIG \u2192 \"sdtmct-{version}\"). When the `name` column contains \"CDISC\" or \"CDISC CT\", it uses the validation run's standard to determine the package prefix and the version found in the cell of the specified column. The operation extracts all codes matching the specified ct_attribute from the package.\n\n**Required Parameters:**\n\n- `ct_attribute`: Attribute to extract - `\"Term CCODE\"`, `\"Codelist CCODE\"`, `\"Term Value\"`, `\"Codelist Value\"`, or `\"Term Preferred Term\"`\n- `name`: Column containing CT reference (e.g., \"TSVCDREF\") - identifies which terminology system is referenced\n- `version`: Column containing CT version (e.g., \"TSVCDVER\")\n\n```yaml\n- id: $VALID_TERM_CODES\n name: TSVCDREF\n operator: get_codelist_attributes\n ct_attribute: Term CCODE\n version: TSVCDVER\n```\n\n**Note:** due to editor not containing the cache, if using this operator in rule editor, you must put the ctpackage versions contained within your data in the library tab for it work properly in editor.\n" } }, - "required": [ - "id", - "operator", - "name", - "ct_attribute", - "version" - ], + "required": ["id", "operator", "name", "ct_attribute", "version"], "type": "object" }, { @@ -174,10 +130,7 @@ "markdownDescription": "\nReturns list of dataset columns in order\n\n```yaml\nCheck:\n all:\n - name: $column_order_from_dataset\n operator: is_not_ordered_by\n value: $column_order_from_library\nOperations:\n - id: $column_order_from_library\n operator: get_column_order_from_library\n - id: $column_order_from_dataset\n operator: get_column_order_from_dataset\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -187,10 +140,7 @@ "markdownDescription": "\nFetches column order for a given domain from the CDISC library. The lists with column names are sorted in accordance to \"ordinal\" key of library metadata.\nOptionally Filters variables based on specified metadata criteria.\n\nRule Type: Variable Metadata Check\n\n```yaml\nCheck:\n all:\n - name: variable_name\n operator: is_not_contained_by\n value: $ig_variables\nOperations:\n - id: $ig_variables\n operator: get_column_order_from_library\n key_name: \"role\" # role, core, etc\n key_value: \"Exp\" # Timing, Req, Exp, Perm, etc\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -200,10 +150,7 @@ "markdownDescription": "\nReturns the list of domain for a given class from the CDISC Library Implementation Guide. This operation retrieves all domains that belong to a specified class (e.g., \"TRIAL DESIGN\", \"FINDINGS\", \"EVENTS\") based on the current standard and version. The operation uses the standard and version from the validation context as well as the optional `domain_class` parameter which is the name of the class to filter by (e.g., \"TRIAL DESIGN\", \"FINDINGS\", \"EVENTS\", \"INTERVENTIONS). NOTE: Class names are case-sensitive and should match the Library metadata format. If no `domain_class` parameter is provided, the operation returns all domains across all classes in the Implementation Guide:\n\n```yaml\n- operator: get_library_class_domains\n id: $trial_design_domains\n domain_class: \"TRIAL DESIGN\"\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -213,10 +160,7 @@ "markdownDescription": "\nFetches column order for a given model class from the CDISC library. The lists with column names are sorted in accordance to \"ordinal\" key of library metadata.\n\nRule Type: Variable Metadata Check\n\n```yaml\nCheck:\n all:\n - name: variable_name\n operator: is_not_contained_by\n value: $model_variables\nOperations:\n - id: $model_variables\n operator: get_model_column_order\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -226,12 +170,7 @@ "markdownDescription": "\nFetches variable level library model properties filtered by the provided key_name and key_value\n\nExample\n\nInput\n\nTarget Domain: LB\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\n- operator: get_model_filtered_variables\n id: $model_filtered_variables\n key_name: \"role\"\n key_value: \"Timing\"\n```\n\nOutput\n\n```\n[\"VISITNUM\", \"VISIT\", \"VISITDY\", \"TAETORD\", \"...\"]\n```\n" } }, - "required": [ - "id", - "operator", - "key_name", - "key_value" - ], + "required": ["id", "operator", "key_name", "key_value"], "type": "object" }, { @@ -241,10 +180,7 @@ "markdownDescription": "\nFetches column order for a given SUPP's parent model class from the CDISC library. The lists with column names are sorted in accordance to \"ordinal\" key of library metadata.\n\n```yaml\nCheck:\n all:\n - operator: is_not_contained_by\n value: $parent_model_variables\nOperations:\n - id: $parent_model_variables\n operator: get_parent_model_column_order\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -254,12 +190,7 @@ "markdownDescription": "\nFilters variables from the dataset based on specified metadata criteria. Returns a list of variable names that exist in the dataset and match the filter criteria.\n\n```yaml\n- operator: get_dataset_filtered_variables\n id: $timing_variables\n key_name: \"role\"\n key_value: \"Timing\"\n```\n" } }, - "required": [ - "id", - "operator", - "key_name", - "key_value" - ], + "required": ["id", "operator", "key_name", "key_value"], "type": "object" }, { @@ -269,11 +200,7 @@ "markdownDescription": "\nGenerates a dataframe where each record in the dataframe is the library ig variable metadata corresponding with the variable label found in the column provided in name. The metadata column names are prefixed with the string provided in `id`.\n\nInput\n\nTarget Dataset: SUPPLB\n\nProduct: sdtmig\n\nVersion: 3-4\n\nDataset:\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QLABEL\": [\"Toxicity\", \"Viscosity\", \"Analysis Method\"]\n}\n```\n\nRule:\n\n```yaml\n- operator: label_referenced_variable_metadata\n id: $qlabel_referenced_variable_metadata\n name: \"QLABEL\"\n```\n\nOutput\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QLABEL\": [\"Toxicity\", \"Viscosity\", \"Analysis Method\"],\n \"$qlabel_referenced_variable_metadata_name\": [\"LBTOX\", null, \"LBANMETH\"],\n \"$qlabel_referenced_variable_metadata_role\": [\n \"Variable Qualifier\",\n null,\n \"Record Qualifier\"\n ],\n \"$qlabel_referenced_variable_metadata_ordinal\": [44, null, 38],\n \"$qlabel_referenced_variable_metadata_core\": [\"Req\", \"Req\", \"Req\"],\n \"$qlabel_referenced_variable_metadata_label\": [\"Toxicity\", null, \"Analysis Method\"]\n}\n```\n" } }, - "required": [ - "id", - "operator", - "name" - ], + "required": ["id", "operator", "name"], "type": "object" }, { @@ -283,11 +210,7 @@ "markdownDescription": "\nAllows the creation of a lookup table to take the values from multiple input columns and map them to values in an output column. The map parameter contains a list of objects. Each dictionary contains column names as properties that match the column names in the source dataset and an output property that will be returned as a result.\n\nIf map has a single object and output is the only property specified on that object, this will function as a direct assignment.\n\nFor example, given the following current dataset:\n\n```\nid \tparent_entity \tparent_rel\n1 \tTiming \trelativeToFrom\n2 \tSomething \trelativeToFrom\n3 \tTiming \ttype\n```\n\nand the following operation:\n\n```yaml\nOperations:\n - id: $codelist_code\n operator: map\n map:\n - parent_entity: Timing\n parent_rel: type\n output: C201264\n - parent_entity: Timing\n parent_rel: relativeToFrom\n output: C201265\n```\n\nThis will result in the following dataset:\n\n```\nid \tparent_entity \tparent_rel \t$codelist_code\n1 \tTiming \trelativeToFrom \tC201265\n2 \tSomething \trelativeToFrom \tNone\n3 \tTiming \ttype \tC201264\n```\n\nThe following operation:\n\n```yaml\nOperations:\n - id: $codelist_code\n operator: map\n map:\n - output: C201264\n```\n\nWill result in the following dataset:\n\n```\nid \tparent_entity \tparent_rel \t$codelist_code\n1 \tTiming \trelativeToFrom \tC201264\n2 \tSomething \trelativeToFrom \tC201264\n3 \tTiming \ttype \tC201264\n```\n" } }, - "required": [ - "id", - "operator", - "map" - ], + "required": ["id", "operator", "map"], "type": "object" }, { @@ -297,11 +220,7 @@ "markdownDescription": "\nIf no group is provided, returns the max value in name. If group is provided, returns the max value in name, within each unique set of the grouping variables.\n\n```yaml\nCheck:\n all:\n - name: \"$max_age\"\n operator: \"greater_than\"\n value: \"MAXAGE\"\nOperations:\n - operator: \"max\"\n domain: \"DM\"\n name: \"AGE\"\n id: \"$max_age\"\n```\n" } }, - "required": [ - "id", - "operator", - "name" - ], + "required": ["id", "operator", "name"], "type": "object" }, { @@ -311,11 +230,7 @@ "markdownDescription": "\nIf no group is provided, returns the max date value in name. If group is provided, returns the max date value in name, within each unique set of the grouping variables.\n\n```yaml\nCheck:\n all:\n - name: USUBJID\n operator: is_contained_by\n value: $ex_usubjid\n - name: RFXENDTC\n operator: not_equal_to\n value: $max_ex_exstdtc\n - name: RFXENDTC\n operator: not_equal_to\n value: $max_ex_exendtc\nOperations:\n - operator: distinct\n domain: EX\n name: USUBJID\n id: $ex_usubjid\n - operator: max_date\n domain: EX\n name: EXSTDTC\n id: $max_ex_exstdtc\n group:\n - USUBJID\n - operator: max_date\n domain: EX\n name: EXENDTC\n id: $max_ex_exendtc\n group:\n - USUBJID\n```\n" } }, - "required": [ - "id", - "operator", - "name" - ], + "required": ["id", "operator", "name"], "type": "object" }, { @@ -325,11 +240,7 @@ "markdownDescription": "\nExample: AAGE > mean(DM.AGE), where AAGE is a fictitious NSV\n\n```yaml\nCheck:\n all:\n - name: \"AAGE\"\n operator: \"greater_than\"\n value: \"$average_age\"\nOperations:\n - operator: \"mean\"\n domain: \"DM\"\n name: \"AGE\"\n id: \"$average_age\"\n```\n" } }, - "required": [ - "id", - "operator", - "name" - ], + "required": ["id", "operator", "name"], "type": "object" }, { @@ -339,11 +250,7 @@ "markdownDescription": "\nIf no group is provided, returns the min value in name. If group is provided, returns the min value in name, within each unique set of the grouping variables.\n\n```yaml\nCheck:\n all:\n - name: \"$min_age\"\n operator: \"less_than\"\n value: \"MINAGE\"\nOperations:\n - operator: \"min\"\n domain: \"DM\"\n name: \"AGE\"\n id: \"$min_age\"\n```\n" } }, - "required": [ - "id", - "operator", - "name" - ], + "required": ["id", "operator", "name"], "type": "object" }, { @@ -353,11 +260,7 @@ "markdownDescription": "\nIf no group is provided, returns the min date value in name. If group is provided, returns the min date value in name, within each unique set of the grouping variables.\n\nExample: RFSTDTC is greater than min AE.AESTDTC for the current USUBJID\n\n```yaml\nCheck:\n all:\n - name: \"RFSTDTC\"\n operator: \"date_greater_than\"\n value: \"$ae_aestdtc\"\nOperations:\n - operator: \"min_date\"\n domain: \"AE\"\n name: \"AESTDTC\"\n id: \"$ae_aestdtc\"\n group:\n - USUBJID\n```\n" } }, - "required": [ - "id", - "operator", - "name" - ], + "required": ["id", "operator", "name"], "type": "object" }, { @@ -367,12 +270,7 @@ "markdownDescription": "\nComputes set difference: elements in `name` that are not in `subtract`. Uses [set difference]() semantics (A \u2216 B). Preserves order from the first list. Both `name` and `subtract` must reference other operation results (e.g., `$expected_variables`, `$dataset_variables`). When `subtract` is empty or missing, returns all elements from `name`. Can be computed and added to output variables to display missing elements in error results.\n\n```yaml\nOperations:\n - id: $expected_variables\n operator: expected_variables\n - id: $dataset_variables\n operator: get_column_order_from_dataset\n - id: $expected_minus_dataset\n name: $expected_variables\n operator: minus\n subtract: $dataset_variables\n```\n" } }, - "required": [ - "id", - "operator", - "name", - "subtract" - ], + "required": ["id", "operator", "name", "subtract"], "type": "object" }, { @@ -382,11 +280,7 @@ "markdownDescription": "\nGenerates a dataframe where each record in the dataframe is the library ig variable metadata corresponding with the variable name found in the column provided in name. The metadata column names are prefixed with the string provided in `id`.\n\nInput\n\nTarget Dataset: SUPPLB\n\nProduct: sdtmig\n\nVersion: 3-4\n\nDataset:\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QNAM\": [\"Toxicity\", \"LBVISCOS\", \"Analysis Method\"]\n}\n```\n\nRule:\n\n```yaml\n- operator: name_referenced_variable_metadata\n id: $qnam_referenced_variable_metadata\n name: \"QNAM\"\n```\n\nOutput\n\n```\n{\n \"STUDYID\": [\"STUDY1\", \"STUDY1\", \"STUDY1\"],\n \"USUBJID\": [\"SUBJ1\", \"SUBJ1\", \"SUBJ1\"],\n \"QNAM\": [\"LBTOX\", \"LBVISCOS\", \"LBANMETH\"],\n \"$qnam_referenced_variable_metadata_name\": [\"LBTOX\", null, \"LBANMETH\"],\n \"$qnam_referenced_variable_metadata_role\": [\n \"Variable Qualifier\",\n null,\n \"Record Qualifier\"\n ],\n \"$qnam_referenced_variable_metadata_ordinal\": [44, null, 38],\n \"$qnam_referenced_variable_metadata_core\": [\"Req\", \"Req\", \"Req\"],\n \"$qnam_referenced_variable_metadata_label\": [\"Toxicity\", null, \"Analysis Method\"]\n}\n```\n" } }, - "required": [ - "id", - "operator", - "name" - ], + "required": ["id", "operator", "name"], "type": "object" }, { @@ -396,10 +290,7 @@ "markdownDescription": "\nReturns the permissible variables (\"Core\" = Perm ) for a given domain and standard Variable Metadata for custom domains will pull from the model while non-custom domains will be from the IG and Model.\n\nInput:\n\nTarget Domain: LB\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\n- operator: permissible_variables\n id: $permissible_variables\n```\n\nOutput:\n\n```\n[\"LBGRPID\", \"LBREFID\", \"LBSPID\", \"...\"]\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -409,10 +300,7 @@ "markdownDescription": "\nIf no filter or group is provided, returns the number of records in the dataset. If filter is provided, returns the number of records in the dataset that contain the value(s) in the corresponding column(s) provided in the filter. Filter can have a wildcard `&` that when added to the end of the filter value will look for all instances of that prefix (see 4th example below). If group is provided, returns the number of rows matching each unique set of the grouping variables. These can be static column name(s) or can be derived from other operations like get_dataset_filtered_variables.\n\nIf both filter and group are provided, returns the number of records in the dataset that contain the value(s) in the corresponding column(s) provided in the filter that also match each unique set of the grouping variables.\n\n**Wildcard Filtering:** Filter values ending with % will match any records where the column value starts with the specified prefix. For example, RACE% will match RACE1, RACE2, RACE3, etc. This is useful for matching related variables with numeric or alphabetic suffixes.\n\n**Regex Transformation:** If regex is provided along with group, the regex pattern will be applied to transform grouping column values before grouping. The regex is only applied to columns where the pattern matches the data type. For example, using regex `^\\d{4}-\\d{2}-\\d{2}` on a column containing `2022-01-14T08:00` will extract `2022-01-14` for grouping purposes.\n\nIf group is provided, group_aliases may also be provided to assign new grouping variable names so that results grouped by the values in one set of grouping variables can be merged onto a dataset according to the same grouping value(s) stored in different set of grouping variables. When both group and group_aliases are provided, columns are renamed according to corresponding list position (i.e., the 1st column in group is renamed to the 1st column in group_aliases, etc.). If there are more columns listed in group than in group_aliases, only the group columns with corresponding group_aliases columns will be renamed. If there are more columns listed in group_aliases than in group, the extra column names in group_aliases will be ignored.\n\nExample: return the number of records in a dataset.\n\n```yaml\n- operator: record_count\n id: $records_in_dataset\n```\n\nExample: return the number of records where STUDYID = \"CDISC01\" and FLAGVAR = \"Y\".\n\n```yaml\n- operator: record_count\n id: $flagged_cdisc01_records_in_dataset\n filter:\n STUDYID: \"CDISC01\"\n FLAGVAR: \"Y\"\n```\n\nExample: return the number of records grouped by USUBJID and timing variables, extracting only the date portion from datetime values.\n\n```yaml\n- operator: record_count\n id: $records_per_usubjid_date\n group:\n - USUBJID\n - --TESTCD\n - $TIMING_VARIABLES\n regex: \"^\\d{4}-\\d{2}-\\d{2}\"\n```\n\nExample: return the number of records where QNAM starts with \"RACE\" (matches RACE1, RACE2, RACE3, etc.) per USUBJID.\n\n```yaml\n- operator: record_count\n id: $race_records_in_dataset\n filter:\n QNAM: \"RACE&\"\n group:\n - \"USUBJID\"\n```\n\nExample: return the number of records grouped by USUBJID.\n\n```yaml\n- operator: record_count\n id: $records_per_usubjid\n group:\n - USUBJID\n```\n\nExample: return the number of records grouped by USUBJID where FLAGVAR = \"Y\".\n\n```yaml\n- operator: record_count\n id: $flagged_records_per_usubjid\n group:\n - USUBJID\n filter:\n FLAGVAR: \"Y\"\n```\n\nExample: return the number of records grouped by USUBJID and IDVARVAL where QNAM = \"TEST1\" and IDVAR = \"GROUPID\", renaming the IDVARVAL column to GROUPID for subsequent merging.\n\n```yaml\n- operator: record_count\n id: $test1_records_per_usubjid_groupid\n group:\n - USUBJID\n - IDVARVAL\n filter:\n QNAM: \"TEST1\"\n IDVAR: \"GROUPID\"\n group_aliases:\n - USUBJID\n - GROUPID\n```\n\nExample: Group the StudyIdentifier dataset by parent_id and merge the result back to the context dataset StudyVersion using StudyVersion.id == StudyIdentifier.parent_id\n\n```yaml\nScope:\n Entities:\n Include:\n - StudyVersion\nOperations:\n - domain: StudyIdentifier\n filter:\n parent_entity: \"StudyVersion\"\n parent_rel: \"studyIdentifiers\"\n rel_type: \"definition\"\n studyIdentifierScope.organizationType.code: \"C70793\"\n studyIdentifierScope.organizationType.codeSystem: \"http://www.cdisc.org\"\n group:\n - parent_id\n group_aliases:\n - id\n id: $num_sponsor_ids\n operator: record_count\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -422,10 +310,7 @@ "markdownDescription": "\nReturns the required variables ( \"Core\" = Req ) for a given domain and standard Variable Metadata for custom domains will pull from the model while non-custom domains will be from the IG and Model.\n\nInput:\n\nTarget Domain: LB\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\n- operator: required_variables\n id: $required_variables\n```\n\nOutput:\n\n```\n[\"STUDYID\", \"DOMAIN\", \"USUBJID\", \"LBSEQ\", \"LBTESTCD\", \"LBTEST\"]\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -435,12 +320,7 @@ "markdownDescription": "\nSplits a dataset column by a given delimiter\n\n```yaml\nOperations:\n - name: PPSPEC\n delimiter: ;\n id: $ppspec_value\n operator: split_by\n```\n" } }, - "required": [ - "id", - "operator", - "delimiter", - "name" - ], + "required": ["id", "operator", "delimiter", "name"], "type": "object" }, { @@ -450,10 +330,7 @@ "markdownDescription": "\nReturns a list of the domains in the study\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -463,10 +340,7 @@ "markdownDescription": "\nReturns a list of the submitted dataset filenames in all uppercase\n\nex. if TS.xpt, AE.xpt, EC.xpt, and SUPPEC.xpt are submitted -> [TS, AE, EC, SUPPEC] will be returned\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -476,10 +350,7 @@ "markdownDescription": "\nReturns a list of valid SDTM domain names from the standard metadata. This can be used to compare extracted suffixes from DOMAIN values or dataset names.\n\nInput\n\nProduct: sdtmig\n\nVersion: 3-4\n\n```yaml\nOperations:\n - operator: standard_domains\n id: $valid_domain_names\n```\n\nOutput\n\n```\n[\"AE\", \"CM\", \"DM\", \"FA\", \"LB\", \"QS\", ...]\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -489,10 +360,7 @@ "markdownDescription": "\nReturns the valid terminology package dates for a given standard.\n\nGiven a list of terminology packages:\n\n```\n[\n \"sdtmct-2023-10-26\",\n \"sdtmct-2023-12-13\",\n \"adamct-2023-12-13\",\n \"cdashct-2023-05-19\"\n]\n```\n\nand standard: sdtmig\n\nthe operation will return:\n\n```\n[\"2023-10-26\", \"2023-12-13\"]\n```\n\nBy default, the standard is as specified when running validation - as the validation runtime parameter and/or as specified in the rule header - and the list of terminology packages is obtained from the current cache. If required, the default standard may be overridden using the optional ct_package_types parameter. For example, given the same list of terminology packages, the following operation:\n\n```yaml\nOperations:\n - operator: valid_codelist_dates\n id: $valid_dates\n ct_package_types:\n - SDTM\n - CDASH\n```\n\nwill return:\n\n```\n[\"2023-05-19\", \"2023-10-26\", \"2023-12-13\"]\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -502,11 +370,7 @@ "markdownDescription": "\nReturns true if the version of an external dictionary provided in the define.xml file matches the version parsed from the dictionary files.\n\nInput:\n\n```yaml\nOperations:\n - operator: valid_define_external_dictionary_version\n id: $is_valid_loinc_version\n external_dictionary_type: loinc\n```\n\nOutput:\n\n```\n[true, true, true, true]\n```\n" } }, - "required": [ - "id", - "operator", - "external_dictionary_type" - ], + "required": ["id", "operator", "external_dictionary_type"], "type": "object" }, { @@ -564,10 +428,7 @@ "markdownDescription": "\nDetermines whether the values are valid in the following variables:\n\n- --SOCCD (System Organ Class Code)\n- --HLGTCD (High Level Group Term Code)\n- --HLTCD (High Level Term Code)\n- --PTCD (Preferred Term Code)\n- --LLTCD (Lowest Level Term Code)\n\nInput:\n\n```yaml\nOperations:\n - id: $is_valid_meddra_codes\n operator: valid_meddra_code_references\n```\n\nOutput:\n\n```\n[true, false, true, true]\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -577,10 +438,7 @@ "markdownDescription": "\nDetermines whether the values are valid in the following variable pairs:\n\n- --SOCCD, --SOC (System Organ Class Code and Term)\n- --HLGTCD, --HLGT (High Level Group Term Code and Term)\n- --HLTCD, --HLT (High Level Term Code and Term)\n- --PTCD, --DECOD (Preferred Term Code and Dictionary-Derived Term)\n- --LLTCD, --LLT (Lowest Level Term Code and Term)\n\nInput:\n\n```yaml\nOperations:\n - id: $is_valid_meddra_pairs\n operator: valid_meddra_code_term_pairs\n```\n\nOutput:\n\n```\n[true, true, false, true, true]\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -590,10 +448,7 @@ "markdownDescription": "\nDetermines whether the values are valid in the following variables:\n\n- --SOC (System Organ Class)\n- --HLGT (High Level Group Term)\n- --HLT (High Level Term)\n- --DECOD (Dictionary-Derived Term)\n- --LLT (Lowest Level Term)\n\nInput:\n\n```yaml\nOperations:\n - id: $is_valid_meddra_terms\n operator: valid_meddra_term_references\n```\n\nOutput:\n\n```\n[true, true, false, true, true]\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -603,11 +458,7 @@ "markdownDescription": "\nChecks if a reference to whodrug term in name points to the existing code in Atc Text (INA) file.\n\nInput:\n\n```yaml\nOperations:\n - id: $whodrug_refs_valid\n operator: valid_whodrug_references\n```\n\nOutput:\n\n```\n[true, false, true, true]\n```\n" } }, - "required": [ - "id", - "operator", - "name" - ], + "required": ["id", "operator", "name"], "type": "object" }, { @@ -617,10 +468,7 @@ "markdownDescription": "\nReturns a mapping of variable names to the number of times that variable appears in a domain within the study.\n\nInput\n\n```\n{\n \"AE\": [\"STUDYID\", \"DOMAIN\", \"USUBJID\", \"AETERM\", \"AEENDTC\"],\n \"LB\": [\"STUDYID\", \"DOMAIN\", \"USUBJID\", \"LBTESTCD\", \"LBENDTC\"]\n}\n```\n\nOutput\n\n```\n{\n \"STUDYID\": 2,\n \"DOMAIN\": 2,\n \"USUBJID\": 2,\n \"--TERM\": 1,\n \"--TESTCD\": 1,\n \"--ENDTC\": 2\n}\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -630,10 +478,7 @@ "markdownDescription": "\nOperation operates only on original submission datasets regardless of rule type. Flags an error if a column exists is in the submission dataset currently being evaluated.\n\nRule Type: Domain Presence Check\n\n```yaml\nCheck:\n all:\n - name: $MIDS_EXISTS\n operator: equal_to\n value: true\n - name: TM\n operator: not_exists\nOperations:\n - id: $MIDS_EXISTS\n name: MIDS\n operator: variable_exists\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -643,10 +488,7 @@ "markdownDescription": "\nReturns true if a variable is missing from the dataset or if all values within the variable are null or empty string. This operation first checks if the target variable exists in the dataset, and if it does exist, evaluates whether all its values are null or empty.\nThe operation supports two sources via the `source` parameter:\n\n- **`submission`** : checks against the raw submission dataset\n- **`evaluation`** (default): checks against the evaluation dataset built based on the rule type\n\n```yaml" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -656,10 +498,7 @@ "markdownDescription": "\nReturns the set of variable names from the library for the given standard. This operation extracts all variable names across all domains in the specified standard's library metadata.\n\nInput:\n\nValidation Standard: sdtmig\nValidation Version: 3-4\n\n```yaml\n- operator: variable_names\n id: $all_variables\n```\n\nOutput:\n\n```\n[\"STUDYID\", \"DOMAIN\", \"USUBJID\", \"SUBJID\", \"RFSTDTC\", \"RFENDTC\", \"SITEID\", \"AGE\", \"AGEU\", \"SEX\", \"RACE\", \"ETHNIC\", \"ARMCD\", \"ARM\", \"ACTARMCD\", \"ACTARM\", \"COUNTRY\", \"DMDTC\", \"DMDY\", \"AETERM\", \"AEDECOD\", \"AECAT\", \"AESCAT\", \"AEPRESP\", \"AEBODSYS\", \"AEBDSYCD\", \"AESOC\", \"AESOCCD\", \"AELLT\", \"AELLTCD\", \"AEHLT\", \"AEHLTCD\", \"AEHLGT\", \"AEHLGTCD\", \"AEPTCD\", \"AESTDTC\", \"AEENDTC\", \"AESTDY\", \"AEENDY\", \"AEDUR\", \"AESER\", \"AESEV\", \"AEACN\", \"AEREL\", \"AEOUT\", \"AESCAN\", \"AESCONG\", \"AESDISAB\", \"AESDTH\", \"AESHOSP\", \"AESLIFE\", \"AESOD\", \"AECONTRT\", \"AETOXGR\", \"LBTESTCD\", \"LBTEST\", \"LBCAT\", \"LBSCAT\", \"LBSPEC\", \"LBMETHOD\", \"LBORRES\", \"LBORRESU\", \"LBORNRLO\", \"LBORNRHI\", \"LBSTRESC\", \"LBSTRESN\", \"LBSTRESU\", \"LBSTNRLO\", \"LBSTNRHI\", \"LBNRIND\", \"LBNAM\", \"LBSPEC\", \"LBANTREG\", \"LBFAST\", \"LBDRVFL\", \"LBTOX\", \"LBTOXGR\", \"LBSTDTC\", \"LBENDTC\", \"LBSTDY\", \"LBENDY\", \"LBTPT\", \"LBTPTNUM\", \"LBELTM\", \"LBTPTREF\", \"LBRFTDTC\", \"...\"]\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -669,11 +508,7 @@ "markdownDescription": "\nGiven a variable name, returns a mapping of variable values to the number of times that value appears in the variable within all datasets in the study.\n" } }, - "required": [ - "id", - "operator", - "name" - ], + "required": ["id", "operator", "name"], "type": "object" }, { @@ -683,10 +518,7 @@ "markdownDescription": "\nDetermines whether the values are valid and in the correct hierarchical structure in the following variables:\n\n- --DECOD\n- --CLAS\n- --CLASCD\n\nInput:\n\n```yaml\nOperations:\n - id: $valid_whodrug_codes\n operator: whodrug_code_hierarchy\n```\n" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" }, { @@ -696,12 +528,7 @@ "markdownDescription": "\nValidates XHTML fragments in the target column against the specified namespace.\n\n```yaml\nOperations:\n - id: $xhtml_errors\n name: text\n operator: get_xhtml_errors\n namespace: http://www.cdisc.org/ns/usdm/xhtml/v1.0\n```\n\nNote that a local XSD file is required for validation. The file must be stored in the folder indicated by the value of the `LOCAL_XSD_FILE_DIR` default file path and the mapping between the namespace and the local XSD file's `sub-folder/name` must be included in the value of the `LOCAL_XSD_FILE_MAP` default file path.\n" } }, - "required": [ - "id", - "operator", - "name", - "namespace" - ], + "required": ["id", "operator", "name", "namespace"], "type": "object" } ], @@ -764,13 +591,7 @@ "type": "string" }, "dictionary_term_type": { - "enum": [ - "LLT", - "PT", - "HLT", - "HLGT", - "SOC" - ] + "enum": ["LLT", "PT", "HLT", "HLGT", "SOC"] }, "domain": { "anyOf": [ @@ -783,9 +604,7 @@ ] }, "external_dictionary_type": { - "enum": [ - "meddra" - ] + "enum": ["meddra"] }, "filter": { "type": "object" @@ -830,10 +649,7 @@ }, "level": { "type": "string", - "enum": [ - "codelist", - "term" - ] + "enum": ["codelist", "term"] }, "map": { "type": "array", @@ -844,9 +660,7 @@ "type": "string" } }, - "required": [ - "output" - ] + "required": ["output"] } }, "name": { @@ -863,11 +677,7 @@ }, "returntype": { "type": "string", - "enum": [ - "code", - "value", - "pref_term" - ] + "enum": ["code", "value", "pref_term"] }, "source": { "type": "string" @@ -891,9 +701,6 @@ "type": "string" } }, - "required": [ - "id", - "operator" - ], + "required": ["id", "operator"], "type": "object" } diff --git a/resources/schema/rule-merged/Operator.json b/resources/schema/rule-merged/Operator.json index a748942e0..8d3479ab3 100644 --- a/resources/schema/rule-merged/Operator.json +++ b/resources/schema/rule-merged/Operator.json @@ -9,9 +9,7 @@ "const": "additional_columns_empty" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -20,9 +18,7 @@ "const": "additional_columns_not_empty" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -32,10 +28,7 @@ "markdownDescription": "\nWill return True if the value in `value` is contained within the collection/iterable in the target column, or if there's an exact match for non-iterable data.\n\nThe operator checks if every value in a column is a list or set. If yes, it compares row-by-row. If any value is blank or a different type (like a string or number), it compares each value against the entire column instead.\n\nExample:\n\n```yaml\n- name: \"--TOXGR\" # Column containing lists like ['GRADE', 'SEVERITY', 'ONSET']\n operator: \"contains\"\n value: \"GRADE\" # True if 'GRADE' is an element in the list\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -45,9 +38,7 @@ "markdownDescription": "\nTrue if all values in `value` are contained within the variable `name`.\n\n> All of ('Screen Failure', 'Not Assigned', 'Not Treated', 'Unplanned Treatment') in ACTARM\n\n```yaml\n- name: \"ACTARM\"\n operator: \"contains_all\"\n value:\n - \"Screen Failure\"\n - \"Not Assigned\"\n - \"Not Treated\"\n - \"Unplanned Treatment\"\n```\n\nThe operator also supports lists:\n\n```yaml\n- name: \"$spec_codelist\"\n operator: \"contains_all\"\n value: \"$ppspec_value\"\n```\n\nWhere:\n\n| $spec_codelist | $ppspec_value |\n| :-------------------------- | :----------------: |\n| [\"CODE1\", \"CODE2\", \"CODE3\"] | [\"CODE1\", \"CODE2\"] |\n| [\"CODE1\", \"CODE2\", \"CODE3\"] | [\"CODE2\", \"CODE3\"] |\n| [\"CODE1\", \"CODE2\", \"CODE3\"] | [\"CODE1\"] |\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -57,10 +48,7 @@ "markdownDescription": "\nTrue if the value in `value` is contained within the collection/iterable in the target column, performing case-insensitive comparison.\n\nExample:\n\n```yaml\n- name: \"--TOXGR\" # Column containing lists like ['Grade', 'Severity', 'Onset']\n operator: \"contains_case_insensitive\"\n value: \"grade\" # True if 'Grade'/'GRADE'/'grade' exists in the list\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -70,10 +58,7 @@ "markdownDescription": "\nDate comparison. Compare `name` to `value`. Compares partial dates if `date_component` is specified.\n\nThe `date_component` parameter accepts: `\"year\"`, `\"month\"`, `\"day\"`, `\"hour\"`, `\"minute\"`, `\"second\"`, `\"microsecond\"`, or `\"auto\"`.\n\nWhen `date_component: \"auto\"` is used, the operator automatically detects the precision of both dates and compares at the common (less precise) level.\n\n```yaml\n- name: \"AESTDTC\"\n operator: \"date_equal_to\"\n value: \"RFSTDTC\"\n date_component: \"auto\"\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -83,10 +68,7 @@ "markdownDescription": "\nDate comparison. Compare `name` to `value`. Compares partial dates if `date_component` is specified. Supports `date_component: \"auto\"`.\n\n> Year part of BRTHDTC > 2021\n\n```yaml\n- name: \"BRTHDTC\"\n operator: \"date_greater_than\"\n date_component: \"year\"\n value: \"2021\"\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -96,10 +78,7 @@ "markdownDescription": "\nDate comparison. Compare `name` to `value`. Compares partial dates if `date_component` is specified. Supports `date_component: \"auto\"`.\n\n> Year part of BRTHDTC >= 2021\n\n```yaml\n- name: \"BRTHDTC\"\n operator: \"date_greater_than_or_equal_to\"\n date_component: \"year\"\n value: \"2021\"\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -109,10 +88,7 @@ "markdownDescription": "\nDate comparison. Compare `name` to `value`. Compares partial dates if `date_component` is specified. Supports `date_component: \"auto\"`.\n\n> AEENDTC < AESTDTC\n\n```yaml\n- name: \"AEENDTC\"\n operator: \"date_less_than\"\n value: \"AESTDTC\"\n```\n\n> SSDTC < all DS.DSSTDTC when SSSTRESC = \"DEAD\"\n\n```yaml\nCheck:\n all:\n - name: \"SSSTRESC\"\n operator: \"equal_to\"\n value: \"DEAD\"\n - name: \"SSDTC\"\n operator: \"date_less_than\"\n value: \"$max_ds_dsstdtc\"\nOperations:\n - operator: \"max_date\"\n domain: \"DS\"\n name: \"DSSTDTC\"\n id: \"$max_ds_dsstdtc\"\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -122,10 +98,7 @@ "markdownDescription": "\nDate comparison. Compare `name` to `value`. Compares partial dates if `date_component` is specified. Supports `date_component: \"auto\"`.\n\n> AEENDTC <= AESTDTC\n\n```yaml\n- name: \"AEENDTC\"\n operator: \"date_less_than_or_equal_to\"\n value: \"AESTDTC\"\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -135,10 +108,7 @@ "markdownDescription": "\nComplement of `date_equal_to`\n\nDate comparison. Compare `name` to `value`. Compares partial dates if `date_component` is specified. Supports `date_component: \"auto\"`.\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -148,10 +118,7 @@ "markdownDescription": "\nComplement of `contains`. Returns True when the value is NOT contained within the target collection.\n\n```yaml\n- name: \"--TOXGR\"\n operator: \"does_not_contain\"\n value: \"GRADE\" # True if 'GRADE' is NOT an element in the list\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -161,10 +128,7 @@ "markdownDescription": "\nComplement of `contains_case_insensitive`. Returns True when the value is NOT contained within the target collection (case-insensitive).\n\nExample:\n\n```yaml\n- name: \"--TOXGR\"\n operator: \"does_not_contain_case_insensitive\"\n value: \"grade\" # True if no case variation of 'grade' exists in the list\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -174,11 +138,7 @@ "markdownDescription": "\nComplement of `equals_string_part`\n" } }, - "required": [ - "operator", - "value", - "regex" - ], + "required": ["operator", "value", "regex"], "type": "object" }, { @@ -188,12 +148,7 @@ "markdownDescription": "\nComplement of `has_next_corresponding_record`\n" } }, - "required": [ - "operator", - "ordering", - "value", - "within" - ], + "required": ["operator", "ordering", "value", "within"], "type": "object" }, { @@ -203,9 +158,7 @@ "markdownDescription": "\nValue presence\n\n> --OCCUR = null\n\n```yaml\n- name: --OCCUR\n operator: empty\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -215,10 +168,7 @@ "markdownDescription": "\n> SEENDTC is not empty when it is not the last record, grouped by USUBJID, sorted by SESTDTC\n\n```yaml\n- name: SEENDTC\n operator: empty_within_except_last_row\n ordering: SESTDTC\n value: USUBJID\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -228,10 +178,7 @@ "markdownDescription": "\nSubstring matching\n\n> DOMAIN ending with 'FOOBAR'\n\n```yaml\n- name: \"DOMAIN\"\n operator: \"ends_with\"\n value: \"FOOBAR\"\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -250,10 +197,7 @@ "type": "boolean" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -272,10 +216,7 @@ "type": "boolean" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -285,11 +226,7 @@ "markdownDescription": "\nChecks that the values in the target column equal the result of parsing the value in the comparison column with a regex\n\n> RDOMAIN equals characters 5 and 6 of SUPP dataset name\n\n```yaml\n- name: RDOMAIN\n operator: equals_string_part\n value: dataset_name\n regex: \".{4}(..).*\"\n```\n" } }, - "required": [ - "operator", - "value", - "regex" - ], + "required": ["operator", "value", "regex"], "type": "object" }, { @@ -299,9 +236,7 @@ "markdownDescription": "\nTrue if the column exists in the current dataframe. (Works for datasets and variables)\n\n> --OCCUR is present in dataset\n\n```yaml\n- name: \"--OCCUR\"\n operator: \"exists\"\n```\n\n> Domain SJ exists\n\n```yaml\nRule Type: Domain Presence Check\nCheck:\n all:\n - name: \"SJ\"\n operator: \"exists\"\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -311,10 +246,7 @@ "markdownDescription": "\nValue comparison\n\n> TSVAL > 0\n\n```yaml\n- name: TSVAL\n operator: greater_than\n value: 0\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -324,10 +256,7 @@ "markdownDescription": "\nValue comparison\n\n> TSVAL >= 0\n\n```yaml\n- name: TSVAL\n operator: greater_than_or_equal_to\n value: 1\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -337,9 +266,7 @@ "markdownDescription": "\nComplement of `has_same_values`\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -349,9 +276,7 @@ "markdownDescription": "\nLength comparison\n\n> Check whether variable values has equal length of another variable.\n\n```yaml\n- name: SEENDTC\n operator: has_equal_length\n value: SESTDTC\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -361,12 +286,7 @@ "markdownDescription": "\nEnsures that a value of a variable `name` in one record is equal to the value of another variable `value` in the next corresponding record. The rows are grouped by `within` and ordered by `ordering`.\n\n> SEENDTC is equal to the SESTDTC of the next record within a USUBJID. Ordered by SESEQ\n\n```yaml\n- name: SEENDTC\n operator: has_next_corresponding_record\n value: SESTDTC\n within: USUBJID\n ordering: SESEQ\n```\n" } }, - "required": [ - "operator", - "ordering", - "value", - "within" - ], + "required": ["operator", "ordering", "value", "within"], "type": "object" }, { @@ -376,9 +296,7 @@ "markdownDescription": "\nComplement of `has_equal_length`\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -388,9 +306,7 @@ "markdownDescription": "\nTrue if all values in `name` are the same\n\n> Condition: MHCAT ^= null\n> Rule: MHCAT ^= the same value for all records\n\n```yaml\nCheck:\n all:\n - name: MHCAT\n operator: non_empty\n - name: MHCAT\n operator: has_same_values\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -400,10 +316,7 @@ "markdownDescription": "\nDuration ISO-8601 check, returns True if a duration is not in ISO-8601 format. The negative parameter must be specified to indicate if negative durations are either allowed (True) or disallowed (False)\n\n> DURVAR is invalid (negative durations disallowed)\n\n```yaml\n- name: \"DURVAR\"\n operator: \"invalid_duration\"\n negative: False\n```\n" } }, - "required": [ - "operator", - "negative" - ], + "required": ["operator", "negative"], "type": "object" }, { @@ -413,9 +326,7 @@ "markdownDescription": "\nThe operator performs date validation against complete and partial dates with uncertainty in the following order:\n\n1. Attempts to parse using [dateutil.parser.isoparse()](https://dateutil.readthedocs.io/en/stable/parser.html)\n2. If parsing fails and the string contains uncertainty indicators (`/`, `--`, `-:`), validates against an extended ISO 8601 dates regex pattern\n3. If parsing succeeds, dates are still validated against the regex pattern.\n\n```yaml\n- name: \"BRTHDTC\"\n operator: \"invalid_date\"\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -425,9 +336,7 @@ "markdownDescription": "\nDate check\n\n> DM.RFSTDTC = complete date\n\n```yaml\n- name: \"RFSTDTC\"\n operator: \"is_complete_date\"\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -437,10 +346,7 @@ "markdownDescription": "\nValue in `name` compared against a list in `value`. The list can have literal values or be a reference to a `$variable`.\n\nThis operator behaves similarly to `contains`. The key distinction: `contains` checks if comparator \u2208 target, while `is_contained_by` checks if target \u2208 comparator.\n\n> ACTARM in ('Screen Failure', 'Not Assigned', 'Not Treated', 'Unplanned Treatment')\n\n```yaml\n- name: \"ACTARM\"\n operator: \"is_contained_by\"\n value:\n - \"Screen Failure\"\n - \"Not Assigned\"\n - \"Not Treated\"\n - \"Unplanned Treatment\"\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -450,10 +356,7 @@ "markdownDescription": "\nValue in `name` case insensitive compared against a list in `value`. The list can have literal values or be a reference to a `$variable`.\n\n> ACTARM in ('Screen Failure', 'Not Assigned', 'Not Treated', 'Unplanned Treatment')\n\n```yaml\n- name: \"ACTARM\"\n operator: \"is_contained_by_case_insensitive\"\n value:\n - \"Screen Failure\"\n - \"Not Assigned\"\n - \"Not Treated\"\n - \"Unplanned Treatment\"\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -463,9 +366,7 @@ "markdownDescription": "\nComplement of `is_complete_date`\n\nDate check\n\n> DM.RFSTDTC ^= complete date\n\n```yaml\n- name: \"RFSTDTC\"\n operator: \"is_incomplete_date\"\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -475,10 +376,7 @@ "markdownDescription": "\nComplement of `is_contained_by`\n\n> ARM not in ('Screen Failure', 'Not Assigned')\n\n```yaml\n- name: \"ARM\"\n operator: \"is_not_contained_by\"\n value:\n - \"Screen Failure\"\n - \"Not Assigned\"\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -488,10 +386,7 @@ "markdownDescription": "\nComplement of `is_contained_by_case_insensitive`\n\n> ARM not in ('Screen Failure', 'Not Assigned')\n\n```yaml\n- name: \"ARM\"\n operator: \"is_not_contained_by_case_insensitive\"\n value:\n - \"Screen Failure\"\n - \"Not Assigned\"\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -501,10 +396,7 @@ "markdownDescription": "\nComplement of `is_ordered_by`\n" } }, - "required": [ - "operator", - "order" - ], + "required": ["operator", "order"], "type": "object" }, { @@ -513,10 +405,7 @@ "const": "is_not_ordered_set" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -526,10 +415,7 @@ "markdownDescription": "\nComplement of `is_unique_relationship`\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -539,9 +425,7 @@ "markdownDescription": "\nComplement of `is_unique_set`.\n\n> --SEQ is not unique within DOMAIN, USUBJID, and --TESTCD\n\n```yaml\n- name: \"--SEQ\"\n operator: is_not_unique_set\n value:\n - \"DOMAIN\"\n - \"USUBJID\"\n - \"--TESTCD\"\n```\n\n> `name` can be a variable containing a list of columns and `value` does not need to be present\n\n```yaml\nRule Type: Dataset Contents Check against Define XML\nCheck:\n all:\n - name: define_dataset_key_sequence # contains list of dataset key columns\n operator: is_not_unique_set\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -551,10 +435,7 @@ "markdownDescription": "\nTrue if the dataset rows are ordered by the values within `name`, given the ordering specified by `order`\n\n```yaml\nCheck:\n all:\n - name: --SEQ\n operator: is_ordered_by\n order: asc\n```\n" } }, - "required": [ - "operator", - "order" - ], + "required": ["operator", "order"], "type": "object" }, { @@ -564,10 +445,7 @@ "markdownDescription": "\nTrue if the dataset rows are in ascending order of the values within `name`, grouped by the values within `value`. Value can either be a single column or multiple.\n\n```yaml\nCheck:\n all:\n - name: --SEQ\n operator: is_ordered_set\n value: USUBJID\n```\n\n```yaml\nCheck:\n all:\n - name: --SEQ\n operator: is_ordered_set\n value:\n - USUBJID\n - \"--TESTCD\"\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -577,10 +455,7 @@ "markdownDescription": "\nRelationship Integrity Check looking for a 1-1 relationship between name and value. Ensures uniqueness of both name and value.\n\n> AETERM and AEDECOD has a 1-to-1 relationship\n\n```yaml\n- name: AETERM\n operator: is_unique_relationship\n value: AEDECOD\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -590,10 +465,7 @@ "markdownDescription": "\nChecks if a variable maintains consistent values within groups defined by one or more grouping variables. Groups records by specified value(s) and validates that the target variable maintains the same value within each unique combination of grouping variables. When inconsistency is detected within a group, the operator attempts to identify a majority value. If one value appears more frequently than all others, only the minority records (those not matching the majority value) are flagged. If no single majority exists \u2014 i.e., two or more values are tied for the highest frequency \u2014 all records in that group are flagged.\n\nSingle grouping variable - true if the values of BGSTRESU differ within USUBJID:\n\nIf a regex parameter is provided, it is applied to the values of the target variable before the consistency check. The first capture group of the regex is used as the normalized value for comparison. This can be useful when only part of the value should be considered during comparison (for example, comparing only the date portion of a datetime value).\n\n- regex is optional.\n- The pattern must include at least one capture group(or whole regex will be wrapped to capture group).\n- Only the first capture group is used for comparison.\n- If the pattern does not match a value, the original value is used.\n\n```yaml\n- name: \"BGSTRESU\"\n operator: is_inconsistent_across_dataset\n value: \"USUBJID\"\n```\n\nMultiple grouping variables - true if the values of --STRESU differ within each combination of --TESTCD, --CAT, --SCAT, --SPEC, and --METHOD:\n\n```yaml\n- name: \"--STRESU\"\n operator: is_inconsistent_across_dataset\n value:\n - \"--TESTCD\"\n - \"--CAT\"\n - \"--SCAT\"\n - \"--SPEC\"\n - \"--METHOD\"\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -603,9 +475,7 @@ "markdownDescription": "\nRelationship Integrity Check\n\n> --SEQ is unique within DOMAIN, USUBJID, and --TESTCD\n\n```yaml\n- name: \"--SEQ\"\n operator: is_unique_set\n value:\n - \"DOMAIN\"\n - \"USUBJID\"\n - \"--TESTCD\"\n```\n\n> `name` can be a variable containing a list of columns and `value` does not need to be present\n\n> The `regex` parameter allows you to extract portions of values using a regex pattern before checking uniqueness.\n\n> Compare date only (YYYY-MM-DD) for uniqueness\n\n```yaml\n- name: \"--REPNUM\"\n operator: is_not_unique_set\n value:\n - \"USUBJID\"\n - \"--TESTCD\"\n - \"$TIMING_VARIABLES\"\n regex: '^\\d{4}-\\d{2}-\\d{2}'\n```\n\n> Compare by first N characters of a string\n\n```yaml\n- name: \"ITEM_ID\"\n operator: is_not_unique_set\n value:\n - \"USUBJID\"\n - \"CATEGORY\"\n regex: \"^.{2}\"\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -615,10 +485,7 @@ "markdownDescription": "\nValue comparison\n\n> TSVAL < 1\n\n```yaml\n- name: TSVAL\n operator: less_than\n value: 1\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -628,10 +495,7 @@ "markdownDescription": "\nValue comparison\n\n> TSVAL <= 1\n\n```yaml\n- name: TSVAL\n operator: less_than_or_equal_to\n value: 1\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -641,10 +505,7 @@ "markdownDescription": "\nLength comparison\n\n> SETCD value length > 8\n\n```yaml\n- name: \"SETCD\"\n operator: \"longer_than\"\n value: 8\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -654,10 +515,7 @@ "markdownDescription": "\nLength comparison\n\n> TSVAL value length >= 201\n\n```yaml\n- name: \"TSVAL\"\n operator: \"longer_than_or_equal_to\"\n value: 201\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -667,10 +525,7 @@ "markdownDescription": "\nRegular Expression value matching\n\n- Determine if each string starts with a match of a regular expression. Refer to this pandas documentation: https://pandas.pydata.org/docs/reference/api/pandas.Series.str.match.html\n- To \"search\" for a regex within the entire text, prefix the regex with `.*` and do not use anchors `^` , `$`\n- To do a \"fullmatch\" of a regex with the entire text, suffix the regex with an anchor `$` and do not prefix the regex with `.*`\n- For syntax guide, refer to this Python documentation: [Regular Expression HOWTO](https://docs.python.org/3/howto/regex.html).\n- Suggestion for an on-line regular expression logic. tester: https://regex101.com, choose the Python dialect.\n- For regex token visualization, try https://www.debuggex.com.\n\n> --DOSTXT value is non-numeric\n\n```yaml\n- name: --DOSTXT\n operator: matches_regex\n value: ^\\d*\\.?\\d*$\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -680,9 +535,7 @@ "markdownDescription": "\nComplement of `empty`\n\n> --OCCUR ^= null\n\n```yaml\n- name: --OCCUR\n operator: non_empty\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -692,10 +545,7 @@ "markdownDescription": "\nComplement of `empty_within_except_last_row`\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -705,9 +555,7 @@ "markdownDescription": "\nComplement of `contains_all`\n\n> All of ('Screen Failure', 'Not Assigned', 'Not Treated', 'Unplanned Treatment') not in ACTARM\n\n```yaml\n- name: \"ACTARM\"\n operator: \"not_contains_all\"\n value:\n - \"Screen Failure\"\n - \"Not Assigned\"\n - \"Not Treated\"\n - \"Unplanned Treatment\"\n```\n\nThe operator also supports lists:\n\n```yaml\n- name: \"$spec_codelist\"\n operator: \"not_contains_all\"\n value: \"$ppspec_value\"\n```\n\nWhere:\n\n| $spec_codelist | $ppspec_value |\n| :-------------------------- | :----------------: |\n| [\"CODE1\", \"CODE2\", \"CODE3\"] | [\"CODE1\", \"CODE2\"] |\n| [\"CODE1\", \"CODE2\", \"CODE3\"] | [\"CODE2\", \"CODE3\"] |\n| [\"CODE1\", \"CODE2\", \"CODE3\"] | [\"CODE1\"] |\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -726,10 +574,7 @@ "type": "boolean" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -748,10 +593,7 @@ "type": "boolean" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -761,9 +603,7 @@ "markdownDescription": "\nComplement of `exists`\n\n> AEOCCUR not present in dataset\n\n```yaml\n- name: \"AEOCCUR\"\n operator: \"not_exists\"\n```\n\n> Domain SJ does not exist\n\n```yaml\nRule Type: Domain Presence Check\nCheck:\n all:\n - name: \"SJ\"\n operator: \"not_exists\"\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -773,10 +613,7 @@ "markdownDescription": "\nComplement of `matches_regex`\n\n> --TESTCD <= 8 chars and contains only letters, numbers, and underscores and can not start with a number\n\n```yaml\n- name: --TESTCD\n operator: not_matches_regex\n value: ^[A-Z_][A-Z0-9_]{0,7}$\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -786,11 +623,7 @@ "markdownDescription": "\nComplement of `prefix_matches_regex`\n" } }, - "required": [ - "operator", - "prefix", - "value" - ], + "required": ["operator", "prefix", "value"], "type": "object" }, { @@ -800,10 +633,7 @@ "markdownDescription": "\nComplement of `present_on_multiple_rows_within`\n\n```yaml\n- operator: \"not_present_on_multiple_rows_within\"\n name: \"RELID\"\n value: 4 (optional)\n within: \"USUBJID\"\n```\n" } }, - "required": [ - "operator", - "within" - ], + "required": ["operator", "within"], "type": "object" }, { @@ -813,11 +643,7 @@ "markdownDescription": "\nComplement of `suffix_matches_regex`\n\n> QNAM does not end with numbers\n\n```yaml\n- name: \"QNAM\"\n operator: \"not_suffix_matches_regex\"\n suffix: 2\n value: \"\\d\\d\"\n```\n" } }, - "required": [ - "operator", - "suffix", - "value" - ], + "required": ["operator", "suffix", "value"], "type": "object" }, { @@ -827,11 +653,7 @@ "markdownDescription": "\nTrue if the `prefix` number of characters beginning a string in `name` match one of the strings in the list in `value`\n\n> Check if a variable's domain identifier exists in the study\n\n```yaml\n- name: variable_name\n operator: prefix_is_contained_by\n prefix: 2\n value: $study_domains\n```\n" } }, - "required": [ - "operator", - "prefix", - "value" - ], + "required": ["operator", "prefix", "value"], "type": "object" }, { @@ -841,11 +663,7 @@ "markdownDescription": "\nTrue if the `prefix` number of characters beginning a string in `name` match the string in `value`\n\n```yaml\n- name: dataset_name\n operator: prefix_equal_to\n prefix: 2\n value: DOMAIN\n```\n" } }, - "required": [ - "operator", - "prefix", - "value" - ], + "required": ["operator", "prefix", "value"], "type": "object" }, { @@ -855,11 +673,7 @@ "markdownDescription": "\nComplement of `prefix_is_contained_by`\n" } }, - "required": [ - "operator", - "prefix", - "value" - ], + "required": ["operator", "prefix", "value"], "type": "object" }, { @@ -869,11 +683,7 @@ "markdownDescription": "\nTrue if the `prefix` number of characters beginning a string in `name` match a regular expression in `value`\n\n```yaml\n- name: DOMAIN\n operator: prefix_matches_regex\n prefix: 2\n value: (AP|ap)\n```\n" } }, - "required": [ - "operator", - "prefix", - "value" - ], + "required": ["operator", "prefix", "value"], "type": "object" }, { @@ -883,11 +693,7 @@ "markdownDescription": "\nComplement of `prefix_equal_to`\n" } }, - "required": [ - "operator", - "prefix", - "value" - ], + "required": ["operator", "prefix", "value"], "type": "object" }, { @@ -897,10 +703,7 @@ "markdownDescription": "\nTrue if the same value of `name` is present on multiple rows, grouped by `within`. A maximum allowed number of occurrences can be specified in the value attribute. In this instance the value: 4 means that an error will be flagged if the same value appears more than 4 times within a USUBJID. By default the operator will flag any time a value appears more than once.\n\n```yaml\n- operator: \"present_on_multiple_rows_within\"\n name: \"RELID\"\n value: 4 (optional)\n within: \"USUBJID\"\n```\n" } }, - "required": [ - "operator", - "within" - ], + "required": ["operator", "within"], "type": "object" }, { @@ -910,10 +713,7 @@ "markdownDescription": "\nWill raise an issue if at least one of the values in `name` is the same as one of the values in `value`. See [shares_no_elements_with](#shares_no_elements_with).\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -923,10 +723,7 @@ "markdownDescription": "\nWill raise an issue if exactly one of the values in `name` is the same as one of the values in `value`. See [shares_no_elements_with](#shares_no_elements_with).\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -936,10 +733,7 @@ "markdownDescription": "\nWill raise an issue if the values in `name` do not share any of the values in `value`\n\n> Check if $dataset_variables shares no elements with $timing_variables\n\n```yaml\nRule Type: Dataset Metadata Check # One record per dataset\nCheck:\n - all:\n name: $dataset_variables\n operator: shares_no_elements_with\n value: $timing_variables\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -949,10 +743,7 @@ "markdownDescription": "\nLength comparison\n\n> SETCD value length < 9\n\n```yaml\n- name: \"SETCD\"\n operator: \"shorter_than\"\n value: 9\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -962,10 +753,7 @@ "markdownDescription": "\nLength comparison\n\n> TSVAL value length <= 200\n\n```yaml\n- name: \"TSVAL\"\n operator: \"shorter_than_or_equal_to\"\n value: 201\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -975,9 +763,7 @@ "markdownDescription": "\nSplits a string by a separator and checks if both parts have equal length. Generic operator for validating paired data formats where both parts must have the same level of detail or precision.\n\nParameters:\n\n- `separator`: The delimiter to split on (default: \"/\")\n\n> Check that string parts separated by a delimiter have equal length\n\n```yaml\n- name: --DTC\n operator: split_parts_have_equal_length\n separator: \"/\"\n```\n\nUse cases:\n\n- **Date/time intervals**: `2003-12-15T10:00/2003-12-15T10:30` \u2192 True (both 16 characters)\n- **Date ranges**: `2003-12-01/2003-12-10` \u2192 True (both 10 characters)\n- **Version ranges**: `1.2.3/2.0.0` \u2192 True (both 5 characters)\n- **Product codes**: `ABC-123/XYZ-789` \u2192 True (both 7 characters)\n\nInvalid example:\n\n- `2003-12-15T10:00/2003-12-15T10:30:15` \u2192 False (16 vs 19 characters - different precision)\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -987,9 +773,7 @@ "markdownDescription": "\nComplement of `split_parts_have_equal_length`. Returns True when parts have unequal lengths (indicates a violation).\n\n```yaml\n- name: --DTC\n operator: split_parts_have_unequal_length\n separator: \"/\"\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -999,10 +783,7 @@ "markdownDescription": "\nSubstring matching\n\n> DOMAIN beginning with 'AP'\n\n```yaml\n- name: \"DOMAIN\"\n operator: \"starts_with\"\n value: \"AP\"\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -1012,11 +793,7 @@ "markdownDescription": "\nTrue if the `suffix` number of characters ending a string in `name` match the string in `value`\n\n```yaml\n- name: dataset_name\n operator: suffix_equal_to\n prefix: 2\n value: DOMAIN\n```\n" } }, - "required": [ - "operator", - "suffix", - "value" - ], + "required": ["operator", "suffix", "value"], "type": "object" }, { @@ -1026,11 +803,7 @@ "markdownDescription": "\nTrue if the `suffix` number of characters ending a string in `name` match one of the strings in the list in `value`\n\n> Check if a supp's parent domain exists in the study\n\n```yaml\n- name: dataset_name\n operator: suffix_is_contained_by\n prefix: 2\n value: $study_domains\n```\n" } }, - "required": [ - "operator", - "suffix", - "value" - ], + "required": ["operator", "suffix", "value"], "type": "object" }, { @@ -1040,11 +813,7 @@ "markdownDescription": "\nComplement of `suffix_is_contained_by`\n" } }, - "required": [ - "operator", - "suffix", - "value" - ], + "required": ["operator", "suffix", "value"], "type": "object" }, { @@ -1054,11 +823,7 @@ "markdownDescription": "\nTrue if the `suffix` number of characters ending a string in `name` match a regular expression in `value`\n\n> QNAM ends with numbers\n\n```yaml\n- name: \"QNAM\"\n operator: \"suffix_matches_regex\"\n suffix: 2\n value: \"\\d\\d\"\n```\n" } }, - "required": [ - "operator", - "suffix", - "value" - ], + "required": ["operator", "suffix", "value"], "type": "object" }, { @@ -1068,11 +833,7 @@ "markdownDescription": "\nComplement of `suffix_equal_to`\n" } }, - "required": [ - "operator", - "suffix", - "value" - ], + "required": ["operator", "suffix", "value"], "type": "object" }, { @@ -1082,11 +843,7 @@ "markdownDescription": "\nComplement of `target_is_sorted_by`\n" } }, - "required": [ - "operator", - "value", - "within" - ], + "required": ["operator", "value", "within"], "type": "object" }, { @@ -1096,11 +853,7 @@ "markdownDescription": "\nTrue if the values in name are ordered according to the values specified by value\nin ascending/descending order, grouped by the values in within. Each value entry\nrequires a variable name, a sort_order of asc or desc, and an optional\nnull_position of first or last (defaults to last) which controls where null/empty\ncomparator values are placed in the expected ordering. Within accepts either a\nsingle column or an ordered list of columns. Columns can be either number or Char\nDates in ISO8601 YYYY-MM-DD format. Date value(s) with different precisions that\noverlap (e.g. 2005-10, 2005-10-3 and 2005-10-08) are all flagged as not sorted as\ntheir order cannot be inferred.\n\nOptionally supports a `regex` parameter that extracts a portion of the target\nvalue for sorting. The regex must contain at least one capturing group. The first\ncaptured group is extracted and converted to numeric if possible, allowing proper\nsorting of sequence numbers (e.g., \"MIDS1\", \"MIDS2\", ..., \"MIDS10\" with regex\n`.*?(\\\\d+)$`). This is particularly useful for variables that end with sequence\nnumbers that may or may not be zero-padded.\n\n```yaml\nCheck:\n all:\n - name: --SEQ\n within:\n - USUBJID\n - MIDSTYPE\n operator: target_is_sorted_by\n value:\n - name: --STDTC\n sort_order: asc\n null_position: last\n```\n\nExample with regex for extracting sequence numbers:\n\n```yaml\nCheck:\n all:\n - name: MIDS\n operator: target_is_sorted_by\n regex: \".*?(\\\\d+)$\" # Extract trailing digits, convert to numeric\n value:\n - name: SMSTDTC\n sort_order: asc\n within:\n - USUBJID\n - MIDSTYPE\n```\n" } }, - "required": [ - "operator", - "value", - "within" - ], + "required": ["operator", "value", "within"], "type": "object" }, { @@ -1110,10 +863,7 @@ "markdownDescription": "\nComplement of `value_has_multiple_references`\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -1123,10 +873,7 @@ "markdownDescription": "\nTrue if the value in `name` has more than one count in the dictionary defined in `value`\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -1136,10 +883,7 @@ "markdownDescription": "\nChecks for inconsistencies in enumerated columns of a DataFrame. Starting with the smallest/largest enumeration of the given variable, returns True if VARIABLE(N+1) is populated but VARIABLE(N) is not populated. Repeats for all variables belonging to the enumeration. Note that the initial variable will not have an index (VARIABLE) and the next enumerated variable has index 1 (VARIABLE1).\n\nex: Check if there are inconsistencies in the TSVAL columns (TSVAL, TSVAL1, TSVAL2, etc.)\n\n```yaml\nCheck:\n all:\n - name: \"TSVAL\"\n operator: \"inconsistent_enumerated_columns\"\n```\n" } }, - "required": [ - "operator", - "name" - ], + "required": ["operator", "name"], "type": "object" }, { @@ -1149,10 +893,7 @@ "markdownDescription": "\nChecks if elements in the target list appear in the same relative order in the comparator list.\n\n> Check if dataset column order is a correctly ordered subset of library column order\n\n```yaml\n- name: $column_order_from_dataset\n operator: is_ordered_subset_of\n value: $column_order_from_library\n```\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -1162,10 +903,7 @@ "markdownDescription": "\nComplement of `is_ordered_subset_of`\n" } }, - "required": [ - "operator", - "value" - ], + "required": ["operator", "value"], "type": "object" }, { @@ -1175,9 +913,7 @@ "markdownDescription": "\nValidates that variable labels follow proper title case formatting rules using the titlecase PyPi library. Title case capitalizes the first word and all major words, while keeping articles (a, an, the), conjunctions (and, but, or), and prepositions (in, of, for) in lowercase unless they are the first word. \nNOTE: The titlecase library may produce false positives or false negatives in syntactic edge cases (e.g. hyphenated words, slash-separated terms, uncommon prepositions).\n\n> Check that AELABEL values are in proper title case\n\n```yaml\n- name: AELABEL\n operator: is_title_case\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" }, { @@ -1187,18 +923,13 @@ "markdownDescription": "\nComplement of `is_title_case`. Returns True when values are NOT in proper title case.\n\n> Flag AELABEL values that violate title case rules\n\n```yaml\n- name: AELABEL\n operator: is_not_title_case\n```\n" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" } ], "properties": { "comparator": { - "type": [ - "number", - "string" - ] + "type": ["number", "string"] }, "context": { "type": "string" @@ -1236,27 +967,18 @@ "type": "boolean" }, "codelistcheck": { - "enum": [ - "code", - "value" - ], + "enum": ["code", "value"], "type": "string" }, "codelistlevel": { - "enum": [ - "term", - "codelist" - ], + "enum": ["term", "codelist"], "type": "string" }, "operator": { "type": "string" }, "order": { - "enum": [ - "asc", - "dsc" - ], + "enum": ["asc", "dsc"], "type": "string" }, "ordering": { @@ -1274,25 +996,17 @@ "value": { "oneOf": [ { - "type": [ - "boolean", - "number", - "string" - ] + "type": ["boolean", "number", "string"] }, { "items": { - "type": [ - "number" - ] + "type": ["number"] }, "type": "array" }, { "items": { - "type": [ - "string" - ] + "type": ["string"] }, "type": "array" }, @@ -1303,10 +1017,7 @@ "$ref": "Operator.json#/properties/name" }, "null_position": { - "enum": [ - "first", - "last" - ], + "enum": ["first", "last"], "type": "string" }, "order": { @@ -1349,8 +1060,6 @@ "type": "string" } }, - "required": [ - "operator" - ], + "required": ["operator"], "type": "object" } diff --git a/resources/schema/rule-merged/Organization_CDISC.json b/resources/schema/rule-merged/Organization_CDISC.json index 9aaef8a76..db1041921 100644 --- a/resources/schema/rule-merged/Organization_CDISC.json +++ b/resources/schema/rule-merged/Organization_CDISC.json @@ -22,9 +22,7 @@ "const": "Failure" } }, - "required": [ - "Type" - ], + "required": ["Type"], "type": "object" }, "Origin": { @@ -40,9 +38,7 @@ "type": "object" }, "Version": { - "enum": [ - "5.0" - ] + "enum": ["5.0"] } }, "type": "object" @@ -51,12 +47,7 @@ "type": "array" }, "Version": { - "enum": [ - "1.0", - "1.1", - "1.2", - "1.3" - ] + "enum": ["1.0", "1.1", "1.2", "1.3"] } }, "type": "object" @@ -75,9 +66,7 @@ "const": "Success" } }, - "required": [ - "Type" - ], + "required": ["Type"], "type": "object" }, "Origin": { @@ -90,19 +79,13 @@ "type": "string" }, "Version": { - "enum": [ - "1", - "2", - "3" - ] + "enum": ["1", "2", "3"] } }, "type": "object" }, "Version": { - "enum": [ - "2.0" - ] + "enum": ["2.0"] } }, "type": "object" @@ -111,11 +94,7 @@ "type": "array" }, "Version": { - "enum": [ - "3.2", - "3.3", - "3.4" - ] + "enum": ["3.2", "3.3", "3.4"] } }, "type": "object" @@ -134,9 +113,7 @@ "const": "Success" } }, - "required": [ - "Type" - ], + "required": ["Type"], "type": "object" }, "Origin": { @@ -152,9 +129,7 @@ "type": "object" }, "Version": { - "enum": [ - "5.0" - ] + "enum": ["5.0"] } }, "type": "object" @@ -163,11 +138,7 @@ "type": "array" }, "Version": { - "enum": [ - "3.0", - "3.1", - "3.1.1" - ] + "enum": ["3.0", "3.1", "3.1.1"] } }, "type": "object" @@ -186,9 +157,7 @@ "const": "Success" } }, - "required": [ - "Type" - ], + "required": ["Type"], "type": "object" }, "Origin": { @@ -204,9 +173,7 @@ "type": "object" }, "Version": { - "enum": [ - "5.0" - ] + "enum": ["5.0"] } }, "type": "object" @@ -215,10 +182,7 @@ "type": "array" }, "Version": { - "enum": [ - "1.1", - "1.2" - ] + "enum": ["1.1", "1.2"] } }, "type": "object" @@ -237,9 +201,7 @@ "const": "Success" } }, - "required": [ - "Type" - ], + "required": ["Type"], "type": "object" }, "Origin": { @@ -255,9 +217,7 @@ "type": "object" }, "Version": { - "enum": [ - "5.0" - ] + "enum": ["5.0"] } }, "type": "object" @@ -266,9 +226,7 @@ "type": "array" }, "Version": { - "enum": [ - "1.0" - ] + "enum": ["1.0"] } }, "type": "object" @@ -287,9 +245,7 @@ "const": "Success" } }, - "required": [ - "Type" - ], + "required": ["Type"], "type": "object" }, "Origin": { @@ -305,9 +261,7 @@ "type": "object" }, "Version": { - "enum": [ - "1.0" - ] + "enum": ["1.0"] } }, "type": "object" @@ -316,24 +270,13 @@ "type": "array" }, "Version": { - "enum": [ - "1.0" - ] + "enum": ["1.0"] }, "Substandard": { - "enum": [ - "SDTM", - "SEND", - "ADaM", - "CDASH" - ] + "enum": ["SDTM", "SEND", "ADaM", "CDASH"] } }, - "required": [ - "Name", - "Version", - "Substandard" - ], + "required": ["Name", "Version", "Substandard"], "type": "object" }, { @@ -354,17 +297,13 @@ "type": "string" }, "Version": { - "enum": [ - "1" - ] + "enum": ["1"] } }, "type": "object" }, "Version": { - "enum": [ - "1.0" - ] + "enum": ["1.0"] } }, "type": "object" @@ -373,10 +312,7 @@ "type": "array" }, "Version": { - "enum": [ - "3.0", - "4.0" - ] + "enum": ["3.0", "4.0"] } }, "type": "object" diff --git a/resources/schema/rule-merged/Organization_Custom.json b/resources/schema/rule-merged/Organization_Custom.json index c5f591394..bf5bd7276 100644 --- a/resources/schema/rule-merged/Organization_Custom.json +++ b/resources/schema/rule-merged/Organization_Custom.json @@ -9,10 +9,7 @@ "type": "string", "description": "Name of your custom organization", "not": { - "enum": [ - "CDISC", - "FDA" - ] + "enum": ["CDISC", "FDA"] } }, "Standards": { @@ -48,9 +45,7 @@ "description": "Version of the rule" } }, - "required": [ - "Id" - ], + "required": ["Id"], "type": "object" }, "Version": { @@ -60,10 +55,7 @@ "Criteria": { "properties": { "Type": { - "enum": [ - "Failure", - "Success" - ], + "enum": ["Failure", "Success"], "type": "string" }, "Plain Language Expression": { @@ -78,46 +70,30 @@ "type": "string" } }, - "required": [ - "Rule" - ], + "required": ["Rule"], "type": "object" } }, - "required": [ - "Type" - ], + "required": ["Type"], "anyOf": [ { - "required": [ - "Logical Expression" - ] + "required": ["Logical Expression"] }, { - "required": [ - "Plain Language Expression" - ] + "required": ["Plain Language Expression"] } ], "type": "object" } }, - "required": [ - "Origin", - "Rule Identifier", - "Version" - ], + "required": ["Origin", "Rule Identifier", "Version"], "type": "object" }, "minItems": 1, "type": "array" } }, - "required": [ - "Name", - "References", - "Version" - ], + "required": ["Name", "References", "Version"], "type": "object" }, "minItems": 1, @@ -165,10 +141,7 @@ }, "OutputType": { "type": "string", - "enum": [ - "Check", - "Listing" - ], + "enum": ["Check", "Listing"], "description": "Output type of the rule validation result" }, "Keywords": { @@ -182,11 +155,7 @@ "additionalProperties": true } }, - "required": [ - "Organization", - "Standards", - "Category" - ], + "required": ["Organization", "Standards", "Category"], "type": "object", "$defs": { "metadata": { diff --git a/resources/schema/rule-merged/Organization_FDA.json b/resources/schema/rule-merged/Organization_FDA.json index 94af54bc4..b0f7de783 100644 --- a/resources/schema/rule-merged/Organization_FDA.json +++ b/resources/schema/rule-merged/Organization_FDA.json @@ -41,10 +41,7 @@ } } ], - "required": [ - "Document", - "Cited Guidance" - ], + "required": ["Document", "Cited Guidance"], "type": "object" }, "type": "array" @@ -55,9 +52,7 @@ "const": "Success" } }, - "required": [ - "Type" - ], + "required": ["Type"], "type": "object" }, "Origin": { @@ -73,9 +68,7 @@ "type": "object" }, "Version": { - "enum": [ - "1.5" - ] + "enum": ["1.5"] } }, "type": "object" @@ -91,11 +84,7 @@ "const": "SDTMIG" }, "Version": { - "enum": [ - "3.2", - "3.3", - "3.4" - ] + "enum": ["3.2", "3.3", "3.4"] } }, "type": "object" @@ -106,11 +95,7 @@ "const": "SENDIG" }, "Version": { - "enum": [ - "3.0", - "3.1", - "3.1.1" - ] + "enum": ["3.0", "3.1", "3.1.1"] } }, "type": "object" @@ -121,9 +106,7 @@ "const": "SENDIG-AR" }, "Version": { - "enum": [ - "1.0" - ] + "enum": ["1.0"] } }, "type": "object" @@ -134,10 +117,7 @@ "const": "SENDIG-DART" }, "Version": { - "enum": [ - "1.1", - "1.2" - ] + "enum": ["1.1", "1.2"] } }, "type": "object" @@ -148,9 +128,7 @@ "const": "SENDIG-GENETOX" }, "Version": { - "enum": [ - "1.0" - ] + "enum": ["1.0"] } }, "type": "object" From 579fd86ba15565ccc45bdfcc5f55c1326956f043 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Fri, 24 Apr 2026 12:28:03 +0200 Subject: [PATCH 5/7] Fix formatting of regex comment in target_is_sorted_by operator documentation --- resources/schema/rule/Operator.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/schema/rule/Operator.md b/resources/schema/rule/Operator.md index 1f6b1078f..4685cac0f 100644 --- a/resources/schema/rule/Operator.md +++ b/resources/schema/rule/Operator.md @@ -1165,7 +1165,7 @@ Check: all: - name: MIDS operator: target_is_sorted_by - regex: ".*?(\\d+)$" # Extract trailing digits, convert to numeric + regex: ".*?(\\d+)$" # Extract trailing digits, convert to numeric value: - name: SMSTDTC sort_order: asc From 68e9141c473c207165834bdb31f89a3de96fa16c Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 24 Apr 2026 10:31:21 +0000 Subject: [PATCH 6/7] Update merged schema files with markdown descriptions --- resources/schema/rule-merged/Operator.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/schema/rule-merged/Operator.json b/resources/schema/rule-merged/Operator.json index 8d3479ab3..1c1f13a64 100644 --- a/resources/schema/rule-merged/Operator.json +++ b/resources/schema/rule-merged/Operator.json @@ -850,7 +850,7 @@ "properties": { "operator": { "const": "target_is_sorted_by", - "markdownDescription": "\nTrue if the values in name are ordered according to the values specified by value\nin ascending/descending order, grouped by the values in within. Each value entry\nrequires a variable name, a sort_order of asc or desc, and an optional\nnull_position of first or last (defaults to last) which controls where null/empty\ncomparator values are placed in the expected ordering. Within accepts either a\nsingle column or an ordered list of columns. Columns can be either number or Char\nDates in ISO8601 YYYY-MM-DD format. Date value(s) with different precisions that\noverlap (e.g. 2005-10, 2005-10-3 and 2005-10-08) are all flagged as not sorted as\ntheir order cannot be inferred.\n\nOptionally supports a `regex` parameter that extracts a portion of the target\nvalue for sorting. The regex must contain at least one capturing group. The first\ncaptured group is extracted and converted to numeric if possible, allowing proper\nsorting of sequence numbers (e.g., \"MIDS1\", \"MIDS2\", ..., \"MIDS10\" with regex\n`.*?(\\\\d+)$`). This is particularly useful for variables that end with sequence\nnumbers that may or may not be zero-padded.\n\n```yaml\nCheck:\n all:\n - name: --SEQ\n within:\n - USUBJID\n - MIDSTYPE\n operator: target_is_sorted_by\n value:\n - name: --STDTC\n sort_order: asc\n null_position: last\n```\n\nExample with regex for extracting sequence numbers:\n\n```yaml\nCheck:\n all:\n - name: MIDS\n operator: target_is_sorted_by\n regex: \".*?(\\\\d+)$\" # Extract trailing digits, convert to numeric\n value:\n - name: SMSTDTC\n sort_order: asc\n within:\n - USUBJID\n - MIDSTYPE\n```\n" + "markdownDescription": "\nTrue if the values in name are ordered according to the values specified by value\nin ascending/descending order, grouped by the values in within. Each value entry\nrequires a variable name, a sort_order of asc or desc, and an optional\nnull_position of first or last (defaults to last) which controls where null/empty\ncomparator values are placed in the expected ordering. Within accepts either a\nsingle column or an ordered list of columns. Columns can be either number or Char\nDates in ISO8601 YYYY-MM-DD format. Date value(s) with different precisions that\noverlap (e.g. 2005-10, 2005-10-3 and 2005-10-08) are all flagged as not sorted as\ntheir order cannot be inferred.\n\nOptionally supports a `regex` parameter that extracts a portion of the target\nvalue for sorting. The regex must contain at least one capturing group. The first\ncaptured group is extracted and converted to numeric if possible, allowing proper\nsorting of sequence numbers (e.g., \"MIDS1\", \"MIDS2\", ..., \"MIDS10\" with regex\n`.*?(\\\\d+)$`). This is particularly useful for variables that end with sequence\nnumbers that may or may not be zero-padded.\n\n```yaml\nCheck:\n all:\n - name: --SEQ\n within:\n - USUBJID\n - MIDSTYPE\n operator: target_is_sorted_by\n value:\n - name: --STDTC\n sort_order: asc\n null_position: last\n```\n\nExample with regex for extracting sequence numbers:\n\n```yaml\nCheck:\n all:\n - name: MIDS\n operator: target_is_sorted_by\n regex: \".*?(\\\\d+)$\" # Extract trailing digits, convert to numeric\n value:\n - name: SMSTDTC\n sort_order: asc\n within:\n - USUBJID\n - MIDSTYPE\n```\n" } }, "required": ["operator", "value", "within"], From 6715e04d92ac590230f96cc1a67788e051027843 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Tue, 5 May 2026 13:11:47 +0200 Subject: [PATCH 7/7] Enhance target sorting in target_is_sorted_by operator to include extracted target values --- .../check_operators/dataframe_operators.py | 6 +++--- .../test_target_is_sorted_by_regex.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index 5105b209e..5625b5a81 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -1857,10 +1857,10 @@ def target_is_sorted_by(self, other_value: dict): working_df[target], target_regex ) target_for_sorting = f"{target}_extracted" - # Sort by within columns only, preserve original order within groups + # Sort by within columns AND extracted target sorted_df = working_df.sort_values( - by=within_columns, - ascending=[True] * len(within_columns), + by=[*within_columns, target_for_sorting], + ascending=[True] * (len(within_columns) + 1), ) else: working_df = self.value[selected_columns] diff --git a/tests/unit/test_check_operators/test_target_is_sorted_by_regex.py b/tests/unit/test_check_operators/test_target_is_sorted_by_regex.py index 23b4a3772..0ff2989af 100644 --- a/tests/unit/test_check_operators/test_target_is_sorted_by_regex.py +++ b/tests/unit/test_check_operators/test_target_is_sorted_by_regex.py @@ -117,11 +117,11 @@ def test_target_is_sorted_by_with_regex_invalid_order(dataset_class): } result = DataframeType({"value": df}).target_is_sorted_by(other_value) - # lalala10 and lalala9 are in wrong positions - # Expected by date: 1, 2, 9, 10 - # Actual: 1, 10, 2, 9 - # The function marks records where neighbor consistency fails - assert result.equals(pd.Series([True, False, True, False])) + # After sorting by extracted MIDS (1, 2, 9, 10), dates should be: + # MIDS=1 (2020-01-01) -> MIDS=2 (should be 2020-01-02) -> MIDS=9 (should be 2020-01-09) -> MIDS=10 (should be 2020-01-10) + # Actual dates: 2020-01-01, 2020-01-09, 2020-01-10, 2020-01-02 + # Only MIDS=1 is in correct chronological position + assert result.equals(pd.Series([True, False, False, False])) @pytest.mark.parametrize("dataset_class", [PandasDataset, DaskDataset])