From fd3ac0c43cd22069b3c1df4d7a207f28ef5aeadb Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Tue, 19 May 2026 14:57:44 -0400 Subject: [PATCH 1/3] define --- core.py | 7 +++++++ env.example | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/core.py b/core.py index f6bad6a99..299bd2759 100644 --- a/core.py +++ b/core.py @@ -600,6 +600,13 @@ def validate( # noqa load_dotenv(dotenv_path) validate_dataset_files_exist(dataset_path, logger, ctx) + if define_xml_path and dotenv_path and os.getenv("DEFINE_XML"): + dxp = Path(define_xml_path) + if not dxp.is_absolute() and dxp.parent == Path("."): + resolved = Path(dotenv_path).parent / define_xml_path + if resolved.is_file(): + define_xml_path = str(resolved) + if not custom_standard: standard = standard.lower() supported_standards = StandardTypes.values() diff --git a/env.example b/env.example index 3e87d175c..6787efaef 100644 --- a/env.example +++ b/env.example @@ -2,7 +2,7 @@ CDISC_LIBRARY_API_KEY=your_api_key_here DATASET_SIZE_THRESHOLD=10485760 # max dataset size in bytes to force dask implementation MAX_REPORT_ROWS = 10 # integer for maximum number of issues per excel sheet (plus headers) in result report. Defaults to 10000. MAX_ERRORS_PER_RULE = (10, True) # Tuple for maximum number of errors to report per rule during a validation run. Also has a per dataset flag described as second bool value in readme. example value -DEFINE_XML = define.xml path +DEFINE_XML = define.xml path--can be absolute or the name of the define.xml if it is located in the same directory as the .env CT = controlled terminology package PRODUCT= standard VERSION= version, denoted with a dash i.e. 3-4 From 076e0212132fa2885731a9430e07c1195c578440 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Tue, 19 May 2026 15:44:09 -0400 Subject: [PATCH 2/3] example --- .env.example | 45 +++++++++++++++++++++++++++++++++++++++++++++ env.example | 10 ---------- 2 files changed, 45 insertions(+), 10 deletions(-) create mode 100644 .env.example delete mode 100644 env.example diff --git a/.env.example b/.env.example new file mode 100644 index 000000000..4eb77b5c7 --- /dev/null +++ b/.env.example @@ -0,0 +1,45 @@ +# ────────────────────────────────────────────── +# CDISC Rules Engine — Environment Configuration +# ────────────────────────────────────────────── +# Copy this file to .env and fill in your values. + +# Required for CORE command line execution (not used by cdisc-open-rules) +CDISC_LIBRARY_API_KEY=your_api_key_here + +# Maximum dataset size in bytes before forcing Dask implementation +DATASET_SIZE_THRESHOLD=10485760 + +# Maximum number of issues per Excel sheet in the result report (default: 10000) +MAX_REPORT_ROWS=10 + +# Maximum errors to report per rule during a validation run. +# Format: (max_errors, per_dataset_flag) +# max_errors — integer cap on reported errors +# per_dataset_flag — if True, the cap applies per dataset; if False, across all datasets +MAX_ERRORS_PER_RULE=(10, True) + +# ────────────────────────────────────────────── +# Study Configuration +# ────────────────────────────────────────────── + +# Path to define.xml — absolute path, or filename if located in the same directory as .env +DEFINE_XML=define.xml + +# Controlled terminology package +# see ./resources/cache for package names +# Format: ct-YYYY-MM-DD (e.g. sdtmct-2024-03-29) +CT= + +# Standard product (e.g. sdtmig, adamig, sendig) +PRODUCT= + +# Standard version, denoted with a dash (e.g. 3-4) +VERSION= + +# TIG substandard +# SDTM, SEND, ADaM, or CDASH +SUBSTANDARD= + +# TIG use case +# INDH, PROD, NONCLIN, or ANALYSIS +USE_CASE= \ No newline at end of file diff --git a/env.example b/env.example deleted file mode 100644 index 6787efaef..000000000 --- a/env.example +++ /dev/null @@ -1,10 +0,0 @@ -CDISC_LIBRARY_API_KEY=your_api_key_here -DATASET_SIZE_THRESHOLD=10485760 # max dataset size in bytes to force dask implementation -MAX_REPORT_ROWS = 10 # integer for maximum number of issues per excel sheet (plus headers) in result report. Defaults to 10000. -MAX_ERRORS_PER_RULE = (10, True) # Tuple for maximum number of errors to report per rule during a validation run. Also has a per dataset flag described as second bool value in readme. example value -DEFINE_XML = define.xml path--can be absolute or the name of the define.xml if it is located in the same directory as the .env -CT = controlled terminology package -PRODUCT= standard -VERSION= version, denoted with a dash i.e. 3-4 -SUBSTANDARD= TIG substandard -USE_CASE= TIG use case \ No newline at end of file From a61ad6361256e06ea3d4ed88dde3dd95a8d7c52a Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Tue, 19 May 2026 16:03:35 -0400 Subject: [PATCH 3/3] missed reference --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index febe8e9ae..1322b7445 100644 --- a/README.md +++ b/README.md @@ -385,7 +385,7 @@ core.exe validate -rest -of -config -commands --- -**Note:** Setting `DATASET_SIZE_THRESHOLD=0` tells the engine to use Dask processing for all datasets regardless of size, size threshold defaults to 1/4 of available RAM so datasets larger than this will use Dask. See env.example to see what the CLI .env file should look like +**Note:** Setting `DATASET_SIZE_THRESHOLD=0` tells the engine to use Dask processing for all datasets regardless of size, size threshold defaults to 1/4 of available RAM so datasets larger than this will use Dask. See .env.example to see what the CLI .env file should look like ### Updating the Cache (`update-cache`)