diff --git a/.env.example b/.env.example new file mode 100644 index 000000000..4eb77b5c7 --- /dev/null +++ b/.env.example @@ -0,0 +1,45 @@ +# ────────────────────────────────────────────── +# CDISC Rules Engine — Environment Configuration +# ────────────────────────────────────────────── +# Copy this file to .env and fill in your values. + +# Required for CORE command line execution (not used by cdisc-open-rules) +CDISC_LIBRARY_API_KEY=your_api_key_here + +# Maximum dataset size in bytes before forcing Dask implementation +DATASET_SIZE_THRESHOLD=10485760 + +# Maximum number of issues per Excel sheet in the result report (default: 10000) +MAX_REPORT_ROWS=10 + +# Maximum errors to report per rule during a validation run. +# Format: (max_errors, per_dataset_flag) +# max_errors — integer cap on reported errors +# per_dataset_flag — if True, the cap applies per dataset; if False, across all datasets +MAX_ERRORS_PER_RULE=(10, True) + +# ────────────────────────────────────────────── +# Study Configuration +# ────────────────────────────────────────────── + +# Path to define.xml — absolute path, or filename if located in the same directory as .env +DEFINE_XML=define.xml + +# Controlled terminology package +# see ./resources/cache for package names +# Format: ct-YYYY-MM-DD (e.g. sdtmct-2024-03-29) +CT= + +# Standard product (e.g. sdtmig, adamig, sendig) +PRODUCT= + +# Standard version, denoted with a dash (e.g. 3-4) +VERSION= + +# TIG substandard +# SDTM, SEND, ADaM, or CDASH +SUBSTANDARD= + +# TIG use case +# INDH, PROD, NONCLIN, or ANALYSIS +USE_CASE= \ No newline at end of file diff --git a/README.md b/README.md index febe8e9ae..1322b7445 100644 --- a/README.md +++ b/README.md @@ -385,7 +385,7 @@ core.exe validate -rest -of -config -commands --- -**Note:** Setting `DATASET_SIZE_THRESHOLD=0` tells the engine to use Dask processing for all datasets regardless of size, size threshold defaults to 1/4 of available RAM so datasets larger than this will use Dask. See env.example to see what the CLI .env file should look like +**Note:** Setting `DATASET_SIZE_THRESHOLD=0` tells the engine to use Dask processing for all datasets regardless of size, size threshold defaults to 1/4 of available RAM so datasets larger than this will use Dask. See .env.example to see what the CLI .env file should look like ### Updating the Cache (`update-cache`) diff --git a/core.py b/core.py index f6bad6a99..299bd2759 100644 --- a/core.py +++ b/core.py @@ -600,6 +600,13 @@ def validate( # noqa load_dotenv(dotenv_path) validate_dataset_files_exist(dataset_path, logger, ctx) + if define_xml_path and dotenv_path and os.getenv("DEFINE_XML"): + dxp = Path(define_xml_path) + if not dxp.is_absolute() and dxp.parent == Path("."): + resolved = Path(dotenv_path).parent / define_xml_path + if resolved.is_file(): + define_xml_path = str(resolved) + if not custom_standard: standard = standard.lower() supported_standards = StandardTypes.values() diff --git a/env.example b/env.example deleted file mode 100644 index 3e87d175c..000000000 --- a/env.example +++ /dev/null @@ -1,10 +0,0 @@ -CDISC_LIBRARY_API_KEY=your_api_key_here -DATASET_SIZE_THRESHOLD=10485760 # max dataset size in bytes to force dask implementation -MAX_REPORT_ROWS = 10 # integer for maximum number of issues per excel sheet (plus headers) in result report. Defaults to 10000. -MAX_ERRORS_PER_RULE = (10, True) # Tuple for maximum number of errors to report per rule during a validation run. Also has a per dataset flag described as second bool value in readme. example value -DEFINE_XML = define.xml path -CT = controlled terminology package -PRODUCT= standard -VERSION= version, denoted with a dash i.e. 3-4 -SUBSTANDARD= TIG substandard -USE_CASE= TIG use case \ No newline at end of file