diff --git a/.github/ISSUE_TEMPLATE/1-rule-issue.yml b/.github/ISSUE_TEMPLATE/1-rule-issue.yml new file mode 100644 index 000000000..a5307fc8f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1-rule-issue.yml @@ -0,0 +1,48 @@ +name: New Rule or Existing Rule Bug +description: A request for a new feature or enhancement related to a specific rule +projects: + - cdisc-org/19 +body: + - type: dropdown + id: request_type + attributes: + label: Request Type + options: + - Bug with Existing Rule + - New Rule Issue + validations: + required: true + - type: dropdown + id: standard + attributes: + label: Standard + options: + - SDTMIG + - SENDIG + - ADaM + - TIG + - USDM + - FDA Business Rules + validations: + required: true + - type: input + id: rule_id + attributes: + label: Rule ID + description: "For bugs: provide the CORE ID (e.g. CORE-000123). For new rules: provide the standard rule ID (e.g. CG0001)." + placeholder: CORE-000123 or CG0001 + validations: + required: true + - type: textarea + id: description + attributes: + label: Description + description: Describe the bug or new rule request in detail. + validations: + required: true + - type: textarea + id: test_data + attributes: + label: Test Data + description: "If reporting a bug, attach your test data files here (CSV, Excel, etc.). You can drag and drop multiple files directly into this field." + placeholder: Drag and drop files here, or describe the test data inline. \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/2-other-feature.yml b/.github/ISSUE_TEMPLATE/2-other-feature.yml new file mode 100644 index 000000000..e828b0e22 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2-other-feature.yml @@ -0,0 +1,9 @@ +name: Non-rule-related Request +description: A request for a new feature or enhancement not related to a specific rule +projects: + - cdisc-org/19 +type: Feature +body: + - type: textarea + attributes: + label: Feature Description diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..ec4bb386b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1 @@ +blank_issues_enabled: false \ No newline at end of file diff --git a/.github/scripts/generate_mappings.py b/.github/scripts/generate_mappings.py index 880d47dc8..0a1fea5d8 100644 --- a/.github/scripts/generate_mappings.py +++ b/.github/scripts/generate_mappings.py @@ -24,11 +24,14 @@ from collections import defaultdict from pathlib import Path -try: - import yaml -except ImportError: - print("PyYAML is required. Install with: pip install pyyaml") - sys.exit(1) + +def get_yaml(): + try: + from ruamel.yaml import YAML + return YAML + except ImportError: + print("ruamel.yaml is required. Install with: pip install ruamel.yaml") + sys.exit(1) # --------------------------------------------------------------------------- @@ -129,11 +132,13 @@ def build_standard_rows(rule_files: list[Path]) -> dict[str, list[dict]]: """ accumulator: dict[str, dict[str, dict]] = defaultdict(dict) - + yaml = get_yaml()() + yaml.preserve_quotes = True + yaml.default_flow_style = False for rule_file in rule_files: raw = rule_file.read_text(encoding="utf-8") try: - data = yaml.safe_load(raw) + data = yaml.load(raw) except Exception as exc: print(f" [WARN] Could not parse {rule_file}: {exc}") continue @@ -240,4 +245,4 @@ def main() -> None: if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/.github/scripts/publish.py b/.github/scripts/publish.py new file mode 100644 index 000000000..a4e53a0fa --- /dev/null +++ b/.github/scripts/publish.py @@ -0,0 +1,85 @@ +import csv +import re +import sys +from pathlib import Path +import argparse + +CORE_PATTERN = re.compile(r"^CORE-(\d{6})$") +PUBLISHED_DIR = Path("Published") + + +def get_yaml(): + try: + from ruamel.yaml import YAML + return YAML + except ImportError: + print("ruamel.yaml is required. Install with: pip install ruamel.yaml") + sys.exit(1) + + +def get_next_core_id(mappings_dir: Path, algorithm="max"): + existing_ids = [] + + for file in mappings_dir.glob("*_mapping.csv"): + with open(file, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + core = row.get("CORE-ID", "").strip() + match = CORE_PATTERN.match(core) + if match: + existing_ids.append(int(match.group(1))) + + existing_ids.sort() + + if algorithm == "min": + next_id = 1 + for eid in existing_ids: + if eid != next_id: + break + next_id += 1 + else: + next_id = max(existing_ids, default=0) + 1 + + return f"CORE-{next_id:06d}" + + +def update_rule_yaml(core_id: str, rule_path: Path): + yaml = get_yaml()() + with open(rule_path, encoding="utf-8") as f: + doc = yaml.load(f) + if "Core" not in doc: + doc["Core"] = {} + doc["Core"]["Id"] = core_id + doc["Core"]["Status"] = "Published" + with open(rule_path, "w", encoding="utf-8") as f: + yaml.dump(doc, f) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--new-dirs", required=True, help="Space-separated rule directories to publish") + parser.add_argument( + "--algorithm", choices=["min", "max"], default="max", help="CORE-ID assignment algorithm" + ) + args = parser.parse_args() + + mappings_dir = Path("mappings") + PUBLISHED_DIR.mkdir(exist_ok=True) + + for rule_dir in args.new_dirs.split(): + rule_path = Path(rule_dir) / "rule.yaml" + if not rule_path.exists(): + print(f"[SKIP] No rule.yaml found in {rule_dir}") + continue + + core_id = get_next_core_id(mappings_dir, args.algorithm) + + update_rule_yaml(core_id, rule_path) + + new_path = PUBLISHED_DIR / core_id + Path(rule_dir).rename(new_path) + print(f"[OK] {rule_dir} -> {new_path} ({core_id})") + + +if __name__ == "__main__": + main() diff --git a/.github/scripts/validate_yaml_schema.py b/.github/scripts/validate_yaml_schema.py new file mode 100644 index 000000000..550363b56 --- /dev/null +++ b/.github/scripts/validate_yaml_schema.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +""" +validate_yaml_schema.py — Validates one or more rule YAML files against the +CDISC CORE JSON Schema (draft/2020-12). + +Usage: + python validate_yaml_schema.py [ ...] + +Exit codes: + 0 — all files are valid + 1 — one or more files failed validation or an unexpected error occurred +""" + +import json +import sys +import urllib.request +from pathlib import Path + +import yaml + +try: + import jsonschema + from jsonschema import Draft202012Validator, ValidationError +except ImportError: + print("ERROR: 'jsonschema' package is not installed. Run: pip install 'jsonschema[format-nongpl]'") + sys.exit(1) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def load_schema(source: str) -> dict: + """Load JSON schema from a URL or local file path.""" + if source.startswith("http://") or source.startswith("https://"): + with urllib.request.urlopen(source, timeout=30) as resp: # noqa: S310 + return json.loads(resp.read()) + return json.loads(Path(source).read_text(encoding="utf-8")) + + +def validate_file(path: Path, validator: Draft202012Validator) -> list[str]: + """ + Validate a YAML file against the schema. + Returns a list of human-readable error strings (empty == valid). + """ + try: + doc = yaml.safe_load(path.read_text(encoding="utf-8")) + except yaml.YAMLError as exc: + return [f"YAML parse error: {exc}"] + + if doc is None: + return ["File is empty or contains only comments."] + + errors = sorted(validator.iter_errors(doc), key=lambda e: list(e.path)) + return [f" [{' > '.join(str(p) for p in err.path) or '/'}] {err.message}" for err in errors] + + +def github_annotation(level: str, file: str, msg: str) -> str: + """Produce a GitHub Actions workflow command annotation.""" + # Escape special characters per GHA spec + msg = msg.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A") + return f"::{level} file={file}::{msg}" + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main() -> int: + if len(sys.argv) < 3: + print(f"Usage: {sys.argv[0]} [ ...]") + return 1 + + schema_source = sys.argv[1] + rule_files = [Path(p) for p in sys.argv[2:]] + + # Load schema + print(f"Loading schema from: {schema_source}") + try: + schema = load_schema(schema_source) + except Exception as exc: + print(f"ERROR: Failed to load schema — {exc}") + return 1 + + validator = Draft202012Validator(schema) + + total = 0 + failed = 0 + + report_lines: list[str] = [] + + for rule_path in rule_files: + if not rule_path.exists(): + print(f"WARNING: File not found — {rule_path}") + continue + + total += 1 + errors = validate_file(rule_path, validator) + + if errors: + failed += 1 + print(github_annotation("error", str(rule_path), f"Schema validation failed ({len(errors)} error(s))")) + print(f"❌ {rule_path}") + for err in errors: + print(err) + report_lines.append(f"### ❌ `{rule_path}`\n") + report_lines.append("```\n" + "\n".join(errors) + "\n```\n") + else: + print(f"✅ {rule_path}") + report_lines.append(f"### ✅ `{rule_path}`\n") + + # Write markdown report (consumed by the workflow) + report_path = Path("schema_validation_report.md") + with report_path.open("w", encoding="utf-8") as fh: + fh.write("# Schema Validation Report\n\n") + fh.write(f"**Schema:** `{schema_source}`\n\n") + fh.write(f"**Files checked:** {total} | **Failed:** {failed}\n\n") + fh.writelines(report_lines) + + print(f"\nSummary: {total - failed}/{total} file(s) passed schema validation.") + + return 1 if failed else 0 + + +if __name__ == "__main__": + sys.exit(main()) + diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 000000000..7cd64b078 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,100 @@ +name: Publish Rules + +on: + workflow_call: + push: + branches: + - main + +concurrency: + group: publish + cancel-in-progress: false + +permissions: + contents: write + +jobs: + publish: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v6 + + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v46 + with: + files_ignore: | + .github/** + README.md + .gitignore + Published/** + + - name: Extract new rule directories + id: newdirs + run: | + NEW_DIRS="" + + for file in ${{ steps.changed-files.outputs.added_files }}; do + dir=$(dirname "$file") + + if [ -f "$dir/rule.yaml" ]; then + NEW_DIRS="$NEW_DIRS $dir" + fi + done + + NEW_DIRS=$(echo $NEW_DIRS | xargs -n1 | sort -u | xargs) + + echo "NEW_DIRS=$NEW_DIRS" + echo "NEW_DIRS=$NEW_DIRS" >> $GITHUB_OUTPUT + + if [ -z "$NEW_DIRS" ]; then + echo "HAS_NEW_DIRS=false" >> $GITHUB_OUTPUT + else + echo "HAS_NEW_DIRS=true" >> $GITHUB_OUTPUT + fi + + - name: Stop if nothing to publish + if: steps.newdirs.outputs.HAS_NEW_DIRS == 'false' + run: echo "No new rules found, skipping publish." + + - name: Setup Python + if: steps.newdirs.outputs.HAS_NEW_DIRS == 'true' + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + if: steps.newdirs.outputs.HAS_NEW_DIRS == 'true' + run: | + pip install ruamel.yaml + + - name: Run publish script + if: steps.newdirs.outputs.HAS_NEW_DIRS == 'true' + run: | + python .github/scripts/publish.py \ + --new-dirs "${{ steps.newdirs.outputs.NEW_DIRS }}" \ + --algorithm min + + - name: Regenerate mappings + if: steps.newdirs.outputs.HAS_NEW_DIRS == 'true' + run: | + python .github/scripts/generate_mappings.py . + + - name: Commit changes + if: steps.newdirs.outputs.HAS_NEW_DIRS == 'true' + run: | + git config user.name "github-actions" + git config user.email "github-actions@github.com" + git add . + if git diff --cached --quiet; then + echo "Nothing to commit" + else + git commit -m "Auto-publish new rules" + fi + + - name: Push + if: steps.newdirs.outputs.HAS_NEW_DIRS == 'true' + run: | + git push \ No newline at end of file diff --git a/.github/workflows/validate-yaml-format.yml b/.github/workflows/validate-yaml-format.yml new file mode 100644 index 000000000..7a7693d17 --- /dev/null +++ b/.github/workflows/validate-yaml-format.yml @@ -0,0 +1,86 @@ +name: Validate YAML Formatting +on: + pull_request: + paths: + - 'Unpublished/**/rule.yml' + - 'Published/**/rule.yml' + types: [opened, synchronize, reopened] + workflow_dispatch: {} +jobs: + check-yaml-format: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Install pyyaml + run: pip install pyyaml + - name: Detect changed rule.yml files + id: changed-files + run: | + if [ "${{ github.event_name }}" = "pull_request" ]; then + FILES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD \ + | grep -E '^(Published|Unpublished)/.*/rule\.yml$' || true) + else + FILES=$(git diff --name-only HEAD~1 HEAD \ + | grep -E '^(Published|Unpublished)/.*/rule\.yml$' || true) + fi + if [ -z "$FILES" ]; then + echo "No rule.yml files changed." + echo "has_files=false" >> $GITHUB_OUTPUT + else + echo "has_files=true" >> $GITHUB_OUTPUT + echo "$FILES" > /tmp/changed_rule_files.txt + fi + - name: Check YAML sorting and formatting + id: format-check + if: steps.changed-files.outputs.has_files == 'true' + run: | + FILES=$(cat /tmp/changed_rule_files.txt | tr '\n' ' ') + python scripts/sort_yaml.py --check $FILES + continue-on-error: true + - name: Post format check result to PR + if: always() && github.event_name == 'pull_request' && steps.changed-files.outputs.has_files == 'true' + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const outcome = '${{ steps.format-check.outcome }}'; + const marker = ''; + let body = marker + '\n'; + if (outcome === 'success') { + body += '## \u2705 YAML Format Check Passed\n\nAll changed `rule.yml` files are correctly sorted and formatted.'; + } else { + body += '## \u274c YAML Format Check Failed\n\n'; + body += 'One or more `rule.yml` files are not correctly sorted/formatted alphabetically by key.\n\n'; + body += 'Run the following command locally to fix them:\n\n```bash\npython scripts/sort_yaml.py\n```\n\nThen commit and push.'; + } + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, repo: context.repo.repo, + issue_number: context.issue.number, + }); + const existing = comments.find(c => c.user.type === 'Bot' && c.body.includes(marker)); + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, repo: context.repo.repo, + comment_id: existing.id, body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, repo: context.repo.repo, + issue_number: context.issue.number, body, + }); + } + - name: Fail if format check failed + if: steps.format-check.outcome == 'failure' + run: | + echo "YAML format check failed. Run 'python scripts/sort_yaml.py' to fix." + exit 1 diff --git a/.github/workflows/validate-yaml-schema.yml b/.github/workflows/validate-yaml-schema.yml new file mode 100644 index 000000000..253766b26 --- /dev/null +++ b/.github/workflows/validate-yaml-schema.yml @@ -0,0 +1,194 @@ +name: Validate YAML against Schema + +on: + pull_request: + paths: + - 'Published/**/rule.yml' + - 'Unpublished/**/rule.yml' + types: [opened, synchronize, reopened] + workflow_dispatch: + inputs: + paths_override: + description: > + Space-separated list of rule YAML files to validate + (e.g. "Published/CORE-000001/rule.yml"). Leave blank to validate + all changed rule files detected from the last commit. + required: false + default: '' + +# Only one run per PR branch at a time; cancel superseded runs. +concurrency: + group: schema-validate-${{ github.ref }} + cancel-in-progress: true + +jobs: + validate-schema: + name: Validate rule YAML against JSON Schema + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + + env: + # draft/2020-12 schema — switch to rule-merged if you need $defs inlined + SCHEMA_URL: >- + https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/refs/heads/main/resources/schema/rule-merged/CORE-base.json + + steps: + # ----------------------------------------------------------------------- + # 1. Checkout + # ----------------------------------------------------------------------- + - name: Checkout repository + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + # ----------------------------------------------------------------------- + # 2. Set up Python + # ----------------------------------------------------------------------- + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + # ----------------------------------------------------------------------- + # 3. Install validation dependencies + # jsonschema 4.x ships full draft/2020-12 support out of the box. + # ----------------------------------------------------------------------- + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install "jsonschema[format-nongpl]>=4.18" "PyYAML>=6.0" + + # ----------------------------------------------------------------------- + # 4. Detect changed rule YAML files + # ----------------------------------------------------------------------- + - name: Detect changed rule files + id: changed + run: | + if [ -n "${{ github.event.inputs.paths_override }}" ]; then + # Manual override + FILES="${{ github.event.inputs.paths_override }}" + elif [ "${{ github.event_name }}" = "pull_request" ]; then + FILES=$(git diff --name-only \ + "origin/${{ github.base_ref }}...HEAD" \ + -- 'Published/**/rule.yml' 'Unpublished/**/rule.yml' \ + | tr '\n' ' ') + else + # workflow_dispatch without override — validate all rule files + FILES=$(find Published Unpublished -name "rule.yml" | tr '\n' ' ') + fi + + echo "files=$FILES" >> "$GITHUB_OUTPUT" + echo "Detected rule files: $FILES" + + if [ -z "$FILES" ]; then + echo "no_files=true" >> "$GITHUB_OUTPUT" + else + echo "no_files=false" >> "$GITHUB_OUTPUT" + fi + + # ----------------------------------------------------------------------- + # 5. Run schema validation + # ----------------------------------------------------------------------- + - name: Validate YAML against schema + id: validate + if: steps.changed.outputs.no_files == 'false' + continue-on-error: true + run: | + python .github/scripts/validate_yaml_schema.py \ + "${{ env.SCHEMA_URL }}" \ + ${{ steps.changed.outputs.files }} + + - name: No rule files changed + if: steps.changed.outputs.no_files == 'true' + run: | + echo "No rule YAML files were changed — nothing to validate." + echo "## Schema Validation" >> "$GITHUB_STEP_SUMMARY" + echo "No \`rule.yml\` files were changed in this PR." >> "$GITHUB_STEP_SUMMARY" + + # ----------------------------------------------------------------------- + # 6. Upload report artifact + # ----------------------------------------------------------------------- + - name: Upload schema validation report + if: always() && steps.changed.outputs.no_files == 'false' + uses: actions/upload-artifact@v7 + with: + name: schema-validation-report + path: schema_validation_report.md + if-no-files-found: warn + + # ----------------------------------------------------------------------- + # 7. Post report as PR comment + # ----------------------------------------------------------------------- + - name: Post report to PR + if: > + always() && + github.event_name == 'pull_request' && + steps.changed.outputs.no_files == 'false' + uses: actions/github-script@v9 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const fs = require('fs'); + const reportPath = 'schema_validation_report.md'; + const runUrl = `${context.payload.repository.html_url}/actions/runs/${context.runId}`; + + let body = '## Schema Validation Results\n\n'; + + if (fs.existsSync(reportPath)) { + const content = fs.readFileSync(reportPath, 'utf8'); + body += '
\nClick to expand\n\n'; + body += content; + body += '\n
\n'; + } else { + body += '_No schema validation report was generated._\n'; + } + + body += `\n[View workflow run](${runUrl})`; + + const marker = 'Schema Validation Results'; + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + const existing = comments.find( + c => c.user.type === 'Bot' && c.body.includes(marker) + ); + + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body, + }); + } + + # ----------------------------------------------------------------------- + # 8. Write report to workflow summary + # ----------------------------------------------------------------------- + - name: Write report to workflow summary + if: always() && steps.changed.outputs.no_files == 'false' + run: | + [ -f schema_validation_report.md ] \ + && cat schema_validation_report.md >> "$GITHUB_STEP_SUMMARY" \ + || true + + # ----------------------------------------------------------------------- + # 9. Fail the job if validation found errors + # ----------------------------------------------------------------------- + - name: Check validation outcome + if: steps.validate.outcome == 'failure' + run: | + echo "::error::Schema validation failed — see the report above." + exit 1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..ef3757b1d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: + - repo: local + hooks: + - id: sort-yaml-rules + name: Sort and format rule YAML files + language: python + entry: python scripts/sort_yaml.py --check + types: [yaml] + files: ".*/rule\\.yml$" + additional_dependencies: [pyyaml] + pass_filenames: true diff --git a/.vscode/extensions.json b/.vscode/extensions.json index c27790ce9..fe086e4f5 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -2,6 +2,7 @@ "recommendations": [ "redhat.vscode-yaml", "GrapeCity.gc-excelviewer", - "mechatroner.rainbow-csv" + "mechatroner.rainbow-csv", + "emeraldwalk.runonsave" ] } \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 7a234b51f..cc469d2b4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,14 @@ { "yaml.schemas": { "https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/refs/heads/main/resources/schema/rule-merged/CORE-base.json": "rule.yml" + }, + "emeraldwalk.runonsave": { + "commands": [ + { + "match": "rule\\.yml$", + "isAsync": false, + "cmd": "${workspaceFolder}/venv/bin/python ${workspaceFolder}/scripts/sort_yaml.py ${file}" + } + ] } } \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 000000000..092145b12 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,28 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Sort & Format rule.yml", + "type": "shell", + "command": "${workspaceFolder}/venv/bin/python", + "args": ["${workspaceFolder}/scripts/sort_yaml.py", "${file}"], + "presentation": { + "reveal": "silent", + "panel": "shared" + }, + "problemMatcher": [] + }, + { + "label": "Sort & Format ALL rule.yml files", + "type": "shell", + "command": "${workspaceFolder}/venv/bin/python", + "args": ["${workspaceFolder}/scripts/sort_yaml.py"], + "presentation": { + "reveal": "always", + "panel": "shared" + }, + "problemMatcher": [] + } + ] +} + diff --git a/README.md b/README.md index 3e1131c96..b82af8dc6 100644 --- a/README.md +++ b/README.md @@ -258,7 +258,17 @@ Unpublished/ - Keep an eye on the PR to make sure the automated checks pass, as well as to respond to any comments from reviewers. - If you need to make further changes, simply checkout your branch (`git checkout `), make your changes, and commit and push them — the PR will automatically update and re-run validation. -16) GitHub will automatically validate your changes when a PR is opened. If you did not include a results.csv, the check will fail — run the rule locally and push the generated results.csv to resolve it. If a difference between your results.csv and the engine output is detected, the check will also fail — re-run the rule locally, verify the results look correct, and push the updated results.csv. If the check continues to fail after updating, flag it for the Engineers Team in the PR comments. +16) GitHub will automatically validate your changes when a PR is opened. If you did not include a results.csv, the check will fail — run the rule locally and push the generated results.csv to resolve it. If a difference between your results.csv and the engine output is detected, the check will also fail — re-run the rule locally, verify the results look correct, and push the updated results.csv. If the check continues to fail after updating, flag it for the Engineers Team in the PR comments. + + **Rule Schema Validation** will run and post a comment on the PR showing whether your `rule.yaml` is valid. If the schema check fails, the comment will show the specific validation error — for example, the image below shows a failure caused by an empty `check all:` condition in the rule. The comment will automatically update when you push new code and the action re-runs. + + ![Schema validation failure example](docs/files/schema.png) + + **Test Data Validation** will also run and post a comment showing the results of running the rule against your test data. If the check fails, the comment will indicate why — for example, the image below shows a failure caused by a missing `.env` file in the test data. + + ![Test data validation failure example](docs/files/validation.png) + +Once your rule and test data pass these checks, the PR can be merged. **Approval - Merge PR** diff --git a/docs/check_operator.md b/docs/check_operator.md index a67342192..073e97382 100644 --- a/docs/check_operator.md +++ b/docs/check_operator.md @@ -1,3 +1,3 @@ # Check Operator -[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/main/resources/schema/rule/Operator.md ":include") +[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/refs/heads/main/resources/schema/rule/Operator.md ":include") diff --git a/docs/check_parameter.md b/docs/check_parameter.md index d152f6e80..94cb65bdd 100644 --- a/docs/check_parameter.md +++ b/docs/check_parameter.md @@ -1,3 +1,3 @@ # Check Parameters -[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/main/resources/schema/rule/check_parameter.md ":include") +[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/refs/heads/main/resources/schema/rule/check_parameter.md ":include") diff --git a/docs/customrules.md b/docs/customrules.md index afd801458..31ab6058a 100644 --- a/docs/customrules.md +++ b/docs/customrules.md @@ -1 +1 @@ -[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/main/resources/schema/rule/customrules.md ":include") +[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/refs/heads/main/resources/schema/rule/customrules.md ":include") diff --git a/docs/exdictionary.md b/docs/exdictionary.md index 0dac4ca08..fd4e4d6f3 100644 --- a/docs/exdictionary.md +++ b/docs/exdictionary.md @@ -1 +1 @@ -[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/main/resources/schema/rule/ExDictionary.md ":include") +[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/refs/heads/main/resources/schema/rule/ExDictionary.md ":include") diff --git a/docs/executability.md b/docs/executability.md index afe860353..4f908dac8 100644 --- a/docs/executability.md +++ b/docs/executability.md @@ -1 +1 @@ -[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/main/resources/schema/rule/Executability.md ":include") +[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/refs/heads/main/resources/schema/rule/Executability.md ":include") diff --git a/docs/files/schema.png b/docs/files/schema.png new file mode 100644 index 000000000..fcdddc30b Binary files /dev/null and b/docs/files/schema.png differ diff --git a/docs/files/validation.png b/docs/files/validation.png new file mode 100644 index 000000000..41163223a Binary files /dev/null and b/docs/files/validation.png differ diff --git a/docs/jsonata_functions.md b/docs/jsonata_functions.md index 2b142d50b..899abb783 100644 --- a/docs/jsonata_functions.md +++ b/docs/jsonata_functions.md @@ -1 +1 @@ -[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/main/resources/schema/rule/JSONata_Functions.md ":include") +[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/refs/heads/main/resources/schema/rule/JSONata_Functions.md ":include") diff --git a/docs/metadata_variables.md b/docs/metadata_variables.md index 8e23b8a35..c5d82b99f 100644 --- a/docs/metadata_variables.md +++ b/docs/metadata_variables.md @@ -1 +1 @@ -[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/main/resources/schema/rule/MetaVariables.md ":include") +[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/refs/heads/main/resources/schema/rule/MetaVariables.md ":include") diff --git a/docs/operations.md b/docs/operations.md index 6e6116e7e..063bea63d 100644 --- a/docs/operations.md +++ b/docs/operations.md @@ -1 +1 @@ -[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/main/resources/schema/rule/Operations.md ":include") +[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/refs/heads/main/resources/schema/rule/Operations.md ":include") diff --git a/docs/rule_type.md b/docs/rule_type.md index 5d7cb6f93..c88d6477c 100644 --- a/docs/rule_type.md +++ b/docs/rule_type.md @@ -1 +1 @@ -[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/main/resources/schema/rule/Rule_Type.md ":include") +[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/refs/heads/main/resources/schema/rule/Rule_Type.md ":include") diff --git a/docs/sensitivity.md b/docs/sensitivity.md index ca993db30..aeb7cb7d1 100644 --- a/docs/sensitivity.md +++ b/docs/sensitivity.md @@ -1 +1 @@ -[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/main/resources/schema/rule/Sensitivity.md ":include") +[](https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/refs/heads/main/resources/schema/rule/Sensitivity.md ":include") diff --git a/engine b/engine index b086dca0a..d6686aa2f 160000 --- a/engine +++ b/engine @@ -1 +1 @@ -Subproject commit b086dca0a6a8a5f628d65d2fa03486beb6ad68a1 +Subproject commit d6686aa2f2b8302d031d7114b45d618aa67043b1 diff --git a/scripts/sort_yaml.py b/scripts/sort_yaml.py new file mode 100644 index 000000000..b174e0aa4 --- /dev/null +++ b/scripts/sort_yaml.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +""" +Sort and format rule YAML files alphabetically and recursively by key name. + +This matches the auto-format/auto-sort behavior of the CDISC conformance rules editor. + +Usage: + # Format files in-place (default: all rule.yml under Published/ and Unpublished/) + python scripts/sort_yaml.py + + # Format specific files + python scripts/sort_yaml.py path/to/rule.yml another/rule.yml + + # Check mode: exit with code 1 if any file is not formatted correctly + python scripts/sort_yaml.py --check [files...] +""" + +import sys +import argparse +from pathlib import Path + +try: + import yaml +except ImportError: + print("ERROR: pyyaml is not installed. Run: pip install pyyaml", file=sys.stderr) + sys.exit(1) + + +# --------------------------------------------------------------------------- +# Custom YAML Dumper +# --------------------------------------------------------------------------- + +class _SortedDumper(yaml.Dumper): + """YAML Dumper that produces consistent, human-readable output.""" + pass + + +def _str_representer(dumper: yaml.Dumper, data: str): + """Represent strings: use literal block style for multi-line, plain otherwise. + Strings that look like YAML scalars (booleans, numbers) are quoted. + """ + if "\n" in data: + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + + +_SortedDumper.add_representer(str, _str_representer) + + +# --------------------------------------------------------------------------- +# Core logic +# --------------------------------------------------------------------------- + +def sort_recursive(obj): + """Recursively sort dict keys alphabetically. Lists are preserved as-is.""" + if isinstance(obj, dict): + return {k: sort_recursive(obj[k]) for k in sorted(obj.keys(), key=str)} + if isinstance(obj, list): + return [sort_recursive(item) for item in obj] + return obj + + +def canonical(content: str) -> str: + """Return the canonical (sorted + formatted) representation of a YAML string.""" + data = yaml.safe_load(content) + if data is None: + return content + sorted_data = sort_recursive(data) + return yaml.dump( + sorted_data, + Dumper=_SortedDumper, + default_flow_style=False, + allow_unicode=True, + indent=2, + sort_keys=False, # we already sorted manually + width=100, + ) + + +def find_rule_files(root: Path) -> list[Path]: + """Find all rule.yml files under Published/ and Unpublished/.""" + files = [] + for folder in ("Published", "Unpublished"): + folder_path = root / folder + if folder_path.exists(): + files.extend(folder_path.rglob("rule.yml")) + return sorted(files) + + +def process_files(files: list[Path], check_mode: bool) -> int: + """Format (or check) the given files. Returns exit code.""" + changed = [] + errors = [] + + for path in files: + try: + original = path.read_text(encoding="utf-8") + formatted = canonical(original) + except Exception as exc: + errors.append(f" {path}: {exc}") + continue + + if original != formatted: + changed.append(path) + if not check_mode: + path.write_text(formatted, encoding="utf-8") + print(f" Formatted: {path}") + + if errors: + print("\nERROR: Failed to process the following files:", file=sys.stderr) + for e in errors: + print(e, file=sys.stderr) + return 1 + + if check_mode: + if changed: + print( + "\nThe following rule.yml files are not correctly sorted/formatted:\n", + file=sys.stderr, + ) + for p in changed: + print(f" {p}", file=sys.stderr) + print( + "\nRun `python scripts/sort_yaml.py` to fix them automatically.", + file=sys.stderr, + ) + return 1 + else: + print("All rule.yml files are correctly sorted and formatted.") + else: + if not changed: + print("All rule.yml files are already correctly sorted and formatted.") + + return 0 + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + "--check", + action="store_true", + help="Check mode: exit 1 if any file needs formatting, without modifying files.", + ) + parser.add_argument( + "files", + nargs="*", + type=Path, + help="rule.yml files to process. Defaults to all rule.yml files under Published/ and Unpublished/.", + ) + args = parser.parse_args() + + repo_root = Path(__file__).resolve().parent.parent + + if args.files: + files = [p.resolve() for p in args.files] + else: + files = find_rule_files(repo_root) + + if not files: + print("No rule.yml files found.") + return 0 + + mode = "Checking" if args.check else "Formatting" + print(f"{mode} {len(files)} rule.yml file(s)...") + + sys.exit(process_files(files, check_mode=args.check)) + + +if __name__ == "__main__": + main() + + diff --git a/setup/bash_setup.sh b/setup/bash_setup.sh index 2ab01bb2f..c69282c30 100644 --- a/setup/bash_setup.sh +++ b/setup/bash_setup.sh @@ -145,4 +145,15 @@ fi VENV_PYTHON=$(which python) +echo "Installing pre-commit..." +pip install pre-commit --index-url https://pypi.org/simple/ --quiet 2>/dev/null || \ + pip install pre-commit --quiet 2>/dev/null || true +if command -v pre-commit >/dev/null 2>&1; then + pre-commit install + echo "Pre-commit hook installed." +else + echo "Warning: pre-commit not found on PATH after install; skipping hook setup." + echo "You can install it manually with: pip install pre-commit && pre-commit install" +fi + echo "Setup completed successfully!" diff --git a/setup/windows_setup.bat b/setup/windows_setup.bat index e87cdef15..77d0ce5be 100644 --- a/setup/windows_setup.bat +++ b/setup/windows_setup.bat @@ -177,6 +177,18 @@ if !errorlevel! neq 0 ( exit /b 1 ) +echo. +echo Installing pre-commit... +python -m pip install pre-commit --index-url https://pypi.org/simple/ --quiet 2>nul || python -m pip install pre-commit --quiet 2>nul +where pre-commit >nul 2>&1 +if !errorlevel! equ 0 ( + pre-commit install + echo Pre-commit hook installed. +) else ( + echo Warning: pre-commit not found on PATH; skipping hook setup. + echo You can install it manually with: pip install pre-commit ^&^& pre-commit install +) + echo. echo Setup completed successfully! pause \ No newline at end of file