diff --git a/.github/workflows/validate-yaml-format.yml b/.github/workflows/validate-yaml-format.yml new file mode 100644 index 000000000..7a7693d17 --- /dev/null +++ b/.github/workflows/validate-yaml-format.yml @@ -0,0 +1,86 @@ +name: Validate YAML Formatting +on: + pull_request: + paths: + - 'Unpublished/**/rule.yml' + - 'Published/**/rule.yml' + types: [opened, synchronize, reopened] + workflow_dispatch: {} +jobs: + check-yaml-format: + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Set up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Install pyyaml + run: pip install pyyaml + - name: Detect changed rule.yml files + id: changed-files + run: | + if [ "${{ github.event_name }}" = "pull_request" ]; then + FILES=$(git diff --name-only origin/${{ github.base_ref }}...HEAD \ + | grep -E '^(Published|Unpublished)/.*/rule\.yml$' || true) + else + FILES=$(git diff --name-only HEAD~1 HEAD \ + | grep -E '^(Published|Unpublished)/.*/rule\.yml$' || true) + fi + if [ -z "$FILES" ]; then + echo "No rule.yml files changed." + echo "has_files=false" >> $GITHUB_OUTPUT + else + echo "has_files=true" >> $GITHUB_OUTPUT + echo "$FILES" > /tmp/changed_rule_files.txt + fi + - name: Check YAML sorting and formatting + id: format-check + if: steps.changed-files.outputs.has_files == 'true' + run: | + FILES=$(cat /tmp/changed_rule_files.txt | tr '\n' ' ') + python scripts/sort_yaml.py --check $FILES + continue-on-error: true + - name: Post format check result to PR + if: always() && github.event_name == 'pull_request' && steps.changed-files.outputs.has_files == 'true' + uses: actions/github-script@v7 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const outcome = '${{ steps.format-check.outcome }}'; + const marker = ''; + let body = marker + '\n'; + if (outcome === 'success') { + body += '## \u2705 YAML Format Check Passed\n\nAll changed `rule.yml` files are correctly sorted and formatted.'; + } else { + body += '## \u274c YAML Format Check Failed\n\n'; + body += 'One or more `rule.yml` files are not correctly sorted/formatted alphabetically by key.\n\n'; + body += 'Run the following command locally to fix them:\n\n```bash\npython scripts/sort_yaml.py\n```\n\nThen commit and push.'; + } + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, repo: context.repo.repo, + issue_number: context.issue.number, + }); + const existing = comments.find(c => c.user.type === 'Bot' && c.body.includes(marker)); + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, repo: context.repo.repo, + comment_id: existing.id, body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, repo: context.repo.repo, + issue_number: context.issue.number, body, + }); + } + - name: Fail if format check failed + if: steps.format-check.outcome == 'failure' + run: | + echo "YAML format check failed. Run 'python scripts/sort_yaml.py' to fix." + exit 1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..ef3757b1d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: + - repo: local + hooks: + - id: sort-yaml-rules + name: Sort and format rule YAML files + language: python + entry: python scripts/sort_yaml.py --check + types: [yaml] + files: ".*/rule\\.yml$" + additional_dependencies: [pyyaml] + pass_filenames: true diff --git a/.vscode/extensions.json b/.vscode/extensions.json index c27790ce9..fe086e4f5 100644 --- a/.vscode/extensions.json +++ b/.vscode/extensions.json @@ -2,6 +2,7 @@ "recommendations": [ "redhat.vscode-yaml", "GrapeCity.gc-excelviewer", - "mechatroner.rainbow-csv" + "mechatroner.rainbow-csv", + "emeraldwalk.runonsave" ] } \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 7a234b51f..cc469d2b4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,14 @@ { "yaml.schemas": { "https://raw.githubusercontent.com/cdisc-org/cdisc-rules-engine/refs/heads/main/resources/schema/rule-merged/CORE-base.json": "rule.yml" + }, + "emeraldwalk.runonsave": { + "commands": [ + { + "match": "rule\\.yml$", + "isAsync": false, + "cmd": "${workspaceFolder}/venv/bin/python ${workspaceFolder}/scripts/sort_yaml.py ${file}" + } + ] } } \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 000000000..092145b12 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,28 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "Sort & Format rule.yml", + "type": "shell", + "command": "${workspaceFolder}/venv/bin/python", + "args": ["${workspaceFolder}/scripts/sort_yaml.py", "${file}"], + "presentation": { + "reveal": "silent", + "panel": "shared" + }, + "problemMatcher": [] + }, + { + "label": "Sort & Format ALL rule.yml files", + "type": "shell", + "command": "${workspaceFolder}/venv/bin/python", + "args": ["${workspaceFolder}/scripts/sort_yaml.py"], + "presentation": { + "reveal": "always", + "panel": "shared" + }, + "problemMatcher": [] + } + ] +} + diff --git a/scripts/sort_yaml.py b/scripts/sort_yaml.py new file mode 100644 index 000000000..b174e0aa4 --- /dev/null +++ b/scripts/sort_yaml.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +""" +Sort and format rule YAML files alphabetically and recursively by key name. + +This matches the auto-format/auto-sort behavior of the CDISC conformance rules editor. + +Usage: + # Format files in-place (default: all rule.yml under Published/ and Unpublished/) + python scripts/sort_yaml.py + + # Format specific files + python scripts/sort_yaml.py path/to/rule.yml another/rule.yml + + # Check mode: exit with code 1 if any file is not formatted correctly + python scripts/sort_yaml.py --check [files...] +""" + +import sys +import argparse +from pathlib import Path + +try: + import yaml +except ImportError: + print("ERROR: pyyaml is not installed. Run: pip install pyyaml", file=sys.stderr) + sys.exit(1) + + +# --------------------------------------------------------------------------- +# Custom YAML Dumper +# --------------------------------------------------------------------------- + +class _SortedDumper(yaml.Dumper): + """YAML Dumper that produces consistent, human-readable output.""" + pass + + +def _str_representer(dumper: yaml.Dumper, data: str): + """Represent strings: use literal block style for multi-line, plain otherwise. + Strings that look like YAML scalars (booleans, numbers) are quoted. + """ + if "\n" in data: + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + + +_SortedDumper.add_representer(str, _str_representer) + + +# --------------------------------------------------------------------------- +# Core logic +# --------------------------------------------------------------------------- + +def sort_recursive(obj): + """Recursively sort dict keys alphabetically. Lists are preserved as-is.""" + if isinstance(obj, dict): + return {k: sort_recursive(obj[k]) for k in sorted(obj.keys(), key=str)} + if isinstance(obj, list): + return [sort_recursive(item) for item in obj] + return obj + + +def canonical(content: str) -> str: + """Return the canonical (sorted + formatted) representation of a YAML string.""" + data = yaml.safe_load(content) + if data is None: + return content + sorted_data = sort_recursive(data) + return yaml.dump( + sorted_data, + Dumper=_SortedDumper, + default_flow_style=False, + allow_unicode=True, + indent=2, + sort_keys=False, # we already sorted manually + width=100, + ) + + +def find_rule_files(root: Path) -> list[Path]: + """Find all rule.yml files under Published/ and Unpublished/.""" + files = [] + for folder in ("Published", "Unpublished"): + folder_path = root / folder + if folder_path.exists(): + files.extend(folder_path.rglob("rule.yml")) + return sorted(files) + + +def process_files(files: list[Path], check_mode: bool) -> int: + """Format (or check) the given files. Returns exit code.""" + changed = [] + errors = [] + + for path in files: + try: + original = path.read_text(encoding="utf-8") + formatted = canonical(original) + except Exception as exc: + errors.append(f" {path}: {exc}") + continue + + if original != formatted: + changed.append(path) + if not check_mode: + path.write_text(formatted, encoding="utf-8") + print(f" Formatted: {path}") + + if errors: + print("\nERROR: Failed to process the following files:", file=sys.stderr) + for e in errors: + print(e, file=sys.stderr) + return 1 + + if check_mode: + if changed: + print( + "\nThe following rule.yml files are not correctly sorted/formatted:\n", + file=sys.stderr, + ) + for p in changed: + print(f" {p}", file=sys.stderr) + print( + "\nRun `python scripts/sort_yaml.py` to fix them automatically.", + file=sys.stderr, + ) + return 1 + else: + print("All rule.yml files are correctly sorted and formatted.") + else: + if not changed: + print("All rule.yml files are already correctly sorted and formatted.") + + return 0 + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument( + "--check", + action="store_true", + help="Check mode: exit 1 if any file needs formatting, without modifying files.", + ) + parser.add_argument( + "files", + nargs="*", + type=Path, + help="rule.yml files to process. Defaults to all rule.yml files under Published/ and Unpublished/.", + ) + args = parser.parse_args() + + repo_root = Path(__file__).resolve().parent.parent + + if args.files: + files = [p.resolve() for p in args.files] + else: + files = find_rule_files(repo_root) + + if not files: + print("No rule.yml files found.") + return 0 + + mode = "Checking" if args.check else "Formatting" + print(f"{mode} {len(files)} rule.yml file(s)...") + + sys.exit(process_files(files, check_mode=args.check)) + + +if __name__ == "__main__": + main() + + diff --git a/setup/bash_setup.sh b/setup/bash_setup.sh index 2ab01bb2f..c69282c30 100644 --- a/setup/bash_setup.sh +++ b/setup/bash_setup.sh @@ -145,4 +145,15 @@ fi VENV_PYTHON=$(which python) +echo "Installing pre-commit..." +pip install pre-commit --index-url https://pypi.org/simple/ --quiet 2>/dev/null || \ + pip install pre-commit --quiet 2>/dev/null || true +if command -v pre-commit >/dev/null 2>&1; then + pre-commit install + echo "Pre-commit hook installed." +else + echo "Warning: pre-commit not found on PATH after install; skipping hook setup." + echo "You can install it manually with: pip install pre-commit && pre-commit install" +fi + echo "Setup completed successfully!" diff --git a/setup/windows_setup.bat b/setup/windows_setup.bat index e87cdef15..77d0ce5be 100644 --- a/setup/windows_setup.bat +++ b/setup/windows_setup.bat @@ -177,6 +177,18 @@ if !errorlevel! neq 0 ( exit /b 1 ) +echo. +echo Installing pre-commit... +python -m pip install pre-commit --index-url https://pypi.org/simple/ --quiet 2>nul || python -m pip install pre-commit --quiet 2>nul +where pre-commit >nul 2>&1 +if !errorlevel! equ 0 ( + pre-commit install + echo Pre-commit hook installed. +) else ( + echo Warning: pre-commit not found on PATH; skipping hook setup. + echo You can install it manually with: pip install pre-commit ^&^& pre-commit install +) + echo. echo Setup completed successfully! pause \ No newline at end of file