From 85b7b3cb4bc7ba697180a470365677995ce2fc01 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Tue, 21 Apr 2026 20:07:54 +0200 Subject: [PATCH 01/13] WIP: action to check engine against published rules --- .../workflows/validate-published-rules.yml | 190 ++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 .github/workflows/validate-published-rules.yml diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml new file mode 100644 index 000000000..0a4598995 --- /dev/null +++ b/.github/workflows/validate-published-rules.yml @@ -0,0 +1,190 @@ +# ============================================================================== +# This workflow: +# 1. Checks out cdisc-rules-engine (the engine itself) +# 2. Checks out cdisc-open-rules (rules + test data) into ./open-rules/ +# 3. Installs engine Python dependencies +# 4. Iterates every Published/ rule from cdisc-open-rules +# 5. Runs the engine against each test case +# 6. Compares output with committed results.csv baseline +# 7. Publishes a Markdown report to Job Summary and as an artifact +# ============================================================================== +name: Validate Published Rules + +on: + push: + branches: [ main ] + workflow_dispatch: + inputs: + rules_ref: + description: 'Branch/tag/SHA of cdisc-open-rules to validate against' + required: false + default: 'main' + engine_ref: + description: 'Branch/tag/SHA of cdisc-rules-engine to use' + required: false + default: 'main' + +jobs: + validate-published-rules: + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + # ----------------------------------------------------------------------- + # 1. Checkout cdisc-rules-engine + # ----------------------------------------------------------------------- + - name: Checkout cdisc-rules-engine + uses: actions/checkout@v6 + with: + repository: cdisc-org/cdisc-rules-engine + ref: ${{ inputs.engine_ref || github.sha }} + path: engine + token: ${{ secrets.GITHUB_TOKEN }} + + # ----------------------------------------------------------------------- + # 2. Checkout cdisc-open-rules (rules + test data + helper scripts) + # ----------------------------------------------------------------------- + - name: Checkout cdisc-open-rules + uses: actions/checkout@v6 + with: + repository: cdisc-org/cdisc-open-rules + ref: ${{ inputs.rules_ref || 'main' }} + path: open-rules + # If cdisc-open-rules is private, add a PAT secret: + # token: ${{ secrets.CDISC_OPEN_RULES_TOKEN }} + + # ----------------------------------------------------------------------- + # 3. Set up Python + # ----------------------------------------------------------------------- + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + # ----------------------------------------------------------------------- + # 4. Install engine dependencies + # ----------------------------------------------------------------------- + - name: Install engine dependencies + run: | + python -m venv venv + ./venv/bin/pip install --upgrade pip + ./venv/bin/pip install -r engine/requirements.txt + + # ----------------------------------------------------------------------- + # 5. Run validation for every Published rule + # ----------------------------------------------------------------------- + - name: Run validation for all Published rules + id: validate + continue-on-error: true + run: | + chmod +x open-rules/.github/scripts/run_validation.sh + + PYTHON_CMD="$(pwd)/venv/bin/python" + ENGINE_DIR="$(pwd)/engine" + RULES_ROOT="$(pwd)/open-rules" + PUBLISHED_DIR="$RULES_ROOT/Published" + SCRIPTS_DIR="$RULES_ROOT/.github/scripts" + SUMMARY_REPORT="$(pwd)/validation_report.md" + + OVERALL_EXIT=0 + RULE_PASS=0 + RULE_FAIL=0 + + mapfile -t RULE_DIRS < <(find "$PUBLISHED_DIR" -mindepth 1 -maxdepth 1 -type d | sort) + + if [ ${#RULE_DIRS[@]} -eq 0 ]; then + echo "::warning::No rule directories found under Published/" + exit 0 + fi + + echo "Found ${#RULE_DIRS[@]} rule(s) under Published/" + + { + echo "# Published Rules Validation Report" + echo "" + } > "$SUMMARY_REPORT" + + for RULE_DIR in "${RULE_DIRS[@]}"; do + RULE_ID=$(basename "$RULE_DIR") + RULE_REL_PATH="Published/$RULE_ID" + + RULE_YML=$(find "$RULE_DIR" -maxdepth 1 -name "*.yml" | head -1) + if [ -z "$RULE_YML" ]; then + echo "::warning::Skipping $RULE_ID — no .yml file found" + continue + fi + + echo "========================================" + echo " Validating $RULE_ID" + echo "========================================" + + RULE_EXIT=0 + # Pass ENGINE_DIR explicitly so run_validation.sh knows where core.py is + ENGINE_DIR_OVERRIDE="$ENGINE_DIR" \ + bash "$SCRIPTS_DIR/run_validation.sh" \ + "$RULE_REL_PATH" \ + "$PYTHON_CMD" \ + "$RULES_ROOT" \ + || RULE_EXIT=$? + + if [ -f "$RULES_ROOT/validation_report.md" ]; then + cat "$RULES_ROOT/validation_report.md" >> "$SUMMARY_REPORT" + echo -e "\n---\n" >> "$SUMMARY_REPORT" + rm -f "$RULES_ROOT/validation_report.md" + fi + + if [ $RULE_EXIT -eq 0 ]; then + RULE_PASS=$((RULE_PASS + 1)) + echo " → $RULE_ID: PASSED" + else + RULE_FAIL=$((RULE_FAIL + 1)) + OVERALL_EXIT=1 + echo " → $RULE_ID: FAILED" + fi + done + + # Insert summary totals after the H1 heading + { + echo "**Total:** $((RULE_PASS + RULE_FAIL)) | ✅ Passed: $RULE_PASS | ❌ Failed: $RULE_FAIL" + echo "" + echo "---" + echo "" + } > /tmp/totals.md + head -2 "$SUMMARY_REPORT" > /tmp/final_report.md + cat /tmp/totals.md >> /tmp/final_report.md + tail -n +3 "$SUMMARY_REPORT" >> /tmp/final_report.md + mv /tmp/final_report.md "$SUMMARY_REPORT" + + exit $OVERALL_EXIT + + # ----------------------------------------------------------------------- + # 6. Upload report + results.json as artifacts + # ----------------------------------------------------------------------- + - name: Upload validation artifacts + if: always() + uses: actions/upload-artifact@v6 + with: + name: published-rules-validation-${{ github.run_id }} + path: | + open-rules/Published/**/results/results.json + validation_report.md + if-no-files-found: warn + + # ----------------------------------------------------------------------- + # 7. Write report to GitHub Actions Job Summary + # ----------------------------------------------------------------------- + - name: Write report to workflow summary + if: always() + run: | + [ -f validation_report.md ] && cat validation_report.md >> $GITHUB_STEP_SUMMARY || true + + # ----------------------------------------------------------------------- + # 8. Fail the job if any rule failed + # ----------------------------------------------------------------------- + - name: Check overall status + if: steps.validate.outcome == 'failure' + run: | + echo "One or more published rules failed validation — see the report above." + exit 1 + From 44da428c602e89bb0f3ef753ed347f4c5f91fc7a Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Wed, 22 Apr 2026 15:43:14 +0200 Subject: [PATCH 02/13] added workflow options to test it --- .github/workflows/validate-published-rules.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 0a4598995..60bb54bfc 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -11,8 +11,11 @@ name: Validate Published Rules on: + pull_request: push: - branches: [ main ] + branches: + - main + - 798-test-against-published workflow_dispatch: inputs: rules_ref: From 77d07d75979d901c59b20f2ae161858b6732c4f4 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Wed, 22 Apr 2026 15:58:10 +0200 Subject: [PATCH 03/13] debug step --- .github/workflows/validate-published-rules.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 60bb54bfc..5c89ea32c 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -57,6 +57,20 @@ jobs: # If cdisc-open-rules is private, add a PAT secret: # token: ${{ secrets.CDISC_OPEN_RULES_TOKEN }} + # ----------------------------------------------------------------------- + # 2b. Debug — verify directory layout + # ----------------------------------------------------------------------- + - name: Debug — list workspace layout + run: | + echo "=== Workspace root ===" + ls -la + echo "=== open-rules/ ===" + ls -la open-rules/ || echo "open-rules/ NOT FOUND" + echo "=== open-rules/Published/ (first 10) ===" + ls open-rules/Published/ 2>/dev/null | head -10 || echo "Published/ NOT FOUND" + echo "=== engine/ ===" + ls engine/ | head -10 || echo "engine/ NOT FOUND" + # ----------------------------------------------------------------------- # 3. Set up Python # ----------------------------------------------------------------------- From f0273b7d6db83eced7b92749000eb4e3e6fc2215 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Wed, 22 Apr 2026 16:07:38 +0200 Subject: [PATCH 04/13] set rules_2 as default branch for open-rules --- .github/workflows/validate-published-rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 5c89ea32c..a1eaa6d32 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -21,7 +21,7 @@ on: rules_ref: description: 'Branch/tag/SHA of cdisc-open-rules to validate against' required: false - default: 'main' + default: 'rules_2' engine_ref: description: 'Branch/tag/SHA of cdisc-rules-engine to use' required: false From 17715926b026aed802a62327520ca17cc17b1d6d Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Wed, 22 Apr 2026 17:18:30 +0200 Subject: [PATCH 05/13] set rules_2 as default branch for open-rules --- .github/workflows/validate-published-rules.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index a1eaa6d32..a561e6aad 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -21,7 +21,7 @@ on: rules_ref: description: 'Branch/tag/SHA of cdisc-open-rules to validate against' required: false - default: 'rules_2' + default: 'main' engine_ref: description: 'Branch/tag/SHA of cdisc-rules-engine to use' required: false @@ -52,7 +52,7 @@ jobs: uses: actions/checkout@v6 with: repository: cdisc-org/cdisc-open-rules - ref: ${{ inputs.rules_ref || 'main' }} + ref: ${{ inputs.rules_ref || 'rules_2' }} path: open-rules # If cdisc-open-rules is private, add a PAT secret: # token: ${{ secrets.CDISC_OPEN_RULES_TOKEN }} From 57ce680a8c5d9afd2fc379a58bbd3b228a0c9861 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 30 Apr 2026 13:41:00 +0200 Subject: [PATCH 06/13] report adjustments --- .../workflows/validate-published-rules.yml | 130 +++++++++++++----- 1 file changed, 95 insertions(+), 35 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index a561e6aad..0ffd9687b 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -11,7 +11,6 @@ name: Validate Published Rules on: - pull_request: push: branches: - main @@ -22,10 +21,6 @@ on: description: 'Branch/tag/SHA of cdisc-open-rules to validate against' required: false default: 'main' - engine_ref: - description: 'Branch/tag/SHA of cdisc-rules-engine to use' - required: false - default: 'main' jobs: validate-published-rules: @@ -41,7 +36,6 @@ jobs: uses: actions/checkout@v6 with: repository: cdisc-org/cdisc-rules-engine - ref: ${{ inputs.engine_ref || github.sha }} path: engine token: ${{ secrets.GITHUB_TOKEN }} @@ -102,7 +96,9 @@ jobs: RULES_ROOT="$(pwd)/open-rules" PUBLISHED_DIR="$RULES_ROOT/Published" SCRIPTS_DIR="$RULES_ROOT/.github/scripts" - SUMMARY_REPORT="$(pwd)/validation_report.md" + + SUMMARY_TABLE="$(pwd)/summary_table.md" + DETAIL_REPORT="$(pwd)/detail_report.md" OVERALL_EXIT=0 RULE_PASS=0 @@ -117,14 +113,22 @@ jobs: echo "Found ${#RULE_DIRS[@]} rule(s) under Published/" + # -- Initialise summary table + { + echo "# Published Rules Validation — Summary" + echo "" + echo "| Rule | Type | Number | Execution | Expected | Got | Match |" + echo "|------|------|--------|-----------|----------|-----|-------|" + } > "$SUMMARY_TABLE" + + # -- Initialise detail report { - echo "# Published Rules Validation Report" + echo "# Published Rules Validation — Failure Details" echo "" - } > "$SUMMARY_REPORT" + } > "$DETAIL_REPORT" for RULE_DIR in "${RULE_DIRS[@]}"; do RULE_ID=$(basename "$RULE_DIR") - RULE_REL_PATH="Published/$RULE_ID" RULE_YML=$(find "$RULE_DIR" -maxdepth 1 -name "*.yml" | head -1) if [ -z "$RULE_YML" ]; then @@ -137,21 +141,85 @@ jobs: echo "========================================" RULE_EXIT=0 - # Pass ENGINE_DIR explicitly so run_validation.sh knows where core.py is ENGINE_DIR_OVERRIDE="$ENGINE_DIR" \ bash "$SCRIPTS_DIR/run_validation.sh" \ - "$RULE_REL_PATH" \ + "Published/$RULE_ID" \ "$PYTHON_CMD" \ "$RULES_ROOT" \ || RULE_EXIT=$? - if [ -f "$RULES_ROOT/validation_report.md" ]; then - cat "$RULES_ROOT/validation_report.md" >> "$SUMMARY_REPORT" - echo -e "\n---\n" >> "$SUMMARY_REPORT" - rm -f "$RULES_ROOT/validation_report.md" + # -- Parse per-test-case results produced by run_validation.sh + CASE_RESULTS="$RULES_ROOT/case_results.jsonl" + RULE_ROW_FAILED=0 + + if [ -f "$CASE_RESULTS" ]; then + while IFS= read -r line; do + # Parse all fields in a single python3 call using shlex.quote + # to safely produce shell variable assignments + eval "$(echo "$line" | python3 -c " + import sys, json, shlex + d = json.load(sys.stdin) + for k, v in [ + ('CASE_RULE', d['rule']), + ('CASE_TYPE', d['type']), + ('CASE_NUM', str(d['num'])), + ('EXEC_OK', '✅' if d['exec'] else '❌'), + ('EXPECTED', str(d.get('expected', ''))), + ('GOT', str(d.get('got', ''))), + ('MATCH', '✅' if d.get('match') else '❌'), + ('DIFF_FILE', str(d.get('diff', ''))), + ('STDERR_FILE', str(d.get('stderr', ''))), + ]: + print(k + '=' + shlex.quote(v)) + ")" + + echo "| $CASE_RULE | $CASE_TYPE | $CASE_NUM | $EXEC_OK | $EXPECTED | $GOT | $MATCH |" >> "$SUMMARY_TABLE" + + # Collect detail only for failures + if [[ "$EXEC_OK" == "❌" || "$MATCH" == "❌" ]]; then + RULE_ROW_FAILED=1 + { + echo "## $CASE_RULE — $CASE_TYPE / $CASE_NUM" + if [[ "$EXEC_OK" == "❌" ]]; then + echo "**Execution failed.**" + if [ -f "$STDERR_FILE" ]; then + echo '```' + cat "$STDERR_FILE" + echo '```' + fi + else + echo "**Expected:** $EXPECTED **Got:** $GOT" + if [ -n "$DIFF_FILE" ] && [ -f "$DIFF_FILE" ]; then + echo '```diff' + cat "$DIFF_FILE" + echo '```' + fi + fi + echo "" + } >> "$DETAIL_REPORT" + fi + done < "$CASE_RESULTS" + rm -f "$CASE_RESULTS" + else + # write a single aggregate row + EXEC_OK=$( [ $RULE_EXIT -eq 0 ] && echo "✅" || echo "❌" ) + echo "| $RULE_ID | — | — | $EXEC_OK | — | — | — |" >> "$SUMMARY_TABLE" + if [ $RULE_EXIT -ne 0 ]; then + RULE_ROW_FAILED=1 + # Append whatever markdown run_validation.sh produced + if [ -f "$RULES_ROOT/validation_report.md" ]; then + { + echo "## $RULE_ID" + cat "$RULES_ROOT/validation_report.md" + echo "" + } >> "$DETAIL_REPORT" + fi + fi fi - if [ $RULE_EXIT -eq 0 ]; then + rm -f "$RULES_ROOT/validation_report.md" + + if [ $RULE_ROW_FAILED -eq 0 ] && [ $RULE_EXIT -eq 0 ]; then RULE_PASS=$((RULE_PASS + 1)) echo " → $RULE_ID: PASSED" else @@ -161,22 +229,14 @@ jobs: fi done - # Insert summary totals after the H1 heading - { - echo "**Total:** $((RULE_PASS + RULE_FAIL)) | ✅ Passed: $RULE_PASS | ❌ Failed: $RULE_FAIL" - echo "" - echo "---" - echo "" - } > /tmp/totals.md - head -2 "$SUMMARY_REPORT" > /tmp/final_report.md - cat /tmp/totals.md >> /tmp/final_report.md - tail -n +3 "$SUMMARY_REPORT" >> /tmp/final_report.md - mv /tmp/final_report.md "$SUMMARY_REPORT" + # -- Insert totals line into summary table + TOTALS="**Total:** $((RULE_PASS + RULE_FAIL)) | ✅ Passed: $RULE_PASS | ❌ Failed: $RULE_FAIL" + sed -i "2s|^|$TOTALS\n\n|" "$SUMMARY_TABLE" exit $OVERALL_EXIT # ----------------------------------------------------------------------- - # 6. Upload report + results.json as artifacts + # 6. Upload both reports + raw results as artifacts # ----------------------------------------------------------------------- - name: Upload validation artifacts if: always() @@ -185,16 +245,17 @@ jobs: name: published-rules-validation-${{ github.run_id }} path: | open-rules/Published/**/results/results.json - validation_report.md + summary_table.md + detail_report.md if-no-files-found: warn # ----------------------------------------------------------------------- - # 7. Write report to GitHub Actions Job Summary + # 7. Write ONLY the summary table to GitHub Actions Job Summary # ----------------------------------------------------------------------- - - name: Write report to workflow summary + - name: Write summary table to workflow summary if: always() run: | - [ -f validation_report.md ] && cat validation_report.md >> $GITHUB_STEP_SUMMARY || true + [ -f summary_table.md ] && cat summary_table.md >> $GITHUB_STEP_SUMMARY || true # ----------------------------------------------------------------------- # 8. Fail the job if any rule failed @@ -202,6 +263,5 @@ jobs: - name: Check overall status if: steps.validate.outcome == 'failure' run: | - echo "One or more published rules failed validation — see the report above." + echo "One or more published rules failed validation — see the artifacts for detail_report.md." exit 1 - From 37551aff92c3da20510d54e4b040fdcd63c8c972 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 30 Apr 2026 13:53:45 +0200 Subject: [PATCH 07/13] indentation fix --- .../workflows/validate-published-rules.yml | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 0ffd9687b..473c98d88 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -156,22 +156,23 @@ jobs: while IFS= read -r line; do # Parse all fields in a single python3 call using shlex.quote # to safely produce shell variable assignments - eval "$(echo "$line" | python3 -c " - import sys, json, shlex - d = json.load(sys.stdin) - for k, v in [ - ('CASE_RULE', d['rule']), - ('CASE_TYPE', d['type']), - ('CASE_NUM', str(d['num'])), - ('EXEC_OK', '✅' if d['exec'] else '❌'), - ('EXPECTED', str(d.get('expected', ''))), - ('GOT', str(d.get('got', ''))), - ('MATCH', '✅' if d.get('match') else '❌'), - ('DIFF_FILE', str(d.get('diff', ''))), - ('STDERR_FILE', str(d.get('stderr', ''))), - ]: - print(k + '=' + shlex.quote(v)) - ")" + eval "$(echo "$line" | python3 <<'PY' + import sys, json, shlex + d = json.load(sys.stdin) + for k, v in [ + ('CASE_RULE', d['rule']), + ('CASE_TYPE', d['type']), + ('CASE_NUM', str(d['num'])), + ('EXEC_OK', '✅' if d['exec'] else '❌'), + ('EXPECTED', str(d.get('expected', ''))), + ('GOT', str(d.get('got', ''))), + ('MATCH', '✅' if d.get('match') else '❌'), + ('DIFF_FILE', str(d.get('diff', ''))), + ('STDERR_FILE', str(d.get('stderr', ''))), + ]: + print(k + '=' + shlex.quote(v)) + PY + )" echo "| $CASE_RULE | $CASE_TYPE | $CASE_NUM | $EXEC_OK | $EXPECTED | $GOT | $MATCH |" >> "$SUMMARY_TABLE" From 118685eb47dd86b55bbd03c72983dbe4c69daac5 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 30 Apr 2026 13:59:00 +0200 Subject: [PATCH 08/13] indentation fix(2) --- .github/workflows/validate-published-rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 473c98d88..9e39eeabf 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -171,7 +171,7 @@ jobs: ('STDERR_FILE', str(d.get('stderr', ''))), ]: print(k + '=' + shlex.quote(v)) - PY + 'PY' )" echo "| $CASE_RULE | $CASE_TYPE | $CASE_NUM | $EXEC_OK | $EXPECTED | $GOT | $MATCH |" >> "$SUMMARY_TABLE" From e9b1a69d7d625907107db9a52935b7d4d960942c Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 30 Apr 2026 14:07:15 +0200 Subject: [PATCH 09/13] indentation fix(3) -- heredoc in tmp file --- .../workflows/validate-published-rules.yml | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 9e39eeabf..f230a7758 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -91,6 +91,24 @@ jobs: run: | chmod +x open-rules/.github/scripts/run_validation.sh + # Write the JSON-line parser once; called once per test case in the loop below + cat > /tmp/parse_case.py << 'PYEOF' + import sys, json, shlex + d = json.load(sys.stdin) + for k, v in [ + ('CASE_RULE', d['rule']), + ('CASE_TYPE', d['type']), + ('CASE_NUM', str(d['num'])), + ('EXEC_OK', '\u2705' if d['exec'] else '\u274c'), + ('EXPECTED', str(d.get('expected', ''))), + ('GOT', str(d.get('got', ''))), + ('MATCH', '\u2705' if d.get('match') else '\u274c'), + ('DIFF_FILE', str(d.get('diff', ''))), + ('STDERR_FILE', str(d.get('stderr', ''))), + ]: + print(k + '=' + shlex.quote(v)) + PYEOF + PYTHON_CMD="$(pwd)/venv/bin/python" ENGINE_DIR="$(pwd)/engine" RULES_ROOT="$(pwd)/open-rules" @@ -154,25 +172,8 @@ jobs: if [ -f "$CASE_RESULTS" ]; then while IFS= read -r line; do - # Parse all fields in a single python3 call using shlex.quote - # to safely produce shell variable assignments - eval "$(echo "$line" | python3 <<'PY' - import sys, json, shlex - d = json.load(sys.stdin) - for k, v in [ - ('CASE_RULE', d['rule']), - ('CASE_TYPE', d['type']), - ('CASE_NUM', str(d['num'])), - ('EXEC_OK', '✅' if d['exec'] else '❌'), - ('EXPECTED', str(d.get('expected', ''))), - ('GOT', str(d.get('got', ''))), - ('MATCH', '✅' if d.get('match') else '❌'), - ('DIFF_FILE', str(d.get('diff', ''))), - ('STDERR_FILE', str(d.get('stderr', ''))), - ]: - print(k + '=' + shlex.quote(v)) - 'PY' - )" + # Parse all fields in a single python3 call — script written once above + eval "$(echo "$line" | python3 /tmp/parse_case.py)" echo "| $CASE_RULE | $CASE_TYPE | $CASE_NUM | $EXEC_OK | $EXPECTED | $GOT | $MATCH |" >> "$SUMMARY_TABLE" From 7ad65fe21db0a97c6b8f48b2c5d570fd4687fa6e Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Mon, 11 May 2026 14:23:48 +0200 Subject: [PATCH 10/13] moved validation logic to python script --- .../workflows/validate-published-rules.yml | 150 +------ scripts/validate_published_rules.py | 373 ++++++++++++++++++ 2 files changed, 378 insertions(+), 145 deletions(-) create mode 100644 scripts/validate_published_rules.py diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index f230a7758..162eb2f05 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -91,151 +91,11 @@ jobs: run: | chmod +x open-rules/.github/scripts/run_validation.sh - # Write the JSON-line parser once; called once per test case in the loop below - cat > /tmp/parse_case.py << 'PYEOF' - import sys, json, shlex - d = json.load(sys.stdin) - for k, v in [ - ('CASE_RULE', d['rule']), - ('CASE_TYPE', d['type']), - ('CASE_NUM', str(d['num'])), - ('EXEC_OK', '\u2705' if d['exec'] else '\u274c'), - ('EXPECTED', str(d.get('expected', ''))), - ('GOT', str(d.get('got', ''))), - ('MATCH', '\u2705' if d.get('match') else '\u274c'), - ('DIFF_FILE', str(d.get('diff', ''))), - ('STDERR_FILE', str(d.get('stderr', ''))), - ]: - print(k + '=' + shlex.quote(v)) - PYEOF - - PYTHON_CMD="$(pwd)/venv/bin/python" - ENGINE_DIR="$(pwd)/engine" - RULES_ROOT="$(pwd)/open-rules" - PUBLISHED_DIR="$RULES_ROOT/Published" - SCRIPTS_DIR="$RULES_ROOT/.github/scripts" - - SUMMARY_TABLE="$(pwd)/summary_table.md" - DETAIL_REPORT="$(pwd)/detail_report.md" - - OVERALL_EXIT=0 - RULE_PASS=0 - RULE_FAIL=0 - - mapfile -t RULE_DIRS < <(find "$PUBLISHED_DIR" -mindepth 1 -maxdepth 1 -type d | sort) - - if [ ${#RULE_DIRS[@]} -eq 0 ]; then - echo "::warning::No rule directories found under Published/" - exit 0 - fi - - echo "Found ${#RULE_DIRS[@]} rule(s) under Published/" - - # -- Initialise summary table - { - echo "# Published Rules Validation — Summary" - echo "" - echo "| Rule | Type | Number | Execution | Expected | Got | Match |" - echo "|------|------|--------|-----------|----------|-----|-------|" - } > "$SUMMARY_TABLE" - - # -- Initialise detail report - { - echo "# Published Rules Validation — Failure Details" - echo "" - } > "$DETAIL_REPORT" - - for RULE_DIR in "${RULE_DIRS[@]}"; do - RULE_ID=$(basename "$RULE_DIR") - - RULE_YML=$(find "$RULE_DIR" -maxdepth 1 -name "*.yml" | head -1) - if [ -z "$RULE_YML" ]; then - echo "::warning::Skipping $RULE_ID — no .yml file found" - continue - fi - - echo "========================================" - echo " Validating $RULE_ID" - echo "========================================" - - RULE_EXIT=0 - ENGINE_DIR_OVERRIDE="$ENGINE_DIR" \ - bash "$SCRIPTS_DIR/run_validation.sh" \ - "Published/$RULE_ID" \ - "$PYTHON_CMD" \ - "$RULES_ROOT" \ - || RULE_EXIT=$? - - # -- Parse per-test-case results produced by run_validation.sh - CASE_RESULTS="$RULES_ROOT/case_results.jsonl" - RULE_ROW_FAILED=0 - - if [ -f "$CASE_RESULTS" ]; then - while IFS= read -r line; do - # Parse all fields in a single python3 call — script written once above - eval "$(echo "$line" | python3 /tmp/parse_case.py)" - - echo "| $CASE_RULE | $CASE_TYPE | $CASE_NUM | $EXEC_OK | $EXPECTED | $GOT | $MATCH |" >> "$SUMMARY_TABLE" - - # Collect detail only for failures - if [[ "$EXEC_OK" == "❌" || "$MATCH" == "❌" ]]; then - RULE_ROW_FAILED=1 - { - echo "## $CASE_RULE — $CASE_TYPE / $CASE_NUM" - if [[ "$EXEC_OK" == "❌" ]]; then - echo "**Execution failed.**" - if [ -f "$STDERR_FILE" ]; then - echo '```' - cat "$STDERR_FILE" - echo '```' - fi - else - echo "**Expected:** $EXPECTED **Got:** $GOT" - if [ -n "$DIFF_FILE" ] && [ -f "$DIFF_FILE" ]; then - echo '```diff' - cat "$DIFF_FILE" - echo '```' - fi - fi - echo "" - } >> "$DETAIL_REPORT" - fi - done < "$CASE_RESULTS" - rm -f "$CASE_RESULTS" - else - # write a single aggregate row - EXEC_OK=$( [ $RULE_EXIT -eq 0 ] && echo "✅" || echo "❌" ) - echo "| $RULE_ID | — | — | $EXEC_OK | — | — | — |" >> "$SUMMARY_TABLE" - if [ $RULE_EXIT -ne 0 ]; then - RULE_ROW_FAILED=1 - # Append whatever markdown run_validation.sh produced - if [ -f "$RULES_ROOT/validation_report.md" ]; then - { - echo "## $RULE_ID" - cat "$RULES_ROOT/validation_report.md" - echo "" - } >> "$DETAIL_REPORT" - fi - fi - fi - - rm -f "$RULES_ROOT/validation_report.md" - - if [ $RULE_ROW_FAILED -eq 0 ] && [ $RULE_EXIT -eq 0 ]; then - RULE_PASS=$((RULE_PASS + 1)) - echo " → $RULE_ID: PASSED" - else - RULE_FAIL=$((RULE_FAIL + 1)) - OVERALL_EXIT=1 - echo " → $RULE_ID: FAILED" - fi - done - - # -- Insert totals line into summary table - TOTALS="**Total:** $((RULE_PASS + RULE_FAIL)) | ✅ Passed: $RULE_PASS | ❌ Failed: $RULE_FAIL" - sed -i "2s|^|$TOTALS\n\n|" "$SUMMARY_TABLE" - - exit $OVERALL_EXIT + ./venv/bin/python engine/scripts/validate_published_rules.py \ + --rules-root "$(pwd)/open-rules" \ + --engine-dir "$(pwd)/engine" \ + --python-cmd "$(pwd)/venv/bin/python" \ + --output-dir "$(pwd)" # ----------------------------------------------------------------------- # 6. Upload both reports + raw results as artifacts diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py new file mode 100644 index 000000000..adb47fba4 --- /dev/null +++ b/scripts/validate_published_rules.py @@ -0,0 +1,373 @@ +#!/usr/bin/env python3 +""" +validate_published_rules.py + +Validates every Published rule from cdisc-open-rules against cdisc-rules-engine. +Intended to be called from the CI workflow (validate-published-rules.yml) instead +of the large inline bash block. + +Outputs: + /summary_table.md — per-test-case results table + /detail_report.md — failure details (stderr / diffs) + +Exit code: + 0 — all rules passed + 1 — one or more rules failed +""" + +import argparse +import json +import os +import subprocess +import sys + +SUMMARY_HEADERS = ["Rule", "Type", "Number", "Execution", "Expected", "Got", "Match"] +CHECKMARK = "\u2705" +CROSS = "\u274c" + + +# --------------------------------------------------------------------------- +# Markdown helpers +# --------------------------------------------------------------------------- + + +def create_md_table(table_name, headers, records, property_getter=None): + """ + Create a Markdown table with the given headers and records. + + Args: + table_name: The title of the table + headers: List of column headers + records: List of records to include in the table + property_getter: Optional function to extract properties from records. + If None, assumes records are dictionaries. + Returns: + String containing the formatted Markdown table + """ + title = f"### {table_name}" + header = "| " + " | ".join(headers) + " |" + underline = "| " + " | ".join(["---" for _ in headers]) + " |" + + if property_getter is None: + + def property_getter(record, prop): + return str(record.get(prop, "")) + + values = "\n".join( + "| " + " | ".join([property_getter(record, prop) for prop in headers]) + " |" + for record in records + ) + + return f"{title}\n\n{header}\n{underline}\n{values}" + + +def _parse_case_result(line: str) -> dict: + """ + Parse one JSON line from case_results.jsonl produced by run_validation.sh. + + Returns a flat dict with both display-ready values (keyed by SUMMARY_HEADERS) + and private fields (prefixed with '_') needed to build failure detail sections. + """ + d = json.loads(line) + exec_ok = bool(d["exec"]) + match_ok = bool(d.get("match", False)) + return { + # Display fields — keys match SUMMARY_HEADERS exactly + "Rule": d["rule"], + "Type": d["type"], + "Number": str(d["num"]), + "Execution": CHECKMARK if exec_ok else CROSS, + "Expected": str(d.get("expected", "")), + "Got": str(d.get("got", "")), + "Match": CHECKMARK if match_ok else CROSS, + # Private fields used when generating failure detail + "_exec_ok": exec_ok, + "_match_ok": match_ok, + "_diff_file": d.get("diff", "") or "", + "_stderr_file": d.get("stderr", "") or "", + } + + +def _build_failure_detail(record: dict) -> str: + """Return a Markdown section describing one failing test case.""" + lines = [f"## {record['Rule']} \u2014 {record['Type']} / {record['Number']}\n"] + if not record["_exec_ok"]: + lines.append("**Execution failed.**\n") + stderr_file = record["_stderr_file"] + if stderr_file and os.path.isfile(stderr_file): + lines.append("```") + with open(stderr_file) as fh: + lines.append(fh.read()) + lines.append("```") + else: + lines.append(f"**Expected:** {record['Expected']} **Got:** {record['Got']}\n") + diff_file = record["_diff_file"] + if diff_file and os.path.isfile(diff_file): + lines.append("```diff") + with open(diff_file) as fh: + lines.append(fh.read()) + lines.append("```") + lines.append("") + return "\n".join(lines) + + +def _run_rule_validation( + rule_id: str, + scripts_dir: str, + rules_root: str, + engine_dir: str, + python_cmd: str, +) -> int: + """ + Invoke run_validation.sh for a single rule directory and return its exit code. + Output is streamed directly to stdout/stderr so CI logs remain readable. + """ + env = os.environ.copy() + env["ENGINE_DIR_OVERRIDE"] = engine_dir + + result = subprocess.run( + [ + "bash", + os.path.join(scripts_dir, "run_validation.sh"), + f"Published/{rule_id}", + python_cmd, + rules_root, + ], + env=env, + ) + return result.returncode + + +def _process_case_results(case_results_path: str) -> tuple[list[dict], list[str], bool]: + """ + Read and remove case_results.jsonl, returning: + - summary rows (public fields only) + - failure detail sections + - whether any row failed + """ + summary_rows: list[dict] = [] + details: list[str] = [] + any_failed = False + + with open(case_results_path) as fh: + raw_lines = fh.readlines() + os.remove(case_results_path) + + for raw_line in raw_lines: + raw_line = raw_line.strip() + if not raw_line: + continue + record = _parse_case_result(raw_line) + summary_rows.append({k: v for k, v in record.items() if not k.startswith("_")}) + if not record["_exec_ok"] or not record["_match_ok"]: + any_failed = True + details.append(_build_failure_detail(record)) + + return summary_rows, details, any_failed + + +def _aggregate_row( + rule_id: str, rule_exit: int, rules_root: str +) -> tuple[dict, str | None]: + """ + Build a single-row summary entry and optional failure detail + for a rule that produced no per-case JSONL output. + """ + exec_ok = rule_exit == 0 + row = { + "Rule": rule_id, + "Type": "\u2014", + "Number": "\u2014", + "Execution": CHECKMARK if exec_ok else CROSS, + "Expected": "\u2014", + "Got": "\u2014", + "Match": "\u2014", + } + if exec_ok: + return row, None + + detail = f"## {rule_id}\n\n" + report_file = os.path.join(rules_root, "validation_report.md") + if os.path.isfile(report_file): + with open(report_file) as fh: + detail += fh.read() + detail += "\n" + return row, detail + + +def _validate_one_rule( + rule_id: str, + scripts_dir: str, + rules_root: str, + engine_dir: str, + python_cmd: str, +) -> tuple[list[dict], list[str], bool]: + """ + Run validation for a single rule and return summary rows, failure details, + and whether the rule passed. + """ + print("=" * 40) + print(f" Validating {rule_id}") + print("=" * 40) + + rule_exit = _run_rule_validation( + rule_id, scripts_dir, rules_root, engine_dir, python_cmd + ) + + case_results_path = os.path.join(rules_root, "case_results.jsonl") + + if os.path.isfile(case_results_path): + summary_rows, details, row_failed = _process_case_results(case_results_path) + passed = not row_failed and rule_exit == 0 + else: + row, detail = _aggregate_row(rule_id, rule_exit, rules_root) + summary_rows = [row] + details = [detail] if detail is not None else [] + passed = rule_exit == 0 + + # Clean up any leftover report file + report_file = os.path.join(rules_root, "validation_report.md") + if os.path.isfile(report_file): + os.remove(report_file) + + return summary_rows, details, passed + + +def _write_reports( + summary_records: list[dict], + failure_details: list[str], + rule_pass: int, + rule_fail: int, + output_dir: str, +) -> None: + """Render and write summary_table.md and detail_report.md.""" + total = rule_pass + rule_fail + totals_line = ( + f"**Total:** {total} | " + f"{CHECKMARK} Passed: {rule_pass} | " + f"{CROSS} Failed: {rule_fail}" + ) + summary_md = ( + "# Published Rules Validation \u2014 Summary\n\n" + f"{totals_line}\n\n" + + create_md_table("Results", SUMMARY_HEADERS, summary_records) + ) + detail_body = "\n".join(failure_details) if failure_details else "_No failures._\n" + detail_md = f"# Published Rules Validation \u2014 Failure Details\n\n{detail_body}" + + os.makedirs(output_dir, exist_ok=True) + summary_path = os.path.join(output_dir, "summary_table.md") + detail_path = os.path.join(output_dir, "detail_report.md") + + with open(summary_path, "w", encoding="utf-8") as fh: + fh.write(summary_md) + with open(detail_path, "w", encoding="utf-8") as fh: + fh.write(detail_md) + + print(f"\nSummary written to : {summary_path}") + print(f"Details written to : {detail_path}") + + +def validate_all_rules( + rules_root: str, + engine_dir: str, + python_cmd: str, + output_dir: str, +) -> bool: + """ + Iterate every directory under Published/, run the validation shell script, + parse results, and write the two report files. + + Returns True if any rule failed, False if all passed. + """ + published_dir = os.path.join(rules_root, "Published") + scripts_dir = os.path.join(rules_root, ".github", "scripts") + + if not os.path.isdir(published_dir): + print(f"WARNING: Published/ not found under {rules_root}", file=sys.stderr) + return False + + rule_ids = sorted( + entry + for entry in os.listdir(published_dir) + if os.path.isdir(os.path.join(published_dir, entry)) + ) + + if not rule_ids: + print("WARNING: No rule directories found under Published/", file=sys.stderr) + return False + + print(f"Found {len(rule_ids)} rule(s) under Published/") + + summary_records: list[dict] = [] + failure_details: list[str] = [] + rule_pass = rule_fail = 0 + + for rule_id in rule_ids: + rule_dir = os.path.join(published_dir, rule_id) + yml_files = [f for f in os.listdir(rule_dir) if f.endswith(".yml")] + if not yml_files: + print( + f"WARNING: Skipping {rule_id} \u2014 no .yml file found", + file=sys.stderr, + ) + continue + + rows, details, passed = _validate_one_rule( + rule_id, scripts_dir, rules_root, engine_dir, python_cmd + ) + summary_records.extend(rows) + failure_details.extend(details) + + if passed: + rule_pass += 1 + print(f" \u2192 {rule_id}: PASSED") + else: + rule_fail += 1 + print(f" \u2192 {rule_id}: FAILED") + + _write_reports(summary_records, failure_details, rule_pass, rule_fail, output_dir) + return rule_fail > 0 + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Validate all Published rules from cdisc-open-rules." + ) + parser.add_argument( + "--rules-root", + required=True, + help="Absolute path to the cdisc-open-rules checkout (contains Published/).", + ) + parser.add_argument( + "--engine-dir", + required=True, + help="Absolute path to the cdisc-rules-engine checkout.", + ) + parser.add_argument( + "--python-cmd", + required=True, + help="Python executable passed through to run_validation.sh.", + ) + parser.add_argument( + "--output-dir", + default=".", + help="Directory where summary_table.md and detail_report.md are written (default: cwd).", + ) + return parser.parse_args() + + +if __name__ == "__main__": + _args = _parse_args() + _any_failed = validate_all_rules( + rules_root=os.path.abspath(_args.rules_root), + engine_dir=os.path.abspath(_args.engine_dir), + python_cmd=_args.python_cmd, + output_dir=os.path.abspath(_args.output_dir), + ) + sys.exit(1 if _any_failed else 0) From 0df39d29e4c4af0c36c69ae8834bb7d4354c0cd7 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Mon, 11 May 2026 14:27:41 +0200 Subject: [PATCH 11/13] removed trigger on feature branch push event --- .github/workflows/validate-published-rules.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 162eb2f05..2241af67a 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -14,13 +14,12 @@ on: push: branches: - main - - 798-test-against-published workflow_dispatch: inputs: rules_ref: - description: 'Branch/tag/SHA of cdisc-open-rules to validate against' + description: "Branch/tag/SHA of cdisc-open-rules to validate against" required: false - default: 'main' + default: "main" jobs: validate-published-rules: @@ -71,7 +70,7 @@ jobs: - name: Set up Python 3.12 uses: actions/setup-python@v6 with: - python-version: '3.12' + python-version: "3.12" # ----------------------------------------------------------------------- # 4. Install engine dependencies From d1179a3a293ac5093fa3b0d517b1fe751497cc60 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Tue, 12 May 2026 11:59:02 +0200 Subject: [PATCH 12/13] fix action --- .github/workflows/validate-published-rules.yml | 4 +--- scripts/validate_published_rules.py | 8 ++++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 2241af67a..92450d8ed 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -45,10 +45,8 @@ jobs: uses: actions/checkout@v6 with: repository: cdisc-org/cdisc-open-rules - ref: ${{ inputs.rules_ref || 'rules_2' }} + ref: ${{ inputs.rules_ref }} path: open-rules - # If cdisc-open-rules is private, add a PAT secret: - # token: ${{ secrets.CDISC_OPEN_RULES_TOKEN }} # ----------------------------------------------------------------------- # 2b. Debug — verify directory layout diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py index adb47fba4..6510dd2cd 100644 --- a/scripts/validate_published_rules.py +++ b/scripts/validate_published_rules.py @@ -284,8 +284,8 @@ def validate_all_rules( scripts_dir = os.path.join(rules_root, ".github", "scripts") if not os.path.isdir(published_dir): - print(f"WARNING: Published/ not found under {rules_root}", file=sys.stderr) - return False + print(f"ERROR: Published/ not found under {rules_root}", file=sys.stderr) + return True rule_ids = sorted( entry @@ -294,8 +294,8 @@ def validate_all_rules( ) if not rule_ids: - print("WARNING: No rule directories found under Published/", file=sys.stderr) - return False + print("ERROR: No rule directories found under Published/", file=sys.stderr) + return True print(f"Found {len(rule_ids)} rule(s) under Published/") From b05b140444f0ab1ec642b424bbc2e53e50ba0437 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 21 May 2026 10:08:56 +0200 Subject: [PATCH 13/13] fixed naming in report --- scripts/validate_published_rules.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py index 6510dd2cd..7c10f2e3f 100644 --- a/scripts/validate_published_rules.py +++ b/scripts/validate_published_rules.py @@ -78,7 +78,7 @@ def _parse_case_result(line: str) -> dict: "Number": str(d["num"]), "Execution": CHECKMARK if exec_ok else CROSS, "Expected": str(d.get("expected", "")), - "Got": str(d.get("got", "")), + "Actual": str(d.get("actual", "")), "Match": CHECKMARK if match_ok else CROSS, # Private fields used when generating failure detail "_exec_ok": exec_ok, @@ -180,7 +180,7 @@ def _aggregate_row( "Number": "\u2014", "Execution": CHECKMARK if exec_ok else CROSS, "Expected": "\u2014", - "Got": "\u2014", + "Actual": "\u2014", "Match": "\u2014", } if exec_ok: