cdisc-org · alexfurmenkov · Mar 5, 2026 · Mar 5, 2026 · Mar 6, 2026 · Mar 20, 2026
diff --git a/.github/scripts/convert_results.py b/.github/scripts/convert_results.py
@@ -25,7 +25,7 @@ def detect_standard(data: dict) -> str:
     return data.get("Conformance_Details", {}).get("Standard", "").upper()
 
 
-def convert_nonusdm(issue_details: list) -> tuple[list[str], list[tuple]]:
+def convert_non_usdm(issue_details: list) -> tuple[list[str], list[tuple]]:
     header = ["Dataset", "Record", "Variable", "Value"]
     rows = []
     for issue in issue_details:
@@ -65,7 +65,7 @@ def convert(json_path: str, csv_path: str) -> None:
     if standard == "USDM":
         header, rows = convert_usdm(issue_details)
     else:
-        header, rows = convert_nonusdm(issue_details)
+        header, rows = convert_non_usdm(issue_details)
 
     with open(csv_path, "w", newline="") as f:
         writer = csv.writer(f)

diff --git a/.github/scripts/run_validation.sh b/.github/scripts/run_validation.sh
@@ -1,7 +1,9 @@
 #!/usr/bin/env bash
 # run_validation.sh — iterates all positive/ and negative/ test cases for a rule,
 # runs the CORE engine against each, converts JSON output to results.csv,
-# diffs against any committed results.csv, and writes a markdown report.
+# diffs against any committed results.csv, and writes two outputs:
+#   - $REPO_ROOT/validation_report.md  (detailed markdown, legacy/fallback)
+#   - $REPO_ROOT/case_results.jsonl    (one JSON line per test case for the summary table)
 #
 # Usage:
 #   bash .github/scripts/run_validation.sh <rule_rel_path> <python_cmd> <repo_root>
@@ -18,9 +20,11 @@ REPO_ROOT="${3:?repo_root required}"
 
 RULE_ID=$(basename "$RULE_REL_PATH")
 RULE_DIR="$REPO_ROOT/$RULE_REL_PATH"
-ENGINE_DIR="$REPO_ROOT/engine"
+# Allow caller to override engine location (e.g. when called from rules-engine repo)
+ENGINE_DIR="${ENGINE_DIR_OVERRIDE:-$REPO_ROOT/engine}"
 SCRIPTS_DIR="$REPO_ROOT/.github/scripts"
 REPORT_FILE="$REPO_ROOT/validation_report.md"
+JSONL_FILE="$REPO_ROOT/case_results.jsonl"
 
 # ---------------------------------------------------------------------------
 # Locate the rule YAML
@@ -48,27 +52,60 @@ TOTAL_CASES=0
 PASSED_CASES=0
 FAILED_CASES=0
 
+# ---------------------------------------------------------------------------
+# Helper: append one JSON line to case_results.jsonl
+# Passes all values via env vars to avoid shell-quoting issues with paths.
+# Args: exec(true|false)  expected  got  match(true|false)  diff_path  stderr_path
+# ---------------------------------------------------------------------------
+emit_result() {
+  R_RULE="$RULE_ID"   \
+  R_TYPE="$TEST_TYPE" \
+  R_NUM="$CASE_ID"    \
+  R_EXEC="$1"         \
+  R_EXPECTED="$2"     \
+  R_GOT="$3"          \
+  R_MATCH="$4"        \
+  R_DIFF="$5"         \
+  R_STDERR="$6"       \
+  python3 -c "
+import json, os
+e = os.environ
+print(json.dumps({
+  'rule':     e['R_RULE'],
+  'type':     e['R_TYPE'],
+  'num':      e['R_NUM'],
+  'exec':     e['R_EXEC'] == 'true',
+  'expected': e['R_EXPECTED'],
+  'got':      e['R_GOT'],
+  'match':    e['R_MATCH'] == 'true',
+  'diff':     e['R_DIFF'],
+  'stderr':   e['R_STDERR'],
+}))" >> "$JSONL_FILE"
+}
+
 # ---------------------------------------------------------------------------
 # Iterate test types and cases
 # ---------------------------------------------------------------------------
 for TEST_TYPE in positive negative; do
   TYPE_DIR="$RULE_DIR/$TEST_TYPE"
   [ -d "$TYPE_DIR" ] || continue
 
-  echo "" >> "$REPORT_FILE"
-  echo "## $TEST_TYPE" >> "$REPORT_FILE"
-  echo "" >> "$REPORT_FILE"
+  {
+    echo ""
+    echo "## $TEST_TYPE"
+    echo ""
+  } >> "$REPORT_FILE"
 
-  for CASE_DIR in $(find "$TYPE_DIR" -mindepth 1 -maxdepth 1 -type d | sort); do
+  while IFS= read -r -d '' CASE_DIR; do
     CASE_ID=$(basename "$CASE_DIR")
     DATA_DIR="$CASE_DIR/data"
     RESULTS_DIR="$CASE_DIR/results"
     CASE_LABEL="$TEST_TYPE/$CASE_ID"
 
-    TOTAL_CASES=$((TOTAL_CASES + 1))
     echo ""
     echo "--- Processing $RULE_ID / $CASE_LABEL ---"
 
+    # -- Skip cases that are structurally incomplete (no jsonl entry emitted)
     if [ ! -d "$DATA_DIR" ]; then
       echo "::warning::Missing data/ directory for $CASE_LABEL — skipping"
       echo "### \`$CASE_LABEL\` — ⚠️ Skipped (no data/ directory)" >> "$REPORT_FILE"
@@ -86,6 +123,10 @@ for TEST_TYPE in positive negative; do
       continue
     fi
     echo "  .env: $ENV_FILE"
+
+    TOTAL_CASES=$((TOTAL_CASES + 1))
+
+    # -- Missing committed baseline
     if [ ! -f "$RESULTS_DIR/results.csv" ]; then
       echo "  ERROR: no committed results.csv found for $CASE_LABEL"
       {
@@ -94,11 +135,16 @@ for TEST_TYPE in positive negative; do
         echo "No \`results.csv\` was found for this test case. Run the rule locally before opening a PR and commit the generated \`results.csv\`."
         echo ""
       } >> "$REPORT_FILE"
+      emit_result "false" "" "" "false" "" ""
       FAILED_CASES=$((FAILED_CASES + 1))
       OVERALL_SUCCESS=false
       continue
     fi
 
+    # Back up committed results.csv before the engine run
+    cp "$RESULTS_DIR/results.csv" "$RESULTS_DIR/results.csv.committed"
+    COMMITTED_RESULTS="$RESULTS_DIR/results.csv.committed"
+
     ENGINE_ARGS=(
       "-lr"  "$RULE_YML"
       "-d"   "$DATA_DIR"
@@ -110,10 +156,6 @@ for TEST_TYPE in positive negative; do
 
     echo "  Command: python core.py validate ${ENGINE_ARGS[*]}"
 
-    # Back up committed results.csv before the engine run
-    cp "$RESULTS_DIR/results.csv" "$RESULTS_DIR/results.csv.committed"
-    COMMITTED_RESULTS="$RESULTS_DIR/results.csv.committed"
-
     # Run the engine
     ENGINE_LOG="/tmp/engine_${TEST_TYPE}_${CASE_ID}.txt"
     ENGINE_EXIT=0
@@ -133,6 +175,7 @@ for TEST_TYPE in positive negative; do
         echo "</details>"
         echo ""
       } >> "$REPORT_FILE"
+      emit_result "false" "" "" "false" "" "$ENGINE_LOG"
       FAILED_CASES=$((FAILED_CASES + 1))
       OVERALL_SUCCESS=false
       mv "$COMMITTED_RESULTS" "$RESULTS_DIR/results.csv"
@@ -147,7 +190,7 @@ for TEST_TYPE in positive negative; do
       2>&1 | tee -a "$ENGINE_LOG" || CONVERT_EXIT=$?
 
     if [ $CONVERT_EXIT -ne 0 ]; then
-      echo "  ERROR: failed to convert results.json to results.csv"
+      echo "  ERROR: failed to convert results.json to CSV"
       {
         echo "### \`$CASE_LABEL\` — ❌ Conversion error"
         echo ""
@@ -159,12 +202,17 @@ for TEST_TYPE in positive negative; do
         echo "</details>"
         echo ""
       } >> "$REPORT_FILE"
+      emit_result "false" "" "" "false" "" "$ENGINE_LOG"
       FAILED_CASES=$((FAILED_CASES + 1))
       OVERALL_SUCCESS=false
       mv "$COMMITTED_RESULTS" "$RESULTS_DIR/results.csv"
       continue
     fi
 
+    # -- Diff
+    EXPECTED_COUNT=$(( $(wc -l < "$COMMITTED_RESULTS") - 1 ))
+    GOT_COUNT=$(( $(wc -l < "$GENERATED_CSV") - 1 ))
+
     DIFF_LOG="/tmp/diff_${TEST_TYPE}_${CASE_ID}.txt"
     DIFF_EXIT=0
     $PYTHON_CMD "$SCRIPTS_DIR/diff_results.py" \
@@ -177,6 +225,7 @@ for TEST_TYPE in positive negative; do
         echo "### \`$CASE_LABEL\` — ✅ Results match committed baseline"
         echo ""
       } >> "$REPORT_FILE"
+      emit_result "true" "$EXPECTED_COUNT" "$GOT_COUNT" "true" "" ""
       PASSED_CASES=$((PASSED_CASES + 1))
     else
       echo "  FAILED — committed results do not match engine output"
@@ -191,24 +240,15 @@ for TEST_TYPE in positive negative; do
         echo "</details>"
         echo ""
       } >> "$REPORT_FILE"
+      emit_result "true" "$EXPECTED_COUNT" "$GOT_COUNT" "false" "$DIFF_LOG" ""
       FAILED_CASES=$((FAILED_CASES + 1))
       OVERALL_SUCCESS=false
     fi
+
     mv "$COMMITTED_RESULTS" "$RESULTS_DIR/results.csv"
-    if [ -s "$ENGINE_LOG" ]; then
-      {
-        echo "<details><summary>Engine output for \`$CASE_LABEL\`</summary>"
-        echo ""
-        echo '```'
-        cat "$ENGINE_LOG"
-        echo '```'
-        echo "</details>"
-        echo ""
-      } >> "$REPORT_FILE"
-    fi
 
-  done   # cases
-done     # test types
+  done < <(find "$TYPE_DIR" -mindepth 1 -maxdepth 1 -type d -print0 | sort -z)
+done
 
 # ---------------------------------------------------------------------------
 # Summary