From 517c3b0315a909a38ac7a329dd0c1ffda74df77f Mon Sep 17 00:00:00 2001 From: Michael Pawliszyn Date: Wed, 25 Feb 2026 22:10:42 -0500 Subject: [PATCH] feat: add check-tree-quality.sh with tests Shell script to validate structural quality of a review-tree.md file. Takes a tree file and a diff file list. Outputs pass/fail per check. Checks: HEAD SHA, Revision, Description Verification section, top-level node count (default 7, configurable), file coverage (every diff file mapped), variation structure (warn if no repeats). Fixes from review: - C1: gensub() replaced with POSIX match()/RSTART for macOS awk - C2: variation pattern matches {variation comment} not just {variation} - I1: grep -qxF for exact file path matching (no substring false positives) - BSD sed: all sed patterns use [[:space:]] not \s - Indent calculation: wc -c newline offset corrected 21 bats tests covering: - Valid tree passes, unmapped file detected, threshold (default + configurable + boundary at 7), missing HEAD/Revision/DescVerification, variation warn (no repeats, {variation comment}, exit 0 on warn), no warnings on valid sample, exact path matching, empty file list, multiple failures reported together Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/check-tree-quality.sh | 148 ++++++++ tests/scripts/test-check-tree-quality.bats | 402 +++++++++++++++++++++ 2 files changed, 550 insertions(+) create mode 100755 scripts/check-tree-quality.sh create mode 100755 tests/scripts/test-check-tree-quality.bats diff --git a/scripts/check-tree-quality.sh b/scripts/check-tree-quality.sh new file mode 100755 index 0000000..34460e9 --- /dev/null +++ b/scripts/check-tree-quality.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash +# +# check-tree-quality.sh -- Validate structural quality of a review-tree.md file. +# +# Usage: check-tree-quality.sh [--max-top-level N] +# +# Arguments: +# tree-file Path to the review-tree.md file +# file-list Path to a text file with one file path per line (from PR diff) +# --max-top-level N Maximum allowed top-level nodes (default: 7, Miller's 7±2) +# +# Checks performed: +# 1. HEAD SHA present in header +# 2. Revision field present in header +# 3. Description Verification section exists +# 4. Top-level node count <= threshold +# 5. Every file in the diff file list appears in at least one tree node +# 6. Variation nodes have at least one {repeat} child (warning, not failure) +# +# Output: One line per check (PASS/FAIL/WARN + reason). Exit 0 if all pass, +# exit 1 if any check fails. Warnings do not cause failure. +# +# Exit codes: +# 0 -- all checks pass (warnings are OK) +# 1 -- at least one check failed, or invalid arguments + +set -euo pipefail + +# --- Parse arguments --- + +if [ $# -lt 2 ]; then + echo "Usage: check-tree-quality.sh [--max-top-level N]" >&2 + exit 1 +fi + +TREE_FILE="$1" +FILE_LIST="$2" +shift 2 + +MAX_TOP_LEVEL=7 + +while [ $# -gt 0 ]; do + case "$1" in + --max-top-level) MAX_TOP_LEVEL="$2"; shift 2 ;; + *) echo "Error: unknown option '$1'" >&2; exit 1 ;; + esac +done + +# --- Input validation --- + +if [ ! -f "$TREE_FILE" ]; then + echo "Error: tree file not found: $TREE_FILE" >&2 + exit 1 +fi + +if [ ! -f "$FILE_LIST" ]; then + echo "Error: file list not found: $FILE_LIST" >&2 + exit 1 +fi + +# --- Run checks --- + +FAILED=0 + +# Check 1: HEAD SHA +if grep -q -- '| HEAD' "$TREE_FILE"; then + echo "PASS: HEAD SHA present" +else + echo "FAIL: HEAD SHA missing from header" + FAILED=1 +fi + +# Check 2: Revision field +if grep -q -- '| Revision' "$TREE_FILE"; then + echo "PASS: Revision field present" +else + echo "FAIL: Revision field missing from header" + FAILED=1 +fi + +# Check 3: Description Verification section +if grep -q -- '^## Description Verification' "$TREE_FILE"; then + echo "PASS: Description Verification section present" +else + echo "FAIL: Description Verification section missing" + FAILED=1 +fi + +# Check 4: Top-level node count +TOP_LEVEL=$(grep -cE '^- \[(pending|reviewed|accepted)\] [0-9]' "$TREE_FILE" || true) +if [ "$TOP_LEVEL" -le "$MAX_TOP_LEVEL" ]; then + echo "PASS: top-level concepts: $TOP_LEVEL (max: $MAX_TOP_LEVEL)" +else + echo "FAIL: top-level concepts: $TOP_LEVEL exceeds max of $MAX_TOP_LEVEL" + FAILED=1 +fi + +# Check 5: File coverage -- every diff file appears in at least one node +# Extract all file paths from the tree (strip line ranges and change counts) +TREE_FILES=$(grep -E '^\s+- .+ L[0-9]' "$TREE_FILE" | sed -E 's/^[[:space:]]+- //' | sed -E 's/ L[0-9]+.*//' | sort -u || true) + +UNMAPPED="" +while IFS= read -r file; do + [ -z "$file" ] && continue + if ! echo "$TREE_FILES" | grep -qxF -- "$file"; then + UNMAPPED="${UNMAPPED}${file}\n" + fi +done < "$FILE_LIST" + +if [ -z "$UNMAPPED" ]; then + echo "PASS: file coverage complete" +else + echo "FAIL: unmapped files in diff:" + printf " %b" "$UNMAPPED" + FAILED=1 +fi + +# Check 6: Variation structure (warning only) +# For each {variation} node, check if any child has {repeat} +# Pattern matches {variation}, {variation comment}, etc. +VARIATION_LINES=$(grep -nE '\{[^}]*variation' "$TREE_FILE" || true) +if [ -n "$VARIATION_LINES" ]; then + while IFS=: read -r line_num line_text; do + # Get the indentation level of this variation node + # Assumes 2-space indent per level (per format spec) + # wc -c includes trailing newline, subtract 1 + indent=$(( $(echo "$line_text" | sed -E 's/[^ ].*//' | wc -c) - 1 )) + child_indent=$((indent + 2)) + # Look for {repeat} in lines after this one at deeper indentation + has_repeat=$(awk -v start="$line_num" -v ci="$child_indent" ' + NR > start && /\{[^}]*repeat/ { print "yes"; exit } + NR > start && /^[[:space:]]*- \[/ { + match($0, /[^ ]/); cur = RSTART - 1 + if (cur < ci) exit + } + ' "$TREE_FILE" || true) + if [ -z "$has_repeat" ]; then + node_id=$(echo "$line_text" | grep -oE '\] [0-9]+(\.[0-9]+)*\.' | sed -E 's/\] //; s/\.$//') + echo "WARN: variation node $node_id has no {repeat} children" + fi + done <<< "$VARIATION_LINES" +fi + +# --- Exit --- + +if [ "$FAILED" -eq 1 ]; then + exit 1 +fi diff --git a/tests/scripts/test-check-tree-quality.bats b/tests/scripts/test-check-tree-quality.bats new file mode 100755 index 0000000..513f52d --- /dev/null +++ b/tests/scripts/test-check-tree-quality.bats @@ -0,0 +1,402 @@ +#!/usr/bin/env bats + +# Tests for scripts/check-tree-quality.sh +# TDD: write tests first, then implement the script. + +REPO_ROOT="$(cd "$BATS_TEST_DIRNAME/../.." && pwd)" +SAMPLE="$REPO_ROOT/tests/formats/sample-tree-hawksbury.md" +SCRIPT="$REPO_ROOT/scripts/check-tree-quality.sh" + +setup() { + cp "$SAMPLE" "$BATS_TEST_TMPDIR/tree.md" + # Generate the file list from the sample tree (all files referenced in it) + grep -E '^\s+- .+ L[0-9]' "$SAMPLE" | sed -E 's/^[[:space:]]+-[[:space:]]//' | sed -E 's/ L[0-9]+.*//' | sort -u > "$BATS_TEST_TMPDIR/files.txt" +} + +# --- Pass on valid tree --- + +@test "passes on valid Hawksbury sample tree" { + run "$SCRIPT" "$BATS_TEST_TMPDIR/tree.md" "$BATS_TEST_TMPDIR/files.txt" + [ "$status" -eq 0 ] + [[ "$output" == *"PASS"* ]] +} + +# --- File coverage --- + +@test "fails when diff file is not mapped to any node" { + # Add an unmapped file to the file list + echo "src/main/java/com/hawksbury/orphan/UnmappedHandler.java" >> "$BATS_TEST_TMPDIR/files.txt" + run "$SCRIPT" "$BATS_TEST_TMPDIR/tree.md" "$BATS_TEST_TMPDIR/files.txt" + [ "$status" -ne 0 ] + [[ "$output" == *"UnmappedHandler.java"* ]] + [[ "$output" == *"unmapped"* ]] +} + +@test "passes when all diff files are in tree" { + run "$SCRIPT" "$BATS_TEST_TMPDIR/tree.md" "$BATS_TEST_TMPDIR/files.txt" + [ "$status" -eq 0 ] +} + +# --- Top-level node count --- + +@test "passes with 5 top-level nodes (under 7 threshold)" { + run "$SCRIPT" "$BATS_TEST_TMPDIR/tree.md" "$BATS_TEST_TMPDIR/files.txt" + [ "$status" -eq 0 ] +} + +@test "fails when top-level nodes exceed threshold" { + local tmpfile="$BATS_TEST_TMPDIR/big-tree.md" + cat > "$tmpfile" << 'EOF' +# Review Tree: Test + +| Field | Value | +|-------------|-------| +| PR | test/test#1 | +| HEAD | abc123 | +| Revision | 1 | +| Tree Built | 2026-02-25T10:00:00Z | +| Updated | 2026-02-25T10:00:00Z | + +## Tree + +- [pending] 1. Concept one +- [pending] 2. Concept two +- [pending] 3. Concept three +- [pending] 4. Concept four +- [pending] 5. Concept five +- [pending] 6. Concept six +- [pending] 7. Concept seven +- [pending] 8. Concept eight + +## Description Verification + +| # | Claim | Status | Evidence | + +## Coverage + +Total files in diff: 0 +Files mapped to tree: 0 +Unmapped files: none +EOF + echo -n "" > "$BATS_TEST_TMPDIR/empty-files.txt" + run "$SCRIPT" "$tmpfile" "$BATS_TEST_TMPDIR/empty-files.txt" + [ "$status" -ne 0 ] + [[ "$output" == *"top-level"* ]] + [[ "$output" == *"8"* ]] +} + +@test "configurable threshold via --max-top-level" { + local tmpfile="$BATS_TEST_TMPDIR/big-tree.md" + cat > "$tmpfile" << 'EOF' +# Review Tree: Test + +| Field | Value | +|-------------|-------| +| PR | test/test#1 | +| HEAD | abc123 | +| Revision | 1 | +| Tree Built | 2026-02-25T10:00:00Z | +| Updated | 2026-02-25T10:00:00Z | + +## Tree + +- [pending] 1. Concept one +- [pending] 2. Concept two +- [pending] 3. Concept three +- [pending] 4. Concept four +- [pending] 5. Concept five +- [pending] 6. Concept six +- [pending] 7. Concept seven +- [pending] 8. Concept eight + +## Description Verification + +| # | Claim | Status | Evidence | + +## Coverage + +Total files in diff: 0 +Files mapped to tree: 0 +Unmapped files: none +EOF + echo -n "" > "$BATS_TEST_TMPDIR/empty-files.txt" + # Raise threshold to 10 -- should pass + run "$SCRIPT" "$tmpfile" "$BATS_TEST_TMPDIR/empty-files.txt" --max-top-level 10 + [ "$status" -eq 0 ] +} + +# --- HEAD SHA --- + +@test "fails when HEAD SHA is missing" { + local tmpfile="$BATS_TEST_TMPDIR/tree.md" + sed -E '/^\| HEAD/d' "$SAMPLE" > "$tmpfile" + run "$SCRIPT" "$tmpfile" "$BATS_TEST_TMPDIR/files.txt" + [ "$status" -ne 0 ] + [[ "$output" == *"HEAD"* ]] +} + +# --- Description Verification --- + +@test "fails when Description Verification section is missing" { + local tmpfile="$BATS_TEST_TMPDIR/tree.md" + sed '/^## Description Verification/,/^## Coverage/{ /^## Coverage/!d; }' "$SAMPLE" > "$tmpfile" + run "$SCRIPT" "$tmpfile" "$BATS_TEST_TMPDIR/files.txt" + [ "$status" -ne 0 ] + [[ "$output" == *"Description Verification"* ]] +} + +# --- Variation structure --- + +@test "warns when variation node has no repeat children" { + local tmpfile="$BATS_TEST_TMPDIR/tree.md" + cat > "$tmpfile" << 'EOF' +# Review Tree: Test + +| Field | Value | +|-------------|-------| +| PR | test/test#1 | +| HEAD | abc123 | +| Revision | 1 | +| Tree Built | 2026-02-25T10:00:00Z | +| Updated | 2026-02-25T10:00:00Z | + +## Tree + +- [reviewed] 1. Guard mechanism {variation} + - [reviewed] 1.1. Example handler + files: + - src/Handler.java L1-10 (+10/-0) + - [reviewed] 1.2. Another handler + files: + - src/Other.java L1-10 (+10/-0) + +## Description Verification + +| # | Claim | Status | Evidence | + +## Coverage + +Total files in diff: 2 +Files mapped to tree: 2 +Unmapped files: none +EOF + cat > "$BATS_TEST_TMPDIR/var-files.txt" << 'FILELIST' +src/Handler.java +src/Other.java +FILELIST + run "$SCRIPT" "$tmpfile" "$BATS_TEST_TMPDIR/var-files.txt" + [ "$status" -eq 0 ] + [[ "$output" == *"WARN"* ]] +} + +# --- Revision field --- + +@test "fails when Revision field is missing" { + local tmpfile="$BATS_TEST_TMPDIR/tree.md" + sed -E '/^\| Revision/d' "$SAMPLE" > "$tmpfile" + run "$SCRIPT" "$tmpfile" "$BATS_TEST_TMPDIR/files.txt" + [ "$status" -ne 0 ] + [[ "$output" == *"Revision"* ]] +} + +# --- Input validation --- + +@test "rejects missing arguments" { + run "$SCRIPT" + [ "$status" -ne 0 ] +} + +@test "rejects non-existent tree file" { + run "$SCRIPT" "/tmp/nonexistent.md" "$BATS_TEST_TMPDIR/files.txt" + [ "$status" -ne 0 ] +} + +@test "rejects non-existent file list" { + run "$SCRIPT" "$BATS_TEST_TMPDIR/tree.md" "/tmp/nonexistent.txt" + [ "$status" -ne 0 ] +} + +# --- Output format --- + +@test "output lists all checks performed" { + run "$SCRIPT" "$BATS_TEST_TMPDIR/tree.md" "$BATS_TEST_TMPDIR/files.txt" + [ "$status" -eq 0 ] + [[ "$output" == *"HEAD SHA"* ]] + [[ "$output" == *"Revision"* ]] + [[ "$output" == *"top-level"* ]] + [[ "$output" == *"file coverage"* ]] + [[ "$output" == *"Description Verification"* ]] +} + +@test "no warnings on valid Hawksbury sample" { + run "$SCRIPT" "$BATS_TEST_TMPDIR/tree.md" "$BATS_TEST_TMPDIR/files.txt" + [ "$status" -eq 0 ] + [[ "$output" != *"WARN"* ]] +} + +# --- Regression tests --- + +@test "variation check catches {variation comment} nodes" { + local tmpfile="$BATS_TEST_TMPDIR/var-comment.md" + cat > "$tmpfile" << 'EOF' +# Review Tree: Test + +| Field | Value | +|-------------|-------| +| PR | test/test#1 | +| HEAD | abc123 | +| Revision | 1 | +| Tree Built | 2026-02-25T10:00:00Z | +| Updated | 2026-02-25T10:00:00Z | + +## Tree + +- [accepted] 1. Pattern group {variation comment} + - [accepted] 1.1. Example + files: + - src/A.java L1-5 (+5/-0) + - [accepted] 1.2. Not a repeat + files: + - src/B.java L1-5 (+5/-0) + +## Description Verification + +| # | Claim | Status | Evidence | + +## Coverage + +Total files in diff: 2 +Files mapped to tree: 2 +Unmapped files: none +EOF + cat > "$BATS_TEST_TMPDIR/vc-files.txt" << 'FILELIST' +src/A.java +src/B.java +FILELIST + run "$SCRIPT" "$tmpfile" "$BATS_TEST_TMPDIR/vc-files.txt" + [ "$status" -eq 0 ] + # Should WARN about node 1 having no {repeat} children + [[ "$output" == *"WARN"* ]] + [[ "$output" == *"variation node 1"* ]] +} + +@test "file coverage requires exact path match" { + local tmpfile="$BATS_TEST_TMPDIR/tree.md" + # File list has short name, tree has full path -- should fail + echo "Handler.java" > "$BATS_TEST_TMPDIR/short-files.txt" + run "$SCRIPT" "$tmpfile" "$BATS_TEST_TMPDIR/short-files.txt" + [ "$status" -ne 0 ] + [[ "$output" == *"Handler.java"* ]] + [[ "$output" == *"unmapped"* ]] +} + +@test "passes with exactly max-top-level nodes (boundary)" { + local tmpfile="$BATS_TEST_TMPDIR/seven.md" + cat > "$tmpfile" << 'EOF' +# Review Tree: Test + +| Field | Value | +|-------------|-------| +| PR | test/test#1 | +| HEAD | abc123 | +| Revision | 1 | +| Tree Built | 2026-02-25T10:00:00Z | +| Updated | 2026-02-25T10:00:00Z | + +## Tree + +- [pending] 1. One +- [pending] 2. Two +- [pending] 3. Three +- [pending] 4. Four +- [pending] 5. Five +- [pending] 6. Six +- [pending] 7. Seven + +## Description Verification + +| # | Claim | Status | Evidence | + +## Coverage + +Total files in diff: 0 +Files mapped to tree: 0 +Unmapped files: none +EOF + echo -n "" > "$BATS_TEST_TMPDIR/empty-files.txt" + run "$SCRIPT" "$tmpfile" "$BATS_TEST_TMPDIR/empty-files.txt" + [ "$status" -eq 0 ] + [[ "$output" == *"top-level concepts: 7"* ]] +} + +@test "passes with empty file list" { + echo -n "" > "$BATS_TEST_TMPDIR/empty-files.txt" + run "$SCRIPT" "$BATS_TEST_TMPDIR/tree.md" "$BATS_TEST_TMPDIR/empty-files.txt" + [ "$status" -eq 0 ] +} + +@test "reports all failures not just the first" { + local tmpfile="$BATS_TEST_TMPDIR/broken.md" + cat > "$tmpfile" << 'EOF' +# Review Tree: Test + +| Field | Value | +|-------------|-------| +| PR | test/test#1 | +| Tree Built | 2026-02-25T10:00:00Z | +| Updated | 2026-02-25T10:00:00Z | + +## Tree + +- [pending] 1. Only concept + +## Coverage + +Total files in diff: 0 +Files mapped to tree: 0 +Unmapped files: none +EOF + echo -n "" > "$BATS_TEST_TMPDIR/empty-files.txt" + run "$SCRIPT" "$tmpfile" "$BATS_TEST_TMPDIR/empty-files.txt" + [ "$status" -ne 0 ] + # Should report all three missing: HEAD, Revision, Description Verification + [[ "$output" == *"HEAD SHA"* ]] + [[ "$output" == *"Revision"* ]] + [[ "$output" == *"Description Verification"* ]] +} + +@test "variation warning does not cause exit failure" { + local tmpfile="$BATS_TEST_TMPDIR/var-warn.md" + cat > "$tmpfile" << 'EOF' +# Review Tree: Test + +| Field | Value | +|-------------|-------| +| PR | test/test#1 | +| HEAD | abc123 | +| Revision | 1 | +| Tree Built | 2026-02-25T10:00:00Z | +| Updated | 2026-02-25T10:00:00Z | + +## Tree + +- [reviewed] 1. Guard mechanism {variation} + - [reviewed] 1.1. Example handler + files: + - src/Handler.java L1-10 (+10/-0) + +## Description Verification + +| # | Claim | Status | Evidence | + +## Coverage + +Total files in diff: 1 +Files mapped to tree: 1 +Unmapped files: none +EOF + echo "src/Handler.java" > "$BATS_TEST_TMPDIR/vw-files.txt" + run "$SCRIPT" "$tmpfile" "$BATS_TEST_TMPDIR/vw-files.txt" + [ "$status" -eq 0 ] + [[ "$output" == *"WARN"* ]] +}