diff --git a/.github/workflows/vale-autofix.yml b/.github/workflows/vale-autofix.yml index 80e5f8fcea..032be3061a 100644 --- a/.github/workflows/vale-autofix.yml +++ b/.github/workflows/vale-autofix.yml @@ -247,7 +247,7 @@ jobs: ./scripts/check-anchors.sh "${FILES[@]}" > /tmp/anchor-check-output.txt 2>&1 || BROKEN=1 if [ "$BROKEN" -eq 1 ]; then grep '^\s' /tmp/anchor-check-output.txt > /tmp/anchor-errors.txt || true - BROKEN=$(wc -l < /tmp/anchor-errors.txt | tr -d ' ') + BROKEN=$(grep -oE 'Found [0-9]+' /tmp/anchor-check-output.txt | grep -oE '[0-9]+' | tail -1 || echo "0") echo "Broken anchors found:" cat /tmp/anchor-check-output.txt fi @@ -355,9 +355,9 @@ jobs: # Collect anchor check results BROKEN_ANCHORS=0 ANCHOR_BODY="" - if [ -f /tmp/anchor-errors.txt ] && [ -s /tmp/anchor-errors.txt ]; then - BROKEN_ANCHORS=$(wc -l < /tmp/anchor-errors.txt | tr -d ' ') - ANCHOR_BODY=$(cat /tmp/anchor-errors.txt) + if [ -f /tmp/anchor-check-output.txt ] && [ -s /tmp/anchor-check-output.txt ]; then + BROKEN_ANCHORS=$(grep -oE 'Found [0-9]+' /tmp/anchor-check-output.txt | grep -oE '[0-9]+' | tail -1 || echo "0") + ANCHOR_BODY=$(cat /tmp/anchor-errors.txt 2>/dev/null || true) fi # Build the summary comment diff --git a/scripts/check-anchors.sh b/scripts/check-anchors.sh index 2f1467bc3e..ee5b842662 100755 --- a/scripts/check-anchors.sh +++ b/scripts/check-anchors.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash # check-anchors.sh โ€” Validate that #anchor hrefs in markdown files resolve to real headings # Usage: -# check-anchors.sh --staged check staged .md docs/ files (pre-commit mode) +# check-anchors.sh --staged check staged .md/.mdx docs/ files (pre-commit mode) # check-anchors.sh [file...] check specific files -# Exits 1 if any broken anchors are found +# Exits 1 if any broken links are found set -euo pipefail @@ -51,6 +51,16 @@ anchor_exists() { [[ "${HEADING_CACHE[$file]}" == *" $anchor "* ]] } +list_anchors() { + local file="$1" + load_headings "$file" + local slugs="${HEADING_CACHE[$file]:-}" result="" + for slug in $slugs; do + result="${result:+$result ยท }#$slug" + done + printf '%s' "$result" +} + check_file() { local source_file="$1" local abs_source source_dir @@ -70,20 +80,25 @@ check_file() { continue fi $in_fence && continue - [[ "$line" == *"#"* ]] || continue + [[ "$line" == *"]("* ]] || continue local rest="$line" while [[ "$rest" =~ $link_re ]]; do local href="${BASH_REMATCH[1]}" rest="${rest#*]($href)}" - [[ "$href" == *"#"* ]] || continue # Strip optional link title: path#anchor "title" -> path#anchor href="$(sed -E "s/[[:space:]]+[\"'][^\"']*[\"']$//" <<< "$href")" + # Skip external URLs and special schemes (http://, mailto:, etc.) + [[ "$href" =~ ^[a-zA-Z][a-zA-Z0-9+.-]*: ]] && continue + local path="${href%%#*}" - local anchor="${href#*#}" - [[ -n "$anchor" ]] || continue + local anchor="" + [[ "$href" == *"#"* ]] && anchor="${href#*#}" + + # Nothing to check โ€” empty href + [[ -z "$path" && -z "$anchor" ]] && continue local target_file if [[ -z "$path" ]]; then @@ -95,12 +110,29 @@ check_file() { fi target_file="$(realpath -m "$target_file" 2>/dev/null || printf '%s' "$target_file")" - # Skip if target doesn't exist โ€” broken links are Docusaurus's job - [[ -f "$target_file" ]] || continue + local trimmed_line="${line#"${line%%[![:space:]]*}"}" + + # Check if target file exists; report broken links to markdown files + if [[ -n "$path" ]] && ! [[ -f "$target_file" ]]; then + if [[ "$target_file" =~ \.(md|mdx)$ ]]; then + printf ' %s:%d\n' "${source_file#$REPO_ROOT/}" "$line_num" + printf ' %s\n' "$trimmed_line" + printf ' %s not found\n' "${target_file#$REPO_ROOT/}" + printf '\n' + (( ERRORS++ )) || true + fi + continue + fi - if ! anchor_exists "$target_file" "$anchor"; then - printf ' %s:%d -> #%s not found in %s\n' \ - "${source_file#$REPO_ROOT/}" "$line_num" "$anchor" "${target_file#$REPO_ROOT/}" + # Check anchor if present + if [[ -n "$anchor" ]] && ! anchor_exists "$target_file" "$anchor"; then + local available + available="$(list_anchors "$target_file")" + printf ' %s:%d\n' "${source_file#$REPO_ROOT/}" "$line_num" + printf ' %s\n' "$trimmed_line" + printf ' #%s not found in %s\n' "$anchor" "${target_file#$REPO_ROOT/}" + [[ -n "$available" ]] && printf ' Available: %s\n' "$available" + printf '\n' (( ERRORS++ )) || true fi done @@ -113,7 +145,7 @@ if [[ "${1:-}" == "--staged" ]]; then while IFS= read -r f; do [[ -f "$REPO_ROOT/$f" ]] && FILES+=("$REPO_ROOT/$f") done < <(git -C "$REPO_ROOT" diff --cached --name-only --diff-filter=ACM \ - | grep -E '\.md$' | grep '^docs/' || true) + | grep -E '\.mdx?$' | grep '^docs/' || true) else for f in "$@"; do [[ -f "$f" ]] && FILES+=("$f") @@ -124,13 +156,13 @@ if [[ ${#FILES[@]} -eq 0 ]]; then exit 0 fi -printf 'Checking anchor links in %d file(s)...\n' "${#FILES[@]}" +printf 'Checking links in %d file(s)...\n' "${#FILES[@]}" for file in "${FILES[@]}"; do check_file "$file" done if [[ "$ERRORS" -gt 0 ]]; then - printf '\nFound %d broken anchor link(s).\n' "$ERRORS" + printf '\nFound %d broken link(s).\n' "$ERRORS" exit 1 fi