Skip to content

Commit 832d935

Browse files
Copilotdannywillems
andcommitted
Phase 4: Update CI script to support hyperlink format
Updated .github/scripts/check-ocaml-refs.sh to parse and validate both: - New hyperlink format: /// OCaml: <https://github.com/MinaProtocol/mina/blob/COMMIT/path#L1-L10> - Legacy multi-line format (for backward compatibility) The script now: - Extracts commit hash, file path, and line ranges from URL fragments - Validates file existence and line ranges for both formats - Supports auto-update mode to refresh commit hashes in URLs - Maintains backward compatibility during the transition period Updated documentation to reflect that the script now fully supports the new format. Co-authored-by: dannywillems <6018454+dannywillems@users.noreply.github.com>
1 parent 7a552c0 commit 832d935

File tree

2 files changed

+157
-37
lines changed

2 files changed

+157
-37
lines changed

.github/scripts/check-ocaml-refs.sh

Lines changed: 155 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
#!/usr/bin/env bash
22
# Script to validate OCaml reference comments in Rust code
33
# Usage: ./.github/scripts/check-ocaml-refs.sh [--repo REPO_URL] [--branch BRANCH] [--update]
4+
#
5+
# Supports two formats:
6+
# 1. New hyperlink format: /// OCaml: <https://github.com/MinaProtocol/mina/blob/COMMIT/path#L1-L10>
7+
# 2. Legacy multi-line format (deprecated): /// OCaml reference: path L:1-10
48

59
set -euo pipefail
610

@@ -60,9 +64,11 @@ fi
6064

6165
echo "Current OCaml commit: ${CURRENT_COMMIT}"
6266

63-
# Find all Rust files with OCaml references
67+
# Find all Rust files with OCaml references (both formats)
6468
cd "${RUST_ROOT}"
65-
RUST_FILES=$(git grep -l -E "^/// OCaml reference:" "*.rs" "**/*.rs" || true)
69+
RUST_FILES_OLD=$(git grep -l -E "^/// OCaml reference:" "*.rs" "**/*.rs" 2>/dev/null || true)
70+
RUST_FILES_NEW=$(git grep -l -E "^/// OCaml: <https://github.com/MinaProtocol/mina/blob/" "*.rs" "**/*.rs" 2>/dev/null || true)
71+
RUST_FILES=$(echo -e "${RUST_FILES_OLD}\n${RUST_FILES_NEW}" | sort -u | grep -v '^$' || true)
6672

6773
if [ -z "$RUST_FILES" ]; then
6874
echo "No OCaml references found in Rust code"
@@ -79,28 +85,131 @@ echo "========================"
7985

8086
# Process each file
8187
echo "$RUST_FILES" | while IFS= read -r rust_file; do
82-
# Extract OCaml reference comments from the file
88+
# Process new hyperlink format: /// OCaml: <URL>
89+
grep -n "^/// OCaml: <https://github.com/MinaProtocol/mina/blob/" "$rust_file" 2>/dev/null | while IFS=: read -r line_num line_content; do
90+
# Extract URL from angle brackets
91+
URL=$(echo "$line_content" | sed -n 's/.*<\(https:\/\/github\.com\/MinaProtocol\/mina\/blob\/[^>]*\)>.*/\1/p')
92+
93+
if [ -z "$URL" ]; then
94+
echo "INVALID|${rust_file}|LINE:${line_num}|MALFORMED_URL" >> "$RESULTS_FILE"
95+
echo "❌ INVALID: ${rust_file}:${line_num}"
96+
echo " Malformed OCaml reference URL"
97+
continue
98+
fi
99+
100+
# Parse URL: https://github.com/MinaProtocol/mina/blob/COMMIT/path#L1-L10
101+
# Pattern: blob/COMMIT/PATH with optional #L1-L10
102+
if [[ "$URL" =~ blob/([a-f0-9]+)/([^#]+)(#L([0-9]+)(-L([0-9]+))?)? ]]; then
103+
COMMIT="${BASH_REMATCH[1]}"
104+
OCAML_PATH="${BASH_REMATCH[2]}"
105+
START_LINE="${BASH_REMATCH[4]}"
106+
END_LINE="${BASH_REMATCH[6]}"
107+
108+
# If only start line is specified, set end line to same
109+
if [ -n "$START_LINE" ] && [ -z "$END_LINE" ]; then
110+
END_LINE="$START_LINE"
111+
fi
112+
113+
LINE_RANGE=""
114+
if [ -n "$START_LINE" ]; then
115+
LINE_RANGE="${START_LINE}-${END_LINE}"
116+
fi
117+
else
118+
echo "INVALID|${rust_file}|LINE:${line_num}|INVALID_URL_FORMAT" >> "$RESULTS_FILE"
119+
echo "❌ INVALID: ${rust_file}:${line_num}"
120+
echo " URL does not match expected format: $URL"
121+
continue
122+
fi
123+
124+
# Fetch the OCaml file from the current branch
125+
CURRENT_FILE="${TEMP_DIR}/current_${rust_file//\//_}_${OCAML_PATH//\//_}"
126+
CURRENT_URL="https://raw.githubusercontent.com/${GITHUB_OWNER}/${GITHUB_REPO}/${OCAML_BRANCH}/${OCAML_PATH}"
127+
128+
if ! curl -sf "$CURRENT_URL" -o "$CURRENT_FILE"; then
129+
echo "INVALID|${rust_file}|${OCAML_PATH}|FILE_NOT_FOUND" >> "$RESULTS_FILE"
130+
echo "❌ INVALID: ${rust_file}:${line_num}"
131+
echo " OCaml file not found: ${OCAML_PATH}"
132+
else
133+
# Validate line range if specified
134+
RANGE_VALID=true
135+
if [ -n "$LINE_RANGE" ]; then
136+
FILE_LINES=$(wc -l < "$CURRENT_FILE")
137+
138+
if [ "$END_LINE" -gt "$FILE_LINES" ]; then
139+
echo "INVALID|${rust_file}|${OCAML_PATH}|LINE_RANGE_EXCEEDED|L:${LINE_RANGE}|${FILE_LINES}" >> "$RESULTS_FILE"
140+
echo "❌ INVALID: ${rust_file}:${line_num}"
141+
echo " Line range L:${LINE_RANGE} exceeds file length (${FILE_LINES} lines): ${OCAML_PATH}"
142+
RANGE_VALID=false
143+
fi
144+
fi
145+
146+
if [ "$RANGE_VALID" = "true" ]; then
147+
# Verify that the code at the referenced commit matches the current branch
148+
CODE_MATCHES=true
149+
if [ -n "$LINE_RANGE" ]; then
150+
# Fetch the file from the referenced commit
151+
COMMIT_FILE="${TEMP_DIR}/commit_${rust_file//\//_}_${OCAML_PATH//\//_}"
152+
COMMIT_URL="https://raw.githubusercontent.com/${GITHUB_OWNER}/${GITHUB_REPO}/${COMMIT}/${OCAML_PATH}"
153+
154+
if ! curl -sf "$COMMIT_URL" -o "$COMMIT_FILE"; then
155+
echo "INVALID|${rust_file}|${OCAML_PATH}|COMMIT_NOT_FOUND|${COMMIT}" >> "$RESULTS_FILE"
156+
echo "❌ INVALID: ${rust_file}:${line_num}"
157+
echo " Referenced commit does not exist: ${COMMIT}"
158+
CODE_MATCHES=false
159+
else
160+
# Extract the specific line ranges from both files and compare
161+
CURRENT_LINES=$(sed -n "${START_LINE},${END_LINE}p" "$CURRENT_FILE")
162+
COMMIT_LINES=$(sed -n "${START_LINE},${END_LINE}p" "$COMMIT_FILE")
163+
164+
if [ "$CURRENT_LINES" != "$COMMIT_LINES" ]; then
165+
echo "INVALID|${rust_file}|${OCAML_PATH}|CODE_MISMATCH|${COMMIT}" >> "$RESULTS_FILE"
166+
echo "❌ INVALID: ${rust_file}:${line_num}"
167+
echo " Code at L:${LINE_RANGE} differs between commit ${COMMIT} and current branch"
168+
echo " Referenced: https://github.com/${GITHUB_OWNER}/${GITHUB_REPO}/blob/${COMMIT}/${OCAML_PATH}#L${START_LINE}-L${END_LINE}"
169+
echo " Current: https://github.com/${GITHUB_OWNER}/${GITHUB_REPO}/blob/${OCAML_BRANCH}/${OCAML_PATH}#L${START_LINE}-L${END_LINE}"
170+
CODE_MATCHES=false
171+
fi
172+
fi
173+
fi
174+
175+
if [ "$CODE_MATCHES" = "true" ]; then
176+
# Check if commit is stale
177+
if [ "$COMMIT" != "$CURRENT_COMMIT" ]; then
178+
echo "STALE|${rust_file}|${line_num}|${OCAML_PATH}|${COMMIT}|${LINE_RANGE}" >> "$RESULTS_FILE"
179+
echo "✓ VALID: ${rust_file}:${line_num} -> ${OCAML_PATH} L:${LINE_RANGE}"
180+
echo " ⚠ STALE COMMIT: ${COMMIT} (current: ${CURRENT_COMMIT})"
181+
else
182+
echo "VALID|${rust_file}|${line_num}|${OCAML_PATH}|${LINE_RANGE}" >> "$RESULTS_FILE"
183+
echo "✓ VALID: ${rust_file}:${line_num} -> ${OCAML_PATH} L:${LINE_RANGE}"
184+
fi
185+
fi
186+
fi
187+
fi
188+
done
189+
190+
# Process legacy multi-line format (for backward compatibility)
83191
awk '
84192
/^\/\/\/ OCaml reference:/ {
193+
line_num = NR
85194
ref = $0
86195
getline
87196
if ($0 ~ /^\/\/\/ Commit:/) {
88197
commit = $0
89198
getline
90199
if ($0 ~ /^\/\/\/ Last verified:/) {
91200
verified = $0
92-
print ref
201+
print line_num "|" ref
93202
print commit
94203
print verified
95204
print "---"
96205
}
97206
}
98207
}
99208
' "$rust_file" | while IFS= read -r line; do
100-
if [[ "$line" == "/// OCaml reference:"* ]]; then
209+
if [[ "$line" == *"|/// OCaml reference:"* ]]; then
101210
# Extract file path and line range
102-
# Format: src/lib/mina_base/transaction_status.ml L:9-113
103-
FULL_REF="${line#/// OCaml reference: }"
211+
LINE_NUM=$(echo "$line" | cut -d'|' -f1)
212+
FULL_REF="${line#*|/// OCaml reference: }"
104213
OCAML_PATH="${FULL_REF%% L:*}"
105214
LINE_RANGE=$(echo "$FULL_REF" | grep -o 'L:[0-9-]*' | sed 's/L://' || echo "")
106215

@@ -110,28 +219,25 @@ echo "$RUST_FILES" | while IFS= read -r rust_file; do
110219
read -r _separator
111220

112221
COMMIT="${commit_line#/// Commit: }"
113-
# LAST_VERIFIED could be extracted from _verified_line if needed for future validation
114222

115223
# Fetch the OCaml file from the current branch
116-
CURRENT_FILE="${TEMP_DIR}/current_${rust_file//\//_}_${OCAML_PATH//\//_}"
224+
CURRENT_FILE="${TEMP_DIR}/current_legacy_${rust_file//\//_}_${OCAML_PATH//\//_}"
117225
CURRENT_URL="https://raw.githubusercontent.com/${GITHUB_OWNER}/${GITHUB_REPO}/${OCAML_BRANCH}/${OCAML_PATH}"
118226

119227
if ! curl -sf "$CURRENT_URL" -o "$CURRENT_FILE"; then
120-
echo "INVALID|${rust_file}|${OCAML_PATH}|FILE_NOT_FOUND" >> "$RESULTS_FILE"
121-
echo "❌ INVALID: ${rust_file}"
228+
echo "INVALID|${rust_file}|${OCAML_PATH}|FILE_NOT_FOUND|LEGACY_FORMAT" >> "$RESULTS_FILE"
229+
echo "❌ INVALID: ${rust_file}:${LINE_NUM} (LEGACY FORMAT)"
122230
echo " OCaml file not found: ${OCAML_PATH}"
123231
else
124232
# Validate line range if specified
125233
RANGE_VALID=true
126234
if [ -n "$LINE_RANGE" ]; then
127235
FILE_LINES=$(wc -l < "$CURRENT_FILE")
128-
# START_LINE is not currently used but could be useful for validation
129-
# START_LINE=$(echo "$LINE_RANGE" | cut -d'-' -f1)
130236
END_LINE=$(echo "$LINE_RANGE" | cut -d'-' -f2)
131237

132238
if [ "$END_LINE" -gt "$FILE_LINES" ]; then
133-
echo "INVALID|${rust_file}|${OCAML_PATH}|LINE_RANGE_EXCEEDED|L:${LINE_RANGE}|${FILE_LINES}" >> "$RESULTS_FILE"
134-
echo "❌ INVALID: ${rust_file}"
239+
echo "INVALID|${rust_file}|${OCAML_PATH}|LINE_RANGE_EXCEEDED|L:${LINE_RANGE}|${FILE_LINES}|LEGACY_FORMAT" >> "$RESULTS_FILE"
240+
echo "❌ INVALID: ${rust_file}:${LINE_NUM} (LEGACY FORMAT)"
135241
echo " Line range L:${LINE_RANGE} exceeds file length (${FILE_LINES} lines): ${OCAML_PATH}"
136242
RANGE_VALID=false
137243
fi
@@ -145,12 +251,12 @@ echo "$RUST_FILES" | while IFS= read -r rust_file; do
145251
END_LINE=$(echo "$LINE_RANGE" | cut -d'-' -f2)
146252

147253
# Fetch the file from the referenced commit
148-
COMMIT_FILE="${TEMP_DIR}/commit_${rust_file//\//_}_${OCAML_PATH//\//_}"
254+
COMMIT_FILE="${TEMP_DIR}/commit_legacy_${rust_file//\//_}_${OCAML_PATH//\//_}"
149255
COMMIT_URL="https://raw.githubusercontent.com/${GITHUB_OWNER}/${GITHUB_REPO}/${COMMIT}/${OCAML_PATH}"
150256

151257
if ! curl -sf "$COMMIT_URL" -o "$COMMIT_FILE"; then
152-
echo "INVALID|${rust_file}|${OCAML_PATH}|COMMIT_NOT_FOUND|${COMMIT}" >> "$RESULTS_FILE"
153-
echo "❌ INVALID: ${rust_file}"
258+
echo "INVALID|${rust_file}|${OCAML_PATH}|COMMIT_NOT_FOUND|${COMMIT}|LEGACY_FORMAT" >> "$RESULTS_FILE"
259+
echo "❌ INVALID: ${rust_file}:${LINE_NUM} (LEGACY FORMAT)"
154260
echo " Referenced commit does not exist: ${COMMIT}"
155261
CODE_MATCHES=false
156262
else
@@ -159,8 +265,8 @@ echo "$RUST_FILES" | while IFS= read -r rust_file; do
159265
COMMIT_LINES=$(sed -n "${START_LINE},${END_LINE}p" "$COMMIT_FILE")
160266

161267
if [ "$CURRENT_LINES" != "$COMMIT_LINES" ]; then
162-
echo "INVALID|${rust_file}|${OCAML_PATH}|CODE_MISMATCH|${COMMIT}" >> "$RESULTS_FILE"
163-
echo "❌ INVALID: ${rust_file}"
268+
echo "INVALID|${rust_file}|${OCAML_PATH}|CODE_MISMATCH|${COMMIT}|LEGACY_FORMAT" >> "$RESULTS_FILE"
269+
echo "❌ INVALID: ${rust_file}:${LINE_NUM} (LEGACY FORMAT)"
164270
echo " Code at L:${LINE_RANGE} differs between commit ${COMMIT} and current branch"
165271
echo " Referenced: https://github.com/${GITHUB_OWNER}/${GITHUB_REPO}/blob/${COMMIT}/${OCAML_PATH}#L${START_LINE}-L${END_LINE}"
166272
echo " Current: https://github.com/${GITHUB_OWNER}/${GITHUB_REPO}/blob/${OCAML_BRANCH}/${OCAML_PATH}#L${START_LINE}-L${END_LINE}"
@@ -172,12 +278,12 @@ echo "$RUST_FILES" | while IFS= read -r rust_file; do
172278
if [ "$CODE_MATCHES" = "true" ]; then
173279
# Check if commit is stale
174280
if [ "$COMMIT" != "$CURRENT_COMMIT" ]; then
175-
echo "STALE|${rust_file}|${OCAML_PATH}|${COMMIT}|${LINE_RANGE}" >> "$RESULTS_FILE"
176-
echo "✓ VALID: ${rust_file} -> ${OCAML_PATH} L:${LINE_RANGE}"
281+
echo "STALE|${rust_file}|${LINE_NUM}|${OCAML_PATH}|${COMMIT}|${LINE_RANGE}|LEGACY_FORMAT" >> "$RESULTS_FILE"
282+
echo "✓ VALID: ${rust_file}:${LINE_NUM} (LEGACY FORMAT) -> ${OCAML_PATH} L:${LINE_RANGE}"
177283
echo " ⚠ STALE COMMIT: ${COMMIT} (current: ${CURRENT_COMMIT})"
178284
else
179-
echo "VALID|${rust_file}|${OCAML_PATH}|${LINE_RANGE}" >> "$RESULTS_FILE"
180-
echo "✓ VALID: ${rust_file} -> ${OCAML_PATH} L:${LINE_RANGE}"
285+
echo "VALID|${rust_file}|${LINE_NUM}|${OCAML_PATH}|${LINE_RANGE}|LEGACY_FORMAT" >> "$RESULTS_FILE"
286+
echo "✓ VALID: ${rust_file}:${LINE_NUM} (LEGACY FORMAT) -> ${OCAML_PATH} L:${LINE_RANGE}"
181287
fi
182288
fi
183289
fi
@@ -202,13 +308,33 @@ echo "Stale commits: ${STALE_COMMITS}"
202308

203309
if [ "$UPDATE_MODE" = "true" ] && [ "${STALE_COMMITS}" -gt 0 ]; then
204310
echo ""
205-
echo "Updating stale commit hashes and verification dates..."
311+
echo "Updating stale commit hashes..."
312+
313+
# Update new hyperlink format
314+
grep "^STALE|" "$RESULTS_FILE" | grep -v "LEGACY_FORMAT" | while IFS='|' read -r _status rust_file line_num ocaml_path old_commit line_range _rest; do
315+
echo "Updating ${rust_file}:${line_num}..."
316+
317+
# Build new URL
318+
NEW_URL="https://github.com/${GITHUB_OWNER}/${GITHUB_REPO}/blob/${CURRENT_COMMIT}/${ocaml_path}"
319+
if [ -n "$line_range" ] && [ "$line_range" != "" ]; then
320+
START_LINE=$(echo "$line_range" | cut -d'-' -f1)
321+
END_LINE=$(echo "$line_range" | cut -d'-' -f2)
322+
NEW_URL="${NEW_URL}#L${START_LINE}-L${END_LINE}"
323+
fi
324+
325+
# Use sed to replace the URL at the specific line
326+
# We need to escape special characters in the URL for sed
327+
OLD_COMMIT_ESCAPED=$(echo "$old_commit" | sed 's/[\/&]/\\&/g')
328+
CURRENT_COMMIT_ESCAPED=$(echo "$CURRENT_COMMIT" | sed 's/[\/&]/\\&/g')
329+
330+
sed -i "${line_num}s/blob\/${OLD_COMMIT_ESCAPED}\//blob\/${CURRENT_COMMIT_ESCAPED}\//" "${RUST_ROOT}/${rust_file}"
331+
done
206332

207-
CURRENT_DATE=$(date +%Y-%m-%d)
333+
# Update legacy multi-line format (for backward compatibility during transition)
334+
grep "^STALE|" "$RESULTS_FILE" | grep "LEGACY_FORMAT" | while IFS='|' read -r _status rust_file line_num ocaml_path old_commit line_range _legacy; do
335+
echo "Updating legacy format in ${rust_file}:${line_num}..."
208336

209-
# Update each file with stale commits
210-
grep "^STALE|" "$RESULTS_FILE" | while IFS='|' read -r _status rust_file ocaml_path _old_commit _line_range; do
211-
echo "Updating ${rust_file}..."
337+
CURRENT_DATE=$(date +%Y-%m-%d)
212338

213339
# Find and replace the old commit with the new one
214340
sed -i.bak \

website/docs/developers/ocaml-reference-tracking.md

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,8 @@ https://github.com/MinaProtocol/mina/blob/<commit-hash>/<path>#L<start>-L<end>
5858
## Validation script
5959

6060
The `.github/scripts/check-ocaml-refs.sh` script validates all OCaml references.
61-
62-
:::note
63-
64-
The validation script is being updated to support the new hyperlink format. In
65-
the meantime, newly added references using the hyperlink format will not be
66-
automatically validated by CI.
67-
68-
:::
61+
It supports both the new hyperlink format and the legacy multi-line format
62+
(for backward compatibility during the transition period).
6963

7064
```bash
7165
# Validate against compatible branch (default)

0 commit comments

Comments
 (0)