11#! /usr/bin/env bash
22# Script to validate OCaml reference comments in Rust code
33# Usage: ./.github/scripts/check-ocaml-refs.sh [--repo REPO_URL] [--branch BRANCH] [--update]
4+ #
5+ # Supports two formats:
6+ # 1. New hyperlink format: /// OCaml: <https://github.com/MinaProtocol/mina/blob/COMMIT/path#L1-L10>
7+ # 2. Legacy multi-line format (deprecated): /// OCaml reference: path L:1-10
48
59set -euo pipefail
610
6064
6165echo " Current OCaml commit: ${CURRENT_COMMIT} "
6266
63- # Find all Rust files with OCaml references
67+ # Find all Rust files with OCaml references (both formats)
6468cd " ${RUST_ROOT} "
65- RUST_FILES=$( git grep -l -E " ^/// OCaml reference:" " *.rs" " **/*.rs" || true)
69+ RUST_FILES_OLD=$( git grep -l -E " ^/// OCaml reference:" " *.rs" " **/*.rs" 2> /dev/null || true)
70+ RUST_FILES_NEW=$( git grep -l -E " ^/// OCaml: <https://github.com/MinaProtocol/mina/blob/" " *.rs" " **/*.rs" 2> /dev/null || true)
71+ RUST_FILES=$( echo -e " ${RUST_FILES_OLD} \n${RUST_FILES_NEW} " | sort -u | grep -v ' ^$' || true)
6672
6773if [ -z " $RUST_FILES " ]; then
6874 echo " No OCaml references found in Rust code"
@@ -79,28 +85,131 @@ echo "========================"
7985
8086# Process each file
8187echo " $RUST_FILES " | while IFS= read -r rust_file; do
82- # Extract OCaml reference comments from the file
88+ # Process new hyperlink format: /// OCaml: <URL>
89+ grep -n " ^/// OCaml: <https://github.com/MinaProtocol/mina/blob/" " $rust_file " 2> /dev/null | while IFS=: read -r line_num line_content; do
90+ # Extract URL from angle brackets
91+ URL=$( echo " $line_content " | sed -n ' s/.*<\(https:\/\/github\.com\/MinaProtocol\/mina\/blob\/[^>]*\)>.*/\1/p' )
92+
93+ if [ -z " $URL " ]; then
94+ echo " INVALID|${rust_file} |LINE:${line_num} |MALFORMED_URL" >> " $RESULTS_FILE "
95+ echo " ❌ INVALID: ${rust_file} :${line_num} "
96+ echo " Malformed OCaml reference URL"
97+ continue
98+ fi
99+
100+ # Parse URL: https://github.com/MinaProtocol/mina/blob/COMMIT/path#L1-L10
101+ # Pattern: blob/COMMIT/PATH with optional #L1-L10
102+ if [[ " $URL " =~ blob/([a-f0-9]+)/([^# ]+)(#L([0-9]+)(-L([0-9]+))?)? ]]; then
103+ COMMIT= " ${BASH_REMATCH[1]} "
104+ OCAML_PATH= " ${BASH_REMATCH[2]} "
105+ START_LINE= " ${BASH_REMATCH[4]} "
106+ END_LINE= " ${BASH_REMATCH[6]} "
107+
108+ # If only start line is specified, set end line to same
109+ if [ -n " $START_LINE " ] && [ -z " $END_LINE " ]; then
110+ END_LINE=" $START_LINE "
111+ fi
112+
113+ LINE_RANGE= " "
114+ if [ -n " $START_LINE " ]; then
115+ LINE_RANGE=" ${START_LINE} -${END_LINE} "
116+ fi
117+ else
118+ echo " INVALID|${rust_file} |LINE:${line_num} |INVALID_URL_FORMAT" >> " $RESULTS_FILE "
119+ echo " ❌ INVALID: ${rust_file} :${line_num} "
120+ echo " URL does not match expected format: $URL "
121+ continue
122+ fi
123+
124+ # Fetch the OCaml file from the current branch
125+ CURRENT_FILE= " ${TEMP_DIR} /current_${rust_file// \/ / _} _${OCAML_PATH// \/ / _} "
126+ CURRENT_URL= " https://raw.githubusercontent.com/${GITHUB_OWNER} /${GITHUB_REPO} /${OCAML_BRANCH} /${OCAML_PATH} "
127+
128+ if ! curl -sf " $CURRENT_URL " -o " $CURRENT_FILE " ; then
129+ echo " INVALID|${rust_file} |${OCAML_PATH} |FILE_NOT_FOUND" >> " $RESULTS_FILE "
130+ echo " ❌ INVALID: ${rust_file} :${line_num} "
131+ echo " OCaml file not found: ${OCAML_PATH} "
132+ else
133+ # Validate line range if specified
134+ RANGE_VALID=true
135+ if [ -n " $LINE_RANGE " ]; then
136+ FILE_LINES=$( wc -l < " $CURRENT_FILE " )
137+
138+ if [ " $END_LINE " -gt " $FILE_LINES " ]; then
139+ echo " INVALID|${rust_file} |${OCAML_PATH} |LINE_RANGE_EXCEEDED|L:${LINE_RANGE} |${FILE_LINES} " >> " $RESULTS_FILE "
140+ echo " ❌ INVALID: ${rust_file} :${line_num} "
141+ echo " Line range L:${LINE_RANGE} exceeds file length (${FILE_LINES} lines): ${OCAML_PATH} "
142+ RANGE_VALID=false
143+ fi
144+ fi
145+
146+ if [ " $RANGE_VALID " = " true" ]; then
147+ # Verify that the code at the referenced commit matches the current branch
148+ CODE_MATCHES=true
149+ if [ -n " $LINE_RANGE " ]; then
150+ # Fetch the file from the referenced commit
151+ COMMIT_FILE=" ${TEMP_DIR} /commit_${rust_file// \/ / _} _${OCAML_PATH// \/ / _} "
152+ COMMIT_URL=" https://raw.githubusercontent.com/${GITHUB_OWNER} /${GITHUB_REPO} /${COMMIT} /${OCAML_PATH} "
153+
154+ if ! curl -sf " $COMMIT_URL " -o " $COMMIT_FILE " ; then
155+ echo " INVALID|${rust_file} |${OCAML_PATH} |COMMIT_NOT_FOUND|${COMMIT} " >> " $RESULTS_FILE "
156+ echo " ❌ INVALID: ${rust_file} :${line_num} "
157+ echo " Referenced commit does not exist: ${COMMIT} "
158+ CODE_MATCHES=false
159+ else
160+ # Extract the specific line ranges from both files and compare
161+ CURRENT_LINES=$( sed -n " ${START_LINE} ,${END_LINE} p" " $CURRENT_FILE " )
162+ COMMIT_LINES=$( sed -n " ${START_LINE} ,${END_LINE} p" " $COMMIT_FILE " )
163+
164+ if [ " $CURRENT_LINES " != " $COMMIT_LINES " ]; then
165+ echo " INVALID|${rust_file} |${OCAML_PATH} |CODE_MISMATCH|${COMMIT} " >> " $RESULTS_FILE "
166+ echo " ❌ INVALID: ${rust_file} :${line_num} "
167+ echo " Code at L:${LINE_RANGE} differs between commit ${COMMIT} and current branch"
168+ echo " Referenced: https://github.com/${GITHUB_OWNER} /${GITHUB_REPO} /blob/${COMMIT} /${OCAML_PATH} #L${START_LINE} -L${END_LINE} "
169+ echo " Current: https://github.com/${GITHUB_OWNER} /${GITHUB_REPO} /blob/${OCAML_BRANCH} /${OCAML_PATH} #L${START_LINE} -L${END_LINE} "
170+ CODE_MATCHES=false
171+ fi
172+ fi
173+ fi
174+
175+ if [ " $CODE_MATCHES " = " true" ]; then
176+ # Check if commit is stale
177+ if [ " $COMMIT " != " $CURRENT_COMMIT " ]; then
178+ echo " STALE|${rust_file} |${line_num} |${OCAML_PATH} |${COMMIT} |${LINE_RANGE} " >> " $RESULTS_FILE "
179+ echo " ✓ VALID: ${rust_file} :${line_num} -> ${OCAML_PATH} L:${LINE_RANGE} "
180+ echo " ⚠ STALE COMMIT: ${COMMIT} (current: ${CURRENT_COMMIT} )"
181+ else
182+ echo " VALID|${rust_file} |${line_num} |${OCAML_PATH} |${LINE_RANGE} " >> " $RESULTS_FILE "
183+ echo " ✓ VALID: ${rust_file} :${line_num} -> ${OCAML_PATH} L:${LINE_RANGE} "
184+ fi
185+ fi
186+ fi
187+ fi
188+ done
189+
190+ # Process legacy multi-line format (for backward compatibility)
83191 awk '
84192 /^\/\/\/ OCaml reference:/ {
193+ line_num = NR
85194 ref = $0
86195 getline
87196 if ($0 ~ /^\/\/\/ Commit:/) {
88197 commit = $0
89198 getline
90199 if ($0 ~ /^\/\/\/ Last verified:/) {
91200 verified = $0
92- print ref
201+ print line_num "|" ref
93202 print commit
94203 print verified
95204 print "---"
96205 }
97206 }
98207 }
99208 ' " $rust_file " | while IFS= read -r line; do
100- if [[ " $line " == " /// OCaml reference:" * ]]; then
209+ if [[ " $line " == * " | /// OCaml reference:" * ]]; then
101210 # Extract file path and line range
102- # Format: src/lib/mina_base/transaction_status.ml L:9-113
103- FULL_REF=" ${line#/// OCaml reference: } "
211+ LINE_NUM= $( echo " $line " | cut -d ' | ' -f1 )
212+ FULL_REF=" ${line#* | /// OCaml reference: } "
104213 OCAML_PATH=" ${FULL_REF%% L:* } "
105214 LINE_RANGE=$( echo " $FULL_REF " | grep -o ' L:[0-9-]*' | sed ' s/L://' || echo " " )
106215
@@ -110,28 +219,25 @@ echo "$RUST_FILES" | while IFS= read -r rust_file; do
110219 read -r _separator
111220
112221 COMMIT=" ${commit_line#/// Commit: } "
113- # LAST_VERIFIED could be extracted from _verified_line if needed for future validation
114222
115223 # Fetch the OCaml file from the current branch
116- CURRENT_FILE=" ${TEMP_DIR} /current_ ${rust_file// \/ / _} _${OCAML_PATH// \/ / _} "
224+ CURRENT_FILE=" ${TEMP_DIR} /current_legacy_ ${rust_file// \/ / _} _${OCAML_PATH// \/ / _} "
117225 CURRENT_URL=" https://raw.githubusercontent.com/${GITHUB_OWNER} /${GITHUB_REPO} /${OCAML_BRANCH} /${OCAML_PATH} "
118226
119227 if ! curl -sf " $CURRENT_URL " -o " $CURRENT_FILE " ; then
120- echo " INVALID|${rust_file} |${OCAML_PATH} |FILE_NOT_FOUND" >> " $RESULTS_FILE "
121- echo " ❌ INVALID: ${rust_file} "
228+ echo " INVALID|${rust_file} |${OCAML_PATH} |FILE_NOT_FOUND|LEGACY_FORMAT " >> " $RESULTS_FILE "
229+ echo " ❌ INVALID: ${rust_file} : ${LINE_NUM} (LEGACY FORMAT) "
122230 echo " OCaml file not found: ${OCAML_PATH} "
123231 else
124232 # Validate line range if specified
125233 RANGE_VALID=true
126234 if [ -n " $LINE_RANGE " ]; then
127235 FILE_LINES=$( wc -l < " $CURRENT_FILE " )
128- # START_LINE is not currently used but could be useful for validation
129- # START_LINE=$(echo "$LINE_RANGE" | cut -d'-' -f1)
130236 END_LINE=$( echo " $LINE_RANGE " | cut -d' -' -f2)
131237
132238 if [ " $END_LINE " -gt " $FILE_LINES " ]; then
133- echo " INVALID|${rust_file} |${OCAML_PATH} |LINE_RANGE_EXCEEDED|L:${LINE_RANGE} |${FILE_LINES} " >> " $RESULTS_FILE "
134- echo " ❌ INVALID: ${rust_file} "
239+ echo " INVALID|${rust_file} |${OCAML_PATH} |LINE_RANGE_EXCEEDED|L:${LINE_RANGE} |${FILE_LINES} |LEGACY_FORMAT " >> " $RESULTS_FILE "
240+ echo " ❌ INVALID: ${rust_file} : ${LINE_NUM} (LEGACY FORMAT) "
135241 echo " Line range L:${LINE_RANGE} exceeds file length (${FILE_LINES} lines): ${OCAML_PATH} "
136242 RANGE_VALID=false
137243 fi
@@ -145,12 +251,12 @@ echo "$RUST_FILES" | while IFS= read -r rust_file; do
145251 END_LINE=$( echo " $LINE_RANGE " | cut -d' -' -f2)
146252
147253 # Fetch the file from the referenced commit
148- COMMIT_FILE=" ${TEMP_DIR} /commit_ ${rust_file// \/ / _} _${OCAML_PATH// \/ / _} "
254+ COMMIT_FILE=" ${TEMP_DIR} /commit_legacy_ ${rust_file// \/ / _} _${OCAML_PATH// \/ / _} "
149255 COMMIT_URL=" https://raw.githubusercontent.com/${GITHUB_OWNER} /${GITHUB_REPO} /${COMMIT} /${OCAML_PATH} "
150256
151257 if ! curl -sf " $COMMIT_URL " -o " $COMMIT_FILE " ; then
152- echo " INVALID|${rust_file} |${OCAML_PATH} |COMMIT_NOT_FOUND|${COMMIT} " >> " $RESULTS_FILE "
153- echo " ❌ INVALID: ${rust_file} "
258+ echo " INVALID|${rust_file} |${OCAML_PATH} |COMMIT_NOT_FOUND|${COMMIT} |LEGACY_FORMAT " >> " $RESULTS_FILE "
259+ echo " ❌ INVALID: ${rust_file} : ${LINE_NUM} (LEGACY FORMAT) "
154260 echo " Referenced commit does not exist: ${COMMIT} "
155261 CODE_MATCHES=false
156262 else
@@ -159,8 +265,8 @@ echo "$RUST_FILES" | while IFS= read -r rust_file; do
159265 COMMIT_LINES=$( sed -n " ${START_LINE} ,${END_LINE} p" " $COMMIT_FILE " )
160266
161267 if [ " $CURRENT_LINES " != " $COMMIT_LINES " ]; then
162- echo " INVALID|${rust_file} |${OCAML_PATH} |CODE_MISMATCH|${COMMIT} " >> " $RESULTS_FILE "
163- echo " ❌ INVALID: ${rust_file} "
268+ echo " INVALID|${rust_file} |${OCAML_PATH} |CODE_MISMATCH|${COMMIT} |LEGACY_FORMAT " >> " $RESULTS_FILE "
269+ echo " ❌ INVALID: ${rust_file} : ${LINE_NUM} (LEGACY FORMAT) "
164270 echo " Code at L:${LINE_RANGE} differs between commit ${COMMIT} and current branch"
165271 echo " Referenced: https://github.com/${GITHUB_OWNER} /${GITHUB_REPO} /blob/${COMMIT} /${OCAML_PATH} #L${START_LINE} -L${END_LINE} "
166272 echo " Current: https://github.com/${GITHUB_OWNER} /${GITHUB_REPO} /blob/${OCAML_BRANCH} /${OCAML_PATH} #L${START_LINE} -L${END_LINE} "
@@ -172,12 +278,12 @@ echo "$RUST_FILES" | while IFS= read -r rust_file; do
172278 if [ " $CODE_MATCHES " = " true" ]; then
173279 # Check if commit is stale
174280 if [ " $COMMIT " != " $CURRENT_COMMIT " ]; then
175- echo " STALE|${rust_file} |${OCAML_PATH} |${COMMIT} |${LINE_RANGE} " >> " $RESULTS_FILE "
176- echo " ✓ VALID: ${rust_file} -> ${OCAML_PATH} L:${LINE_RANGE} "
281+ echo " STALE|${rust_file} |${LINE_NUM} | ${ OCAML_PATH} |${COMMIT} |${LINE_RANGE} |LEGACY_FORMAT " >> " $RESULTS_FILE "
282+ echo " ✓ VALID: ${rust_file} : ${LINE_NUM} (LEGACY FORMAT) -> ${OCAML_PATH} L:${LINE_RANGE} "
177283 echo " ⚠ STALE COMMIT: ${COMMIT} (current: ${CURRENT_COMMIT} )"
178284 else
179- echo " VALID|${rust_file} |${OCAML_PATH} |${LINE_RANGE} " >> " $RESULTS_FILE "
180- echo " ✓ VALID: ${rust_file} -> ${OCAML_PATH} L:${LINE_RANGE} "
285+ echo " VALID|${rust_file} |${LINE_NUM} | ${ OCAML_PATH} |${LINE_RANGE} |LEGACY_FORMAT " >> " $RESULTS_FILE "
286+ echo " ✓ VALID: ${rust_file} : ${LINE_NUM} (LEGACY FORMAT) -> ${OCAML_PATH} L:${LINE_RANGE} "
181287 fi
182288 fi
183289 fi
@@ -202,13 +308,33 @@ echo "Stale commits: ${STALE_COMMITS}"
202308
203309if [ " $UPDATE_MODE " = " true" ] && [ " ${STALE_COMMITS} " -gt 0 ]; then
204310 echo " "
205- echo " Updating stale commit hashes and verification dates..."
311+ echo " Updating stale commit hashes..."
312+
313+ # Update new hyperlink format
314+ grep " ^STALE|" " $RESULTS_FILE " | grep -v " LEGACY_FORMAT" | while IFS=' |' read -r _status rust_file line_num ocaml_path old_commit line_range _rest; do
315+ echo " Updating ${rust_file} :${line_num} ..."
316+
317+ # Build new URL
318+ NEW_URL=" https://github.com/${GITHUB_OWNER} /${GITHUB_REPO} /blob/${CURRENT_COMMIT} /${ocaml_path} "
319+ if [ -n " $line_range " ] && [ " $line_range " != " " ]; then
320+ START_LINE=$( echo " $line_range " | cut -d' -' -f1)
321+ END_LINE=$( echo " $line_range " | cut -d' -' -f2)
322+ NEW_URL=" ${NEW_URL} #L${START_LINE} -L${END_LINE} "
323+ fi
324+
325+ # Use sed to replace the URL at the specific line
326+ # We need to escape special characters in the URL for sed
327+ OLD_COMMIT_ESCAPED=$( echo " $old_commit " | sed ' s/[\/&]/\\&/g' )
328+ CURRENT_COMMIT_ESCAPED=$( echo " $CURRENT_COMMIT " | sed ' s/[\/&]/\\&/g' )
329+
330+ sed -i " ${line_num} s/blob\/${OLD_COMMIT_ESCAPED} \//blob\/${CURRENT_COMMIT_ESCAPED} \//" " ${RUST_ROOT} /${rust_file} "
331+ done
206332
207- CURRENT_DATE=$( date +%Y-%m-%d)
333+ # Update legacy multi-line format (for backward compatibility during transition)
334+ grep " ^STALE|" " $RESULTS_FILE " | grep " LEGACY_FORMAT" | while IFS=' |' read -r _status rust_file line_num ocaml_path old_commit line_range _legacy; do
335+ echo " Updating legacy format in ${rust_file} :${line_num} ..."
208336
209- # Update each file with stale commits
210- grep " ^STALE|" " $RESULTS_FILE " | while IFS=' |' read -r _status rust_file ocaml_path _old_commit _line_range; do
211- echo " Updating ${rust_file} ..."
337+ CURRENT_DATE=$( date +%Y-%m-%d)
212338
213339 # Find and replace the old commit with the new one
214340 sed -i.bak \
0 commit comments