Skip to content

Commit 56fe3ad

Browse files
hyperpolymathclaude
andcommitted
tooling(codemod): instrumented parser + #{ record-migration codemod
Branch-only tooling for affinescript#218 (do NOT merge into main — the Codemod_hook instrumentation is for the migration sweep only). lib/codemod_hook.ml records every record-literal LBRACE byte offset; parser.mly ExprRecord productions call Codemod_hook.note; codemod/ exe inserts '#' at exactly those offsets (`{`->`#{`, `Foo {`->`Foo #{`). Safety: refuses to rewrite unless every recorded offset is really '{'. Hardened to catch per-file parser exceptions so one bad file can't abort the sweep. Known limitation (recorded on #218): Affinescript.Parse.parse_file hits a pre-existing Match_failure (parse.ml:49) on stdlib + ~95/261 .affine that the 257 AOT harness parses fine — fix that parse-API bug (next) before the stdlib sweep. Refs #218 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent a45b021 commit 56fe3ad

5 files changed

Lines changed: 95 additions & 4 deletions

File tree

codemod/codemod.ml

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
(* #{ record-syntax codemod (affinescript#218). Parses each .affine with
2+
the instrumented origin/main grammar, which records the byte offset of
3+
every record-literal LBRACE in Affinescript.Codemod_hook.brace_offsets.
4+
We then insert '#' immediately before each such '{', so `{` -> `#{` and
5+
`Foo {` -> `Foo #{`. Only true record literals are touched.
6+
7+
Usage:
8+
codemod --check FILE... # report offsets, write nothing
9+
codemod FILE... # rewrite in place (only if it parses &
10+
# every offset really points at '{') *)
11+
12+
let read_file path =
13+
let ic = open_in_bin path in
14+
let n = in_channel_length ic in
15+
let s = really_input_string ic n in
16+
close_in ic; s
17+
18+
let write_file path s =
19+
let oc = open_out_bin path in
20+
output_string oc s; close_out oc
21+
22+
let process ~check path =
23+
Affinescript.Codemod_hook.brace_offsets := [];
24+
match
25+
(try Affinescript.Parse.parse_file path
26+
with e -> Error (Printexc.to_string e, Affinescript.Span.dummy))
27+
with
28+
| Error (msg, _) ->
29+
Printf.eprintf "SKIP %s -- parse error: %s\n" path msg; false
30+
| Ok _ ->
31+
let offsets =
32+
!Affinescript.Codemod_hook.brace_offsets
33+
|> List.sort_uniq (fun a b -> compare b a) (* descending, deduped *)
34+
in
35+
let src = read_file path in
36+
let len = String.length src in
37+
let bad =
38+
List.find_opt
39+
(fun o -> o < 0 || o >= len || src.[o] <> '{') offsets
40+
in
41+
(match bad with
42+
| Some o ->
43+
Printf.eprintf
44+
"SKIP %s -- offset %d is not '{'; not rewriting\n" path o;
45+
false
46+
| None ->
47+
if check then begin
48+
Printf.printf "%s: %d record literal(s) at %s\n" path
49+
(List.length offsets)
50+
(String.concat "," (List.map string_of_int offsets));
51+
true
52+
end else if offsets = [] then begin
53+
Printf.printf "OK %s -- no record literals\n" path; true
54+
end else begin
55+
let out =
56+
List.fold_left
57+
(fun acc o ->
58+
String.sub acc 0 o ^ "#" ^
59+
String.sub acc o (String.length acc - o))
60+
src offsets
61+
in
62+
write_file path out;
63+
Printf.printf "REWROTE %s -- %d record literal(s)\n"
64+
path (List.length offsets);
65+
true
66+
end)
67+
68+
let () =
69+
let args = Array.to_list Sys.argv |> List.tl in
70+
let check, files =
71+
match args with
72+
| "--check" :: rest -> true, rest
73+
| rest -> false, rest
74+
in
75+
let ok = ref 0 and skip = ref 0 in
76+
List.iter
77+
(fun f -> if process ~check f then incr ok else incr skip)
78+
files;
79+
Printf.eprintf "done: %d processed, %d skipped\n" !ok !skip

codemod/dune

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
(executable
2+
(name codemod)
3+
(libraries affinescript))

lib/codemod_hook.ml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
(* CODEMOD INSTRUMENTATION — branch stage-c/codemod only, never merged
2+
(affinescript#218). The instrumented parser calls [note] with the
3+
byte position of every record-literal LBRACE it matches; the #{
4+
migration codemod reads [brace_offsets] to insert '#' there. *)
5+
6+
let brace_offsets : int list ref = ref []
7+
let note (p : Lexing.position) =
8+
brace_offsets := p.Lexing.pos_cnum :: !brace_offsets

lib/dune

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
(modes byte native)
55
(modules
66
ast
7+
codemod_hook
78
borrow
89
c_codegen
910
cafe_face

lib/parser.mly

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -771,8 +771,8 @@ expr_primary:
771771
/* Struct literal: `Point { x: v, y: w }`. Must come before the plain
772772
upper_ident production so Menhir shifts LBRACE rather than reducing
773773
upper_ident to ExprVar when the next token is LBRACE. */
774-
| _ty = upper_ident LBRACE b = expr_record_body RBRACE
775-
{ ExprRecord { er_fields = fst b; er_spread = snd b } }
774+
| _ty = upper_ident l = LBRACE b = expr_record_body RBRACE
775+
{ Codemod_hook.note $startpos(l); ignore l; ExprRecord { er_fields = fst b; er_spread = snd b } }
776776
| name = upper_ident { ExprVar (mk_ident name $startpos $endpos) }
777777
| ty = upper_ident COLONCOLON variant = upper_ident
778778
{ ExprVariant (mk_ident ty $startpos(ty) $endpos(ty),
@@ -791,8 +791,8 @@ expr_primary:
791791
avoid the LALR(1) greedy-separator conflict that arises when a ROW_VAR
792792
spread like `..record` follows a COMMA that `separated_list` has already
793793
consumed expecting another record_field. */
794-
| LBRACE b = expr_record_body RBRACE
795-
{ ExprRecord { er_fields = fst b; er_spread = snd b } }
794+
| l = LBRACE b = expr_record_body RBRACE
795+
{ Codemod_hook.note $startpos(l); ignore l; ExprRecord { er_fields = fst b; er_spread = snd b } }
796796

797797
/* Block */
798798
| blk = block { ExprBlock blk }

0 commit comments

Comments
 (0)