Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions fyi/semgrep-grammars/src/semgrep-elixir/grammar.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ module.exports = grammar(base_grammar, {
name: 'elixir',

conflicts: ($, previous) => previous.concat([
// '$...VAR' is valid both as an _expression (positional) and inside a
// pair (keyword). Either resolution yields ParamEllipsis in the
// Generic AST, so the choice is harmless; we just need to declare
// the conflict explicitly. See the comment on `semgrep_ellipsis`.
[$._expression, $.pair],
]),

/*
Expand Down Expand Up @@ -48,6 +53,12 @@ module.exports = grammar(base_grammar, {

_semgrep_metavariable: $ => token(/\$[A-Z_][A-Z_0-9]*/),

// Semgrep ellipsis-metavariable: '$...VAR' matches a variadic
// sequence and binds it to a metavariable (e.g. $...ARGS).
// Tokenized as a single unit so the leading '$' isn't separated
// from '...VAR'.
_semgrep_ellipsis_metavariable: $ => token(/\$\.\.\.[A-Z_][A-Z_0-9]*/),

// Ellipsis
// No need for extensions to _expressions for ellipsis because
// Elixir already uses "..." as valid identifiers
Expand All @@ -62,6 +73,7 @@ module.exports = grammar(base_grammar, {
return choice(
previous,
$.semgrep_ellipsis,
$._semgrep_ellipsis_metavariable,
);
},

Expand All @@ -79,6 +91,7 @@ module.exports = grammar(base_grammar, {
_expression: ($, previous) => choice(
...previous.members,
$.deep_ellipsis,
$._semgrep_ellipsis_metavariable,
),

// The actual ellipsis rules
Expand Down
2 changes: 1 addition & 1 deletion fyi/tree-sitter-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
tree-sitter 0.22.6 (d521f0a0791d94f4442cf9be08322f6aabce20d6)
tree-sitter 0.24.4
35 changes: 16 additions & 19 deletions fyi/versions
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,25 @@ Last change in file:
Format with commas
---
File: semgrep-grammars/src/semgrep-elixir/grammar.js
Git repo name: ocaml-tree-sitter-semgrep
Latest commit in repo: b9665cd85a2537da15cdc86f81b83722bc14af67
Git repo name: agent-a1b1f87efc493b5a7
Latest commit in repo: 041b61e542d4bcefbb375268dc5aaaf0e98403f5
Last change in file:
commit b52bcb7fcd6caecacf5146d934d45afb5ecd5dfb
Author: Amarin (Um) Phaosawasdi <amchiclet@users.noreply.github.com>
Date: Wed Jan 31 17:46:55 2024 +0900
commit 041b61e542d4bcefbb375268dc5aaaf0e98403f5
Author: brandonspark <brandon@semgrep.com>
Date: Wed Apr 29 17:28:58 2026 -0700

elixir: support metavariable atoms (#474)
fix(elixir): add $...VAR ellipsis-metavariable support

Elixir has atoms that are pretty much identifiers that start with a colon, e.g. `:foo`.
Adds a `_semgrep_ellipsis_metavariable` token (`$\.\.\.[A-Z_][A-Z_0-9]*`)
and wires it into `_expression` and `pair` so it parses in every position
where users naturally reach for it: call args, def/fn parameters, list and
tuple elements, do-block bodies, and keyword-argument positions. Declares
the resulting `_expression` vs `pair` conflict explicitly (either resolution
becomes ParamEllipsis in the Generic AST). Adds corpus tests covering all
six positions from LANG-493.

Metavars can match expressions, but not atoms
* `$ATOM` matches `:foo`
* `:$ATOM` doesn't parse
LANG-501 (`?`/`!` suffix on metavariables) is deferred — root cause is in
the upstream tree-sitter-elixir scanner.cc, not in grammar.js.

This PR allows metavars as atoms so `:$ATOM` matches `:foo`.

Also fixed some indentation in some previous code.

Test plan: added test in https://github.com/semgrep/semgrep-proprietary/pull/1274

### Security

- [x] Change has no security implications (otherwise, ping the security team)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
47 changes: 28 additions & 19 deletions lib/Boilerplate.ml
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ let map_float_ (env : env) (tok : CST.float_) =
let map_imm_tok_pat_0db2d54 (env : env) (tok : CST.imm_tok_pat_0db2d54) =
(* pattern [a-z] *) token env tok

let map_semgrep_metavariable (env : env) (tok : CST.semgrep_metavariable) =
(* semgrep_metavariable *) token env tok
let map_semgrep_ellipsis_metavariable (env : env) (tok : CST.semgrep_ellipsis_metavariable) =
(* semgrep_ellipsis_metavariable *) token env tok

let map_quoted_content_single (env : env) (tok : CST.quoted_content_single) =
(* quoted_content_single *) token env tok
Expand Down Expand Up @@ -131,6 +131,9 @@ let map_boolean (env : env) (x : CST.boolean) =
let map_quoted_content_double (env : env) (tok : CST.quoted_content_double) =
(* quoted_content_double *) token env tok

let map_semgrep_metavariable (env : env) (tok : CST.semgrep_metavariable) =
(* semgrep_metavariable *) token env tok

let map_quoted_content_i_bar (env : env) (tok : CST.quoted_content_i_bar) =
(* quoted_content_i_bar *) token env tok

Expand Down Expand Up @@ -249,23 +252,6 @@ let map_quoted_curly (env : env) ((v1, v2, v3, v4) : CST.quoted_curly) =
let v4 = (* "}" *) token env v4 in
R.Tuple [v1; v2; v3; v4]

let map_identifier (env : env) (x : CST.identifier) =
(match x with
| `Choice_pat_cf9c6c3 x -> R.Case ("Choice_pat_cf9c6c3",
(match x with
| `Pat_cf9c6c3 x -> R.Case ("Pat_cf9c6c3",
map_pat_cf9c6c3 env x
)
| `DOTDOTDOT tok -> R.Case ("DOTDOTDOT",
(* "..." *) token env tok
)
)
)
| `Semg_meta tok -> R.Case ("Semg_meta",
(* semgrep_metavariable *) token env tok
)
)

let map_quoted_single (env : env) ((v1, v2, v3, v4) : CST.quoted_single) =
let v1 = (* "'" *) token env v1 in
let v2 =
Expand Down Expand Up @@ -409,6 +395,23 @@ let map_quoted_double (env : env) ((v1, v2, v3, v4) : CST.quoted_double) =
let v4 = (* "\"" *) token env v4 in
R.Tuple [v1; v2; v3; v4]

let map_identifier (env : env) (x : CST.identifier) =
(match x with
| `Choice_pat_cf9c6c3 x -> R.Case ("Choice_pat_cf9c6c3",
(match x with
| `Pat_cf9c6c3 x -> R.Case ("Pat_cf9c6c3",
map_pat_cf9c6c3 env x
)
| `DOTDOTDOT tok -> R.Case ("DOTDOTDOT",
(* "..." *) token env tok
)
)
)
| `Semg_meta tok -> R.Case ("Semg_meta",
(* semgrep_metavariable *) token env tok
)
)

let map_quoted_heredoc_single (env : env) ((v1, v2, v3, v4) : CST.quoted_heredoc_single) =
let v1 = (* "'''" *) token env v1 in
let v2 =
Expand Down Expand Up @@ -1563,6 +1566,9 @@ and map_expression (env : env) (x : CST.expression) =
let v3 = (* "...>" *) token env v3 in
R.Tuple [v1; v2; v3]
)
| `Semg_ellips_meta tok -> R.Case ("Semg_ellips_meta",
(* semgrep_ellipsis_metavariable *) token env tok
)
)

and map_items_with_trailing_separator (env : env) (v1 : CST.items_with_trailing_separator) =
Expand Down Expand Up @@ -1673,6 +1679,9 @@ and map_pair (env : env) (x : CST.pair) =
| `Semg_ellips tok -> R.Case ("Semg_ellips",
(* "..." *) token env tok
)
| `Semg_ellips_meta tok -> R.Case ("Semg_ellips_meta",
(* semgrep_ellipsis_metavariable *) token env tok
)
)

and map_quoted_i_angle (env : env) ((v1, v2, v3, v4) : CST.quoted_i_angle) =
Expand Down
22 changes: 13 additions & 9 deletions lib/CST.ml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ type float_ = Token.t

type imm_tok_pat_0db2d54 = Token.t (* pattern [a-z] *)

type semgrep_metavariable = Token.t
type semgrep_ellipsis_metavariable = Token.t

type quoted_content_single = Token.t

Expand Down Expand Up @@ -82,6 +82,8 @@ type boolean = [

type quoted_content_double = Token.t

type semgrep_metavariable = Token.t

type quoted_content_i_bar = Token.t

type quoted_content_i_curly = Token.t
Expand Down Expand Up @@ -129,14 +131,6 @@ type quoted_curly = (
* Token.t (* "}" *)
)

type identifier = [
`Choice_pat_cf9c6c3 of [
`Pat_cf9c6c3 of pat_cf9c6c3
| `DOTDOTDOT of Token.t (* "..." *)
]
| `Semg_meta of semgrep_metavariable (*tok*)
]

type quoted_single = (
Token.t (* "'" *)
* quoted_content_single (*tok*) option
Expand Down Expand Up @@ -185,6 +179,14 @@ type quoted_double = (
* Token.t (* "\"" *)
)

type identifier = [
`Choice_pat_cf9c6c3 of [
`Pat_cf9c6c3 of pat_cf9c6c3
| `DOTDOTDOT of Token.t (* "..." *)
]
| `Semg_meta of semgrep_metavariable (*tok*)
]

type quoted_heredoc_single = (
Token.t (* "'''" *)
* quoted_content_heredoc_single (*tok*) option
Expand Down Expand Up @@ -616,6 +618,7 @@ and expression = [
| `Deep_ellips of (
Token.t (* "<..." *) * expression * Token.t (* "...>" *)
)
| `Semg_ellips_meta of semgrep_ellipsis_metavariable (*tok*)
]

and items_with_trailing_separator = [
Expand Down Expand Up @@ -657,6 +660,7 @@ and local_call_with_parentheses = (
and pair = [
`Kw_exp of (keyword * expression)
| `Semg_ellips of Token.t (* "..." *)
| `Semg_ellips_meta of semgrep_ellipsis_metavariable (*tok*)
]

and quoted_i_angle = (
Expand Down
60 changes: 38 additions & 22 deletions lib/Parse.ml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ let children_regexps : (string * Run.exp option) list = [
Token (Literal "false");
|];
);
"semgrep_metavariable", None;
"semgrep_ellipsis_metavariable", None;
"imm_tok_pat_8f9e87e", None;
"imm_tok_pat_b250293", None;
"quoted_atom_start", None;
Expand All @@ -60,6 +60,7 @@ let children_regexps : (string * Run.exp option) list = [
"quoted_content_double", None;
"imm_tok_lpar", None;
"imm_tok_pat_5eb9c21", None;
"semgrep_metavariable", None;
"quoted_content_slash", None;
"keyword_", None;
"quoted_content_i_bar", None;
Expand Down Expand Up @@ -87,13 +88,6 @@ let children_regexps : (string * Run.exp option) list = [
"quoted_content_i_heredoc_double", None;
"atom_", None;
"newline_before_binary_operator", None;
"metavariable_atom",
Some (
Seq [
Token (Literal ":");
Token (Name "semgrep_metavariable");
];
);
"operator_identifier",
Some (
Alt [|
Expand Down Expand Up @@ -151,6 +145,13 @@ let children_regexps : (string * Run.exp option) list = [
Token (Literal "->");
|];
);
"metavariable_atom",
Some (
Seq [
Token (Literal ":");
Token (Name "semgrep_metavariable");
];
);
"terminator",
Some (
Alt [|
Expand Down Expand Up @@ -992,6 +993,7 @@ let children_regexps : (string * Run.exp option) list = [
Token (Name "access_call");
Token (Name "anonymous_function");
Token (Name "deep_ellipsis");
Token (Name "semgrep_ellipsis_metavariable");
|];
);
"interpolation",
Expand Down Expand Up @@ -1142,6 +1144,7 @@ let children_regexps : (string * Run.exp option) list = [
Token (Name "expression");
];
Token (Name "semgrep_ellipsis");
Token (Name "semgrep_ellipsis_metavariable");
|];
);
"quoted_atom",
Expand Down Expand Up @@ -1732,7 +1735,7 @@ let trans_boolean ((kind, body) : mt) : CST.boolean =
)
| Leaf _ -> assert false

let trans_semgrep_metavariable ((kind, body) : mt) : CST.semgrep_metavariable =
let trans_semgrep_ellipsis_metavariable ((kind, body) : mt) : CST.semgrep_ellipsis_metavariable =
match body with
| Leaf v -> v
| Children _ -> assert false
Expand Down Expand Up @@ -1812,6 +1815,11 @@ let trans_imm_tok_pat_5eb9c21 ((kind, body) : mt) : CST.imm_tok_pat_5eb9c21 =
| Leaf v -> v
| Children _ -> assert false

let trans_semgrep_metavariable ((kind, body) : mt) : CST.semgrep_metavariable =
match body with
| Leaf v -> v
| Children _ -> assert false

let trans_quoted_content_slash ((kind, body) : mt) : CST.quoted_content_slash =
match body with
| Leaf v -> v
Expand Down Expand Up @@ -1947,19 +1955,6 @@ let trans_newline_before_binary_operator ((kind, body) : mt) : CST.newline_befor
| Leaf v -> v
| Children _ -> assert false

let trans_metavariable_atom ((kind, body) : mt) : CST.metavariable_atom =
match body with
| Children v ->
(match v with
| Seq [v0; v1] ->
(
Run.trans_token (Run.matcher_token v0),
trans_semgrep_metavariable (Run.matcher_token v1)
)
| _ -> assert false
)
| Leaf _ -> assert false

let trans_operator_identifier ((kind, body) : mt) : CST.operator_identifier =
match body with
| Children v ->
Expand Down Expand Up @@ -2174,6 +2169,19 @@ let trans_operator_identifier ((kind, body) : mt) : CST.operator_identifier =
)
| Leaf _ -> assert false

let trans_metavariable_atom ((kind, body) : mt) : CST.metavariable_atom =
match body with
| Children v ->
(match v with
| Seq [v0; v1] ->
(
Run.trans_token (Run.matcher_token v0),
trans_semgrep_metavariable (Run.matcher_token v1)
)
| _ -> assert false
)
| Leaf _ -> assert false

let trans_terminator ((kind, body) : mt) : CST.terminator =
match body with
| Children v ->
Expand Down Expand Up @@ -4109,6 +4117,10 @@ and trans_expression ((kind, body) : mt) : CST.expression =
`Deep_ellips (
trans_deep_ellipsis (Run.matcher_token v)
)
| Alt (24, v) ->
`Semg_ellips_meta (
trans_semgrep_ellipsis_metavariable (Run.matcher_token v)
)
| _ -> assert false
)
| Leaf _ -> assert false
Expand Down Expand Up @@ -4411,6 +4423,10 @@ and trans_pair ((kind, body) : mt) : CST.pair =
`Semg_ellips (
trans_semgrep_ellipsis (Run.matcher_token v)
)
| Alt (2, v) ->
`Semg_ellips_meta (
trans_semgrep_ellipsis_metavariable (Run.matcher_token v)
)
| _ -> assert false
)
| Leaf _ -> assert false
Expand Down
Loading