Skip to content

Commit 679c0fd

Browse files
tobymaoizeigerman
authored andcommitted
fix: better downstream and identifier handling (#3757)
1 parent ad66273 commit 679c0fd

File tree

2 files changed

+61
-21
lines changed

2 files changed

+61
-21
lines changed

sqlmesh/core/selector.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
from sqlglot import exp
88
from sqlglot.errors import ParseError
9-
from sqlglot.tokens import Token, Tokenizer, TokenType
10-
from sqlglot.dialects.dialect import DialectType
9+
from sqlglot.tokens import Token, TokenType, Tokenizer as BaseTokenizer
10+
from sqlglot.dialects.dialect import Dialect, DialectType
1111
from sqlglot.helper import seq_get
1212

1313
from sqlmesh.core.dialect import normalize_model_name
@@ -230,20 +230,25 @@ def evaluate(node: exp.Expression) -> t.Set[str]:
230230
return evaluate(node)
231231

232232

233-
class SelectorTokenizer(Tokenizer):
234-
SINGLE_TOKENS = {
235-
"(": TokenType.L_PAREN,
236-
")": TokenType.R_PAREN,
237-
"&": TokenType.AMP,
238-
"|": TokenType.PIPE,
239-
"^": TokenType.CARET,
240-
"+": TokenType.PLUS,
241-
"*": TokenType.STAR,
242-
":": TokenType.COLON,
243-
}
233+
class SelectorDialect(Dialect):
234+
IDENTIFIERS_CAN_START_WITH_DIGIT = True
244235

245-
KEYWORDS = {}
246-
IDENTIFIERS: t.List[str | t.Tuple[str, str]] = []
236+
class Tokenizer(BaseTokenizer):
237+
SINGLE_TOKENS = {
238+
"(": TokenType.L_PAREN,
239+
")": TokenType.R_PAREN,
240+
"&": TokenType.AMP,
241+
"|": TokenType.PIPE,
242+
"^": TokenType.CARET,
243+
"+": TokenType.PLUS,
244+
"*": TokenType.STAR,
245+
":": TokenType.COLON,
246+
}
247+
248+
KEYWORDS = {}
249+
IDENTIFIERS = ["\\"] # there are no identifiers but need to put something here
250+
IDENTIFIER_START = ""
251+
IDENTIFIER_END = ""
247252

248253

249254
class Git(exp.Expression):
@@ -259,7 +264,7 @@ class Direction(exp.Expression):
259264

260265

261266
def parse(selector: str, dialect: DialectType = None) -> exp.Expression:
262-
tokens = SelectorTokenizer().tokenize(selector)
267+
tokens = SelectorDialect().tokenize(selector)
263268
i = 0
264269

265270
def _curr() -> t.Optional[Token]:
@@ -304,29 +309,32 @@ def _parse_kind(kind: str) -> bool:
304309

305310
def _parse_var() -> exp.Expression:
306311
upstream = _match(TokenType.PLUS)
312+
downstream = None
307313
tag = _parse_kind("tag")
308314
git = False if tag else _parse_kind("git")
309315
lstar = "*" if _match(TokenType.STAR) else ""
310316
directions = {}
311317

312-
if _match(TokenType.VAR):
318+
if _match(TokenType.VAR) or _match(TokenType.NUMBER):
313319
name = _prev().text
314320
rstar = "*" if _match(TokenType.STAR) else ""
315321
downstream = _match(TokenType.PLUS)
316322
this: exp.Expression = exp.Var(this=f"{lstar}{name}{rstar}")
317323

318-
if upstream:
319-
directions["up"] = True
320-
if downstream:
321-
directions["down"] = True
322324
elif _match(TokenType.L_PAREN):
323325
this = exp.Paren(this=_parse_conjunction())
326+
downstream = _match(TokenType.PLUS)
324327
_match(TokenType.R_PAREN, True)
325328
elif lstar:
326329
this = exp.var("*")
327330
else:
328331
raise ParseError(_error("Expected model name."))
329332

333+
if upstream:
334+
directions["up"] = True
335+
if downstream:
336+
directions["down"] = True
337+
330338
if tag:
331339
this = Tag(this=this)
332340
if git:

tests/core/test_selector.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,38 @@ def test_select_models_missing_env(mocker: MockerFixture, make_snapshot):
512512
["model* & ^(tag:tag1 | tag:tag2)"],
513513
{'"model3"'},
514514
),
515+
(
516+
[
517+
("model1", "tag1", None),
518+
("model2", "tag2", {"model1"}),
519+
("model3", "tag3", {"model1"}),
520+
],
521+
["(model1*)+"],
522+
{'"model1"', '"model2"', '"model3"'},
523+
),
524+
(
525+
[
526+
("model1", "tag1", None),
527+
("model2", "tag2", {"model1"}),
528+
("model3", "tag3", {"model2"}),
529+
],
530+
["+(+model2*+)+"],
531+
{'"model1"', '"model2"', '"model3"'},
532+
),
533+
(
534+
[
535+
("model1", "tag1", None),
536+
("model2", "tag2", {"model1"}),
537+
("model3", "tag3", {"model1"}),
538+
],
539+
["(model* & ^*1)+"],
540+
{'"model2"', '"model3"'},
541+
),
542+
(
543+
[("model2", "tag1", None), ("model2_1", "tag2", None), ("model2_2", "tag3", None)],
544+
["*2_*"],
545+
{'"model2_1"', '"model2_2"'},
546+
),
515547
],
516548
)
517549
def test_expand_model_selections(

0 commit comments

Comments
 (0)