Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions crates/squawk_parser/src/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,6 @@ pub(crate) enum Event {
kind: SyntaxKind,
n_raw_tokens: u8,
},
/// When we parse `foo.0.0` or `foo. 0. 0` the lexer will hand us a numeric literal
/// instead of an integer literal followed by a dot as the lexer has no contextual knowledge.
/// This event instructs whatever consumes the events to split the numeric literal into
/// the corresponding parts.
NumericSplitHack {
ends_in_dot: bool,
},
Error {
msg: String,
},
Expand Down Expand Up @@ -159,11 +152,6 @@ pub(super) fn process(mut events: Vec<Event>) -> Output {
Event::Token { kind, n_raw_tokens } => {
res.token(kind, n_raw_tokens);
}
Event::NumericSplitHack { ends_in_dot } => {
res.numeric_split_hack(ends_in_dot);
let ev = mem::replace(&mut events[i + 1], Event::tombstone());
assert!(matches!(ev, Event::Finish), "{ev:?}");
}
Event::Error { msg } => res.error(msg),
}
}
Expand Down
31 changes: 11 additions & 20 deletions crates/squawk_parser/src/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1463,7 +1463,7 @@ fn postfix_expr(
BETWEEN_KW => between_expr(p),
L_PAREN if allow_calls => call_expr_args(p, lhs),
L_BRACK => index_expr(p, lhs),
DOT => match postfix_dot_expr(p, lhs, allow_calls) {
DOT => match postfix_dot_expr(p, lhs) {
Ok(it) => it,
Err(it) => {
lhs = it;
Expand Down Expand Up @@ -2264,8 +2264,8 @@ fn index_expr(p: &mut Parser<'_>, lhs: CompletedMarker) -> CompletedMarker {
m.complete(p, INDEX_EXPR)
}

fn name_ref_or_index(p: &mut Parser<'_>) {
assert!(p.at(IDENT) || p.at_ts(TYPE_KEYWORDS) || p.at_ts(ALL_KEYWORDS) || p.at(INT_NUMBER));
fn field_name(p: &mut Parser<'_>) {
assert!(p.at(IDENT) || p.at_ts(TYPE_KEYWORDS) || p.at_ts(ALL_KEYWORDS));
let m = p.start();
if !opt_ident(p) {
p.bump_any();
Expand All @@ -2276,42 +2276,33 @@ fn name_ref_or_index(p: &mut Parser<'_>) {
fn field_expr(
p: &mut Parser<'_>,
lhs: Option<CompletedMarker>,
allow_calls: bool,
) -> Result<CompletedMarker, CompletedMarker> {
assert!(p.at(DOT));
let m = match lhs {
Some(lhs) => lhs.precede(p),
None => p.start(),
};
p.bump(DOT);
if p.at(IDENT) || p.at_ts(TYPE_KEYWORDS) || p.at(INT_NUMBER) || p.at_ts(ALL_KEYWORDS) {
name_ref_or_index(p);
} else if p.at(NUMERIC_NUMBER) {
return match p.split_numeric(m) {
(true, m) => {
let lhs = m.complete(p, FIELD_EXPR);
postfix_dot_expr(p, lhs, allow_calls)
}
(false, m) => Ok(m.complete(p, FIELD_EXPR)),
};
if p.at(IDENT) || p.at_ts(TYPE_KEYWORDS) || p.at_ts(ALL_KEYWORDS) {
field_name(p);
} else if p.at(INT_NUMBER) || p.at(NUMERIC_NUMBER) {
// Unlike Rust, we can't have a number as a field, so we just report an
// error.
p.err_and_bump("expected field name");
} else if p.eat(STAR) || opt_operator(p) {
//
} else {
p.error(format!(
"expected field name or number, got {:?}",
p.current()
));
p.error(format!("expected field name, got {:?}", p.current()));
}
Ok(m.complete(p, FIELD_EXPR))
}

fn postfix_dot_expr(
p: &mut Parser<'_>,
lhs: CompletedMarker,
allow_calls: bool,
) -> Result<CompletedMarker, CompletedMarker> {
assert!(p.at(DOT));
field_expr(p, Some(lhs), allow_calls).map(|cm| {
field_expr(p, Some(lhs)).map(|cm| {
if p.at_ts(STRING_FIRST) {
// wrap our previous expression in a type
// TODO: can we unify types & exprs?
Expand Down
32 changes: 0 additions & 32 deletions crates/squawk_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -533,38 +533,6 @@ impl<'t> Parser<'t> {
self.do_bump(kind, 1);
}

/// Advances the parser by one token
pub(crate) fn split_numeric(&mut self, mut marker: Marker) -> (bool, Marker) {
assert!(self.at(SyntaxKind::NUMERIC_NUMBER));
// we have parse `<something>.`
// `<something>`.0.1
// here we need to insert an extra event
//
// `<something>`. 0. 1;
// here we need to change the follow up parse, the return value will cause us to emulate a dot
// the actual splitting happens later
let ends_in_dot = !self.inp.is_joint(self.pos);
if !ends_in_dot {
let new_marker = self.start();
let idx = marker.pos as usize;
match &mut self.events[idx] {
Event::Start {
forward_parent,
kind,
} => {
*kind = SyntaxKind::FIELD_EXPR;
*forward_parent = Some(new_marker.pos - marker.pos);
}
_ => unreachable!(),
}
marker.bomb.defuse();
marker = new_marker;
};
self.pos += 1;
self.push_event(Event::NumericSplitHack { ends_in_dot });
(ends_in_dot, marker)
}

/// Consume the next token if it is `kind` or emit an error
/// otherwise.
pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
Expand Down
14 changes: 0 additions & 14 deletions crates/squawk_parser/src/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,6 @@ pub(crate) enum Step<'a> {
kind: SyntaxKind,
n_input_tokens: u8,
},
NumericSplit {
ends_in_dot: bool,
},
Enter {
kind: SyntaxKind,
},
Expand All @@ -80,7 +77,6 @@ impl Output {
const TOKEN_EVENT: u8 = 0;
const ENTER_EVENT: u8 = 1;
const EXIT_EVENT: u8 = 2;
const SPLIT_EVENT: u8 = 3;

pub(crate) fn iter(&self) -> impl Iterator<Item = Step<'_>> {
self.event.iter().map(|&event| {
Expand All @@ -107,9 +103,6 @@ impl Output {
Step::Enter { kind }
}
Self::EXIT_EVENT => Step::Exit,
Self::SPLIT_EVENT => Step::NumericSplit {
ends_in_dot: event & Self::N_INPUT_TOKEN_MASK != 0,
},
_ => unreachable!(),
}
})
Expand All @@ -122,13 +115,6 @@ impl Output {
self.event.push(e)
}

pub(crate) fn numeric_split_hack(&mut self, ends_in_dot: bool) {
let e = (Self::SPLIT_EVENT as u32) << Self::TAG_SHIFT
| ((ends_in_dot as u32) << Self::N_INPUT_TOKEN_SHIFT)
| Self::EVENT_MASK;
self.event.push(e);
}

pub(crate) fn enter_node(&mut self, kind: SyntaxKind) {
let e = ((kind as u16 as u32) << Self::KIND_SHIFT)
| ((Self::ENTER_EVENT as u32) << Self::TAG_SHIFT)
Expand Down
95 changes: 0 additions & 95 deletions crates/squawk_parser/src/shortcuts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,6 @@ impl Builder<'_, '_> {
self.do_token(kind, n_tokens as usize);
}

fn numeric_split(&mut self, has_pseudo_dot: bool) {
match mem::replace(&mut self.state, State::Normal) {
State::PendingEnter => unreachable!(),
State::PendingExit => (self.sink)(StrStep::Exit),
State::Normal => (),
}
self.eat_trivias();
self.do_numeric_split(has_pseudo_dot);
}

fn enter(&mut self, kind: SyntaxKind) {
match mem::replace(&mut self.state, State::Normal) {
State::PendingEnter => {
Expand Down Expand Up @@ -132,77 +122,6 @@ impl Builder<'_, '_> {
self.pos += n_tokens;
(self.sink)(StrStep::Token { kind, text });
}

fn do_numeric_split(&mut self, has_pseudo_dot: bool) {
let text = &self.lexed.range_text(self.pos..self.pos + 1);

match text.split_once('.') {
Some((left, right)) => {
assert!(!left.is_empty());
(self.sink)(StrStep::Enter {
kind: SyntaxKind::NAME_REF,
});
(self.sink)(StrStep::Token {
kind: SyntaxKind::INT_NUMBER,
text: left,
});
(self.sink)(StrStep::Exit);

// here we move the exit up, the original exit has been deleted in process
(self.sink)(StrStep::Exit);

(self.sink)(StrStep::Token {
kind: SyntaxKind::DOT,
text: ".",
});

if has_pseudo_dot {
assert!(right.is_empty(), "{left}.{right}");
self.state = State::Normal;
} else {
assert!(!right.is_empty(), "{left}.{right}");
(self.sink)(StrStep::Enter {
kind: SyntaxKind::NAME_REF,
});
(self.sink)(StrStep::Token {
kind: SyntaxKind::INT_NUMBER,
text: right,
});
(self.sink)(StrStep::Exit);

// the parser creates an unbalanced start node, we are required to close it here
self.state = State::PendingExit;
}
}
None => {
// illegal numeric literal which doesn't have dot in form (like 1e0)
// we should emit an error node here
(self.sink)(StrStep::Error {
msg: "illegal numeric literal",
pos: self.pos,
});
(self.sink)(StrStep::Enter {
kind: SyntaxKind::ERROR,
});
(self.sink)(StrStep::Token {
kind: SyntaxKind::NUMERIC_NUMBER,
text,
});
(self.sink)(StrStep::Exit);

// move up
(self.sink)(StrStep::Exit);

self.state = if has_pseudo_dot {
State::Normal
} else {
State::PendingExit
};
}
}

self.pos += 1;
}
}

impl LexedStr<'_> {
Expand All @@ -227,18 +146,7 @@ impl LexedStr<'_> {
res.was_joint();
}
res.push(kind);
// Tag the token as joint if it is numeric with a fractional part
// we use this jointness to inform the parser about what token split
// event to emit when we encounter a numeric literal in a field access
// if kind == SyntaxKind::NUMERIC_NUMBER {
// if !self.text(i).ends_with('.') {
// res.was_joint();
// } else {
// was_joint = false;
// }
// } else {
was_joint = true;
// }
}
}
res
Expand All @@ -259,9 +167,6 @@ impl LexedStr<'_> {
kind,
n_input_tokens: n_raw_tokens,
} => builder.token(kind, n_raw_tokens),
Step::NumericSplit {
ends_in_dot: has_pseudo_dot,
} => builder.numeric_split(has_pseudo_dot),
Step::Enter { kind } => builder.enter(kind),
Step::Exit => builder.exit(),
Step::Error { msg } => {
Expand Down
8 changes: 8 additions & 0 deletions crates/squawk_parser/tests/data/err/select_literal.sql
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,11 @@ SELECT $;
SELECT $0111111111111111111111111111111111111111111111111111;
SELECT "";
SELECT U&"";
-- numeric field accesses used to panic in some cases
select 0 . 0e ;
select t . 0e ;
select 1 . 2e ;
select 0 . 1e5 ;
select 0 . .5 ;
select 1 . 2 . 3e ;
select (1) . 0e ;
Loading
Loading