Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion crates/squawk_ide/src/column_name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,15 @@ fn name_from_expr(expr: ast::Expr, in_type: bool) -> Option<(ColumnName, SyntaxN
return name_from_expr(base, in_type);
}
}
ast::Expr::Literal(_) | ast::Expr::PrefixExpr(_) => {
ast::Expr::Literal(literal) => {
if literal.syntax().first_token().is_some_and(|token| {
token.kind() == SyntaxKind::STRING && token.text().starts_with(['n', 'N'])
}) {
return Some((ColumnName::UnknownColumn(Some("bpchar".to_string())), node));
}
return Some((ColumnName::UnknownColumn(None), node));
}
ast::Expr::PrefixExpr(_) => {
return Some((ColumnName::UnknownColumn(None), node));
}
ast::Expr::PostfixExpr(postfix_expr) => match postfix_expr.op() {
Expand Down Expand Up @@ -533,6 +541,8 @@ fn examples() {
assert_snapshot!(name("1 + 2"), @"?column?");
assert_snapshot!(name("42"), @"?column?");
assert_snapshot!(name("'string'"), @"?column?");
assert_snapshot!(name("n'string'"), @"bpchar");
assert_snapshot!(name("N'string'"), @"bpchar");
// prefix
assert_snapshot!(name("-42"), @"?column?");
assert_snapshot!(name("|/ 42"), @"?column?");
Expand Down
135 changes: 115 additions & 20 deletions crates/squawk_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,25 +56,42 @@ impl Cursor<'_> {
self.prefixed_string(
|terminated| LiteralKind::UnicodeEscStr { terminated },
true,
false,
)
} else {
self.ident_or_unknown_prefix()
}
}
// escaped strings
'e' | 'E' => {
self.prefixed_string(|terminated| LiteralKind::EscStr { terminated }, false)
self.prefixed_string(|terminated| LiteralKind::EscStr { terminated }, false, true)
}

// bit string
'b' | 'B' => {
self.prefixed_string(|terminated| LiteralKind::BitStr { terminated }, false)
}
'b' | 'B' => self.prefixed_string(
|terminated| LiteralKind::BitStr { terminated },
false,
false,
),

// hexadecimal byte string
'x' | 'X' => {
self.prefixed_string(|terminated| LiteralKind::ByteStr { terminated }, false)
}
'x' | 'X' => self.prefixed_string(
|terminated| LiteralKind::ByteStr { terminated },
false,
false,
),

// national character string
'n' | 'N' => match self.first() {
'\'' => {
self.bump();
let terminated = self.single_quoted_string(false);
TokenKind::Literal {
kind: LiteralKind::Str { terminated },
}
}
_ => self.ident(),
},

// Identifier (this should be checked after other variant that can
// start as identifier).
Expand Down Expand Up @@ -137,7 +154,7 @@ impl Cursor<'_> {

// String literal
'\'' => {
let terminated = self.single_quoted_string();
let terminated = self.single_quoted_string(false);
let kind = LiteralKind::Str { terminated };
TokenKind::Literal { kind }
}
Expand Down Expand Up @@ -179,7 +196,7 @@ impl Cursor<'_> {
pub(crate) fn line_comment(&mut self) -> TokenKind {
self.bump();

self.eat_while(|c| c != '\n');
self.eat_while(|c| c != '\n' && c != '\r');
TokenKind::LineComment
}

Expand Down Expand Up @@ -217,11 +234,12 @@ impl Cursor<'_> {
&mut self,
mk_kind: fn(bool) -> LiteralKind,
allows_double: bool,
backslash_escapes: bool,
) -> TokenKind {
match self.first() {
'\'' => {
self.bump();
let terminated = self.single_quoted_string();
let terminated = self.single_quoted_string(backslash_escapes);
let kind = mk_kind(terminated);
TokenKind::Literal { kind }
}
Expand Down Expand Up @@ -313,10 +331,16 @@ impl Cursor<'_> {
}
}

fn single_quoted_string(&mut self) -> bool {
fn single_quoted_string(&mut self, backslash_escapes: bool) -> bool {
// Parse until either quotes are terminated or error is detected.
loop {
match self.first() {
'\\' if backslash_escapes => {
// backslash
self.bump();
// escaped char
self.bump();
}
// Quotes might be terminated.
'\'' => {
self.bump();
Expand Down Expand Up @@ -397,31 +421,30 @@ impl Cursor<'_> {
}
} else {
loop {
self.eat_while(|c| c != start[0]);
self.eat_while(|c| c != '$');
if self.is_eof() {
return TokenKind::Literal {
kind: LiteralKind::DollarQuotedString { terminated: false },
};
}

// might be the start of our start/end sequence
let mut match_count = 0;
// Eat the leading '$' of a possible closing delimiter.
self.bump();

let mut matches_tag = true;
for start_char in &start {
if self.first() == *start_char {
self.bump();
match_count += 1;
} else {
self.bump();
matches_tag = false;
break;
}
}

// closing '$'
let terminated = match_count == start.len();
if self.first() == '$' && terminated {
if matches_tag && self.first() == '$' {
self.bump();
return TokenKind::Literal {
kind: LiteralKind::DollarQuotedString { terminated },
kind: LiteralKind::DollarQuotedString { terminated: true },
};
}
}
Expand Down Expand Up @@ -615,6 +638,25 @@ mod tests {
assert_debug_snapshot!(result);
}

#[test]
fn line_comment_cr_newline() {
assert_debug_snapshot!(lex("select 1; -- comment\rselect 2;"), @r#"
[
"select" @ Ident,
" " @ Whitespace,
"1" @ Literal { kind: Int { base: Decimal, empty_int: false, trailing_junk_start: 1 } },
";" @ Semi,
" " @ Whitespace,
"-- comment" @ LineComment,
"\r" @ Whitespace,
"select" @ Ident,
" " @ Whitespace,
"2" @ Literal { kind: Int { base: Decimal, empty_int: false, trailing_junk_start: 1 } },
";" @ Semi,
]
"#);
}

#[test]
fn line_comment_whitespace() {
assert_debug_snapshot!(lex(r#"
Expand Down Expand Up @@ -714,6 +756,20 @@ x'1FF'
"#))
}

#[test]
fn national_character_string() {
assert_debug_snapshot!(lex("N'foo' n'bar' numeric'1'"), @r#"
[
"N'foo'" @ Literal { kind: Str { terminated: true } },
" " @ Whitespace,
"n'bar'" @ Literal { kind: Str { terminated: true } },
" " @ Whitespace,
"numeric" @ Ident,
"'1'" @ Literal { kind: Str { terminated: true } },
]
"#);
}

#[test]
fn string() {
assert_debug_snapshot!(lex(r#"
Expand Down Expand Up @@ -763,6 +819,33 @@ e'\uAAAA \UFFFFFFFF'
"#))
}

#[test]
fn escape_string_with_backslash_escaped_quote() {
assert_debug_snapshot!(lex(r"E'foo\'bar'"), @r#"
[
"E'foo\\'bar'" @ Literal { kind: EscStr { terminated: true } },
]
"#);
}

#[test]
fn escape_string_with_escaped_terminal_quote_is_unterminated() {
assert_debug_snapshot!(lex(r"E'foo\';"), @r#"
[
"E'foo\\';" @ Literal { kind: EscStr { terminated: false } },
]
"#);
}

#[test]
fn escape_string_with_even_backslashes_before_quote_is_terminated() {
assert_debug_snapshot!(lex(r"E'foo\\'"), @r#"
[
"E'foo\\\\'" @ Literal { kind: EscStr { terminated: true } },
]
"#);
}

#[test]
fn string_unicode_escape() {
// https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS-UESCAPE
Expand Down Expand Up @@ -804,6 +887,18 @@ U&"d!0061t!+000061" UESCAPE '!'
"#);
}

#[test]
fn tagged_dollar_quote_requires_leading_dollar() {
assert_debug_snapshot!(lex("select $foo$abcfoo$def$foo$;"), @r#"
[
"select" @ Ident,
" " @ Whitespace,
"$foo$abcfoo$def$foo$" @ Literal { kind: DollarQuotedString { terminated: true } },
";" @ Semi,
]
"#);
}

#[test]
fn ident_non_ascii_above_latin1() {
assert_debug_snapshot!(lex("ẞ Ā 漢字 𐐷"), @r#"
Expand Down
10 changes: 6 additions & 4 deletions crates/squawk_syntax/src/quote.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,13 @@ pub fn strip_unicode_esc_prefix(text: &str) -> Option<&str> {
strip_quotes(text.strip_prefix(['u', 'U'])?.strip_prefix('&')?)
}

pub fn dollar_quote_tag(text: &str) -> Option<&str> {
text.strip_prefix('$')?.split_once('$').map(|(tag, _)| tag)
}

pub fn strip_dollar_quotes(text: &str) -> Option<&str> {
let after_first = text.strip_prefix('$')?;
let tag_end = after_first.find('$')?;
let tag = &after_first[..tag_end];
let body = &after_first[tag_end + 1..];
let tag = dollar_quote_tag(text)?;
let body = &text[tag.len() + 2..];
let closing = format!("${tag}$");
body.strip_suffix(&closing)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
source: crates/squawk_syntax/src/test.rs
input_file: crates/squawk_syntax/test_data/validation/dollar_quoted_string.sql
---
SOURCE_FILE@0..30
SELECT@0..29
SELECT_CLAUSE@0..28
SELECT_KW@0..6 "select"
WHITESPACE@6..7 " "
TARGET_LIST@7..28
TARGET@7..28
LITERAL@7..28
DOLLAR_QUOTED_STRING@7..28 "$foo-bar$abc$foo-bar$"
SEMICOLON@28..29 ";"
WHITESPACE@29..30 "\n"

error[syntax-error]: "-" is not allowed in dollar quote tags
╭▸
1 │ select $foo-bar$abc$foo-bar$;
╰╴ ━
error[syntax-error]: "-" is not allowed in dollar quote tags
╭▸
1 │ select $foo-bar$abc$foo-bar$;
╰╴ ━
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
source: crates/squawk_syntax/src/test.rs
input_file: crates/squawk_syntax/test_data/validation/validate_string_continuation.sql
---
SOURCE_FILE@0..1228
SOURCE_FILE@0..1287
COMMENT@0..28 "-- ok strings with ne ..."
WHITESPACE@28..29 "\n"
SELECT@29..49
Expand Down Expand Up @@ -482,7 +482,21 @@ SOURCE_FILE@0..1228
WHITESPACE@1221..1222 "\n"
STRING@1222..1226 "'G0'"
SEMICOLON@1226..1227 ";"
WHITESPACE@1227..1228 "\n"
WHITESPACE@1227..1229 "\n\n"
COMMENT@1229..1270 "-- ok CR-only newline ..."
WHITESPACE@1270..1271 "\n"
SELECT@1271..1286
SELECT_CLAUSE@1271..1285
SELECT_KW@1271..1277 "select"
WHITESPACE@1277..1278 " "
TARGET_LIST@1278..1285
TARGET@1278..1285
LITERAL@1278..1285
STRING@1278..1281 "'a'"
WHITESPACE@1281..1282 "\r"
STRING@1282..1285 "'b'"
SEMICOLON@1285..1286 ";"
WHITESPACE@1286..1287 "\n"

error[syntax-error]: Expected new line or comma between string literals
╭▸
Expand Down
Loading
Loading