Skip to content

Commit 9d5f00b

Browse files
committed
Add lifetime parameter to BorrowedToken for zero-copy tokenization
This change introduces a lifetime parameter 'a to BorrowedToken enum to prepare for zero-copy tokenization support. This is a foundational step toward reducing memory allocations during SQL parsing. Changes: - Added lifetime parameter to BorrowedToken<'a> enum - Added _Phantom(Cow<'a, str>) variant to carry the lifetime - Implemented Visit and VisitMut traits for Cow<'a, str> to support the visitor pattern with the new lifetime parameter - Fixed lifetime issues in visitor tests by using tokenized_owned() instead of tokenize() where owned tokens are required - Type alias Token = BorrowedToken<'static> maintains backward compatibility
1 parent 31adc0e commit 9d5f00b

File tree

11 files changed

+1724
-1409
lines changed

11 files changed

+1724
-1409
lines changed

src/ast/helpers/attached_token.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ use sqlparser_derive::{Visit, VisitMut};
8080
#[derive(Clone)]
8181
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
8282
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
83-
pub struct AttachedToken(pub TokenWithSpan);
83+
pub struct AttachedToken(pub TokenWithSpan<'static>);
8484

8585
impl AttachedToken {
8686
/// Return a new Empty AttachedToken
@@ -123,13 +123,13 @@ impl Hash for AttachedToken {
123123
}
124124
}
125125

126-
impl From<TokenWithSpan> for AttachedToken {
127-
fn from(value: TokenWithSpan) -> Self {
126+
impl From<TokenWithSpan<'static>> for AttachedToken {
127+
fn from(value: TokenWithSpan<'static>) -> Self {
128128
AttachedToken(value)
129129
}
130130
}
131131

132-
impl From<AttachedToken> for TokenWithSpan {
132+
impl From<AttachedToken> for TokenWithSpan<'static> {
133133
fn from(value: AttachedToken) -> Self {
134134
value.0
135135
}

src/ast/visitor.rs

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717

1818
//! Recursive visitors for ast Nodes. See [`Visitor`] for more details.
1919
20+
#[cfg(not(feature = "std"))]
21+
use alloc::borrow::Cow;
22+
#[cfg(feature = "std")]
23+
use std::borrow::Cow;
24+
2025
use crate::ast::{Expr, ObjectName, Query, Statement, TableFactor, Value};
2126
use core::ops::ControlFlow;
2227

@@ -118,6 +123,19 @@ visit_noop!(u8, u16, u32, u64, i8, i16, i32, i64, char, bool, String);
118123
#[cfg(feature = "bigdecimal")]
119124
visit_noop!(bigdecimal::BigDecimal);
120125

126+
// Implement Visit and VisitMut for Cow<str> to support the lifetime parameter in BorrowedToken
127+
impl<'a> Visit for Cow<'a, str> {
128+
fn visit<V: Visitor>(&self, _visitor: &mut V) -> ControlFlow<V::Break> {
129+
ControlFlow::Continue(())
130+
}
131+
}
132+
133+
impl<'a> VisitMut for Cow<'a, str> {
134+
fn visit<V: VisitorMut>(&mut self, _visitor: &mut V) -> ControlFlow<V::Break> {
135+
ControlFlow::Continue(())
136+
}
137+
}
138+
121139
/// A visitor that can be used to walk an AST tree.
122140
///
123141
/// `pre_visit_` methods are invoked before visiting all children of the
@@ -751,7 +769,7 @@ mod tests {
751769

752770
fn do_visit<V: Visitor<Break = ()>>(sql: &str, visitor: &mut V) -> Statement {
753771
let dialect = GenericDialect {};
754-
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
772+
let tokens = Tokenizer::new(&dialect, sql).tokenized_owned().unwrap();
755773
let s = Parser::new(&dialect)
756774
.with_tokens(tokens)
757775
.parse_statement()
@@ -942,7 +960,9 @@ mod tests {
942960
let sql = format!("SELECT x where {cond}");
943961

944962
let dialect = GenericDialect {};
945-
let tokens = Tokenizer::new(&dialect, sql.as_str()).tokenize().unwrap();
963+
let tokens = Tokenizer::new(&dialect, sql.as_str())
964+
.tokenized_owned()
965+
.unwrap();
946966
let s = Parser::new(&dialect)
947967
.with_tokens(tokens)
948968
.parse_statement()
@@ -983,7 +1003,7 @@ mod visit_mut_tests {
9831003

9841004
fn do_visit_mut<V: VisitorMut<Break = ()>>(sql: &str, visitor: &mut V) -> Statement {
9851005
let dialect = GenericDialect {};
986-
let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap();
1006+
let tokens = Tokenizer::new(&dialect, sql).tokenized_owned().unwrap();
9871007
let mut s = Parser::new(&dialect)
9881008
.with_tokens(tokens)
9891009
.parse_statement()

src/dialect/mod.rs

Lines changed: 109 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ use crate::ast::{ColumnOption, Expr, GranteesType, Ident, ObjectNamePart, Statem
5353
pub use crate::keywords;
5454
use crate::keywords::Keyword;
5555
use crate::parser::{Parser, ParserError};
56-
use crate::tokenizer::Token;
56+
use crate::tokenizer::BorrowedToken;
5757

5858
#[cfg(not(feature = "std"))]
5959
use alloc::boxed::Box;
@@ -655,16 +655,16 @@ pub trait Dialect: Debug + Any {
655655
let token = parser.peek_token();
656656
debug!("get_next_precedence_full() {token:?}");
657657
match token.token {
658-
Token::Word(w) if w.keyword == Keyword::OR => Ok(p!(Or)),
659-
Token::Word(w) if w.keyword == Keyword::AND => Ok(p!(And)),
660-
Token::Word(w) if w.keyword == Keyword::XOR => Ok(p!(Xor)),
658+
BorrowedToken::Word(w) if w.keyword == Keyword::OR => Ok(p!(Or)),
659+
BorrowedToken::Word(w) if w.keyword == Keyword::AND => Ok(p!(And)),
660+
BorrowedToken::Word(w) if w.keyword == Keyword::XOR => Ok(p!(Xor)),
661661

662-
Token::Word(w) if w.keyword == Keyword::AT => {
662+
BorrowedToken::Word(w) if w.keyword == Keyword::AT => {
663663
match (
664664
parser.peek_nth_token(1).token,
665665
parser.peek_nth_token(2).token,
666666
) {
667-
(Token::Word(w), Token::Word(w2))
667+
(BorrowedToken::Word(w), BorrowedToken::Word(w2))
668668
if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE =>
669669
{
670670
Ok(p!(AtTz))
@@ -673,102 +673,112 @@ pub trait Dialect: Debug + Any {
673673
}
674674
}
675675

676-
Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token {
677-
// The precedence of NOT varies depending on keyword that
678-
// follows it. If it is followed by IN, BETWEEN, or LIKE,
679-
// it takes on the precedence of those tokens. Otherwise, it
680-
// is not an infix operator, and therefore has zero
681-
// precedence.
682-
Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
683-
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
684-
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
685-
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
686-
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
687-
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
688-
Token::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)),
689-
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
690-
Token::Word(w) if w.keyword == Keyword::MEMBER => Ok(p!(Like)),
691-
Token::Word(w)
692-
if w.keyword == Keyword::NULL && !parser.in_column_definition_state() =>
693-
{
694-
Ok(p!(Is))
676+
BorrowedToken::Word(w) if w.keyword == Keyword::NOT => {
677+
match parser.peek_nth_token(1).token {
678+
// The precedence of NOT varies depending on keyword that
679+
// follows it. If it is followed by IN, BETWEEN, or LIKE,
680+
// it takes on the precedence of those tokens. Otherwise, it
681+
// is not an infix operator, and therefore has zero
682+
// precedence.
683+
BorrowedToken::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
684+
BorrowedToken::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
685+
BorrowedToken::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
686+
BorrowedToken::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
687+
BorrowedToken::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
688+
BorrowedToken::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
689+
BorrowedToken::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)),
690+
BorrowedToken::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
691+
BorrowedToken::Word(w) if w.keyword == Keyword::MEMBER => Ok(p!(Like)),
692+
BorrowedToken::Word(w)
693+
if w.keyword == Keyword::NULL && !parser.in_column_definition_state() =>
694+
{
695+
Ok(p!(Is))
696+
}
697+
_ => Ok(self.prec_unknown()),
695698
}
696-
_ => Ok(self.prec_unknown()),
697-
},
698-
Token::Word(w) if w.keyword == Keyword::NOTNULL && self.supports_notnull_operator() => {
699-
Ok(p!(Is))
700-
}
701-
Token::Word(w) if w.keyword == Keyword::IS => Ok(p!(Is)),
702-
Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
703-
Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
704-
Token::Word(w) if w.keyword == Keyword::OVERLAPS => Ok(p!(Between)),
705-
Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
706-
Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
707-
Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
708-
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
709-
Token::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)),
710-
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
711-
Token::Word(w) if w.keyword == Keyword::MEMBER => Ok(p!(Like)),
712-
Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(p!(Between)),
713-
Token::Word(w) if w.keyword == Keyword::DIV => Ok(p!(MulDivModOp)),
714-
Token::Period => Ok(p!(Period)),
715-
Token::Assignment
716-
| Token::Eq
717-
| Token::Lt
718-
| Token::LtEq
719-
| Token::Neq
720-
| Token::Gt
721-
| Token::GtEq
722-
| Token::DoubleEq
723-
| Token::Tilde
724-
| Token::TildeAsterisk
725-
| Token::ExclamationMarkTilde
726-
| Token::ExclamationMarkTildeAsterisk
727-
| Token::DoubleTilde
728-
| Token::DoubleTildeAsterisk
729-
| Token::ExclamationMarkDoubleTilde
730-
| Token::ExclamationMarkDoubleTildeAsterisk
731-
| Token::Spaceship => Ok(p!(Eq)),
732-
Token::Pipe
733-
| Token::QuestionMarkDash
734-
| Token::DoubleSharp
735-
| Token::Overlap
736-
| Token::AmpersandLeftAngleBracket
737-
| Token::AmpersandRightAngleBracket
738-
| Token::QuestionMarkDashVerticalBar
739-
| Token::AmpersandLeftAngleBracketVerticalBar
740-
| Token::VerticalBarAmpersandRightAngleBracket
741-
| Token::TwoWayArrow
742-
| Token::LeftAngleBracketCaret
743-
| Token::RightAngleBracketCaret
744-
| Token::QuestionMarkSharp
745-
| Token::QuestionMarkDoubleVerticalBar
746-
| Token::QuestionPipe
747-
| Token::TildeEqual
748-
| Token::AtSign
749-
| Token::ShiftLeftVerticalBar
750-
| Token::VerticalBarShiftRight => Ok(p!(Pipe)),
751-
Token::Caret | Token::Sharp | Token::ShiftRight | Token::ShiftLeft => Ok(p!(Caret)),
752-
Token::Ampersand => Ok(p!(Ampersand)),
753-
Token::Plus | Token::Minus => Ok(p!(PlusMinus)),
754-
Token::Mul | Token::Div | Token::DuckIntDiv | Token::Mod | Token::StringConcat => {
755-
Ok(p!(MulDivModOp))
756699
}
757-
Token::DoubleColon | Token::ExclamationMark | Token::LBracket | Token::CaretAt => {
758-
Ok(p!(DoubleColon))
700+
BorrowedToken::Word(w)
701+
if w.keyword == Keyword::NOTNULL && self.supports_notnull_operator() =>
702+
{
703+
Ok(p!(Is))
759704
}
760-
Token::Arrow
761-
| Token::LongArrow
762-
| Token::HashArrow
763-
| Token::HashLongArrow
764-
| Token::AtArrow
765-
| Token::ArrowAt
766-
| Token::HashMinus
767-
| Token::AtQuestion
768-
| Token::AtAt
769-
| Token::Question
770-
| Token::QuestionAnd
771-
| Token::CustomBinaryOperator(_) => Ok(p!(PgOther)),
705+
BorrowedToken::Word(w) if w.keyword == Keyword::IS => Ok(p!(Is)),
706+
BorrowedToken::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)),
707+
BorrowedToken::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)),
708+
BorrowedToken::Word(w) if w.keyword == Keyword::OVERLAPS => Ok(p!(Between)),
709+
BorrowedToken::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)),
710+
BorrowedToken::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)),
711+
BorrowedToken::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)),
712+
BorrowedToken::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)),
713+
BorrowedToken::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)),
714+
BorrowedToken::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)),
715+
BorrowedToken::Word(w) if w.keyword == Keyword::MEMBER => Ok(p!(Like)),
716+
BorrowedToken::Word(w) if w.keyword == Keyword::OPERATOR => Ok(p!(Between)),
717+
BorrowedToken::Word(w) if w.keyword == Keyword::DIV => Ok(p!(MulDivModOp)),
718+
BorrowedToken::Period => Ok(p!(Period)),
719+
BorrowedToken::Assignment
720+
| BorrowedToken::Eq
721+
| BorrowedToken::Lt
722+
| BorrowedToken::LtEq
723+
| BorrowedToken::Neq
724+
| BorrowedToken::Gt
725+
| BorrowedToken::GtEq
726+
| BorrowedToken::DoubleEq
727+
| BorrowedToken::Tilde
728+
| BorrowedToken::TildeAsterisk
729+
| BorrowedToken::ExclamationMarkTilde
730+
| BorrowedToken::ExclamationMarkTildeAsterisk
731+
| BorrowedToken::DoubleTilde
732+
| BorrowedToken::DoubleTildeAsterisk
733+
| BorrowedToken::ExclamationMarkDoubleTilde
734+
| BorrowedToken::ExclamationMarkDoubleTildeAsterisk
735+
| BorrowedToken::Spaceship => Ok(p!(Eq)),
736+
BorrowedToken::Pipe
737+
| BorrowedToken::QuestionMarkDash
738+
| BorrowedToken::DoubleSharp
739+
| BorrowedToken::Overlap
740+
| BorrowedToken::AmpersandLeftAngleBracket
741+
| BorrowedToken::AmpersandRightAngleBracket
742+
| BorrowedToken::QuestionMarkDashVerticalBar
743+
| BorrowedToken::AmpersandLeftAngleBracketVerticalBar
744+
| BorrowedToken::VerticalBarAmpersandRightAngleBracket
745+
| BorrowedToken::TwoWayArrow
746+
| BorrowedToken::LeftAngleBracketCaret
747+
| BorrowedToken::RightAngleBracketCaret
748+
| BorrowedToken::QuestionMarkSharp
749+
| BorrowedToken::QuestionMarkDoubleVerticalBar
750+
| BorrowedToken::QuestionPipe
751+
| BorrowedToken::TildeEqual
752+
| BorrowedToken::AtSign
753+
| BorrowedToken::ShiftLeftVerticalBar
754+
| BorrowedToken::VerticalBarShiftRight => Ok(p!(Pipe)),
755+
BorrowedToken::Caret
756+
| BorrowedToken::Sharp
757+
| BorrowedToken::ShiftRight
758+
| BorrowedToken::ShiftLeft => Ok(p!(Caret)),
759+
BorrowedToken::Ampersand => Ok(p!(Ampersand)),
760+
BorrowedToken::Plus | BorrowedToken::Minus => Ok(p!(PlusMinus)),
761+
BorrowedToken::Mul
762+
| BorrowedToken::Div
763+
| BorrowedToken::DuckIntDiv
764+
| BorrowedToken::Mod
765+
| BorrowedToken::StringConcat => Ok(p!(MulDivModOp)),
766+
BorrowedToken::DoubleColon
767+
| BorrowedToken::ExclamationMark
768+
| BorrowedToken::LBracket
769+
| BorrowedToken::CaretAt => Ok(p!(DoubleColon)),
770+
BorrowedToken::Arrow
771+
| BorrowedToken::LongArrow
772+
| BorrowedToken::HashArrow
773+
| BorrowedToken::HashLongArrow
774+
| BorrowedToken::AtArrow
775+
| BorrowedToken::ArrowAt
776+
| BorrowedToken::HashMinus
777+
| BorrowedToken::AtQuestion
778+
| BorrowedToken::AtAt
779+
| BorrowedToken::Question
780+
| BorrowedToken::QuestionAnd
781+
| BorrowedToken::CustomBinaryOperator(_) => Ok(p!(PgOther)),
772782
_ => Ok(self.prec_unknown()),
773783
}
774784
}

0 commit comments

Comments
 (0)