Skip to content

Commit 2973a3d

Browse files
Alex Sarapulovmolyee
authored andcommitted
feat: support parsing sql with custom tokenizer
1 parent df5b5dc commit 2973a3d

File tree

2 files changed

+23
-5
lines changed

2 files changed

+23
-5
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ rand = "0.8"
137137
regex = "1.8"
138138
rstest = "0.22.0"
139139
serde_json = "1"
140-
sqlparser = { version = "0.50.0", features = ["visitor"] }
140+
sqlparser = { git = "https://github.com/tarantool/datafusion-sqlparser-rs.git", features = ["visitor"], branch = "release-0.50.0" }
141141
tempfile = "3"
142142
thiserror = "1.0.44"
143143
tokio = { version = "1.36", features = ["macros", "rt", "sync"] }

datafusion/sql/src/parser.rs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ use sqlparser::{
2727
},
2828
dialect::{keywords::Keyword, Dialect, GenericDialect},
2929
parser::{Parser, ParserError},
30-
tokenizer::{Token, TokenWithLocation, Tokenizer, Word},
30+
tokenizer::{Token, TokenWithLocation, Tokenize, Tokenizer, Word},
3131
};
3232

3333
// Use `Parser::expected` instead, if possible
@@ -279,9 +279,17 @@ impl<'a> DFParser<'a> {
279279
sql: &str,
280280
dialect: &'a dyn Dialect,
281281
) -> Result<Self, ParserError> {
282-
let mut tokenizer = Tokenizer::new(dialect, sql);
283-
let tokens = tokenizer.tokenize()?;
282+
let tokenizer = Tokenizer::new(dialect, sql);
283+
Self::new_with_dialect_and_tokenizer(dialect, tokenizer)
284+
}
284285

286+
/// Create a new parser for the specified tokens with
287+
/// specified dialect and tokenizer.
288+
pub fn new_with_dialect_and_tokenizer<T: Tokenize>(
289+
dialect: &'a dyn Dialect,
290+
mut tokenizer: T,
291+
) -> Result<Self, ParserError> {
292+
let tokens = tokenizer.tokenize()?;
285293
Ok(DFParser {
286294
parser: Parser::new(dialect).with_tokens(tokens),
287295
})
@@ -300,7 +308,17 @@ impl<'a> DFParser<'a> {
300308
sql: &str,
301309
dialect: &dyn Dialect,
302310
) -> Result<VecDeque<Statement>, ParserError> {
303-
let mut parser = DFParser::new_with_dialect(sql, dialect)?;
311+
let tokenizer = Tokenizer::new(dialect, sql);
312+
Self::parse_sql_with_dialect_and_tokenizer(dialect, tokenizer)
313+
}
314+
315+
/// Parse a SQL string and produce one or more [`Statement`]s with
316+
/// with the specified dialect and tokenizer.
317+
pub fn parse_sql_with_dialect_and_tokenizer<T: Tokenize>(
318+
dialect: &dyn Dialect,
319+
tokenizer: T,
320+
) -> Result<VecDeque<Statement>, ParserError> {
321+
let mut parser = DFParser::new_with_dialect_and_tokenizer(dialect, tokenizer)?;
304322
let mut stmts = VecDeque::new();
305323
let mut expecting_statement_delimiter = false;
306324
loop {

0 commit comments

Comments
 (0)