Skip to content
This repository was archived by the owner on Mar 26, 2026. It is now read-only.

Commit 0347a8e

Browse files
committed
edge-cases in Oracle/pgsql parsing, notes on Rust
1 parent 439fc87 commit 0347a8e

2 files changed

Lines changed: 48 additions & 25 deletions

File tree

README.md

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@ Supports stored procedures (PL/pgSQL, T-SQL, MySQL, PL/SQL): where sqlglot falls
88

99
#### What it's for
1010

11-
Anything that parses/transpiles SQL on the hot path: proxies, sidecars, migration tools, linters, CI/CD, Git pre-commit hooks where Python would be too slow. Also a replacement for every regex that's pretending to be a parser.
11+
Anywhere SQL parsing meets the hot path: proxies, sidecars, migration, linters, Git pre-commit hooks, CI/CD pipelines. Also a replacement for every regex that's pretending to be a parser.
1212

1313
#### Why this exists
1414

15-
libsqlglot was born out of a gap in the C++ ecosystem: the lack of native tooling for efficient, high-volume and hassle-free parsing & transpilation between dozens of SQL dialects.
15+
Because life is too short to parse SQL in Python.
1616

1717
Inspired by the original [sqlglot](https://github.com/tobymao/sqlglot), which did the decade-long work of mapping 31+ SQL dialects into an elegant, universal AST. libsqlglot does the comparatively trivial work of compiling it: the algorithm was already O(n), the runtime wasn't.
1818

@@ -28,6 +28,7 @@ Inspired by the original [sqlglot](https://github.com/tobymao/sqlglot), which di
2828
- [Benchmarks](#benchmarks)
2929
- [Examples](#examples)
3030
- [Supported SQL dialects](#supported-sql-dialects)
31+
- [Contributing](#contributing)
3132
- [Licence](#licence)
3233

3334
## Functionality
@@ -239,7 +240,7 @@ cmake --build build
239240

240241
## Architecture
241242

242-
Header-only design: you only pay for what you use. 19 header files, no `.cpp`. See `include/libsqlglot/` for the full layout. Core files: `parser.h` (4171 lines), `generator.h` (2149), `expression.h` (1385, 115 expression types). Entry point is `transpiler.h` (86 lines).
243+
Header-only design: you only pay for what you use. 19 header files, no `.cpp`. See `include/libsqlglot/` for the full layout. Core files: `parser.h` (4191 lines), `generator.h` (2149), `expression.h` (1385, 115 expression types). Entry point is `transpiler.h` (86 lines).
243244

244245
### Memory management
245246

@@ -610,14 +611,16 @@ These dialects inherit features from a compatible base dialect and add specific
610611

611612
## Contributing
612613

613-
# Contributing
614-
615-
libsqlglot is a solo project. Bug reports, test cases, and dialect edge cases are welcome via GitHub issues. If you have a dialect you wish to see added, please open an issue or PR.
614+
libsqlglot is currently a solo project. Bug reports, test cases, and dialect edge cases are welcome via GitHub issues. If you have a dialect you wish to see added, please open an issue or PR.
616615

617616
If a query parses incorrectly, or a dialect transformation that produces wrong output, please open an issue with the input SQL, source dialect, target dialect, expected output and any other pertinent details.
618617

619618
Pull requests are considered but there is no guarantee of merge. The codebase is intentionally small and opinionated.
620619

620+
### How about a Rust rewrite?
621+
622+
No.
623+
621624
## Licence
622625

623626
Apache 2.0

include/libsqlglot/parser.h

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,7 +1499,8 @@ class Parser {
14991499

15001500
// Database/Schema
15011501
if (priv_upper == "CONNECT") return GrantStmt::PrivilegeType::CONNECT;
1502-
if (priv_upper == "TEMPORARY" || priv_upper == "TEMP") return GrantStmt::PrivilegeType::TEMPORARY;
1502+
if (priv_upper == "TEMPORARY") return GrantStmt::PrivilegeType::TEMPORARY;
1503+
if (priv_upper == "TEMP") return GrantStmt::PrivilegeType::TEMP;
15031504
if (priv_upper == "USAGE") return GrantStmt::PrivilegeType::USAGE;
15041505

15051506
// Procedures
@@ -1604,7 +1605,7 @@ class Parser {
16041605
if (std::string(current().text) == "PUBLIC" || std::string(current().text) == "public") {
16051606
stmt->to_public = true;
16061607
} else {
1607-
stmt->grantees.push_back(std::string(current().text));
1608+
stmt->grantees.push_back(std::string(current().view(source_)));
16081609
}
16091610
advance();
16101611
}
@@ -1738,6 +1739,12 @@ class Parser {
17381739
} while (match(TokenType::COMMA));
17391740

17401741
expect(TokenType::RPAREN);
1742+
1743+
// Validate: column list cannot be empty
1744+
if (col_priv.columns.empty()) {
1745+
error("Column list cannot be empty for column-level privilege");
1746+
}
1747+
17411748
stmt->column_privileges.push_back(std::move(col_priv));
17421749
handled_column_priv = true;
17431750
}
@@ -1817,18 +1824,26 @@ class Parser {
18171824

18181825
// Check for third segment: catalog.schema.table
18191826
if (match(TokenType::DOT)) {
1820-
stmt->schema_name = second_segment; // catalog.schema becomes schema
1827+
// Three-segment name: store as full qualified name
18211828
if (current().type == TokenType::IDENTIFIER) {
18221829
obj_name = obj_name + "." + second_segment + "." + std::string(current().text);
18231830
advance();
18241831
} else if (current().type == TokenType::STAR) {
18251832
obj_name = obj_name + "." + second_segment + ".*";
18261833
advance();
18271834
}
1835+
// Don't set schema_name for three-segment names - keep full name
18281836
} else {
18291837
// Two-segment name: schema.table or *.*
1830-
stmt->schema_name = obj_name;
1831-
obj_name = second_segment;
1838+
// For the first object, split into schema and name
1839+
// For subsequent objects, store full qualified name
1840+
if (stmt->object_name.empty()) {
1841+
stmt->schema_name = obj_name;
1842+
obj_name = second_segment;
1843+
} else {
1844+
// Keep full qualified name for list items
1845+
obj_name = obj_name + "." + second_segment;
1846+
}
18321847
}
18331848
}
18341849

@@ -1983,7 +1998,7 @@ class Parser {
19831998
if (std::string(current().text) == "PUBLIC" || std::string(current().text) == "public") {
19841999
stmt->from_public = true;
19852000
} else {
1986-
stmt->grantees.push_back(std::string(current().text));
2001+
stmt->grantees.push_back(std::string(current().view(source_)));
19872002
}
19882003
advance();
19892004
}
@@ -2108,6 +2123,12 @@ class Parser {
21082123
} while (match(TokenType::COMMA));
21092124

21102125
expect(TokenType::RPAREN);
2126+
2127+
// Validate: column list cannot be empty
2128+
if (col_priv.columns.empty()) {
2129+
error("Column list cannot be empty for column-level privilege");
2130+
}
2131+
21112132
stmt->column_privileges.push_back(std::move(col_priv));
21122133
handled_column_priv = true;
21132134
}
@@ -2213,6 +2234,11 @@ class Parser {
22132234
advance();
22142235
}
22152236
} while (match(TokenType::COMMA));
2237+
2238+
// Validate: must have at least one grantee
2239+
if (!stmt->from_public && stmt->grantees.empty()) {
2240+
error("REVOKE statement must specify at least one grantee");
2241+
}
22162242
}
22172243

22182244
// CASCADE / RESTRICT
@@ -2629,26 +2655,20 @@ class Parser {
26292655

26302656
// Read up to 3 consecutive tokens that could form a delimiter
26312657
// Examples: $ + $ = $$, / + / = //, | = |, ; = ;
2632-
// Note: $ may tokenize as ERROR, so we accept ERROR tokens here
2658+
// Be permissive: accept any non-keyword, non-identifier token
26332659
for (int i = 0; i < 3 && !is_at_end(); ++i) {
26342660
TokenType t = current().type;
26352661

2636-
// Stop if we hit keywords, identifiers, numbers, or strings
2637-
// But allow ERROR tokens (for $), and operator tokens (/, ;, |, etc.)
2662+
// Stop if we hit SQL keywords or identifiers (likely start of next statement)
26382663
if (t == TokenType::IDENTIFIER ||
2639-
t == TokenType::NUMBER ||
2640-
t == TokenType::STRING ||
2641-
(t >= TokenType::SELECT && t < TokenType::EOF_TOKEN)) { // Keywords range
2664+
(t >= TokenType::SELECT && t < TokenType::SEMICOLON)) { // Keywords before punctuation
26422665
break;
26432666
}
26442667

2645-
// Accumulate the token text
2646-
const char* text = get_token_text(current());
2647-
if (text) {
2648-
delim += std::string(text);
2649-
} else {
2650-
// Fallback to source view for tokens without text (e.g., ERROR tokens like $)
2651-
delim += std::string(current().view(source_));
2668+
// Accumulate the token text using source view (most reliable)
2669+
std::string_view token_view = current().view(source_);
2670+
if (!token_view.empty()) {
2671+
delim += std::string(token_view);
26522672
}
26532673
advance();
26542674

0 commit comments

Comments
 (0)