Core SQL parsing and dialect translation library for Rust. Parses, generates, transpiles, and formats SQL across 32 database dialects.
Part of the Polyglot project.
- Parse SQL into a fully-typed AST with 200+ expression types
- Generate SQL from AST nodes for any target dialect
- Transpile between any pair of 32 dialects in one call
- Format / pretty-print SQL
- Fluent builder API for constructing queries programmatically
- AST traversal utilities (DFS/BFS iterators, transform, walk)
- Validation with syntax checking and error location reporting
- Schema module for column resolution and type annotation
use polyglot_sql::{transpile, DialectType};
let result = transpile(
"SELECT IFNULL(a, b) FROM t",
DialectType::MySQL,
DialectType::Postgres,
).unwrap();
assert_eq!(result[0], "SELECT COALESCE(a, b) FROM t");You can also transpile through a Dialect handle directly — useful when you
already hold one (e.g., for custom dialects) or need pretty-printed output:
use polyglot_sql::{Dialect, DialectType, TranspileOptions};
let mysql = Dialect::get(DialectType::MySQL);
// Built-in target via DialectType
let plain = mysql.transpile("SELECT IFNULL(a, b) FROM t", DialectType::Postgres).unwrap();
// Pretty-printed output via TranspileOptions
let pretty = mysql
.transpile_with(
"SELECT IFNULL(a, b) FROM t",
DialectType::Postgres,
TranspileOptions::pretty(),
)
.unwrap();
// Target a custom (or built-in) Dialect handle directly
let pg = Dialect::get(DialectType::Postgres);
let via_handle = mysql.transpile("SELECT IFNULL(a, b) FROM t", &pg).unwrap();use polyglot_sql::{parse, generate, DialectType};
let ast = parse("SELECT 1 + 2", DialectType::Generic).unwrap();
let sql = generate(&ast[0], DialectType::Postgres).unwrap();
assert_eq!(sql, "SELECT 1 + 2");Formatting is protected by guard limits by default:
max_input_bytes:16 * 1024 * 1024max_tokens:1_000_000max_ast_nodes:1_000_000max_set_op_chain:256
You can override these limits per call:
use polyglot_sql::{format_with_options, DialectType, FormatGuardOptions};
let options = FormatGuardOptions {
max_input_bytes: Some(2 * 1024 * 1024),
max_tokens: Some(250_000),
max_ast_nodes: Some(250_000),
max_set_op_chain: Some(128),
};
let formatted = format_with_options("SELECT a,b FROM t", DialectType::Postgres, &options).unwrap();
assert!(formatted[0].contains("SELECT"));Guard failures include stable codes in the error message:
E_GUARD_INPUT_TOO_LARGEE_GUARD_TOKEN_BUDGET_EXCEEDEDE_GUARD_AST_BUDGET_EXCEEDEDE_GUARD_SET_OP_CHAIN_EXCEEDED
use polyglot_sql::builder::*;
// SELECT id, name FROM users WHERE age > 18 ORDER BY name LIMIT 10
let expr = select(["id", "name"])
.from("users")
.where_(col("age").gt(lit(18)))
.order_by(["name"])
.limit(10)
.build();use polyglot_sql::builder::*;
// Column references (supports dotted names)
let c = col("users.id");
// Literals
let s = lit("hello"); // 'hello'
let n = lit(42); // 42
let f = lit(3.14); // 3.14
let b = lit(true); // TRUE
// Operators
let cond = col("age").gte(lit(18)).and(col("status").eq(lit("active")));
// Functions
let f = func("COALESCE", [col("a"), col("b"), null()]);use polyglot_sql::builder::*;
let expr = case()
.when(col("x").gt(lit(0)), lit("positive"))
.when(col("x").eq(lit(0)), lit("zero"))
.else_(lit("negative"))
.build();use polyglot_sql::builder::*;
let expr = union_all(
select(["id"]).from("a"),
select(["id"]).from("b"),
)
.order_by(["id"])
.limit(5)
.build();use polyglot_sql::builder::*;
// INSERT INTO users (id, name) VALUES (1, 'Alice')
let ins = insert_into("users")
.columns(["id", "name"])
.values([lit(1), lit("Alice")])
.build();
// UPDATE users SET name = 'Bob' WHERE id = 1
let upd = update("users")
.set("name", lit("Bob"))
.where_(col("id").eq(lit(1)))
.build();
// DELETE FROM users WHERE id = 1
let del = delete("users")
.where_(col("id").eq(lit(1)))
.build();use polyglot_sql::{parse, DialectType, traversal::*};
let ast = parse("SELECT a, b FROM t WHERE x > 1", DialectType::Generic).unwrap();
let columns = get_columns(&ast[0]);
let tables = get_tables(&ast[0]);use polyglot_sql::{validate, DialectType};
let result = validate("SELECT * FORM users", DialectType::Generic);
// result contains error with line/column locationuse polyglot_sql::{
validate_with_schema, DialectType, SchemaColumn, SchemaTable, SchemaValidationOptions,
ValidationSchema,
};
let schema = ValidationSchema {
strict: Some(true),
tables: vec![
SchemaTable {
name: "users".into(),
schema: None,
columns: vec![
SchemaColumn {
name: "id".into(),
data_type: "integer".into(),
nullable: Some(false),
primary_key: true,
unique: false,
references: None,
},
SchemaColumn {
name: "email".into(),
data_type: "varchar".into(),
nullable: Some(false),
primary_key: false,
unique: true,
references: None,
},
],
aliases: vec![],
primary_key: vec!["id".into()],
unique_keys: vec![vec!["email".into()]],
foreign_keys: vec![],
},
],
};
let opts = SchemaValidationOptions {
check_types: true,
check_references: true,
strict: None,
semantic: true,
};
let result = validate_with_schema(
"SELECT id FROM users WHERE email = 1",
DialectType::Generic,
&schema,
&opts,
);
assert!(!result.valid);Schema-aware validation emits stable codes such as:
E200/E201for unknown tables/columnsE210-E217andW210-W216for type checksE220,E221,W220,W221,W222for reference/FK checks
Access the raw token stream with full source position spans. Each token carries a Span with byte offsets and line/column numbers.
use polyglot_sql::{DialectType, Dialect};
let dialect = Dialect::new(DialectType::Generic);
let tokens = dialect.tokenize("SELECT a, b FROM t").unwrap();
for token in &tokens {
println!("{:?} {:?} {:?}", token.token_type, token.text, token.span);
// Select "SELECT" Span { start: 0, end: 6, line: 1, column: 1 }
// Var "a" Span { start: 7, end: 8, line: 1, column: 8 }
// ...
}The Span struct provides:
| Field | Type | Description |
|---|---|---|
start |
usize |
Start byte offset (0-based) |
end |
usize |
End byte offset (exclusive) |
line |
usize |
Line number (1-based) |
column |
usize |
Column number (1-based) |
Parse and tokenize errors include source position information with line/column numbers and byte offset ranges, making it straightforward to provide precise error feedback.
use polyglot_sql::{parse, DialectType};
let result = parse("SELECT 1 +", DialectType::Generic);
if let Err(e) = result {
println!("{}", e); // "Parse error at line 1, column 11: ..."
println!("{:?}", e.line()); // Some(1)
println!("{:?}", e.column()); // Some(11)
println!("{:?}", e.start()); // Some(10) — byte offset
println!("{:?}", e.end()); // Some(11) — byte offset (exclusive)
}The Error enum provides line(), column(), start(), and end() accessors that return Option<usize> for Parse, Tokenize, and Syntax error variants:
use polyglot_sql::error::Error;
let err = Error::parse("Unexpected token", 3, 15);
assert_eq!(err.line(), Some(3));
assert_eq!(err.column(), Some(15));
// Generation errors don't carry position info
let err = Error::generate("unsupported expression");
assert_eq!(err.line(), None);Athena, BigQuery, ClickHouse, CockroachDB, Databricks, Doris, Dremio, Drill, Druid, DuckDB, Dune, Exasol, Fabric, Hive, Materialize, MySQL, Oracle, PostgreSQL, Presto, Redshift, RisingWave, SingleStore, Snowflake, Solr, Spark, SQLite, StarRocks, Tableau, Teradata, TiDB, Trino, TSQL
| Flag | Description |
|---|---|
bindings |
Enable ts-rs TypeScript type generation |