polyglot-sql

Core SQL parsing and dialect translation library for Rust. Parses, generates, transpiles, and formats SQL across 32 database dialects.

Part of the Polyglot project.

Features

Parse SQL into a fully-typed AST with 200+ expression types
Generate SQL from AST nodes for any target dialect
Transpile between any pair of 32 dialects in one call
Format / pretty-print SQL
Fluent builder API for constructing queries programmatically
AST traversal utilities (DFS/BFS iterators, transform, walk)
Validation with syntax checking and error location reporting
Schema module for column resolution and type annotation

Usage

Transpile

use polyglot_sql::{transpile, DialectType};

let result = transpile(
    "SELECT IFNULL(a, b) FROM t",
    DialectType::MySQL,
    DialectType::Postgres,
).unwrap();
assert_eq!(result[0], "SELECT COALESCE(a, b) FROM t");

You can also transpile through a Dialect handle directly — useful when you already hold one (e.g., for custom dialects) or need pretty-printed output:

use polyglot_sql::{Dialect, DialectType, TranspileOptions};

let mysql = Dialect::get(DialectType::MySQL);

// Built-in target via DialectType
let plain = mysql.transpile("SELECT IFNULL(a, b) FROM t", DialectType::Postgres).unwrap();

// Pretty-printed output via TranspileOptions
let pretty = mysql
    .transpile_with(
        "SELECT IFNULL(a, b) FROM t",
        DialectType::Postgres,
        TranspileOptions::pretty(),
    )
    .unwrap();

// Target a custom (or built-in) Dialect handle directly
let pg = Dialect::get(DialectType::Postgres);
let via_handle = mysql.transpile("SELECT IFNULL(a, b) FROM t", &pg).unwrap();

Parse + Generate

use polyglot_sql::{parse, generate, DialectType};

let ast = parse("SELECT 1 + 2", DialectType::Generic).unwrap();
let sql = generate(&ast[0], DialectType::Postgres).unwrap();
assert_eq!(sql, "SELECT 1 + 2");

Format With Guard Options

Formatting is protected by guard limits by default:

max_input_bytes: 16 * 1024 * 1024
max_tokens: 1_000_000
max_ast_nodes: 1_000_000
max_set_op_chain: 256

You can override these limits per call:

use polyglot_sql::{format_with_options, DialectType, FormatGuardOptions};

let options = FormatGuardOptions {
    max_input_bytes: Some(2 * 1024 * 1024),
    max_tokens: Some(250_000),
    max_ast_nodes: Some(250_000),
    max_set_op_chain: Some(128),
};

let formatted = format_with_options("SELECT a,b FROM t", DialectType::Postgres, &options).unwrap();
assert!(formatted[0].contains("SELECT"));

Guard failures include stable codes in the error message:

E_GUARD_INPUT_TOO_LARGE
E_GUARD_TOKEN_BUDGET_EXCEEDED
E_GUARD_AST_BUDGET_EXCEEDED
E_GUARD_SET_OP_CHAIN_EXCEEDED

Fluent Builder

use polyglot_sql::builder::*;

// SELECT id, name FROM users WHERE age > 18 ORDER BY name LIMIT 10
let expr = select(["id", "name"])
    .from("users")
    .where_(col("age").gt(lit(18)))
    .order_by(["name"])
    .limit(10)
    .build();

Expression Helpers

use polyglot_sql::builder::*;

// Column references (supports dotted names)
let c = col("users.id");

// Literals
let s = lit("hello");   // 'hello'
let n = lit(42);         // 42
let f = lit(3.14);       // 3.14
let b = lit(true);       // TRUE

// Operators
let cond = col("age").gte(lit(18)).and(col("status").eq(lit("active")));

// Functions
let f = func("COALESCE", [col("a"), col("b"), null()]);

CASE Expressions

use polyglot_sql::builder::*;

let expr = case()
    .when(col("x").gt(lit(0)), lit("positive"))
    .when(col("x").eq(lit(0)), lit("zero"))
    .else_(lit("negative"))
    .build();

Set Operations

use polyglot_sql::builder::*;

let expr = union_all(
    select(["id"]).from("a"),
    select(["id"]).from("b"),
)
.order_by(["id"])
.limit(5)
.build();

INSERT, UPDATE, DELETE

use polyglot_sql::builder::*;

// INSERT INTO users (id, name) VALUES (1, 'Alice')
let ins = insert_into("users")
    .columns(["id", "name"])
    .values([lit(1), lit("Alice")])
    .build();

// UPDATE users SET name = 'Bob' WHERE id = 1
let upd = update("users")
    .set("name", lit("Bob"))
    .where_(col("id").eq(lit(1)))
    .build();

// DELETE FROM users WHERE id = 1
let del = delete("users")
    .where_(col("id").eq(lit(1)))
    .build();

AST Traversal

use polyglot_sql::{parse, DialectType, traversal::*};

let ast = parse("SELECT a, b FROM t WHERE x > 1", DialectType::Generic).unwrap();
let columns = get_columns(&ast[0]);
let tables = get_tables(&ast[0]);

Validation

use polyglot_sql::{validate, DialectType};

let result = validate("SELECT * FORM users", DialectType::Generic);
// result contains error with line/column location

use polyglot_sql::{
    validate_with_schema, DialectType, SchemaColumn, SchemaTable, SchemaValidationOptions,
    ValidationSchema,
};

let schema = ValidationSchema {
    strict: Some(true),
    tables: vec![
        SchemaTable {
            name: "users".into(),
            schema: None,
            columns: vec![
                SchemaColumn {
                    name: "id".into(),
                    data_type: "integer".into(),
                    nullable: Some(false),
                    primary_key: true,
                    unique: false,
                    references: None,
                },
                SchemaColumn {
                    name: "email".into(),
                    data_type: "varchar".into(),
                    nullable: Some(false),
                    primary_key: false,
                    unique: true,
                    references: None,
                },
            ],
            aliases: vec![],
            primary_key: vec!["id".into()],
            unique_keys: vec![vec!["email".into()]],
            foreign_keys: vec![],
        },
    ],
};

let opts = SchemaValidationOptions {
    check_types: true,
    check_references: true,
    strict: None,
    semantic: true,
};

let result = validate_with_schema(
    "SELECT id FROM users WHERE email = 1",
    DialectType::Generic,
    &schema,
    &opts,
);
assert!(!result.valid);

Schema-aware validation emits stable codes such as:

E200/E201 for unknown tables/columns
E210-E217 and W210-W216 for type checks
E220, E221, W220, W221, W222 for reference/FK checks

Tokenize

Access the raw token stream with full source position spans. Each token carries a Span with byte offsets and line/column numbers.

use polyglot_sql::{DialectType, Dialect};

let dialect = Dialect::new(DialectType::Generic);
let tokens = dialect.tokenize("SELECT a, b FROM t").unwrap();

for token in &tokens {
    println!("{:?} {:?} {:?}", token.token_type, token.text, token.span);
    // Select "SELECT" Span { start: 0, end: 6, line: 1, column: 1 }
    // Var    "a"      Span { start: 7, end: 8, line: 1, column: 8 }
    // ...
}

The Span struct provides:

Field	Type	Description
`start`	`usize`	Start byte offset (0-based)
`end`	`usize`	End byte offset (exclusive)
`line`	`usize`	Line number (1-based)
`column`	`usize`	Column number (1-based)

Error Reporting

Parse and tokenize errors include source position information with line/column numbers and byte offset ranges, making it straightforward to provide precise error feedback.

use polyglot_sql::{parse, DialectType};

let result = parse("SELECT 1 +", DialectType::Generic);
if let Err(e) = result {
    println!("{}", e);            // "Parse error at line 1, column 11: ..."
    println!("{:?}", e.line());   // Some(1)
    println!("{:?}", e.column()); // Some(11)
    println!("{:?}", e.start());  // Some(10) — byte offset
    println!("{:?}", e.end());    // Some(11) — byte offset (exclusive)
}

The Error enum provides line(), column(), start(), and end() accessors that return Option<usize> for Parse, Tokenize, and Syntax error variants:

use polyglot_sql::error::Error;

let err = Error::parse("Unexpected token", 3, 15);
assert_eq!(err.line(), Some(3));
assert_eq!(err.column(), Some(15));

// Generation errors don't carry position info
let err = Error::generate("unsupported expression");
assert_eq!(err.line(), None);

Supported Dialects

Athena, BigQuery, ClickHouse, CockroachDB, Databricks, Doris, Dremio, Drill, Druid, DuckDB, Dune, Exasol, Fabric, Hive, Materialize, MySQL, Oracle, PostgreSQL, Presto, Redshift, RisingWave, SingleStore, Snowflake, Solr, Spark, SQLite, StarRocks, Tableau, Teradata, TiDB, Trino, TSQL

Feature Flags

Flag	Description
`bindings`	Enable `ts-rs` TypeScript type generation

License

MIT

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

polyglot-sql

Features

Usage

Transpile

Parse + Generate

Format With Guard Options

Fluent Builder

Expression Helpers

CASE Expressions

Set Operations

INSERT, UPDATE, DELETE

AST Traversal

Validation

Tokenize

Error Reporting

Supported Dialects

Feature Flags

License

FilesExpand file tree

README.md

Latest commit

History

README.md

File metadata and controls

polyglot-sql

Features

Usage

Transpile

Parse + Generate

Format With Guard Options

Fluent Builder

Expression Helpers

CASE Expressions

Set Operations

INSERT, UPDATE, DELETE

AST Traversal

Validation

Tokenize

Error Reporting

Supported Dialects

Feature Flags

License