Skip to content

Commit 48703c4

Browse files
committed
Chore(tsql)!: temporarily move VARCHAR length inference logic to Fabric
1 parent 4cc321c commit 48703c4

4 files changed

Lines changed: 87 additions & 75 deletions

File tree

sqlglot/dialects/fabric.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22

33

4-
from sqlglot import exp
4+
from sqlglot import exp, transforms
55
from sqlglot.dialects.dialect import NormalizationStrategy
66
from sqlglot.dialects.tsql import TSQL
77
from sqlglot.tokens import TokenType
@@ -27,6 +27,27 @@ def _cap_data_type_precision(expression: exp.DataType, max_precision: int = 6) -
2727
)
2828

2929

30+
def _add_default_precision_to_varchar(expression: exp.Expression) -> exp.Expression:
31+
"""Transform function to add VARCHAR(MAX) or CHAR(MAX) for cross-dialect conversion."""
32+
if (
33+
isinstance(expression, exp.Create)
34+
and expression.kind == "TABLE"
35+
and isinstance(expression.this, exp.Schema)
36+
):
37+
for column in expression.this.expressions:
38+
if isinstance(column, exp.ColumnDef):
39+
column_type = column.kind
40+
if (
41+
isinstance(column_type, exp.DataType)
42+
and column_type.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.CHAR)
43+
and not column_type.expressions
44+
):
45+
# For transpilation, VARCHAR/CHAR without precision becomes VARCHAR(MAX)/CHAR(MAX)
46+
column_type.set("expressions", [exp.var("MAX")])
47+
48+
return expression
49+
50+
3051
class Fabric(TSQL):
3152
"""
3253
Microsoft Fabric Data Warehouse dialect that inherits from T-SQL.
@@ -60,6 +81,29 @@ class Tokenizer(TSQL.Tokenizer):
6081
"UTINYINT": TokenType.UTINYINT,
6182
}
6283

84+
class Parser(TSQL.Parser):
85+
def _parse_create(self) -> exp.Create | exp.Command:
86+
create = super()._parse_create()
87+
88+
if isinstance(create, exp.Create):
89+
# Transform VARCHAR/CHAR without precision to VARCHAR(1)/CHAR(1)
90+
if create.kind == "TABLE" and isinstance(create.this, exp.Schema):
91+
for column in create.this.expressions:
92+
if isinstance(column, exp.ColumnDef):
93+
column_type = column.kind
94+
if (
95+
isinstance(column_type, exp.DataType)
96+
and column_type.this
97+
in (exp.DataType.Type.VARCHAR, exp.DataType.Type.CHAR)
98+
and not column_type.expressions
99+
):
100+
# Add default precision of 1 to VARCHAR/CHAR without precision
101+
# When n isn't specified in a data definition or variable declaration statement, the default length is 1.
102+
# https://learn.microsoft.com/en-us/sql/t-sql/data-types/char-and-varchar-transact-sql?view=sql-server-ver17#remarks
103+
column_type.set("expressions", [exp.Literal.number("1")])
104+
105+
return create
106+
63107
class Generator(TSQL.Generator):
64108
# Fabric-specific type mappings - override T-SQL types that aren't supported
65109
# Reference: https://learn.microsoft.com/en-us/fabric/data-warehouse/data-types
@@ -85,6 +129,11 @@ class Generator(TSQL.Generator):
85129
exp.DataType.Type.XML: "VARCHAR",
86130
}
87131

132+
TRANSFORMS = {
133+
**TSQL.Generator.TRANSFORMS,
134+
exp.Create: transforms.preprocess([_add_default_precision_to_varchar]),
135+
}
136+
88137
def datatype_sql(self, expression: exp.DataType) -> str:
89138
# Check if this is a temporal type that needs precision handling. Fabric limits temporal
90139
# types to max 6 digits precision. When no precision is specified, we default to 6 digits.

sqlglot/dialects/tsql.py

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -392,27 +392,6 @@ def _timestrtotime_sql(self: TSQL.Generator, expression: exp.TimeStrToTime):
392392
return sql
393393

394394

395-
def _add_default_precision_to_varchar(expression: exp.Expression) -> exp.Expression:
396-
"""Transform function to add VARCHAR(MAX) or CHAR(MAX) for cross-dialect conversion."""
397-
if (
398-
isinstance(expression, exp.Create)
399-
and expression.kind == "TABLE"
400-
and isinstance(expression.this, exp.Schema)
401-
):
402-
for column in expression.this.expressions:
403-
if isinstance(column, exp.ColumnDef):
404-
column_type = column.kind
405-
if (
406-
isinstance(column_type, exp.DataType)
407-
and column_type.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.CHAR)
408-
and not column_type.expressions
409-
):
410-
# For transpilation, VARCHAR/CHAR without precision becomes VARCHAR(MAX)/CHAR(MAX)
411-
column_type.set("expressions", [exp.var("MAX")])
412-
413-
return expression
414-
415-
416395
def _build_datetrunc(args: t.List) -> exp.TimestampTrunc:
417396
unit = seq_get(args, 0)
418397
this = seq_get(args, 1)
@@ -890,22 +869,6 @@ def _parse_create(self) -> exp.Create | exp.Command:
890869

891870
create.args["properties"].append("expressions", exp.TemporaryProperty())
892871

893-
# Transform VARCHAR/CHAR without precision to VARCHAR(1)/CHAR(1)
894-
if create.kind == "TABLE" and isinstance(create.this, exp.Schema):
895-
for column in create.this.expressions:
896-
if isinstance(column, exp.ColumnDef):
897-
column_type = column.kind
898-
if (
899-
isinstance(column_type, exp.DataType)
900-
and column_type.this
901-
in (exp.DataType.Type.VARCHAR, exp.DataType.Type.CHAR)
902-
and not column_type.expressions
903-
):
904-
# Add default precision of 1 to VARCHAR/CHAR without precision
905-
# When n isn't specified in a data definition or variable declaration statement, the default length is 1.
906-
# https://learn.microsoft.com/en-us/sql/t-sql/data-types/char-and-varchar-transact-sql?view=sql-server-ver17#remarks
907-
column_type.set("expressions", [exp.Literal.number("1")])
908-
909872
return create
910873

911874
def _parse_if(self) -> t.Optional[exp.Expression]:
@@ -1043,7 +1006,6 @@ class Generator(generator.Generator):
10431006
exp.DateAdd: date_delta_sql("DATEADD"),
10441007
exp.DateDiff: date_delta_sql("DATEDIFF"),
10451008
exp.CTE: transforms.preprocess([qualify_derived_table_outputs]),
1046-
exp.Create: transforms.preprocess([_add_default_precision_to_varchar]),
10471009
exp.CurrentDate: rename_func("GETDATE"),
10481010
exp.CurrentTimestamp: rename_func("GETDATE"),
10491011
exp.CurrentTimestampLTZ: rename_func("SYSDATETIMEOFFSET"),

tests/dialects/test_fabric.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,3 +95,40 @@ def test_unix_to_time(self):
9595
"UNIX_TO_TIME(column)",
9696
"DATEADD(MICROSECONDS, CAST(ROUND(column * 1e6, 0) AS BIGINT), CAST('1970-01-01' AS DATETIME2(6)))",
9797
)
98+
99+
def test_varchar_precision_inference(self):
100+
# Test VARCHAR without precision conversion to VARCHAR(1)
101+
self.validate_identity(
102+
"CREATE TABLE t (col VARCHAR)",
103+
"CREATE TABLE t (col VARCHAR(1))",
104+
)
105+
106+
# Test VARCHAR with existing precision should remain unchanged
107+
self.validate_identity("CREATE TABLE t (col VARCHAR(50))")
108+
109+
# Test CHAR without precision conversion to CHAR(1)
110+
self.validate_identity(
111+
"CREATE TABLE t (col CHAR)",
112+
"CREATE TABLE t (col CHAR(1))",
113+
)
114+
115+
# Test CHAR with existing precision should remain unchanged
116+
self.validate_identity("CREATE TABLE t (col CHAR(10))")
117+
118+
# Test cross-dialect conversion: non-TSQL VARCHAR -> TSQL VARCHAR(MAX)
119+
self.validate_all(
120+
"CREATE TABLE t (col VARCHAR(MAX))",
121+
read={
122+
"postgres": "CREATE TABLE t (col VARCHAR)",
123+
"tsql": "CREATE TABLE t (col VARCHAR(MAX))",
124+
},
125+
)
126+
127+
# Test cross-dialect conversion: non-TSQL CHAR -> TSQL CHAR(MAX)
128+
self.validate_all(
129+
"CREATE TABLE t (col CHAR(MAX))",
130+
read={
131+
"postgres": "CREATE TABLE t (col CHAR)",
132+
"tsql": "CREATE TABLE t (col CHAR(MAX))",
133+
},
134+
)

tests/dialects/test_tsql.py

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -288,42 +288,6 @@ def test_tsql(self):
288288
"CREATE TABLE [db].[tbl] ([a] INTEGER)",
289289
)
290290

291-
# Test VARCHAR without precision conversion to VARCHAR(1)
292-
self.validate_identity(
293-
"CREATE TABLE t (col VARCHAR)",
294-
"CREATE TABLE t (col VARCHAR(1))",
295-
)
296-
297-
# Test VARCHAR with existing precision should remain unchanged
298-
self.validate_identity("CREATE TABLE t (col VARCHAR(50))")
299-
300-
# Test CHAR without precision conversion to CHAR(1)
301-
self.validate_identity(
302-
"CREATE TABLE t (col CHAR)",
303-
"CREATE TABLE t (col CHAR(1))",
304-
)
305-
306-
# Test CHAR with existing precision should remain unchanged
307-
self.validate_identity("CREATE TABLE t (col CHAR(10))")
308-
309-
# Test cross-dialect conversion: non-TSQL VARCHAR -> TSQL VARCHAR(MAX)
310-
self.validate_all(
311-
"CREATE TABLE t (col VARCHAR(MAX))",
312-
read={
313-
"postgres": "CREATE TABLE t (col VARCHAR)",
314-
"tsql": "CREATE TABLE t (col VARCHAR(MAX))",
315-
},
316-
)
317-
318-
# Test cross-dialect conversion: non-TSQL CHAR -> TSQL CHAR(MAX)
319-
self.validate_all(
320-
"CREATE TABLE t (col CHAR(MAX))",
321-
read={
322-
"postgres": "CREATE TABLE t (col CHAR)",
323-
"tsql": "CREATE TABLE t (col CHAR(MAX))",
324-
},
325-
)
326-
327291
self.validate_identity("SELECT a = 1", "SELECT 1 AS a").selects[0].assert_is(
328292
exp.Alias
329293
).args["alias"].assert_is(exp.Identifier)

0 commit comments

Comments
 (0)