Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .circleci/continue_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -300,8 +300,8 @@ workflows:
name: cloud_engine_<< matrix.engine >>
context:
- sqlmesh_cloud_database_integration
requires:
- engine_tests_docker
# requires:
# - engine_tests_docker
matrix:
parameters:
engine:
Expand All @@ -313,10 +313,10 @@ workflows:
- athena
- fabric
- gcp-postgres
filters:
branches:
only:
- main
# filters:
# branches:
# only:
# - main
- ui_style
- ui_test
- vscode_test
Expand Down
2 changes: 2 additions & 0 deletions sqlmesh/core/engine_adapter/databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class DatabricksEngineAdapter(SparkEngineAdapter):
SUPPORTS_CLONING = True
SUPPORTS_MATERIALIZED_VIEWS = True
SUPPORTS_MATERIALIZED_VIEW_SCHEMA = True
# Spark has this set to false for compatibility when mixing with Trino but that isn't a concern with Databricks
QUOTE_IDENTIFIERS_IN_VIEWS = True
SCHEMA_DIFFER_KWARGS = {
"support_positional_add": True,
"nested_support": NestedSupport.ALL,
Expand Down
7 changes: 5 additions & 2 deletions sqlmesh/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import types
import typing as t
import uuid
import unicodedata
from dataclasses import dataclass
from collections import defaultdict
from contextlib import contextmanager
Expand Down Expand Up @@ -289,11 +290,13 @@ def sqlglot_dialects() -> str:
return "'" + "', '".join(Dialects.__members__.values()) + "'"


NON_ALNUM = re.compile(r"[^a-zA-Z0-9_]")
NON_WORD = re.compile(r"\W", flags=re.UNICODE)


def sanitize_name(name: str) -> str:
return NON_ALNUM.sub("_", name)
s = unicodedata.normalize("NFC", name)
s = NON_WORD.sub("_", s)
return s


def groupby(
Expand Down
29 changes: 29 additions & 0 deletions tests/core/engine_adapter/integration/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -3990,3 +3990,32 @@ def _set_config(gateway: str, config: Config) -> None:
was_evaluated=True,
day_delta=4,
)


def test_unicode_characters(ctx: TestContext, tmp_path: Path):
if ctx.dialect in ["spark", "trino"]:
# It is possible that Trino could support this if we changed `QUOTE_IDENTIFIERS_IN_VIEWS` but that would
# break the compatibility it has when be mixed with Spark for compute
pytest.skip("Skipping as these engines have issues with unicode characters in model names")

model_name = "客户数据"
table = ctx.table(model_name).sql(dialect=ctx.dialect)
(tmp_path / "models").mkdir(exist_ok=True)

model_def = f"""
MODEL (
name {table},
kind FULL,
dialect '{ctx.dialect}'
);
SELECT 1 as id
"""

(tmp_path / "models" / "客户数据.sql").write_text(model_def)

context = ctx.create_context(path=tmp_path)
context.plan(auto_apply=True, no_prompts=True)

results = ctx.get_metadata_results()
assert len(results.views) == 1
assert results.views[0].lower() == model_name
22 changes: 22 additions & 0 deletions tests/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pytest

from sqlmesh.utils import sanitize_name


@pytest.mark.parametrize(
"raw,expected",
[
("simple", "simple"),
("snake_case", "snake_case"),
("客户数据", "客户数据"), # pure Chinese kept
("客户-数据 v2", "客户_数据_v2"), # dash/space -> underscore
("中文,逗号", "中文_逗号"), # full-width comma -> underscore
("a/b", "a_b"), # slash -> underscore
("spaces\tand\nnewlines", "spaces_and_newlines"),
("data📦2025", "data_2025"),
("MiXeD123_名字", "MiXeD123_名字"),
("", ""),
],
)
def test_sanitize_known_cases(raw, expected):
assert sanitize_name(raw) == expected
1 change: 1 addition & 0 deletions tests/utils/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def test_file_cache(tmp_path: Path, mocker: MockerFixture):
loader.assert_called_once()

assert "___test_model_" in cache._cache_entry_path('"test_model"').name
assert "客户数据" in cache._cache_entry_path("客户数据").name


def test_optimized_query_cache(tmp_path: Path, mocker: MockerFixture):
Expand Down