Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion sqlmesh/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from functools import lru_cache, reduce, wraps
from pathlib import Path

import unicodedata
from sqlglot import exp
from sqlglot.dialects.dialect import Dialects

Expand Down Expand Up @@ -291,8 +292,14 @@ def sqlglot_dialects() -> str:

NON_ALNUM = re.compile(r"[^a-zA-Z0-9_]")

NON_ALUM_INCLUDE_UNICODE = re.compile(r"\W", flags=re.UNICODE)

def sanitize_name(name: str) -> str:

def sanitize_name(name: str, *, include_unicode: bool = False) -> str:
if include_unicode:
s = unicodedata.normalize("NFC", name)
s = NON_ALUM_INCLUDE_UNICODE.sub("_", s)
return s
return NON_ALNUM.sub("_", name)


Expand Down
2 changes: 1 addition & 1 deletion sqlmesh/utils/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def clear(self) -> None:

def _cache_entry_path(self, name: str, entry_id: str = "") -> Path:
entry_file_name = "__".join(p for p in (self._cache_version, name, entry_id) if p)
full_path = self._path / sanitize_name(entry_file_name)
full_path = self._path / sanitize_name(entry_file_name, include_unicode=True)
if IS_WINDOWS:
# handle paths longer than 260 chars
full_path = fix_windows_path(full_path)
Expand Down
23 changes: 23 additions & 0 deletions tests/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import pytest

from sqlmesh.utils import sanitize_name


@pytest.mark.parametrize(
"raw,exclude_unicode,include_unicode",
[
("simple", "simple", "simple"),
("snake_case", "snake_case", "snake_case"),
("客户数据", "____", "客户数据"),
("客户-数据 v2", "______v2", "客户_数据_v2"),
("中文,逗号", "_____", "中文_逗号"),
("a/b", "a_b", "a_b"),
("spaces\tand\nnewlines", "spaces_and_newlines", "spaces_and_newlines"),
("data📦2025", "data_2025", "data_2025"),
("MiXeD123_名字", "MiXeD123___", "MiXeD123_名字"),
("", "", ""),
],
)
def test_sanitize_name_no_(raw, exclude_unicode, include_unicode):
assert sanitize_name(raw) == exclude_unicode
assert sanitize_name(raw, include_unicode=True) == include_unicode
1 change: 1 addition & 0 deletions tests/utils/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def test_file_cache(tmp_path: Path, mocker: MockerFixture):
loader.assert_called_once()

assert "___test_model_" in cache._cache_entry_path('"test_model"').name
assert "客户数据" in cache._cache_entry_path("客户数据").name


def test_optimized_query_cache(tmp_path: Path, mocker: MockerFixture):
Expand Down