From 920a6f745d179d13429aa15fb044d407554c38c0 Mon Sep 17 00:00:00 2001 From: collerek Date: Thu, 14 May 2026 11:47:20 +0200 Subject: [PATCH] Align Parser return-type hints with actual UniqueList returns Properties like `columns`, `tables`, `with_names`, `subqueries_names`, and `columns_aliases_names` were annotated as `list[str]` but actually returned `UniqueList`, which silently deduplicates on append. The mismatch surprised callers who wrote tests against `list` semantics and got dedup behavior at runtime. Updates the annotations (and matching extractor signatures in `TableExtractor.extract`, `NestedResolver.extract_cte_names`, `NestedResolver.extract_subqueries`) to declare `UniqueList`, so the public contract reflects what the code has always done. Resolves #291. Co-Authored-By: Claude --- sql_metadata/nested_resolver.py | 6 +++--- sql_metadata/parser.py | 30 ++++++++++++++++-------------- sql_metadata/table_extractor.py | 2 +- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/sql_metadata/nested_resolver.py b/sql_metadata/nested_resolver.py index 3efa9ef2..91687c13 100644 --- a/sql_metadata/nested_resolver.py +++ b/sql_metadata/nested_resolver.py @@ -207,7 +207,7 @@ def __init__( def extract_cte_names( self, cte_name_map: dict[str, str], - ) -> list[str]: + ) -> UniqueList: """Extract CTE names from the AST. Called by :attr:`Parser.with_names`. @@ -247,7 +247,7 @@ def extract_cte_bodies( @staticmethod def extract_subqueries( ast: exp.Expression, - ) -> tuple[list[str], dict[str, str]]: + ) -> tuple[UniqueList, dict[str, str]]: """Extract subquery names and bodies in a single post-order walk. Aliased subqueries keep their alias as the name. Unaliased @@ -262,7 +262,7 @@ def extract_subqueries( :returns: ``(names, bodies)`` where *names* is ordered innermost-first, e.g. ``(["subquery_1", "sub"], {...})``. """ - names: list[str] = UniqueList() + names = UniqueList() bodies: dict[str, str] = {} NestedResolver._walk_subqueries(ast, names, bodies, 0) return names, bodies diff --git a/sql_metadata/parser.py b/sql_metadata/parser.py index 3adbd039..c92538fc 100644 --- a/sql_metadata/parser.py +++ b/sql_metadata/parser.py @@ -63,13 +63,13 @@ def __init__(self, sql: str = "", disable_logging: bool = False) -> None: self._columns_aliases_dict: dict[str, UniqueList] = {} self._output_columns: list[str] = [] - self._tables: list[str] | None = None + self._tables: UniqueList | None = None self._table_aliases: dict[str, str] | None = None - self._with_names: list[str] | None = None + self._with_names: UniqueList | None = None self._with_queries: dict[str, str] | None = None self._subqueries: dict[str, str] | None = None - self._subqueries_names: list[str] | None = None + self._subqueries_names: UniqueList | None = None self._limit_and_offset: tuple[int, int] | None = None @@ -168,7 +168,7 @@ def tokens(self) -> list[str]: return self._tokens @property - def columns(self) -> list[str]: + def columns(self) -> UniqueList: """Return the list of column names referenced in the query. Walks the sqlglot AST via :class:`ColumnExtractor` in a single DFS @@ -177,7 +177,7 @@ def columns(self) -> list[str]: SQL), falls back to a regex extraction of ``INTO … (col1, col2)`` column lists. - :rtype: list[str] + :rtype: UniqueList """ if self._columns_extracted: return self._columns @@ -276,10 +276,10 @@ def columns_aliases_dict(self) -> dict[str, UniqueList]: return self._columns_aliases_dict @property - def columns_aliases_names(self) -> list[str]: + def columns_aliases_names(self) -> UniqueList: """Return the names of all column aliases used in the query. - :rtype: list[str] + :rtype: UniqueList """ if not self._columns_extracted: _ = self.columns @@ -299,14 +299,14 @@ def output_columns(self) -> list[str]: return self._output_columns @property - def tables(self) -> list[str]: + def tables(self) -> UniqueList: """Return the list of table names referenced in the query. Tables are extracted from the AST by :class:`TableExtractor`, sorted by their position in the SQL text, and filtered to exclude CTE names (which appear in :attr:`with_names` instead). - :rtype: list[str] + :rtype: UniqueList """ if self._tables is not None: return self._tables @@ -339,10 +339,10 @@ def tables_aliases(self) -> dict[str, str]: return self._table_aliases @property - def with_names(self) -> list[str]: + def with_names(self) -> UniqueList: """Return the CTE (Common Table Expression) names from the query. - :rtype: list[str] + :rtype: UniqueList """ if self._with_names is not None: return self._with_names @@ -387,13 +387,13 @@ def subqueries(self) -> dict[str, str]: return self._subqueries @property - def subqueries_names(self) -> list[str]: + def subqueries_names(self) -> UniqueList: """Return the names of all subqueries (innermost first). Aliased subqueries use their alias; unaliased ones get auto-generated names (``subquery_1``, ``subquery_2``, …). - :rtype: list[str] + :rtype: UniqueList """ if self._subqueries_names is not None: return self._subqueries_names @@ -482,7 +482,9 @@ def values_dict(self) -> dict[str, Any] | None: is_multi = values and isinstance(values[0], list) first_row = values[0] if is_multi else values if not columns: - columns = [f"column_{ind + 1}" for ind in range(len(first_row))] + columns = UniqueList( + f"column_{ind + 1}" for ind in range(len(first_row)) + ) if is_multi: self._values_dict = { diff --git a/sql_metadata/table_extractor.py b/sql_metadata/table_extractor.py index 573634f1..1c37492d 100644 --- a/sql_metadata/table_extractor.py +++ b/sql_metadata/table_extractor.py @@ -141,7 +141,7 @@ def __init__( # Public API # ------------------------------------------------------------------- - def extract(self) -> list[str]: + def extract(self) -> UniqueList: """Extract table names, excluding CTE definitions. For ``CREATE TABLE`` statements, the target table is always placed