Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -846,17 +846,12 @@ def _build_grain_group_from_preagg_table(
if col.semantic_type in ("metric_component", "measure", "metric"):
col_ref = ast.Column(name=ast.Name(col.name))

# Find the component to get the merge function. The third
# clause matches legacy pre-agg / grain-group shapes where
# the LIMITED-DISTINCT column lived under the bare grain
# name (before the `<bare> AS <hashed>` projection landed) —
# keeps re-aggregation correct against stale materializations.
# Find the component to get the merge function
merge_func = None
for comp in original_gg.components:
if ( # pragma: no branch
comp.name == col.name
or original_gg.component_aliases.get(comp.name) == col.name
or comp.grain_alias == col.name
):
merge_func = comp.merge
break
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -486,19 +486,7 @@ def build_synthetic_grain_group(

for comp in decomposed.components:
if comp.name not in component_aliases: # pragma: no branch
# Legacy compat: cubes materialized before LIMITED-DISTINCT
# gained its `<bare> AS <hashed>` projection had the column
# stored under the bare grain name (e.g. ``order_id``)
# rather than the hashed identity (``order_id_distinct_<hash>``).
# Try the hashed name first (current shape), then fall back
# to the grain_alias (legacy shape) before defaulting to
# ``comp.name``. Keeps stale cube configs queryable until
# they're re-materialized.
cube_col_name = mat_col_lookup.get(comp.name)
if cube_col_name is None and comp.grain_alias:
cube_col_name = mat_col_lookup.get(comp.grain_alias)
if cube_col_name is None:
cube_col_name = comp.name
cube_col_name = mat_col_lookup.get(comp.name, comp.name)
component_aliases[comp.name] = cube_col_name
all_components.append(comp)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,43 +164,6 @@ def _parse_type_string(type_str: str | None) -> ct.ColumnType | None:
return _TYPE_STRING_MAP.get(normalized)


def register_limited_component(
component: MetricComponent,
metric_node: Node,
component_aliases: dict[str, str],
component_expressions: list[tuple[str, ast.Expression]],
component_metadata: list[tuple[str, MetricComponent, Node]],
) -> None:
"""Register a LIMITED component (e.g. ``COUNT(DISTINCT)``).

For plain-column DISTINCT, ``grain_alias`` is the bare column and
differs from ``component.name`` (the hashed identity). Emit an
extra ``<bare> AS <component.name>`` projection so the hashed name
is addressable alongside the bare grain column, and route
``component_aliases`` at the hashed name so combiner rewriters
stay consistent. Both call sites in ``build_grain_group_sql`` must
use this helper to keep the contract in one place.
"""
# The preagg-read path assumes LIMITED.merge is None to skip
# re-aggregation; a future LIMITED+merge variant would wrap the
# merge over the bare grain column and produce wrong SQL.
assert component.merge is None, (
f"LIMITED component {component.name!r} has merge={component.merge!r}; "
f"update the preagg-read path in combiners.py before adding this."
)
grain_alias = component.grain_alias or component.name
if grain_alias != component.name:
component_aliases[component.name] = component.name
component_expressions.append(
(component.name, make_column_ref(grain_alias)),
)
component_metadata.append(
(component.name, component, metric_node),
)
else:
component_aliases[component.name] = grain_alias


def infer_component_type(
component: MetricComponent,
metric_type: str,
Expand Down Expand Up @@ -1784,14 +1747,10 @@ def build_grain_group_from_preagg(
f"Component {component.name} not found in pre-agg {preagg.id}",
)

# Always alias the output as ``component.name`` (the hashed
# identity) so the CTE's output column name matches the
# metric's persisted ``derived_expression`` regardless of how
# the pre-agg physically stored it. Legacy pre-aggs
# materialized before the LIMITED-DISTINCT change carry the
# column under the bare grain name; we read that bare column
# and alias it back to the hashed identity here.
output_alias = component.name
# Always use the measure column name (component hash) as the output alias
# This ensures consistency with the non-preagg path
output_alias = measure_col

component_aliases[component.name] = output_alias

col_ref = ast.Column(name=ast.Name(measure_col))
Expand All @@ -1806,17 +1765,11 @@ def build_grain_group_from_preagg(
aliased = ast.Alias(child=agg_expr, alias=ast.Name(output_alias))
select_items.append(aliased)
else:
# No merge - output grain column directly, add to GROUP BY.
# Alias to ``component.name`` so the CTE output name is
# the hashed identity regardless of the pre-agg's stored
# column name.
if measure_col != output_alias:
select_items.append(
ast.Alias(child=col_ref, alias=ast.Name(output_alias)),
)
else:
select_items.append(col_ref)
# No merge - output grain column directly, add to GROUP BY
select_items.append(col_ref)
grain_col_names.append(measure_col)
output_alias = measure_col
component_aliases[component.name] = output_alias

# Get type from pre-agg columns
col_type = preagg.get_column_type(measure_col, default="double")
Expand Down Expand Up @@ -1957,12 +1910,11 @@ def build_grain_group_sql(
Aggregability.FULL,
)
if orig_agg == Aggregability.LIMITED:
register_limited_component(
component,
metric_node,
component_aliases,
component_expressions,
component_metadata,
# LIMITED: grain column is already in GROUP BY, no output needed.
# grain_alias was set by _make_component: plain column → column name,
# complex expression → component.name.
component_aliases[component.name] = (
component.grain_alias or component.name
)
continue
else:
Expand All @@ -1977,14 +1929,12 @@ def build_grain_group_sql(
component_aliases[component.name] = component_alias
continue

# Skip LIMITED aggregability components with no aggregation
# These are represented by grain columns instead.
# grain_alias was set by _make_component: plain column → column name,
# complex expression → component.name.
if component.rule.type == Aggregability.LIMITED and not component.aggregation:
register_limited_component(
component,
metric_node,
component_aliases,
component_expressions,
component_metadata,
)
component_aliases[component.name] = component.grain_alias or component.name
continue

# Always use component.name for consistency - no special case for single-component
Expand Down
9 changes: 0 additions & 9 deletions datajunction-server/tests/api/cubes_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4964,7 +4964,6 @@ async def test_materialize_cube_returns_metric_combiners(
order_id,
SUM(line_total_sum_e1f61696) line_total_sum_e1f61696,
hll_union_agg(customer_id_hll_23002251) customer_id_hll_23002251,
order_id_distinct_f93d50ab,
week_order
FROM analytics.preaggs.v3_revenue_orders_by_week
GROUP BY category, order_id, week_order
Expand Down Expand Up @@ -5004,14 +5003,6 @@ async def test_materialize_cube_returns_metric_combiners(
"semantic_type": "metric_component",
"type": "binary",
},
{
"column": None,
"name": "order_id_distinct_f93d50ab",
"node": None,
"semantic_entity": "v3.order_count:order_id_distinct_f93d50ab",
"semantic_type": "metric_component",
"type": "bigint",
},
{
"column": None,
"name": "week_order",
Expand Down
4 changes: 2 additions & 2 deletions datajunction-server/tests/construction/build_v3/djsql_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,12 @@ async def test_derived_metric(self, client_with_build_v3):
JOIN default.v3.order_items oi ON o.order_id = oi.order_id
),
order_details_0 AS (
SELECT t1.status, t1.order_id, SUM(t1.line_total) line_total_sum_e1f61696, t1.order_id order_id_distinct_f93d50ab
SELECT t1.status, t1.order_id, SUM(t1.line_total) line_total_sum_e1f61696
FROM v3_order_details t1
GROUP BY t1.status, t1.order_id
)
SELECT order_details_0.status AS status,
SUM(order_details_0.line_total_sum_e1f61696) / NULLIF(COUNT(DISTINCT order_details_0.order_id_distinct_f93d50ab), 0) AS avg_order_value
SUM(order_details_0.line_total_sum_e1f61696) / NULLIF(COUNT(DISTINCT order_details_0.order_id), 0) AS avg_order_value
FROM order_details_0
GROUP BY order_details_0.status
""",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,18 +99,17 @@ async def test_metric_filter_with_less_than(
order_details_0 AS (
SELECT
t2.category,
t1.order_id,
t1.order_id order_id_distinct_f93d50ab
t1.order_id
FROM v3_order_details t1
LEFT OUTER JOIN v3_product t2 ON t1.product_id = t2.product_id
GROUP BY t2.category, t1.order_id
)
SELECT
order_details_0.category AS category,
COUNT( DISTINCT order_details_0.order_id_distinct_f93d50ab) AS order_count
COUNT( DISTINCT order_details_0.order_id) AS order_count
FROM order_details_0
GROUP BY order_details_0.category
HAVING COUNT( DISTINCT order_details_0.order_id_distinct_f93d50ab) < 100
HAVING COUNT( DISTINCT order_details_0.order_id) < 100
""",
)

Expand Down Expand Up @@ -195,18 +194,17 @@ async def test_metric_filter_with_equality(
order_details_0 AS (
SELECT
t2.category,
t1.order_id,
t1.order_id order_id_distinct_f93d50ab
t1.order_id
FROM v3_order_details t1
LEFT OUTER JOIN v3_product t2 ON t1.product_id = t2.product_id
GROUP BY t2.category, t1.order_id
)
SELECT
order_details_0.category AS category,
COUNT( DISTINCT order_details_0.order_id_distinct_f93d50ab) AS order_count
COUNT( DISTINCT order_details_0.order_id) AS order_count
FROM order_details_0
GROUP BY order_details_0.category
HAVING COUNT(DISTINCT order_details_0.order_id_distinct_f93d50ab) = 50
HAVING COUNT(DISTINCT order_details_0.order_id) = 50
""",
)

Expand Down Expand Up @@ -259,21 +257,20 @@ async def test_multiple_metric_filters_and(
SELECT
t2.category,
t1.order_id,
SUM(t1.line_total) line_total_sum_e1f61696,
t1.order_id order_id_distinct_f93d50ab
SUM(t1.line_total) line_total_sum_e1f61696
FROM v3_order_details t1
LEFT OUTER JOIN v3_product t2 ON t1.product_id = t2.product_id
GROUP BY t2.category, t1.order_id
)
SELECT
order_details_0.category AS category,
SUM(order_details_0.line_total_sum_e1f61696) AS total_revenue,
COUNT( DISTINCT order_details_0.order_id_distinct_f93d50ab) AS order_count
COUNT( DISTINCT order_details_0.order_id) AS order_count
FROM order_details_0
GROUP BY order_details_0.category
HAVING
SUM(order_details_0.line_total_sum_e1f61696) > 5000
AND COUNT( DISTINCT order_details_0.order_id_distinct_f93d50ab) > 20
AND COUNT( DISTINCT order_details_0.order_id) > 20
""",
)

Expand Down Expand Up @@ -439,8 +436,7 @@ async def test_complex_mixed_filters(
t2.category,
t1.status,
t1.order_id,
SUM(t1.line_total) line_total_sum_e1f61696,
t1.order_id order_id_distinct_f93d50ab
SUM(t1.line_total) line_total_sum_e1f61696
FROM v3_order_details t1
LEFT OUTER JOIN v3_product t2 ON t1.product_id = t2.product_id
WHERE t2.category IN ('Electronics', 'Clothing')
Expand All @@ -450,11 +446,11 @@ async def test_complex_mixed_filters(
order_details_0.category AS category,
order_details_0.status AS status,
SUM(order_details_0.line_total_sum_e1f61696) AS total_revenue,
COUNT(DISTINCT order_details_0.order_id_distinct_f93d50ab) AS order_count
COUNT(DISTINCT order_details_0.order_id) AS order_count
FROM order_details_0
WHERE order_details_0.category IN ('Electronics', 'Clothing') AND order_details_0.status = 'completed'
GROUP BY order_details_0.category, order_details_0.status
HAVING SUM(order_details_0.line_total_sum_e1f61696) > 10000 AND COUNT( DISTINCT order_details_0.order_id_distinct_f93d50ab) >= 50
HAVING SUM(order_details_0.line_total_sum_e1f61696) > 10000 AND COUNT( DISTINCT order_details_0.order_id) >= 50
""",
)

Expand Down Expand Up @@ -505,17 +501,16 @@ async def test_derived_metric_filter(
SELECT
t2.category,
t1.order_id,
SUM(t1.line_total) line_total_sum_e1f61696,
t1.order_id order_id_distinct_f93d50ab
SUM(t1.line_total) line_total_sum_e1f61696
FROM v3_order_details t1 LEFT OUTER JOIN v3_product t2 ON t1.product_id = t2.product_id
GROUP BY t2.category, t1.order_id
)
SELECT
order_details_0.category AS category,
SUM(order_details_0.line_total_sum_e1f61696) / NULLIF(COUNT( DISTINCT order_details_0.order_id_distinct_f93d50ab), 0) AS avg_order_value
SUM(order_details_0.line_total_sum_e1f61696) / NULLIF(COUNT( DISTINCT order_details_0.order_id), 0) AS avg_order_value
FROM order_details_0
GROUP BY order_details_0.category
HAVING SUM(order_details_0.line_total_sum_e1f61696) / NULLIF(COUNT( DISTINCT order_details_0.order_id_distinct_f93d50ab), 0) > 100
HAVING SUM(order_details_0.line_total_sum_e1f61696) / NULLIF(COUNT( DISTINCT order_details_0.order_id), 0) > 100
""",
)

Expand Down
Loading
Loading