Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/examples/05_joined_measures/joined_measures.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,16 @@ We then evaluate both queries (the results may have different cardinality becaus

This way we guarantee that the values of that joined measure are exactly the same as in the original — as that is exactly how it's evaluated.

[Transforms](../../concepts/formulas.md) like `cumsum()` and `change()` work on cross-model measures too — the transform is applied after the sub-query join:
All [transforms](../../concepts/formulas.md) work on cross-model measures — window transforms (`cumsum`, `lag`, `lead`, `rank`, `last`) and self-join transforms (`change`, `change_pct`, `time_shift`) alike. Window transforms are applied as window functions over the sub-query result; self-join transforms generate their own CTE chain on top of the cross-model sub-query.

```json
{
"source_model": "orders",
"time_dimensions": [{"dimension": {"name": "ordered_at"}, "granularity": "month"}],
"fields": [
{"formula": "customers.count"},
{"formula": "cumsum(customers.count)", "name": "cumulative_customers"}
{"formula": "cumsum(customers.count)", "name": "cumulative_customers"},
{"formula": "change(customers.count)", "name": "count_change"}
]
}
```
Expand Down
307 changes: 231 additions & 76 deletions slayer/sql/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,20 +486,179 @@ def _generate_with_computed(self, enriched: EnrichedQuery, base_sql: str,

return sql

def _build_cm_cte_sql(self, cm, enriched: EnrichedQuery,
time_offset=None) -> str:
"""Build the SQL body for a cross-model measure CTE.

Args:
cm: CrossModelMeasure to build the CTE for.
enriched: The parent EnrichedQuery (for WHERE filters).
time_offset: Optional (offset, granularity) tuple for calendar-based
self-join transforms. Shifts time dimension expressions.
"""
select_parts = []
group_parts = []

# Shared dimensions
for dim in cm.shared_dimensions:
col_expr = self._resolve_sql(sql=dim.sql, name=dim.name, model_name=cm.source_model_name)
col_sql = col_expr.sql(dialect=self.dialect)
select_parts.append(f'{col_sql} AS "{dim.alias}"')
group_parts.append(col_sql)

# Shared time dimensions
for td in cm.shared_time_dimensions:
col_expr = self._resolve_sql(sql=td.sql, name=td.name, model_name=cm.source_model_name)
if time_offset is not None:
offset_val, gran = time_offset
col_expr = self._build_time_offset_expr(col_expr=col_expr, offset=offset_val, granularity=gran)
td_expr = self._build_date_trunc(col_expr=col_expr, granularity=td.granularity)
td_sql = td_expr.sql(dialect=self.dialect)
select_parts.append(f'{td_sql} AS "{td.alias}"')
group_parts.append(td_sql)

# The measure aggregation
agg_expr, _ = self._build_agg(measure=cm.measure)
select_parts.append(f'{agg_expr.sql(dialect=self.dialect)} AS "{cm.alias}"')

# FROM: source table with JOIN to target
if cm.source_sql:
from_sql = f"({cm.source_sql}) AS {cm.source_model_name}"
else:
from_sql = f"{cm.source_sql_table} AS {cm.source_model_name}"

if cm.target_model_sql:
target_from = f"({cm.target_model_sql}) AS {cm.target_model_name}"
else:
target_from = f"{cm.target_model_sql_table} AS {cm.target_model_name}"

join_conditions = []
for src_dim, tgt_dim in cm.join_pairs:
join_conditions.append(
f"{cm.source_model_name}.{src_dim} = {cm.target_model_name}.{tgt_dim}"
)
join_on = " AND ".join(join_conditions)

cte_sql = (
f"SELECT {', '.join(select_parts)}\n"
f"FROM {from_sql}\n"
f"LEFT JOIN {target_from} ON {join_on}"
)

# Apply the main query's WHERE filters
where_clause, _ = self._build_where_and_having(enriched=enriched)
if where_clause is not None:
cte_sql += f"\nWHERE {where_clause.sql(dialect=self.dialect)}"

if group_parts:
cte_sql += f"\nGROUP BY {', '.join(group_parts)}"

return cte_sql

def _build_cm_self_join_ctes(self, t, cm, cm_cte_name: str,
enriched: EnrichedQuery) -> list:
"""Build the CTE chain for a self-join transform on a cross-model measure.

Returns a list of (cte_name, cte_sql) tuples to append to the top-level CTEs.
The last CTE in the chain contains the transform result.
"""
result_ctes = []

# Column aliases in the CM CTE
cm_col_aliases = []
for dim in cm.shared_dimensions:
cm_col_aliases.append(dim.alias)
for td in cm.shared_time_dimensions:
cm_col_aliases.append(td.alias)
cm_col_aliases.append(cm.alias)

time_col = f'"{t.time_alias}"' if t.time_alias else None

# Determine effective join granularity
has_date_ranges = any(
td.date_range and len(td.date_range) == 2
for td in enriched.time_dimensions
)
join_granularity = t.granularity
if not join_granularity and has_date_ranges:
for td in enriched.time_dimensions:
if td.alias == t.time_alias:
join_granularity = td.granularity.value
break

is_calendar = join_granularity is not None
src_cte = cm_cte_name

# Add ROW_NUMBER if using row-number join
if not is_calendar:
rn_cte_name = f"{cm_cte_name}_rn"
all_cols = ", ".join(f'"{a}"' for a in cm_col_aliases)
rn_sql = f"SELECT {all_cols}, ROW_NUMBER() OVER (ORDER BY {time_col}) AS _rn FROM {cm_cte_name}"
result_ctes.append((rn_cte_name, rn_sql))
src_cte = rn_cte_name

# Build shifted base CTE
shift_base_name = f"shifted_base_cm_{t.name}"
if is_calendar:
# Calendar-based: regenerate CM CTE with shifted time expressions
gran = join_granularity
offset = t.offset
shifted_sql = self._build_cm_cte_sql(
cm=cm, enriched=enriched,
time_offset=(-offset, gran),
)
else:
# Row-based: shifted base is identical to original
shifted_sql = self._build_cm_cte_sql(cm=cm, enriched=enriched)
result_ctes.append((shift_base_name, shifted_sql))

# Add ROW_NUMBER to shifted CTE
shift_name = f"shifted_cm_{t.name}"
if not is_calendar:
shift_cols = ", ".join(f'"{a}"' for a in cm_col_aliases)
shift_rn_sql = f"SELECT {shift_cols}, ROW_NUMBER() OVER (ORDER BY {time_col}) AS _rn FROM {shift_base_name}"
result_ctes.append((shift_name, shift_rn_sql))
else:
result_ctes.append((shift_name, f"SELECT * FROM {shift_base_name}"))

# Build self-join CTE
if is_calendar:
join_cond = f'{src_cte}.{time_col} = {shift_name}.{time_col}'
else:
join_cond = self._build_row_number_join(
left_table=src_cte, right_table=shift_name, offset=t.offset,
)

col_sql = self._build_self_join_column(
transform=t.transform, left_table=src_cte,
right_table=shift_name, measure_alias=cm.alias,
)
join_cols = ", ".join(f'{src_cte}."{a}"' for a in cm_col_aliases)
sjoin_name = f"sjoin_cm_{t.name}"
sjoin_sql = (
f"SELECT {join_cols}, {col_sql} AS \"{t.alias}\"\n"
f"FROM {src_cte}\n"
f"LEFT JOIN {shift_name}\n"
f" ON {join_cond}"
)
result_ctes.append((sjoin_name, sjoin_sql))

return result_ctes

def _generate_with_cross_model(self, enriched: EnrichedQuery,
base_sql: str, is_cte: bool) -> str:
base_sql: str, is_cte: bool = False) -> str:
"""Wrap the main query with cross-model measure sub-queries.

Each cross-model measure becomes a CTE that aggregates the target model's
measure scoped to shared dimensions, then LEFT JOINed to the main query.

Window transforms (cumsum, lag, lead, rank, last) are applied as window
functions in the outer SELECT. Self-join transforms (change, change_pct,
time_shift) generate additional CTE layers on top of the cross-model CTE.
"""
_ = is_cte # All paths wrap base_sql as a CTE
# Wrap the base/computed SQL as a CTE
if is_cte:
# base_sql is already a WITH ... SELECT — wrap it as a subquery CTE
main_cte = f"_main AS (\n{base_sql}\n)"
else:
main_cte = f"_main AS (\n{base_sql}\n)"

main_cte = f"_main AS (\n{base_sql}\n)"
ctes = [main_cte]

# Build join columns from the main query (for the final SELECT)
Expand All @@ -513,7 +672,7 @@ def _generate_with_cross_model(self, enriched: EnrichedQuery,
for expr in enriched.expressions:
main_columns.append(expr.alias)
# Transforms that depend on cross-model aliases are computed in the
# outer SELECT, not inside _main — exclude them from main_columns
# outer SELECT or via extra CTEs — exclude them from main_columns
cm_aliases_pre = {cm.alias for cm in enriched.cross_model_measures}
for t in enriched.transforms:
if t.measure_alias not in cm_aliases_pre:
Expand All @@ -531,105 +690,101 @@ def _generate_with_cross_model(self, enriched: EnrichedQuery,
if is_duplicate:
continue # CTE already generated, just reuse in final SELECT

# Build the sub-query: SELECT shared_dims, AGG(measure) FROM target GROUP BY shared_dims
select_parts = []
group_parts = []

# Shared dimensions
for dim in cm.shared_dimensions:
col_expr = self._resolve_sql(sql=dim.sql, name=dim.name, model_name=cm.source_model_name)
col_sql = col_expr.sql(dialect=self.dialect)
select_parts.append(f'{col_sql} AS "{dim.alias}"')
group_parts.append(col_sql)

# Shared time dimensions
for td in cm.shared_time_dimensions:
col_expr = self._resolve_sql(sql=td.sql, name=td.name, model_name=cm.source_model_name)
td_expr = self._build_date_trunc(col_expr=col_expr, granularity=td.granularity)
td_sql = td_expr.sql(dialect=self.dialect)
select_parts.append(f'{td_sql} AS "{td.alias}"')
group_parts.append(td_sql)

# The measure aggregation
agg_expr, _ = self._build_agg(measure=cm.measure)
select_parts.append(f'{agg_expr.sql(dialect=self.dialect)} AS "{cm.alias}"')

# FROM: source table with JOIN to target
if cm.source_sql:
from_sql = f"({cm.source_sql}) AS {cm.source_model_name}"
else:
from_sql = f"{cm.source_sql_table} AS {cm.source_model_name}"

# JOIN to target model
if cm.target_model_sql:
target_from = f"({cm.target_model_sql}) AS {cm.target_model_name}"
else:
target_from = f"{cm.target_model_sql_table} AS {cm.target_model_name}"

join_conditions = []
for src_dim, tgt_dim in cm.join_pairs:
join_conditions.append(
f"{cm.source_model_name}.{src_dim} = {cm.target_model_name}.{tgt_dim}"
)
join_on = " AND ".join(join_conditions)

cte_sql = (
f"SELECT {', '.join(select_parts)}\n"
f"FROM {from_sql}\n"
f"LEFT JOIN {target_from} ON {join_on}"
)

# Apply the main query's WHERE filters to the cross-model CTE
where_clause, _ = self._build_where_and_having(enriched=enriched)
if where_clause is not None:
cte_sql += f"\nWHERE {where_clause.sql(dialect=self.dialect)}"

if group_parts:
cte_sql += f"\nGROUP BY {', '.join(group_parts)}"

cte_sql = self._build_cm_cte_sql(cm=cm, enriched=enriched)
ctes.append(f"{cte_name} AS (\n{cte_sql}\n)")

# Identify transforms that depend on cross-model measure aliases
cm_aliases = {cm.alias for _, cm in cm_cte_names}
post_cm_transforms = [t for t in enriched.transforms if t.measure_alias in cm_aliases]
cm_window_transforms = [t for t in post_cm_transforms if t.transform not in _SELF_JOIN_TRANSFORMS]
cm_self_join_transforms = [t for t in post_cm_transforms if t.transform in _SELF_JOIN_TRANSFORMS]

# Build self-join CTE chains for self-join transforms on cross-model measures.
# Maps transform alias -> sjoin CTE name (for the final SELECT/JOIN).
sjoin_cte_map = {}
for t in cm_self_join_transforms:
# Find the CM and CTE name this transform targets
target_cm = None
target_cte_name = ""
for cte_name, cm in cm_cte_names:
if cm.alias == t.measure_alias:
target_cm = cm
target_cte_name = cte_name
break
if target_cm is None:
raise ValueError(f"No cross-model measure found for transform '{t.name}'")

extra_ctes = self._build_cm_self_join_ctes(
t=t, cm=target_cm, cm_cte_name=target_cte_name, enriched=enriched,
)
for name, sql in extra_ctes:
ctes.append(f"{name} AS (\n{sql}\n)")
# The last CTE in the chain has the transform result
sjoin_cte_map[t.alias] = (extra_ctes[-1][0], target_cm)

# Build final SELECT: main columns + cross-model measure columns + post-CM transforms
# Build final SELECT: main columns + cross-model measure columns + transforms
final_parts = [f'_main."{a}"' for a in main_columns]

# Add bare cross-model measure columns (from base CM CTEs or sjoin CTEs)
seen_cm_aliases = set()
for cte_name, cm in cm_cte_names:
if cm.alias not in seen_cm_aliases:
seen_cm_aliases.add(cm.alias)
final_parts.append(f'{cte_name}."{cm.alias}"')
for t in post_cm_transforms:
# If a self-join transform targets this CM, get the measure from the
# sjoin CTE (which carries it through); otherwise from the base CM CTE
source_cte = cte_name
for sjoin_cte_name, sjoin_cm in sjoin_cte_map.values():
if sjoin_cm.alias == cm.alias:
source_cte = sjoin_cte_name
break
final_parts.append(f'{source_cte}."{cm.alias}"')
Comment on lines +733 to +740
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check the sjoin CTE structure - does it carry through the base measure?
# Look at _build_cm_self_join_ctes to see what columns the sjoin CTE selects
ast-grep --pattern $'def _build_cm_self_join_ctes($$$) {
  $$$
}'

Repository: MotleyAI/slayer

Length of output: 41


🏁 Script executed:

#!/bin/bash
# Search for the SELECT in sjoin CTE construction
rg -n "sjoin_sql" slayer/sql/generator.py -A5 -B5

Repository: MotleyAI/slayer

Length of output: 797


🏁 Script executed:

# Find where sjoin_cte_map is initialized and populated
rg -n "sjoin_cte_map" slayer/sql/generator.py | head -20

Repository: MotleyAI/slayer

Length of output: 448


🏁 Script executed:

# Look at the broader context around line 638 to understand join_cols construction
sed -n '620,660p' slayer/sql/generator.py

Repository: MotleyAI/slayer

Length of output: 1809


🏁 Script executed:

# Search for where _build_cm_self_join_ctes or similar function is called
rg -n "build.*sjoin|self.*join.*cte" slayer/sql/generator.py -i

Repository: MotleyAI/slayer

Length of output: 833


🏁 Script executed:

# Look at the loop that builds sjoin_cte_map to understand structure
rg -n "for.*sjoin|sjoin_cte_map\[" slayer/sql/generator.py -B5 -A5

Repository: MotleyAI/slayer

Length of output: 3587


🏁 Script executed:

# Get lines 768-790 to see how JOINs are built
sed -n '768,790p' slayer/sql/generator.py

Repository: MotleyAI/slayer

Length of output: 1171


🏁 Script executed:

# Also check what happens after the loop - is the full FROM clause constructed?
sed -n '778,810p' slayer/sql/generator.py

Repository: MotleyAI/slayer

Length of output: 1537


🏁 Script executed:

# Search for how cm_cte_names is populated to understand if duplicates are possible
rg -n "cm_cte_names" slayer/sql/generator.py -B3 -A3

Repository: MotleyAI/slayer

Length of output: 2667


🏁 Script executed:

# Look for _build_cm_self_join_ctes function definition to see if it can chain sjoin CTEs
rg -n "def _build_cm_self_join_ctes" slayer/sql/generator.py -A20

Repository: MotleyAI/slayer

Length of output: 974


🏁 Script executed:

# Search for test cases with multiple transforms on same cross-model measure
rg -n "change.*time_shift|time_shift.*change" --type py

Repository: MotleyAI/slayer

Length of output: 1357


🏁 Script executed:

# Check if there are any safeguards or comments about multiple transforms on same CM
rg -n "multiple.*transform|duplicate.*transform" slayer/sql/generator.py -i

Repository: MotleyAI/slayer

Length of output: 41


🏁 Script executed:

# Verify: when second transform is processed, does it use base CM CTE or previous sjoin CTE?
# Check the loop context around line 705-723 more carefully
sed -n '705,725p' slayer/sql/generator.py

Repository: MotleyAI/slayer

Length of output: 1020


Multiple self-join transforms on the same cross-model measure will fail with a missing table error.

If a query applies multiple self-join transforms to the same cross-model measure (e.g., both change(customers.avg_score) and time_shift(customers.avg_score, -2)), each transform creates its own sjoin CTE. However, the JOIN construction at lines 771-777 breaks after adding the first matching sjoin_cte for a given CM, leaving subsequent sjoin_ctes in the CTE definition but not in the FROM clause. When the SELECT clause tries to reference all transform results (lines 765-766), it fails because only the first sjoin_cte is properly joined to _main.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@slayer/sql/generator.py` around lines 733 - 740, The current loop that sets
source_cte for a cross-model measure stops at the first matching sjoin CTE (it
uses a break) so subsequent self-join transforms for the same cm.alias are never
exposed in the FROM/JOINs; update the logic that iterates sjoin_cte_map.values()
so every sjoin_cte_name whose sjoin_cm.alias == cm.alias is handled (do not
break), and make final_parts reference the correct specific sjoin_cte_name for
each transform rather than a single shared source_cte; also ensure the JOIN
construction code that consumes these CTE names iterates over all matching
sjoin_cte_names so each sjoin CTE is actually joined into _main.


# Add window transforms in outer SELECT
for t in cm_window_transforms:
window_sql = self._build_transform_sql(t)
# Replace the quoted measure alias with the cross-model CTE reference
for cte_name, cm in cm_cte_names:
if cm.alias == t.measure_alias:
# If a sjoin CTE exists for this CM, reference it
source_cte = cte_name
for sjoin_cte_name, sjoin_cm in sjoin_cte_map.values():
if sjoin_cm.alias == cm.alias:
source_cte = sjoin_cte_name
break
window_sql = window_sql.replace(
f'"{t.measure_alias}"', f'{cte_name}."{cm.alias}"'
f'"{t.measure_alias}"', f'{source_cte}."{cm.alias}"'
)
break
# Qualify time alias with _main to avoid ambiguity in JOINed context
if t.time_alias:
window_sql = window_sql.replace(
f'"{t.time_alias}"', f'_main."{t.time_alias}"'
)
final_parts.append(f'{window_sql} AS "{t.alias}"')

# Build JOINs: join each cross-model CTE to _main on shared dimensions (deduplicate)
# Add self-join transform columns
for t in cm_self_join_transforms:
sjoin_cte_name, _ = sjoin_cte_map[t.alias]
final_parts.append(f'{sjoin_cte_name}."{t.alias}"')

# Build JOINs: join each cross-model CTE (or its sjoin CTE) to _main
from_clause = "FROM _main"
joined_ctes = set()
for cte_name, cm in cm_cte_names:
if cte_name in joined_ctes:
# Determine which CTE to join: sjoin CTE if self-join transforms exist, else base CM CTE
join_cte = cte_name
for sjoin_cte_name, sjoin_cm in sjoin_cte_map.values():
if sjoin_cm.alias == cm.alias:
join_cte = sjoin_cte_name
break
if join_cte in joined_ctes:
continue
joined_ctes.add(cte_name)
joined_ctes.add(join_cte)
join_on_parts = []
for dim in cm.shared_dimensions:
join_on_parts.append(f'_main."{dim.alias}" = {cte_name}."{dim.alias}"')
join_on_parts.append(f'_main."{dim.alias}" = {join_cte}."{dim.alias}"')
for td in cm.shared_time_dimensions:
join_on_parts.append(f'_main."{td.alias}" = {cte_name}."{td.alias}"')
join_on_parts.append(f'_main."{td.alias}" = {join_cte}."{td.alias}"')
if join_on_parts:
from_clause += f"\nLEFT JOIN {cte_name} ON {' AND '.join(join_on_parts)}"
from_clause += f"\nLEFT JOIN {join_cte} ON {' AND '.join(join_on_parts)}"
Comment on lines +768 to +787
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check if there are tests for multiple self-join transforms on the same measure
rg -n "change.*time_shift|time_shift.*change" tests/ --type py

Repository: MotleyAI/slayer

Length of output: 324


🏁 Script executed:

#!/bin/bash
# Find where sjoin_cte_map is populated and understand its structure
rg -n "sjoin_cte_map\[" slayer/sql/generator.py -A 2 -B 2

Repository: MotleyAI/slayer

Length of output: 579


🏁 Script executed:

#!/bin/bash
# Find where cm_cte_names is built
rg -n "cm_cte_names" slayer/sql/generator.py | head -20

Repository: MotleyAI/slayer

Length of output: 394


🏁 Script executed:

#!/bin/bash
# Look for tests with multiple distinct self-join transforms on same cross-model measure
rg -n "change.*customers\.|time_shift.*customers\." tests/ --type py -A 3 -B 3

Repository: MotleyAI/slayer

Length of output: 2408


🏁 Script executed:

#!/bin/bash
# Find where cm_self_join_transforms is defined and populated
rg -n "cm_self_join_transforms" slayer/sql/generator.py -B 5 -A 2

Repository: MotleyAI/slayer

Length of output: 1263


🏁 Script executed:

#!/bin/bash
# Look at the context around lines 771-786 to understand the full JOIN logic
sed -n '680,790p' slayer/sql/generator.py

Repository: MotleyAI/slayer

Length of output: 5359


🏁 Script executed:

#!/bin/bash
# Check if there's a test with both change() and time_shift() on the same cross-model measure
rg -n "change\(" tests/ --type py -A 10 | rg -A 10 "time_shift"

Repository: MotleyAI/slayer

Length of output: 2829


🏁 Script executed:

#!/bin/bash
# Search for any test with multiple distinct self-join transforms on same measure
# (not nested, but parallel like Field(change(...)) and Field(time_shift(...)))
rg -n "Field\(" tests/test_sql_generator.py -A 1 | rg -B 1 "change\(.*\)" | rg -A 10 "time_shift"

Repository: MotleyAI/slayer

Length of output: 552


🏁 Script executed:

#!/bin/bash
# Check if any test has both change and time_shift in the same query (as separate fields)
rg "change\(" tests/test_sql_generator.py -A 5 | grep -c "time_shift"

Repository: MotleyAI/slayer

Length of output: 60


🏁 Script executed:

#!/bin/bash
# Check the integration tests more carefully for same-measure scenarios
sed -n '1090,1110p' tests/integration/test_integration.py

Repository: MotleyAI/slayer

Length of output: 852


🏁 Script executed:

#!/bin/bash
# Look for tests with multiple fields on same cross-model measure
rg -n "Field.*change.*customers\.|Field.*time_shift.*customers\." tests/ --type py -B 5 -A 10 | head -40

Repository: MotleyAI/slayer

Length of output: 3395


🏁 Script executed:

#!/bin/bash
# Look for test with BOTH change AND time_shift as separate fields on same CM
grep -n "Field.*formula.*change" tests/test_sql_generator.py -A 5 -B 5 | grep -A 5 -B 5 "time_shift"

Repository: MotleyAI/slayer

Length of output: 1013


🏁 Script executed:

#!/bin/bash
# Create a minimal test case to see if this scenario is tested anywhere
rg "fields=\[" tests/ --type py -A 20 | rg -B 15 "change\(" | rg -A 15 "time_shift\(" | head -50

Repository: MotleyAI/slayer

Length of output: 3899


🏁 Script executed:

#!/bin/bash
# Check the _build_cm_self_join_ctes method to confirm each transform creates separate CTE chains
rg -n "_build_cm_self_join_ctes" slayer/sql/generator.py -A 1

Repository: MotleyAI/slayer

Length of output: 348


🏁 Script executed:

#!/bin/bash
# Look at the method signature and how it's called
sed -n '595,730p' slayer/sql/generator.py | grep -A 50 "_build_cm_self_join_ctes"

Repository: MotleyAI/slayer

Length of output: 743


🏁 Script executed:

#!/bin/bash
# Confirm the scenario by looking at the actual flow: 
# When we have two distinct self-join transforms on the same CM,
# each creates separate sjoin_cte entries, but the JOIN loop only uses first one
sed -n '771,790p' slayer/sql/generator.py

Repository: MotleyAI/slayer

Length of output: 1031


🏁 Script executed:

#!/bin/bash
# Check if there's defensive logic we missed - maybe duplicate CM entries or multi-CTE joins
sed -n '745,790p' slayer/sql/generator.py

Repository: MotleyAI/slayer

Length of output: 2207


Multiple self-join transforms targeting the same cross-model measure will fail with unjoined CTE reference.

When two distinct self-join transforms reference the same cross-model measure (e.g., Field(formula="change(customers.avg_score)") and Field(formula="time_shift(customers.avg_score, -1)")), each generates its own CTE chain. The JOIN building loop (lines 771-786) only joins the first matching CTE per CM due to the break statement, leaving the second transform's CTE unjoined. The subsequent SELECT references both CTEs (line 765-766), resulting in a SQL error.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@slayer/sql/generator.py` around lines 768 - 787, The JOIN loop currently
stops at the first sjoin CTE matching a cross-model (due to the break), leaving
additional CTEs unjoined; update the logic in the block that builds FROM/_main
so that for each (cte_name, cm) in cm_cte_names you collect all sjoin CTE names
whose sjoin_cm.alias == cm.alias (iterate over sjoin_cte_map.values() without
breaking) and treat each matching sjoin_cte_name as a separate join target (fall
back to the base cte_name if none match); for each join target (join_cte) that
is not yet in joined_ctes compute join_on_parts from cm.shared_dimensions and
cm.shared_time_dimensions and append a LEFT JOIN {join_cte} ON ... so every CTE
produced for that CM is joined rather than only the first.


sql = f"WITH {','.join(ctes)}\nSELECT {', '.join(final_parts)}\n{from_clause}"

Expand Down
Loading
Loading