From dd9f4159e536bcf41b558e84abf62cd580be3a83 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 31 May 2026 22:12:03 +0000 Subject: [PATCH 1/9] feat(cubesql): merge view joins on shared cube member into single CubeScan Generalize the push-down-cube-join rewrite so that a join between two CubeScans (typically views) on a dimension that resolves to the same underlying cube member is merged into a single CubeScan, just like the existing __cubeJoinField cube-to-cube join. A view dimension keeps its original cube.dimension path in alias_member, which is used to detect that both sides of the equi-join reference the same shared key. Co-authored-by: Pavel Tiunov --- .../src/compile/rewrite/rules/members.rs | 103 +++++++++++++++++- 1 file changed, 101 insertions(+), 2 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 1429f99e5a7f8..95d9e7020be54 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -2803,15 +2803,114 @@ impl MemberRules { let left_join_hints_var = var!(left_join_hints_var); let right_join_hints_var = var!(right_join_hints_var); let out_join_hints_var = var!(out_join_hints_var); + let meta_context = self.meta_context.clone(); move |egraph, subst| { - let Some((left_cube, right_cube)) = is_proper_cube_join_condition( + // Resolves a join column to the name of the dimension member it + // references, but only for plain or time dimensions (measures, + // segments, etc. are not valid shared join keys). + fn dimension_member_name( + egraph: &mut CubeEGraph, + members_id: Id, + column: &Column, + ) -> Option { + match egraph[members_id].data.find_member_by_column(column) { + Some(((_, Member::Dimension { name, .. }, _), _)) + | Some(((_, Member::TimeDimension { name, .. }, _), _)) => Some(name.clone()), + _ => None, + } + } + + // Two ways to recognize a joinable pair of CubeScans: + // 1. The classic `left.__cubeJoinField = right.__cubeJoinField` + // condition that comes from the data model join graph. + // 2. A join between two CubeScans (typically views) on a + // dimension that resolves to the *same underlying cube member* + // — e.g. `orders_view.city = customers_view.city` where both + // `city` dimensions are aliases of the same `cube.dimension`. + // Such a join is on the same shared key, so the two scans can + // be merged into a single CubeScan exactly like any other + // cube-to-cube join, letting the query planner treat the result + // as a (multi-fact) query over the combined members. + let cubes = is_proper_cube_join_condition( egraph, subst, left_members_var, left_on_var, right_members_var, right_on_var, - ) else { + ); + let cubes = match cubes { + Some(cubes) => Some(cubes), + None => { + // A view dimension keeps the original `cube.dimension` path + // in `alias_member`; for non-view members we fall back to + // the member name itself. + let resolve_underlying = |member_name: &str| -> String { + meta_context + .find_dimension_with_name(member_name) + .and_then(|dim| dim.alias_member.clone()) + .unwrap_or_else(|| member_name.to_string()) + }; + + let left_join_ons = var_iter!(egraph[subst[left_on_var]], JoinLeftOn) + .cloned() + .collect::>(); + let right_join_ons = var_iter!(egraph[subst[right_on_var]], JoinRightOn) + .cloned() + .collect::>(); + + let mut found = None; + 'pairs: for left_on in left_join_ons.iter() { + for right_on in right_join_ons.iter() { + // Equi-join on a matching set of columns; every + // column pair must resolve to the same underlying + // cube member. + if left_on.is_empty() || left_on.len() != right_on.len() { + continue; + } + let mut left_cube_name: Option = None; + let mut right_cube_name: Option = None; + let mut all_match = true; + for (left_column, right_column) in left_on.iter().zip(right_on.iter()) { + let Some(left_name) = dimension_member_name( + egraph, + subst[left_members_var], + left_column, + ) else { + all_match = false; + break; + }; + let Some(right_name) = dimension_member_name( + egraph, + subst[right_members_var], + right_column, + ) else { + all_match = false; + break; + }; + if resolve_underlying(&left_name) != resolve_underlying(&right_name) + { + all_match = false; + break; + } + left_cube_name = left_name.split('.').next().map(|s| s.to_string()); + right_cube_name = + right_name.split('.').next().map(|s| s.to_string()); + } + if all_match { + if let (Some(left_cube_name), Some(right_cube_name)) = + (left_cube_name, right_cube_name) + { + found = Some((left_cube_name, right_cube_name)); + break 'pairs; + } + } + } + } + found + } + }; + let Some((left_cube, right_cube)) = cubes else { return false; }; From f60313ed99d1664c61d3e0e4646ae4e3ae1ace62 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 31 May 2026 22:18:57 +0000 Subject: [PATCH 2/9] test(cubesql): cover view join merge on shared cube member Co-authored-by: Pavel Tiunov --- rust/cubesql/cubesql/src/compile/test/mod.rs | 1 + .../src/compile/test/test_cube_join_views.rs | 117 ++++++++++++++++++ 2 files changed, 118 insertions(+) create mode 100644 rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs diff --git a/rust/cubesql/cubesql/src/compile/test/mod.rs b/rust/cubesql/cubesql/src/compile/test/mod.rs index 20e63e584b5f8..7dcbc2fa862ec 100644 --- a/rust/cubesql/cubesql/src/compile/test/mod.rs +++ b/rust/cubesql/cubesql/src/compile/test/mod.rs @@ -32,6 +32,7 @@ pub mod test_bi_workarounds; pub mod test_cube_join; #[cfg(test)] pub mod test_cube_join_grouped; +pub mod test_cube_join_views; #[cfg(test)] pub mod test_cube_scan; #[cfg(test)] diff --git a/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs b/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs new file mode 100644 index 0000000000000..c10b6a9faa666 --- /dev/null +++ b/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs @@ -0,0 +1,117 @@ +use cubeclient::models::V1LoadRequestQuery; +use pretty_assertions::assert_eq; + +use crate::{ + compile::{ + rewrite::rewriter::Rewriter, + test::{ + convert_select_to_query_plan_with_meta, init_testing_logger, + utils::LogicalPlanTestUtils, + }, + }, + transport::{CubeMeta, CubeMetaDimension, CubeMetaMeasure}, +}; +use cubeclient::models::V1CubeMetaType; + +/// Two views that both expose the same underlying `Customers.city` +/// dimension (via `aliasMember`). `OrdersView` carries an `Orders` +/// measure while `CustomersView` carries a `Customers` measure, so a +/// query touching both is a multi-fact query joined on the shared key. +fn views_meta() -> Vec { + let dimension = |name: &str, alias: &str| CubeMetaDimension { + name: name.to_string(), + r#type: "string".to_string(), + alias_member: Some(alias.to_string()), + ..CubeMetaDimension::default() + }; + let measure = |name: &str, alias: &str| CubeMetaMeasure { + name: name.to_string(), + title: None, + short_title: None, + description: None, + r#type: "number".to_string(), + agg_type: Some("sum".to_string()), + meta: None, + alias_member: Some(alias.to_string()), + format: None, + format_description: None, + currency: None, + }; + + vec![ + CubeMeta { + name: "OrdersView".to_string(), + description: None, + title: None, + r#type: V1CubeMetaType::View, + dimensions: vec![dimension("OrdersView.city", "Customers.city")], + measures: vec![measure("OrdersView.revenue", "Orders.revenue")], + segments: vec![], + joins: None, + folders: None, + nested_folders: None, + hierarchies: None, + meta: None, + }, + CubeMeta { + name: "CustomersView".to_string(), + description: None, + title: None, + r#type: V1CubeMetaType::View, + dimensions: vec![dimension("CustomersView.city", "Customers.city")], + measures: vec![measure("CustomersView.amount", "Customers.amount")], + segments: vec![], + joins: None, + folders: None, + nested_folders: None, + hierarchies: None, + meta: None, + }, + ] +} + +/// A join between two views on a dimension that resolves to the same +/// underlying cube member (`Customers.city`) should be merged into a +/// single CubeScan over the combined members, exactly like a regular +/// cube-to-cube join. +#[tokio::test] +async fn test_join_two_views_on_shared_member() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let logical_plan = convert_select_to_query_plan_with_meta( + r#" + SELECT * + FROM OrdersView + LEFT JOIN CustomersView ON (OrdersView.city = CustomersView.city) + "# + .to_string(), + views_meta(), + ) + .await + .as_logical_plan(); + + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec![ + "OrdersView.revenue".to_string(), + "CustomersView.amount".to_string(), + ]), + dimensions: Some(vec![ + "OrdersView.city".to_string(), + "CustomersView.city".to_string(), + ]), + segments: Some(vec![]), + order: Some(vec![]), + ungrouped: Some(true), + join_hints: Some(vec![vec![ + "OrdersView".to_string(), + "CustomersView".to_string(), + ]]), + ..Default::default() + } + ) +} From 76963f1835d2ab229cdf6ead6a949473a601d015 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 31 May 2026 22:40:15 +0000 Subject: [PATCH 3/9] test(cubesql): add group-by view join query for shared cube member Mirror the motivating query exactly: SELECT c.customer_city, measure(o.revenue), measure(c.avg_age) FROM customers_view c LEFT JOIN orders_view o ON o.customer_city = c.customer_city GROUP BY 1 and assert it merges into a single grouped multi-fact CubeScan. Co-authored-by: Pavel Tiunov --- .../src/compile/test/test_cube_join_views.rs | 96 +++++++++++++++---- 1 file changed, 75 insertions(+), 21 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs b/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs index c10b6a9faa666..3965d02095016 100644 --- a/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs +++ b/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs @@ -13,10 +13,10 @@ use crate::{ }; use cubeclient::models::V1CubeMetaType; -/// Two views that both expose the same underlying `Customers.city` -/// dimension (via `aliasMember`). `OrdersView` carries an `Orders` -/// measure while `CustomersView` carries a `Customers` measure, so a -/// query touching both is a multi-fact query joined on the shared key. +/// Two views that both expose the same underlying `customers.customer_city` +/// dimension (via `aliasMember`). `orders_view` carries an `orders` measure +/// while `customers_view` carries a `customers` measure, so a query that +/// touches both is a multi-fact query joined on the shared key. fn views_meta() -> Vec { let dimension = |name: &str, alias: &str| CubeMetaDimension { name: name.to_string(), @@ -24,13 +24,13 @@ fn views_meta() -> Vec { alias_member: Some(alias.to_string()), ..CubeMetaDimension::default() }; - let measure = |name: &str, alias: &str| CubeMetaMeasure { + let measure = |name: &str, alias: &str, agg: &str| CubeMetaMeasure { name: name.to_string(), title: None, short_title: None, description: None, r#type: "number".to_string(), - agg_type: Some("sum".to_string()), + agg_type: Some(agg.to_string()), meta: None, alias_member: Some(alias.to_string()), format: None, @@ -40,12 +40,19 @@ fn views_meta() -> Vec { vec![ CubeMeta { - name: "OrdersView".to_string(), + name: "customers_view".to_string(), description: None, title: None, r#type: V1CubeMetaType::View, - dimensions: vec![dimension("OrdersView.city", "Customers.city")], - measures: vec![measure("OrdersView.revenue", "Orders.revenue")], + dimensions: vec![dimension( + "customers_view.customer_city", + "customers.customer_city", + )], + measures: vec![measure( + "customers_view.avg_age", + "customers.avg_age", + "avg", + )], segments: vec![], joins: None, folders: None, @@ -54,12 +61,15 @@ fn views_meta() -> Vec { meta: None, }, CubeMeta { - name: "CustomersView".to_string(), + name: "orders_view".to_string(), description: None, title: None, r#type: V1CubeMetaType::View, - dimensions: vec![dimension("CustomersView.city", "Customers.city")], - measures: vec![measure("CustomersView.amount", "Customers.amount")], + dimensions: vec![dimension( + "orders_view.customer_city", + "customers.customer_city", + )], + measures: vec![measure("orders_view.revenue", "orders.revenue", "sum")], segments: vec![], joins: None, folders: None, @@ -71,7 +81,7 @@ fn views_meta() -> Vec { } /// A join between two views on a dimension that resolves to the same -/// underlying cube member (`Customers.city`) should be merged into a +/// underlying cube member (`customers.customer_city`) should be merged into a /// single CubeScan over the combined members, exactly like a regular /// cube-to-cube join. #[tokio::test] @@ -84,8 +94,9 @@ async fn test_join_two_views_on_shared_member() { let logical_plan = convert_select_to_query_plan_with_meta( r#" SELECT * - FROM OrdersView - LEFT JOIN CustomersView ON (OrdersView.city = CustomersView.city) + FROM customers_view + LEFT JOIN orders_view + ON (orders_view.customer_city = customers_view.customer_city) "# .to_string(), views_meta(), @@ -97,19 +108,62 @@ async fn test_join_two_views_on_shared_member() { logical_plan.find_cube_scan().request, V1LoadRequestQuery { measures: Some(vec![ - "OrdersView.revenue".to_string(), - "CustomersView.amount".to_string(), + "customers_view.avg_age".to_string(), + "orders_view.revenue".to_string(), ]), dimensions: Some(vec![ - "OrdersView.city".to_string(), - "CustomersView.city".to_string(), + "customers_view.customer_city".to_string(), + "orders_view.customer_city".to_string(), ]), segments: Some(vec![]), order: Some(vec![]), ungrouped: Some(true), join_hints: Some(vec![vec![ - "OrdersView".to_string(), - "CustomersView".to_string(), + "customers_view".to_string(), + "orders_view".to_string(), + ]]), + ..Default::default() + } + ) +} + +/// The motivating query: a grouped (multi-fact) query selecting a dimension +/// and measures from each view, joined on the shared `customer_city`. The two +/// view scans are merged into a single grouped CubeScan over the combined +/// members. +#[tokio::test] +async fn test_group_by_join_two_views_on_shared_member() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let logical_plan = convert_select_to_query_plan_with_meta( + r#" + SELECT c.customer_city, measure(o.revenue), measure(c.avg_age) + FROM customers_view c + LEFT JOIN orders_view o ON o.customer_city = c.customer_city + GROUP BY 1 + "# + .to_string(), + views_meta(), + ) + .await + .as_logical_plan(); + + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec![ + "orders_view.revenue".to_string(), + "customers_view.avg_age".to_string(), + ]), + dimensions: Some(vec!["customers_view.customer_city".to_string()]), + segments: Some(vec![]), + order: Some(vec![]), + join_hints: Some(vec![vec![ + "customers_view".to_string(), + "orders_view".to_string(), ]]), ..Default::default() } From e5f4bf14ff5d31cdd08d4f4f1e968aa78c350a84 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 31 May 2026 23:22:15 +0000 Subject: [PATCH 4/9] feat(cubesql): respect inner/left/right join semantics for view joins When merging a join between two views on a shared cube member, the downstream multi-fact query is rendered as a FULL OUTER JOIN over the shared key. To recover the requested join semantics, the rewrite now adds a measure 'set' filter on each side that must be present: - INNER: both sides required - LEFT: left side required - RIGHT: right side required - FULL: no extra filter Branch presence is detected via a measure of the side (the grouping key is COALESCEd across sides downstream, so it cannot tell sides apart). Covered with left/inner group-by tests. Co-authored-by: Pavel Tiunov --- .../src/compile/rewrite/rules/members.rs | 88 ++++++++++++++++++- .../src/compile/test/test_cube_join_views.rs | 74 ++++++++++++++-- 2 files changed, 152 insertions(+), 10 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 95d9e7020be54..53766e933a9a0 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -432,6 +432,9 @@ impl RewriteRules for MemberRules { "?left_join_hints", "?right_join_hints", "?out_join_hints", + "?join_type", + "?left_filters", + "?right_filters", ), ), ]; @@ -2792,6 +2795,9 @@ impl MemberRules { left_join_hints_var: &'static str, right_join_hints_var: &'static str, out_join_hints_var: &'static str, + join_type_var: &'static str, + left_filters_var: &'static str, + right_filters_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let left_alias_to_cube_var = var!(left_alias_to_cube_var); let right_alias_to_cube_var = var!(right_alias_to_cube_var); @@ -2803,6 +2809,9 @@ impl MemberRules { let left_join_hints_var = var!(left_join_hints_var); let right_join_hints_var = var!(right_join_hints_var); let out_join_hints_var = var!(out_join_hints_var); + let join_type_var = var!(join_type_var); + let left_filters_var = var!(left_filters_var); + let right_filters_var = var!(right_filters_var); let meta_context = self.meta_context.clone(); move |egraph, subst| { // Resolves a join column to the name of the dimension member it @@ -2839,8 +2848,8 @@ impl MemberRules { right_members_var, right_on_var, ); - let cubes = match cubes { - Some(cubes) => Some(cubes), + let (cubes, shared_member_join) = match cubes { + Some(cubes) => (Some(cubes), false), None => { // A view dimension keeps the original `cube.dimension` path // in `alias_member`; for non-view members we fall back to @@ -2907,13 +2916,86 @@ impl MemberRules { } } } - found + (found, true) } }; let Some((left_cube, right_cube)) = cubes else { return false; }; + // For a join between two views on a shared cube member, Tesseract + // renders the merged multi-fact scan as a FULL OUTER JOIN over the + // shared key. Re-introduce the requested INNER/LEFT/RIGHT semantics + // by requiring a measure of each "must be present" side to be set + // (FULL adds nothing). Branch presence is detected via a measure + // because the shared grouping key is COALESCEd across sides + // downstream and so cannot distinguish which side a row came from. + if shared_member_join { + fn side_measure(egraph: &CubeEGraph, members_id: Id) -> Option { + egraph[members_id] + .data + .member_name_to_expr + .as_ref() + .and_then(|m| { + m.list.iter().find_map(|(_, member, _)| match member { + Member::Measure { name, .. } => Some(name.clone()), + _ => None, + }) + }) + } + + let mut require_left = false; + let mut require_right = false; + if let Some(join_type) = var_list_iter!(egraph[subst[join_type_var]], JoinJoinType) + .cloned() + .next() + { + match join_type.0 { + datafusion::prelude::JoinType::Inner => { + require_left = true; + require_right = true; + } + datafusion::prelude::JoinType::Left => require_left = true, + datafusion::prelude::JoinType::Right => require_right = true, + _ => {} + } + } + + let mut presence_members = vec![]; + if require_left { + if let Some(name) = side_measure(egraph, subst[left_members_var]) { + presence_members.push(name); + } + } + if require_right { + if let Some(name) = side_measure(egraph, subst[right_members_var]) { + presence_members.push(name); + } + } + + if !presence_members.is_empty() { + let mut acc = subst[left_filters_var]; + for name in presence_members { + let member = egraph.add(LogicalPlanLanguage::FilterMemberMember( + crate::compile::rewrite::FilterMemberMember(name), + )); + let op = egraph.add(LogicalPlanLanguage::FilterMemberOp( + crate::compile::rewrite::FilterMemberOp("set".to_string()), + )); + let values = egraph.add(LogicalPlanLanguage::FilterMemberValues( + crate::compile::rewrite::FilterMemberValues(vec![]), + )); + let filter_member = + egraph.add(LogicalPlanLanguage::FilterMember([member, op, values])); + acc = egraph.add(LogicalPlanLanguage::CubeScanFilters(vec![ + filter_member, + acc, + ])); + } + subst.insert(left_filters_var, acc); + } + } + for left_alias_to_cube in var_iter!(egraph[subst[left_alias_to_cube_var]], CubeScanAliasToCube) { diff --git a/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs b/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs index 3965d02095016..c04c9d0cda35e 100644 --- a/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs +++ b/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs @@ -1,4 +1,4 @@ -use cubeclient::models::V1LoadRequestQuery; +use cubeclient::models::{V1LoadRequestQuery, V1LoadRequestQueryFilterItem}; use pretty_assertions::assert_eq; use crate::{ @@ -80,10 +80,22 @@ fn views_meta() -> Vec { ] } +fn set_filter(member: &str) -> V1LoadRequestQueryFilterItem { + V1LoadRequestQueryFilterItem { + member: Some(member.to_string()), + operator: Some("set".to_string()), + values: None, + or: None, + and: None, + } +} + /// A join between two views on a dimension that resolves to the same /// underlying cube member (`customers.customer_city`) should be merged into a /// single CubeScan over the combined members, exactly like a regular -/// cube-to-cube join. +/// cube-to-cube join. As a `LEFT JOIN`, the left ("must be present") side is +/// guarded with a `set` filter so the downstream FULL OUTER multi-fact stitch +/// keeps left-join semantics. #[tokio::test] async fn test_join_two_views_on_shared_member() { if !Rewriter::sql_push_down_enabled() { @@ -117,6 +129,7 @@ async fn test_join_two_views_on_shared_member() { ]), segments: Some(vec![]), order: Some(vec![]), + filters: Some(vec![set_filter("customers_view.avg_age")]), ungrouped: Some(true), join_hints: Some(vec![vec![ "customers_view".to_string(), @@ -127,12 +140,13 @@ async fn test_join_two_views_on_shared_member() { ) } -/// The motivating query: a grouped (multi-fact) query selecting a dimension -/// and measures from each view, joined on the shared `customer_city`. The two -/// view scans are merged into a single grouped CubeScan over the combined -/// members. +/// The motivating query: a grouped (multi-fact) `LEFT JOIN` selecting a +/// dimension and measures from each view, joined on the shared `customer_city`. +/// The two view scans are merged into a single grouped CubeScan, and the left +/// side gets a `set` filter to recover LEFT-join semantics on top of the +/// FULL OUTER multi-fact stitch. #[tokio::test] -async fn test_group_by_join_two_views_on_shared_member() { +async fn test_group_by_left_join_two_views_on_shared_member() { if !Rewriter::sql_push_down_enabled() { return; } @@ -161,6 +175,52 @@ async fn test_group_by_join_two_views_on_shared_member() { dimensions: Some(vec!["customers_view.customer_city".to_string()]), segments: Some(vec![]), order: Some(vec![]), + filters: Some(vec![set_filter("customers_view.avg_age")]), + join_hints: Some(vec![vec![ + "customers_view".to_string(), + "orders_view".to_string(), + ]]), + ..Default::default() + } + ) +} + +/// Same shape but `INNER JOIN`: both sides must be present, so the merged scan +/// carries a `set` filter for a measure of each side. +#[tokio::test] +async fn test_group_by_inner_join_two_views_on_shared_member() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let logical_plan = convert_select_to_query_plan_with_meta( + r#" + SELECT c.customer_city, measure(o.revenue), measure(c.avg_age) + FROM customers_view c + INNER JOIN orders_view o ON o.customer_city = c.customer_city + GROUP BY 1 + "# + .to_string(), + views_meta(), + ) + .await + .as_logical_plan(); + + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec![ + "orders_view.revenue".to_string(), + "customers_view.avg_age".to_string(), + ]), + dimensions: Some(vec!["customers_view.customer_city".to_string()]), + segments: Some(vec![]), + order: Some(vec![]), + filters: Some(vec![ + set_filter("orders_view.revenue"), + set_filter("customers_view.avg_age"), + ]), join_hints: Some(vec![vec![ "customers_view".to_string(), "orders_view".to_string(), From a18644fb8366381cdbefc7af9e5bd217f4f4af48 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 31 May 2026 23:34:25 +0000 Subject: [PATCH 5/9] refactor(cubesql): use join key (not a measure) for view-join presence filter Detect side presence with the side's join-key dimension instead of an arbitrary measure. The join key is always available and is the actual shared-key marker, avoiding the nullable-measure caveat and the case where a side has no selected measure. - LEFT: left join key must be set - RIGHT: right join key must be set - INNER: both join keys must be set - FULL: no extra filter Co-authored-by: Pavel Tiunov --- .../src/compile/rewrite/rules/members.rs | 37 +++++++------------ .../src/compile/test/test_cube_join_views.rs | 8 ++-- 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 53766e933a9a0..3c57344b1092d 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -2848,6 +2848,8 @@ impl MemberRules { right_members_var, right_on_var, ); + let mut shared_left_keys: Vec = vec![]; + let mut shared_right_keys: Vec = vec![]; let (cubes, shared_member_join) = match cubes { Some(cubes) => (Some(cubes), false), None => { @@ -2879,6 +2881,8 @@ impl MemberRules { } let mut left_cube_name: Option = None; let mut right_cube_name: Option = None; + let mut left_keys: Vec = vec![]; + let mut right_keys: Vec = vec![]; let mut all_match = true; for (left_column, right_column) in left_on.iter().zip(right_on.iter()) { let Some(left_name) = dimension_member_name( @@ -2905,12 +2909,16 @@ impl MemberRules { left_cube_name = left_name.split('.').next().map(|s| s.to_string()); right_cube_name = right_name.split('.').next().map(|s| s.to_string()); + left_keys.push(left_name); + right_keys.push(right_name); } if all_match { if let (Some(left_cube_name), Some(right_cube_name)) = (left_cube_name, right_cube_name) { found = Some((left_cube_name, right_cube_name)); + shared_left_keys = left_keys; + shared_right_keys = right_keys; break 'pairs; } } @@ -2926,24 +2934,9 @@ impl MemberRules { // For a join between two views on a shared cube member, Tesseract // renders the merged multi-fact scan as a FULL OUTER JOIN over the // shared key. Re-introduce the requested INNER/LEFT/RIGHT semantics - // by requiring a measure of each "must be present" side to be set - // (FULL adds nothing). Branch presence is detected via a measure - // because the shared grouping key is COALESCEd across sides - // downstream and so cannot distinguish which side a row came from. + // by requiring the join key of each "must be present" side to be + // set (FULL adds nothing). if shared_member_join { - fn side_measure(egraph: &CubeEGraph, members_id: Id) -> Option { - egraph[members_id] - .data - .member_name_to_expr - .as_ref() - .and_then(|m| { - m.list.iter().find_map(|(_, member, _)| match member { - Member::Measure { name, .. } => Some(name.clone()), - _ => None, - }) - }) - } - let mut require_left = false; let mut require_right = false; if let Some(join_type) = var_list_iter!(egraph[subst[join_type_var]], JoinJoinType) @@ -2961,16 +2954,12 @@ impl MemberRules { } } - let mut presence_members = vec![]; + let mut presence_members: Vec = vec![]; if require_left { - if let Some(name) = side_measure(egraph, subst[left_members_var]) { - presence_members.push(name); - } + presence_members.extend(shared_left_keys.iter().cloned()); } if require_right { - if let Some(name) = side_measure(egraph, subst[right_members_var]) { - presence_members.push(name); - } + presence_members.extend(shared_right_keys.iter().cloned()); } if !presence_members.is_empty() { diff --git a/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs b/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs index c04c9d0cda35e..2ab13089d7f2b 100644 --- a/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs +++ b/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs @@ -129,7 +129,7 @@ async fn test_join_two_views_on_shared_member() { ]), segments: Some(vec![]), order: Some(vec![]), - filters: Some(vec![set_filter("customers_view.avg_age")]), + filters: Some(vec![set_filter("customers_view.customer_city")]), ungrouped: Some(true), join_hints: Some(vec![vec![ "customers_view".to_string(), @@ -175,7 +175,7 @@ async fn test_group_by_left_join_two_views_on_shared_member() { dimensions: Some(vec!["customers_view.customer_city".to_string()]), segments: Some(vec![]), order: Some(vec![]), - filters: Some(vec![set_filter("customers_view.avg_age")]), + filters: Some(vec![set_filter("customers_view.customer_city")]), join_hints: Some(vec![vec![ "customers_view".to_string(), "orders_view".to_string(), @@ -218,8 +218,8 @@ async fn test_group_by_inner_join_two_views_on_shared_member() { segments: Some(vec![]), order: Some(vec![]), filters: Some(vec![ - set_filter("orders_view.revenue"), - set_filter("customers_view.avg_age"), + set_filter("orders_view.customer_city"), + set_filter("customers_view.customer_city"), ]), join_hints: Some(vec![vec![ "customers_view".to_string(), From 072a43f9641fe2aa339eb9fba8736cb08432c542 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 31 May 2026 23:45:24 +0000 Subject: [PATCH 6/9] feat(cubesql): only merge view joins when the join key is fully within dimensions Make the merge gate explicit: the entire join key must resolve to dimensions (or time dimensions) on both sides and to the same underlying cube member. A join key that touches a measure/segment/etc. is rejected and the join falls back to normal (non-merged) handling. Add a negative test that joining two views on measures is not merged. Co-authored-by: Pavel Tiunov --- .../src/compile/rewrite/rules/members.rs | 10 +++-- .../src/compile/test/test_cube_join_views.rs | 44 +++++++++++++++---- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 3c57344b1092d..65798da62d9b0 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -2873,9 +2873,13 @@ impl MemberRules { let mut found = None; 'pairs: for left_on in left_join_ons.iter() { for right_on in right_join_ons.iter() { - // Equi-join on a matching set of columns; every - // column pair must resolve to the same underlying - // cube member. + // We can only merge when the *whole* join key is + // fully within dimensions: every column pair must + // resolve to a dimension (or time dimension) on both + // sides and to the same underlying cube member. A + // join key that touches a measure/segment/etc. (or + // mixes underlying members) is rejected, leaving the + // join to the normal (non-merged) handling. if left_on.is_empty() || left_on.len() != right_on.len() { continue; } diff --git a/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs b/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs index 2ab13089d7f2b..a1e1c285e0d80 100644 --- a/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs +++ b/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs @@ -1,17 +1,17 @@ -use cubeclient::models::{V1LoadRequestQuery, V1LoadRequestQueryFilterItem}; +use cubeclient::models::{V1CubeMetaType, V1LoadRequestQuery, V1LoadRequestQueryFilterItem}; use pretty_assertions::assert_eq; use crate::{ compile::{ rewrite::rewriter::Rewriter, test::{ - convert_select_to_query_plan_with_meta, init_testing_logger, - utils::LogicalPlanTestUtils, + convert_select_to_query_plan_with_meta, convert_sql_to_cube_query, get_test_session, + get_test_tenant_ctx_with_meta, init_testing_logger, utils::LogicalPlanTestUtils, }, + CompilationError, DatabaseProtocol, }, transport::{CubeMeta, CubeMetaDimension, CubeMetaMeasure}, }; -use cubeclient::models::V1CubeMetaType; /// Two views that both expose the same underlying `customers.customer_city` /// dimension (via `aliasMember`). `orders_view` carries an `orders` measure @@ -94,8 +94,8 @@ fn set_filter(member: &str) -> V1LoadRequestQueryFilterItem { /// underlying cube member (`customers.customer_city`) should be merged into a /// single CubeScan over the combined members, exactly like a regular /// cube-to-cube join. As a `LEFT JOIN`, the left ("must be present") side is -/// guarded with a `set` filter so the downstream FULL OUTER multi-fact stitch -/// keeps left-join semantics. +/// guarded with a `set` filter on its join key so the downstream FULL OUTER +/// multi-fact stitch keeps left-join semantics. #[tokio::test] async fn test_join_two_views_on_shared_member() { if !Rewriter::sql_push_down_enabled() { @@ -143,7 +143,7 @@ async fn test_join_two_views_on_shared_member() { /// The motivating query: a grouped (multi-fact) `LEFT JOIN` selecting a /// dimension and measures from each view, joined on the shared `customer_city`. /// The two view scans are merged into a single grouped CubeScan, and the left -/// side gets a `set` filter to recover LEFT-join semantics on top of the +/// join key gets a `set` filter to recover LEFT-join semantics on top of the /// FULL OUTER multi-fact stitch. #[tokio::test] async fn test_group_by_left_join_two_views_on_shared_member() { @@ -186,7 +186,7 @@ async fn test_group_by_left_join_two_views_on_shared_member() { } /// Same shape but `INNER JOIN`: both sides must be present, so the merged scan -/// carries a `set` filter for a measure of each side. +/// carries a `set` filter on the join key of each side. #[tokio::test] async fn test_group_by_inner_join_two_views_on_shared_member() { if !Rewriter::sql_push_down_enabled() { @@ -229,3 +229,31 @@ async fn test_group_by_inner_join_two_views_on_shared_member() { } ) } + +/// The merge only fires when the join key is fully within dimensions. Joining +/// the two views on a measure (`o.revenue = c.avg_age`) is not a shared-member +/// dimension join, so the scans are not merged and the query is rejected the +/// same way any other unsupported cube join is. +#[tokio::test] +async fn test_join_two_views_on_measure_is_not_merged() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let meta = get_test_tenant_ctx_with_meta(views_meta()); + let query = convert_sql_to_cube_query( + &r#" + SELECT * + FROM customers_view c + LEFT JOIN orders_view o ON (o.revenue = c.avg_age) + "# + .to_string(), + meta.clone(), + get_test_session(DatabaseProtocol::PostgreSQL, meta).await, + ) + .await; + + let error = query.unwrap_err(); + assert!(matches!(error, CompilationError::Rewrite(..))); +} From b95e8d750acb7da785049724df32089a810957bf Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 1 Jun 2026 20:26:15 +0000 Subject: [PATCH 7/9] fix(cubesql): gate view-join test module with cfg(test); drop unused var - Add #[cfg(test)] to the test_cube_join_views module so it is not compiled into non-test builds (fixes unresolved pretty_assertions and unused-import errors under clippy -D warnings and the native builds). - Remove the unused right_filters_var from push_down_cube_join. Co-authored-by: Pavel Tiunov --- rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs | 3 --- rust/cubesql/cubesql/src/compile/test/mod.rs | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 65798da62d9b0..9b317a5bb38aa 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -434,7 +434,6 @@ impl RewriteRules for MemberRules { "?out_join_hints", "?join_type", "?left_filters", - "?right_filters", ), ), ]; @@ -2797,7 +2796,6 @@ impl MemberRules { out_join_hints_var: &'static str, join_type_var: &'static str, left_filters_var: &'static str, - right_filters_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let left_alias_to_cube_var = var!(left_alias_to_cube_var); let right_alias_to_cube_var = var!(right_alias_to_cube_var); @@ -2811,7 +2809,6 @@ impl MemberRules { let out_join_hints_var = var!(out_join_hints_var); let join_type_var = var!(join_type_var); let left_filters_var = var!(left_filters_var); - let right_filters_var = var!(right_filters_var); let meta_context = self.meta_context.clone(); move |egraph, subst| { // Resolves a join column to the name of the dimension member it diff --git a/rust/cubesql/cubesql/src/compile/test/mod.rs b/rust/cubesql/cubesql/src/compile/test/mod.rs index 7dcbc2fa862ec..cfefbc79da11d 100644 --- a/rust/cubesql/cubesql/src/compile/test/mod.rs +++ b/rust/cubesql/cubesql/src/compile/test/mod.rs @@ -32,6 +32,7 @@ pub mod test_bi_workarounds; pub mod test_cube_join; #[cfg(test)] pub mod test_cube_join_grouped; +#[cfg(test)] pub mod test_cube_join_views; #[cfg(test)] pub mod test_cube_scan; From 9a682af19f4f4aa4b115c298992cdd40b1047aa5 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 1 Jun 2026 21:40:34 +0000 Subject: [PATCH 8/9] chore: re-trigger CI (flaky Windows native + concurrency-canceled redshift) Co-authored-by: Pavel Tiunov From 6306a2fc09fd4217241333ca6629f45709fc2d99 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sat, 6 Jun 2026 06:28:02 +0000 Subject: [PATCH 9/9] feat(cubesql): only merge view joins under aggregate grouping by the join key Move the shared-member view-join merge out of push_down_cube_join (which runs on the always-ungrouped raw join) into a new rule that matches an Aggregate over the join. The merge now only fires when: - the query is grouped (an Aggregate sits over the join), and - the GROUP BY is exactly the shared join key. This rejects ungrouped queries (e.g. SELECT * over the join) and queries that group by a non-join-key dimension, both of which would otherwise produce an incorrect multi-fact pushdown. push_down_cube_join is restored to its original __cubeJoinField-only behavior. Tests: grouped left/inner merge (with join-key set filters); ungrouped, group-by-mismatch, and measure-key joins are not merged. Co-authored-by: Pavel Tiunov --- .../src/compile/rewrite/rules/members.rs | 469 ++++++++++++------ .../src/compile/test/test_cube_join_views.rs | 130 ++--- 2 files changed, 389 insertions(+), 210 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs index 9b317a5bb38aa..c0aaffec860cd 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/members.rs @@ -432,8 +432,87 @@ impl RewriteRules for MemberRules { "?left_join_hints", "?right_join_hints", "?out_join_hints", + ), + ), + // Merge a join between two (view) CubeScans on a dimension that + // resolves to the same underlying cube member into a single + // CubeScan, but ONLY under an aggregate whose GROUP BY is exactly + // that shared join key. The merged scan becomes a multi-fact query + // (FULL OUTER stitched over the group-by key by the planner). + // Gating on the aggregate means ungrouped queries (e.g. SELECT *) + // and queries grouping by a non-join-key dimension are not merged. + transforming_rewrite( + "push-down-aggregate-shared-member-join", + aggregate( + join( + cube_scan( + "?left_alias_to_cube", + "?left_members", + "?left_filters", + "?left_orders", + "CubeScanLimit:None", + "CubeScanOffset:None", + "?left_split", + "CubeScanCanPushdownJoin:true", + "CubeScanWrapped:false", + "CubeScanUngrouped:true", + "?left_join_hints", + ), + cube_scan( + "?right_alias_to_cube", + "?right_members", + "?right_filters", + "?right_orders", + "CubeScanLimit:None", + "CubeScanOffset:None", + "?right_split", + "CubeScanCanPushdownJoin:true", + "CubeScanWrapped:false", + "CubeScanUngrouped:true", + "?right_join_hints", + ), + "?left_on", + "?right_on", + "?join_type", + "?join_constraint", + "?null_equals_null", + ), + "?group_expr", + "?aggr_expr", + "?agg_split", + ), + aggregate( + cube_scan( + "?out_alias_to_cube", + cube_scan_members("?left_members", "?right_members"), + cube_scan_filters("?left_filters", "?right_filters"), + cube_scan_order_empty_tail(), + "CubeScanLimit:None", + "CubeScanOffset:None", + "CubeScanSplit:false", + "CubeScanCanPushdownJoin:true", + "CubeScanWrapped:false", + "CubeScanUngrouped:true", + "?out_join_hints", + ), + "?group_expr", + "?aggr_expr", + "?agg_split", + ), + self.push_down_aggregate_shared_member_join( + "?left_alias_to_cube", + "?right_alias_to_cube", + "?out_alias_to_cube", + "?left_members", + "?right_members", + "?left_on", + "?right_on", "?join_type", + "?left_join_hints", + "?right_join_hints", + "?out_join_hints", "?left_filters", + "?group_expr", ), ), ]; @@ -2794,8 +2873,96 @@ impl MemberRules { left_join_hints_var: &'static str, right_join_hints_var: &'static str, out_join_hints_var: &'static str, + ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { + let left_alias_to_cube_var = var!(left_alias_to_cube_var); + let right_alias_to_cube_var = var!(right_alias_to_cube_var); + let out_alias_to_cube_var = var!(out_alias_to_cube_var); + let left_members_var = var!(left_members_var); + let right_members_var = var!(right_members_var); + let left_on_var = var!(left_on_var); + let right_on_var = var!(right_on_var); + let left_join_hints_var = var!(left_join_hints_var); + let right_join_hints_var = var!(right_join_hints_var); + let out_join_hints_var = var!(out_join_hints_var); + move |egraph, subst| { + let Some((left_cube, right_cube)) = is_proper_cube_join_condition( + egraph, + subst, + left_members_var, + left_on_var, + right_members_var, + right_on_var, + ) else { + return false; + }; + + for left_alias_to_cube in + var_iter!(egraph[subst[left_alias_to_cube_var]], CubeScanAliasToCube) + { + for right_alias_to_cube in + var_iter!(egraph[subst[right_alias_to_cube_var]], CubeScanAliasToCube) + { + for left_join_hints in + var_iter!(egraph[subst[left_join_hints_var]], CubeScanJoinHints) + { + for right_join_hints in + var_iter!(egraph[subst[right_join_hints_var]], CubeScanJoinHints) + { + let out_alias_to_cube = CubeScanAliasToCube( + left_alias_to_cube + .iter() + .chain(right_alias_to_cube.iter()) + .cloned() + .collect(), + ); + + let out_join_hints = CubeScanJoinHints( + left_join_hints + .iter() + .chain(right_join_hints.iter()) + .cloned() + .chain(iter::once(vec![left_cube, right_cube])) + .collect(), + ); + + subst.insert( + out_alias_to_cube_var, + egraph.add(LogicalPlanLanguage::CubeScanAliasToCube( + out_alias_to_cube, + )), + ); + + subst.insert( + out_join_hints_var, + egraph.add(LogicalPlanLanguage::CubeScanJoinHints(out_join_hints)), + ); + + return true; + } + } + } + } + + false + } + } + + #[allow(clippy::too_many_arguments)] + fn push_down_aggregate_shared_member_join( + &self, + left_alias_to_cube_var: &'static str, + right_alias_to_cube_var: &'static str, + out_alias_to_cube_var: &'static str, + left_members_var: &'static str, + right_members_var: &'static str, + left_on_var: &'static str, + right_on_var: &'static str, join_type_var: &'static str, + left_join_hints_var: &'static str, + right_join_hints_var: &'static str, + out_join_hints_var: &'static str, left_filters_var: &'static str, + group_expr_var: &'static str, ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { let left_alias_to_cube_var = var!(left_alias_to_cube_var); let right_alias_to_cube_var = var!(right_alias_to_cube_var); @@ -2804,16 +2971,14 @@ impl MemberRules { let right_members_var = var!(right_members_var); let left_on_var = var!(left_on_var); let right_on_var = var!(right_on_var); + let join_type_var = var!(join_type_var); let left_join_hints_var = var!(left_join_hints_var); let right_join_hints_var = var!(right_join_hints_var); let out_join_hints_var = var!(out_join_hints_var); - let join_type_var = var!(join_type_var); let left_filters_var = var!(left_filters_var); + let group_expr_var = var!(group_expr_var); let meta_context = self.meta_context.clone(); move |egraph, subst| { - // Resolves a join column to the name of the dimension member it - // references, but only for plain or time dimensions (measures, - // segments, etc. are not valid shared join keys). fn dimension_member_name( egraph: &mut CubeEGraph, members_id: Id, @@ -2826,164 +2991,170 @@ impl MemberRules { } } - // Two ways to recognize a joinable pair of CubeScans: - // 1. The classic `left.__cubeJoinField = right.__cubeJoinField` - // condition that comes from the data model join graph. - // 2. A join between two CubeScans (typically views) on a - // dimension that resolves to the *same underlying cube member* - // — e.g. `orders_view.city = customers_view.city` where both - // `city` dimensions are aliases of the same `cube.dimension`. - // Such a join is on the same shared key, so the two scans can - // be merged into a single CubeScan exactly like any other - // cube-to-cube join, letting the query planner treat the result - // as a (multi-fact) query over the combined members. - let cubes = is_proper_cube_join_condition( - egraph, - subst, - left_members_var, - left_on_var, - right_members_var, - right_on_var, - ); - let mut shared_left_keys: Vec = vec![]; - let mut shared_right_keys: Vec = vec![]; - let (cubes, shared_member_join) = match cubes { - Some(cubes) => (Some(cubes), false), - None => { - // A view dimension keeps the original `cube.dimension` path - // in `alias_member`; for non-view members we fall back to - // the member name itself. - let resolve_underlying = |member_name: &str| -> String { - meta_context - .find_dimension_with_name(member_name) - .and_then(|dim| dim.alias_member.clone()) - .unwrap_or_else(|| member_name.to_string()) - }; + let resolve_underlying = |member_name: &str| -> String { + meta_context + .find_dimension_with_name(member_name) + .and_then(|dim| dim.alias_member.clone()) + .unwrap_or_else(|| member_name.to_string()) + }; - let left_join_ons = var_iter!(egraph[subst[left_on_var]], JoinLeftOn) - .cloned() - .collect::>(); - let right_join_ons = var_iter!(egraph[subst[right_on_var]], JoinRightOn) - .cloned() - .collect::>(); + // The join must be on dimensions that resolve to the same + // underlying cube member on both sides (a shared key). + let left_join_ons = var_iter!(egraph[subst[left_on_var]], JoinLeftOn) + .cloned() + .collect::>(); + let right_join_ons = var_iter!(egraph[subst[right_on_var]], JoinRightOn) + .cloned() + .collect::>(); - let mut found = None; - 'pairs: for left_on in left_join_ons.iter() { - for right_on in right_join_ons.iter() { - // We can only merge when the *whole* join key is - // fully within dimensions: every column pair must - // resolve to a dimension (or time dimension) on both - // sides and to the same underlying cube member. A - // join key that touches a measure/segment/etc. (or - // mixes underlying members) is rejected, leaving the - // join to the normal (non-merged) handling. - if left_on.is_empty() || left_on.len() != right_on.len() { - continue; - } - let mut left_cube_name: Option = None; - let mut right_cube_name: Option = None; - let mut left_keys: Vec = vec![]; - let mut right_keys: Vec = vec![]; - let mut all_match = true; - for (left_column, right_column) in left_on.iter().zip(right_on.iter()) { - let Some(left_name) = dimension_member_name( - egraph, - subst[left_members_var], - left_column, - ) else { - all_match = false; - break; - }; - let Some(right_name) = dimension_member_name( - egraph, - subst[right_members_var], - right_column, - ) else { - all_match = false; - break; - }; - if resolve_underlying(&left_name) != resolve_underlying(&right_name) - { - all_match = false; - break; - } - left_cube_name = left_name.split('.').next().map(|s| s.to_string()); - right_cube_name = - right_name.split('.').next().map(|s| s.to_string()); - left_keys.push(left_name); - right_keys.push(right_name); - } - if all_match { - if let (Some(left_cube_name), Some(right_cube_name)) = - (left_cube_name, right_cube_name) - { - found = Some((left_cube_name, right_cube_name)); - shared_left_keys = left_keys; - shared_right_keys = right_keys; - break 'pairs; - } - } + let mut matched: Option<( + String, + String, + Vec, + Vec, + Vec, + Vec, + )> = None; + 'pairs: for left_on in left_join_ons.iter() { + for right_on in right_join_ons.iter() { + if left_on.is_empty() || left_on.len() != right_on.len() { + continue; + } + let mut left_cube_name: Option = None; + let mut right_cube_name: Option = None; + let mut left_keys: Vec = vec![]; + let mut right_keys: Vec = vec![]; + let mut all_match = true; + for (left_column, right_column) in left_on.iter().zip(right_on.iter()) { + let Some(left_name) = + dimension_member_name(egraph, subst[left_members_var], left_column) + else { + all_match = false; + break; + }; + let Some(right_name) = + dimension_member_name(egraph, subst[right_members_var], right_column) + else { + all_match = false; + break; + }; + if resolve_underlying(&left_name) != resolve_underlying(&right_name) { + all_match = false; + break; + } + left_cube_name = left_name.split('.').next().map(|s| s.to_string()); + right_cube_name = right_name.split('.').next().map(|s| s.to_string()); + left_keys.push(left_name); + right_keys.push(right_name); + } + if all_match { + if let (Some(left_cube_name), Some(right_cube_name)) = + (left_cube_name, right_cube_name) + { + matched = Some(( + left_cube_name, + right_cube_name, + left_keys, + right_keys, + left_on.clone(), + right_on.clone(), + )); + break 'pairs; } } - (found, true) } - }; - let Some((left_cube, right_cube)) = cubes else { + } + + let Some(( + left_cube, + right_cube, + shared_left_keys, + shared_right_keys, + matched_left_cols, + matched_right_cols, + )) = matched + else { return false; }; - // For a join between two views on a shared cube member, Tesseract - // renders the merged multi-fact scan as a FULL OUTER JOIN over the - // shared key. Re-introduce the requested INNER/LEFT/RIGHT semantics - // by requiring the join key of each "must be present" side to be - // set (FULL adds nothing). - if shared_member_join { - let mut require_left = false; - let mut require_right = false; - if let Some(join_type) = var_list_iter!(egraph[subst[join_type_var]], JoinJoinType) - .cloned() - .next() + // The join key must be fully within the GROUP BY dimensions: every + // group-by column must be one of the join-key columns, and every + // join-key pair must be grouped. This is what makes the multi-fact + // stitch over the group-by key match the requested join. + let Some(group_referenced_expr) = + &egraph.index(subst[group_expr_var]).data.referenced_expr + else { + return false; + }; + let group_cols = referenced_columns(group_referenced_expr); + if group_cols.is_empty() { + return false; + } + let join_key_cols: HashSet = matched_left_cols + .iter() + .chain(matched_right_cols.iter()) + .map(|c| c.flat_name()) + .collect(); + if !group_cols.iter().all(|c| join_key_cols.contains(c)) { + return false; + } + let group_set: HashSet<&String> = group_cols.iter().collect(); + for (left_col, right_col) in matched_left_cols.iter().zip(matched_right_cols.iter()) { + if !group_set.contains(&left_col.flat_name()) + && !group_set.contains(&right_col.flat_name()) { - match join_type.0 { - datafusion::prelude::JoinType::Inner => { - require_left = true; - require_right = true; - } - datafusion::prelude::JoinType::Left => require_left = true, - datafusion::prelude::JoinType::Right => require_right = true, - _ => {} - } + return false; } + } - let mut presence_members: Vec = vec![]; - if require_left { - presence_members.extend(shared_left_keys.iter().cloned()); - } - if require_right { - presence_members.extend(shared_right_keys.iter().cloned()); + // Re-introduce INNER/LEFT/RIGHT semantics on top of the FULL OUTER + // multi-fact stitch by requiring the join key of each "must be + // present" side to be set (FULL adds nothing). + let mut require_left = false; + let mut require_right = false; + if let Some(join_type) = var_list_iter!(egraph[subst[join_type_var]], JoinJoinType) + .cloned() + .next() + { + match join_type.0 { + datafusion::prelude::JoinType::Inner => { + require_left = true; + require_right = true; + } + datafusion::prelude::JoinType::Left => require_left = true, + datafusion::prelude::JoinType::Right => require_right = true, + _ => {} } + } - if !presence_members.is_empty() { - let mut acc = subst[left_filters_var]; - for name in presence_members { - let member = egraph.add(LogicalPlanLanguage::FilterMemberMember( - crate::compile::rewrite::FilterMemberMember(name), - )); - let op = egraph.add(LogicalPlanLanguage::FilterMemberOp( - crate::compile::rewrite::FilterMemberOp("set".to_string()), - )); - let values = egraph.add(LogicalPlanLanguage::FilterMemberValues( - crate::compile::rewrite::FilterMemberValues(vec![]), - )); - let filter_member = - egraph.add(LogicalPlanLanguage::FilterMember([member, op, values])); - acc = egraph.add(LogicalPlanLanguage::CubeScanFilters(vec![ - filter_member, - acc, - ])); - } - subst.insert(left_filters_var, acc); + let mut presence_members: Vec = vec![]; + if require_left { + presence_members.extend(shared_left_keys.iter().cloned()); + } + if require_right { + presence_members.extend(shared_right_keys.iter().cloned()); + } + + if !presence_members.is_empty() { + let mut acc = subst[left_filters_var]; + for name in presence_members { + let member = egraph.add(LogicalPlanLanguage::FilterMemberMember( + crate::compile::rewrite::FilterMemberMember(name), + )); + let op = egraph.add(LogicalPlanLanguage::FilterMemberOp( + crate::compile::rewrite::FilterMemberOp("set".to_string()), + )); + let values = egraph.add(LogicalPlanLanguage::FilterMemberValues( + crate::compile::rewrite::FilterMemberValues(vec![]), + )); + let filter_member = + egraph.add(LogicalPlanLanguage::FilterMember([member, op, values])); + acc = egraph.add(LogicalPlanLanguage::CubeScanFilters(vec![ + filter_member, + acc, + ])); } + subst.insert(left_filters_var, acc); } for left_alias_to_cube in @@ -3011,7 +3182,7 @@ impl MemberRules { .iter() .chain(right_join_hints.iter()) .cloned() - .chain(iter::once(vec![left_cube, right_cube])) + .chain(iter::once(vec![left_cube.clone(), right_cube.clone()])) .collect(), ); diff --git a/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs b/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs index a1e1c285e0d80..64373622fd2b8 100644 --- a/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs +++ b/rust/cubesql/cubesql/src/compile/test/test_cube_join_views.rs @@ -44,10 +44,12 @@ fn views_meta() -> Vec { description: None, title: None, r#type: V1CubeMetaType::View, - dimensions: vec![dimension( - "customers_view.customer_city", - "customers.customer_city", - )], + dimensions: vec![ + dimension("customers_view.customer_city", "customers.customer_city"), + // A second dimension that is NOT a join key, used to test that a + // query grouping by it (instead of the join key) is not merged. + dimension("customers_view.status", "customers.status"), + ], measures: vec![measure( "customers_view.avg_age", "customers.avg_age", @@ -90,61 +92,11 @@ fn set_filter(member: &str) -> V1LoadRequestQueryFilterItem { } } -/// A join between two views on a dimension that resolves to the same -/// underlying cube member (`customers.customer_city`) should be merged into a -/// single CubeScan over the combined members, exactly like a regular -/// cube-to-cube join. As a `LEFT JOIN`, the left ("must be present") side is -/// guarded with a `set` filter on its join key so the downstream FULL OUTER -/// multi-fact stitch keeps left-join semantics. -#[tokio::test] -async fn test_join_two_views_on_shared_member() { - if !Rewriter::sql_push_down_enabled() { - return; - } - init_testing_logger(); - - let logical_plan = convert_select_to_query_plan_with_meta( - r#" - SELECT * - FROM customers_view - LEFT JOIN orders_view - ON (orders_view.customer_city = customers_view.customer_city) - "# - .to_string(), - views_meta(), - ) - .await - .as_logical_plan(); - - assert_eq!( - logical_plan.find_cube_scan().request, - V1LoadRequestQuery { - measures: Some(vec![ - "customers_view.avg_age".to_string(), - "orders_view.revenue".to_string(), - ]), - dimensions: Some(vec![ - "customers_view.customer_city".to_string(), - "orders_view.customer_city".to_string(), - ]), - segments: Some(vec![]), - order: Some(vec![]), - filters: Some(vec![set_filter("customers_view.customer_city")]), - ungrouped: Some(true), - join_hints: Some(vec![vec![ - "customers_view".to_string(), - "orders_view".to_string(), - ]]), - ..Default::default() - } - ) -} - /// The motivating query: a grouped (multi-fact) `LEFT JOIN` selecting a -/// dimension and measures from each view, joined on the shared `customer_city`. -/// The two view scans are merged into a single grouped CubeScan, and the left -/// join key gets a `set` filter to recover LEFT-join semantics on top of the -/// FULL OUTER multi-fact stitch. +/// dimension and measures from each view, joined on the shared `customer_city` +/// which is also the GROUP BY key. The two view scans are merged into a single +/// grouped CubeScan, and the left join key gets a `set` filter to recover +/// LEFT-join semantics on top of the FULL OUTER multi-fact stitch. #[tokio::test] async fn test_group_by_left_join_two_views_on_shared_member() { if !Rewriter::sql_push_down_enabled() { @@ -230,10 +182,65 @@ async fn test_group_by_inner_join_two_views_on_shared_member() { ) } +/// Ungrouped query (`SELECT *`): the shared-member merge only applies to +/// grouped queries, so an ungrouped join is not merged and is rejected the +/// same way any other unsupported cube join is. +#[tokio::test] +async fn test_ungrouped_join_two_views_on_shared_member_is_not_merged() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let meta = get_test_tenant_ctx_with_meta(views_meta()); + let query = convert_sql_to_cube_query( + &r#" + SELECT * + FROM customers_view + LEFT JOIN orders_view + ON (orders_view.customer_city = customers_view.customer_city) + "# + .to_string(), + meta.clone(), + get_test_session(DatabaseProtocol::PostgreSQL, meta).await, + ) + .await; + + let error = query.unwrap_err(); + assert!(matches!(error, CompilationError::Rewrite(..))); +} + +/// The join is over a dimension (`customer_city`) that is not in the GROUP BY +/// (the query groups by `status` instead). The merge requires the join key to +/// be the group-by key, so this is not merged and is rejected. +#[tokio::test] +async fn test_group_by_join_dimension_not_in_group_by_is_not_merged() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let meta = get_test_tenant_ctx_with_meta(views_meta()); + let query = convert_sql_to_cube_query( + &r#" + SELECT c.status, measure(o.revenue), measure(c.avg_age) + FROM customers_view c + LEFT JOIN orders_view o ON o.customer_city = c.customer_city + GROUP BY 1 + "# + .to_string(), + meta.clone(), + get_test_session(DatabaseProtocol::PostgreSQL, meta).await, + ) + .await; + + let error = query.unwrap_err(); + assert!(matches!(error, CompilationError::Rewrite(..))); +} + /// The merge only fires when the join key is fully within dimensions. Joining /// the two views on a measure (`o.revenue = c.avg_age`) is not a shared-member -/// dimension join, so the scans are not merged and the query is rejected the -/// same way any other unsupported cube join is. +/// dimension join, so the scans are not merged and the query is rejected. #[tokio::test] async fn test_join_two_views_on_measure_is_not_merged() { if !Rewriter::sql_push_down_enabled() { @@ -244,9 +251,10 @@ async fn test_join_two_views_on_measure_is_not_merged() { let meta = get_test_tenant_ctx_with_meta(views_meta()); let query = convert_sql_to_cube_query( &r#" - SELECT * + SELECT c.customer_city, measure(o.revenue) FROM customers_view c LEFT JOIN orders_view o ON (o.revenue = c.avg_age) + GROUP BY 1 "# .to_string(), meta.clone(),