From 32ab6a8de3e3d61d518a70356c366fd12076fdc6 Mon Sep 17 00:00:00 2001 From: Hank Ditton Date: Wed, 1 Apr 2026 15:41:36 -0600 Subject: [PATCH] fix(tesseract): preserve FILTER_PARAMS pushdown with segments Tesseract was treating any segment inside a filter tree as a fatal subtree extraction failure, which made FILTER_PARAMS fall back to always_true(). Keep AND-group pruning for matching members, require full matches for OR groups, and add regressions for segment siblings and partial OR branches. Made-with: Cursor --- .../postgres/sql-generation.test.ts | 47 ++++ .../cubesqlplanner/src/plan/filter.rs | 130 ++++++----- .../src/planner/sql_evaluator/sql_call.rs | 15 +- .../cubesqlplanner/src/tests/utils/debug.rs | 207 ++++++++++++++++++ 4 files changed, 330 insertions(+), 69 deletions(-) diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts index 3efee71060690..e3d3cf903eff8 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts @@ -5708,4 +5708,51 @@ cubes: }, ]); }); + + if (getEnv('nativeSqlPlanner')) { + describe('FILTER_PARAMS with segments under Tesseract', () => { + const fpCompilers = prepareJsCompiler(` + cube('orders_fp', { + sql: \` + SELECT * FROM orders + WHERE \${FILTER_PARAMS.orders_fp.created_at.filter('created_at')} + AND \${FILTER_PARAMS.orders_fp.status.filter('status')} + \`, + measures: { + count: { type: 'count' }, + }, + dimensions: { + id: { sql: 'id', type: 'number', primaryKey: true }, + status: { sql: 'status', type: 'string' }, + created_at: { sql: 'created_at', type: 'time' }, + }, + segments: { + completed: { sql: \`\${CUBE}.status = 'completed'\` }, + }, + }); + `); + + it('FILTER_PARAMS pushdown is preserved when a segment is present', async () => { + await fpCompilers.compiler.compile(); + const query = new PostgresQuery(fpCompilers, { + measures: ['orders_fp.count'], + segments: ['orders_fp.completed'], + filters: [ + { member: 'orders_fp.status', operator: 'equals', values: ['completed'] }, + ], + timeDimensions: [{ + dimension: 'orders_fp.created_at', + dateRange: ['2024-01-01', '2024-01-31'], + }], + timezone: 'UTC', + }); + + const [sql] = query.buildSqlAndParams(); + + expect(sql).not.toMatch(/WHERE\s+1\s*=\s*1\s+AND\s+1\s*=\s*1/); + expect(sql).toMatch(/created_at/); + expect(sql).toMatch(/status/); + }); + }); + } }); diff --git a/rust/cubesqlplanner/cubesqlplanner/src/plan/filter.rs b/rust/cubesqlplanner/cubesqlplanner/src/plan/filter.rs index b6cfff118436f..3fc6e680177c3 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/plan/filter.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/plan/filter.rs @@ -6,6 +6,10 @@ use cubenativeutils::CubeError; use std::fmt; use std::rc::Rc; +/// Whether a recursive `find_subtree_for_members` call matched every node +/// without pruning anything (segments, non-matching items, etc.). +type FullMatch = bool; + #[derive(Clone, PartialEq)] pub enum FilterGroupOperator { Or, @@ -128,40 +132,24 @@ impl FilterItem { } } - /// Extract all member symbols from this filter tree - /// Returns None if filter tree is invalid (e.g., empty group) - /// Returns Some(set) with all member symbols found in the tree - fn extract_filter_members(&self) -> Option>> { - match self { - FilterItem::Group(group) => { - // Empty groups are considered invalid - if group.items.is_empty() { - return None; - } - - let mut all_members = Vec::new(); - - // Recursively extract from all children - for child in &group.items { - match child.extract_filter_members() { - None => return None, // If any child is invalid, entire tree is invalid - Some(mut members) => all_members.append(&mut members), - } - } - - Some(all_members) - } - FilterItem::Item(item) => Some(vec![item.member_evaluator().clone()]), - FilterItem::Segment(_) => None, - } - } - - /// Find subtree of filters that only contains filters for the specified members - /// Returns None if no matching filters found - /// Returns Some(FilterItem) with the subtree containing only filters for target members + /// Find subtree of filters that only contains filters for the specified members. + /// Returns `None` if no matching filters found. + /// Returns `Some(FilterItem)` with the subtree containing only filters for target members. + /// + /// Partial matching is only supported for AND groups. OR groups only match + /// if all of their children match the target members. /// - /// This only processes AND groups - OR groups are not supported and will return None + /// `Segment` nodes are skipped during extraction -- they do not prevent + /// sibling member filters from being collected in AND groups. pub fn find_subtree_for_members(&self, target_members: &[&String]) -> Option { + self.find_subtree_for_members_inner(target_members) + .map(|(filter_item, _)| filter_item) + } + + fn find_subtree_for_members_inner( + &self, + target_members: &[&String], + ) -> Option<(FilterItem, FullMatch)> { match self { FilterItem::Group(group) => { // Empty groups return None @@ -169,47 +157,57 @@ impl FilterItem { return None; } - // Extract all members from this filter subtree - let filter_members = self.extract_filter_members()?; + match group.operator { + FilterGroupOperator::And => { + let mut matching_children = Vec::new(); + let mut all_children_fully_match = true; - // Check if all members in this filter are in the target set - let all_members_match = filter_members.iter().all(|member| { - target_members.iter().any(|target| { - &&member.clone().resolve_reference_chain().full_name() == target - }) - }); + for child in &group.items { + match child.find_subtree_for_members_inner(target_members) { + Some((matching_child, child_fully_matched)) => { + matching_children.push(matching_child); + all_children_fully_match &= child_fully_matched; + } + None => all_children_fully_match = false, + } + } - if all_members_match { - // All members match - return this entire filter subtree - return Some(self.clone()); - } + if matching_children.is_empty() { + return None; + } - // Only process AND groups for partial matching - if group.operator == FilterGroupOperator::And { - let matching_children: Vec = group - .items - .iter() - .filter_map(|child| child.find_subtree_for_members(target_members)) - .collect(); + if all_children_fully_match { + // Every child matches, so preserve the original group shape. + return Some((self.clone(), true)); + } - if matching_children.is_empty() { - return None; - } + if matching_children.len() == 1 { + // Single match - return it directly without wrapping. + return Some((matching_children.into_iter().next().unwrap(), false)); + } - if matching_children.len() == 1 { - // Single match - return it directly without wrapping - return Some(matching_children.into_iter().next().unwrap()); + // Multiple matches - wrap in a new AND group. + Some(( + FilterItem::Group(Rc::new(FilterGroup::new( + FilterGroupOperator::And, + matching_children, + ))), + false, + )) } + FilterGroupOperator::Or => { + // OR groups can only be preserved if every child matches. + for child in &group.items { + let (_, child_fully_matched) = + child.find_subtree_for_members_inner(target_members)?; + if !child_fully_matched { + return None; + } + } - // Multiple matches - wrap in new AND group - return Some(FilterItem::Group(Rc::new(FilterGroup::new( - FilterGroupOperator::And, - matching_children, - )))); + Some((self.clone(), true)) + } } - - // OR groups are not supported - None } FilterItem::Item(item) => { let member = item.member_evaluator(); @@ -219,7 +217,7 @@ impl FilterItem { .iter() .any(|target| &&member.clone().resolve_reference_chain().full_name() == target) { - Some(self.clone()) + Some((self.clone(), true)) } else { None } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/sql_call.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/sql_call.rs index 08dc7235cb380..08618a53d73e5 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/sql_call.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/sql_call.rs @@ -147,13 +147,17 @@ impl SqlCall { let (filter_params, filter_groups, deps, context_values) = self.prepare_template_params(visitor, node_processor, &query_tools, templates)?; - Self::substitute_template( + let result = Self::substitute_template( template, &deps, &filter_params, &filter_groups, &context_values, - ) + )?; + // Filter-params callbacks (e.g. FILTER_PARAMS inside a segment SQL) + // may produce results containing {arg:N} dep references that the + // first pass inserted verbatim. A second pass resolves them. + Self::substitute_template(&result, &deps, &[], &[], &[]) } else { Err(CubeError::internal( "SqlCall::eval called for function that returns string".to_string(), @@ -194,7 +198,12 @@ impl SqlCall { }) .collect::, _>>()?, }; - Ok(result) + // Filter-params callbacks may produce results containing {arg:N} + // dep references; resolve them with a second pass. + result + .into_iter() + .map(|s| Self::substitute_template(&s, &deps, &[], &[], &[])) + .collect() } pub fn is_owned_by_cube(&self) -> bool { diff --git a/rust/cubesqlplanner/cubesqlplanner/src/tests/utils/debug.rs b/rust/cubesqlplanner/cubesqlplanner/src/tests/utils/debug.rs index 18975f9008f88..be3cb9367c0f2 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/tests/utils/debug.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/tests/utils/debug.rs @@ -4,6 +4,7 @@ use crate::planner::filter::{BaseFilter, FilterOperator}; use crate::planner::sql_evaluator::DebugSql; use crate::test_fixtures::cube_bridge::MockSchema; use crate::test_fixtures::test_utils::TestContext; +use std::rc::Rc; #[test] fn test_dimension_basic() { @@ -201,3 +202,209 @@ fn test_filter_group_and_collapsed() { "AND: [\n {visitors.source} equals: ['google'],\n {visitors.id} gt: ['100']\n]"; assert_eq!(sql, expected); } + +#[test] +fn test_find_subtree_for_members_excludes_segments_from_and_group() { + let schema = MockSchema::from_yaml_file("common/integration_basic.yaml"); + let ctx = TestContext::new(schema).unwrap(); + let city_symbol = ctx.create_symbol("customers.city").unwrap(); + let amount_symbol = ctx.create_symbol("orders.amount").unwrap(); + + let city_filter = BaseFilter::try_new( + ctx.query_tools().clone(), + city_symbol.clone(), + crate::planner::filter::base_filter::FilterType::Dimension, + FilterOperator::Equal, + Some(vec![Some("New York".to_string())]), + ) + .unwrap(); + + let amount_filter = BaseFilter::try_new( + ctx.query_tools().clone(), + amount_symbol.clone(), + crate::planner::filter::base_filter::FilterType::Dimension, + FilterOperator::Gte, + Some(vec![Some("100".to_string())]), + ) + .unwrap(); + + let completed_segment = ctx.create_segment("orders.completed_orders").unwrap(); + + let filter_tree = FilterItem::Group(Rc::new(FilterGroup::new( + FilterGroupOperator::And, + vec![ + FilterItem::Item(city_filter), + FilterItem::Item(amount_filter), + FilterItem::Segment(completed_segment), + ], + ))); + + let city_member = city_symbol.resolve_reference_chain().full_name(); + let amount_member = amount_symbol.resolve_reference_chain().full_name(); + let targets = vec![&city_member, &amount_member]; + + let subtree = filter_tree + .find_subtree_for_members(&targets) + .expect("matching filters should still be extracted"); + + assert_eq!( + subtree.debug_sql(false), + "AND: [\n {customers.city} equals: ['New York'],\n {orders.amount} gte: ['100']\n]" + ); +} + +#[test] +fn test_find_subtree_for_members_keeps_or_groups_all_or_nothing() { + let schema = MockSchema::from_yaml_file("common/integration_basic.yaml"); + let ctx = TestContext::new(schema).unwrap(); + let city_symbol = ctx.create_symbol("customers.city").unwrap(); + + let city_filter = BaseFilter::try_new( + ctx.query_tools().clone(), + city_symbol.clone(), + crate::planner::filter::base_filter::FilterType::Dimension, + FilterOperator::Equal, + Some(vec![Some("New York".to_string())]), + ) + .unwrap(); + + let completed_segment = ctx.create_segment("orders.completed_orders").unwrap(); + + let filter_tree = FilterItem::Group(Rc::new(FilterGroup::new( + FilterGroupOperator::Or, + vec![ + FilterItem::Item(city_filter), + FilterItem::Segment(completed_segment), + ], + ))); + + let city_member = city_symbol.resolve_reference_chain().full_name(); + let targets = vec![&city_member]; + + assert!( + filter_tree.find_subtree_for_members(&targets).is_none(), + "partial matches should not be extracted from OR groups" + ); +} + +#[test] +fn test_find_subtree_for_members_rejects_or_groups_with_partially_matching_children() { + let schema = MockSchema::from_yaml_file("common/integration_basic.yaml"); + let ctx = TestContext::new(schema).unwrap(); + let city_symbol = ctx.create_symbol("customers.city").unwrap(); + let amount_symbol = ctx.create_symbol("orders.amount").unwrap(); + + let city_filter = BaseFilter::try_new( + ctx.query_tools().clone(), + city_symbol.clone(), + crate::planner::filter::base_filter::FilterType::Dimension, + FilterOperator::Equal, + Some(vec![Some("New York".to_string())]), + ) + .unwrap(); + + let amount_filter = BaseFilter::try_new( + ctx.query_tools().clone(), + amount_symbol.clone(), + crate::planner::filter::base_filter::FilterType::Dimension, + FilterOperator::Gte, + Some(vec![Some("100".to_string())]), + ) + .unwrap(); + + let completed_segment = ctx.create_segment("orders.completed_orders").unwrap(); + + let partially_matching_branch = FilterItem::Group(Rc::new(FilterGroup::new( + FilterGroupOperator::And, + vec![ + FilterItem::Item(city_filter), + FilterItem::Segment(completed_segment), + ], + ))); + + let filter_tree = FilterItem::Group(Rc::new(FilterGroup::new( + FilterGroupOperator::Or, + vec![partially_matching_branch, FilterItem::Item(amount_filter)], + ))); + + let city_member = city_symbol.resolve_reference_chain().full_name(); + let amount_member = amount_symbol.resolve_reference_chain().full_name(); + let targets = vec![&city_member, &amount_member]; + + assert!( + filter_tree.find_subtree_for_members(&targets).is_none(), + "OR groups should only match when every branch matches without pruning" + ); +} + +#[test] +fn test_find_subtree_for_members_segment_only_and_group_returns_none() { + let schema = MockSchema::from_yaml_file("common/integration_basic.yaml"); + let ctx = TestContext::new(schema).unwrap(); + + let completed_segment = ctx.create_segment("orders.completed_orders").unwrap(); + + let filter_tree = FilterItem::Group(Rc::new(FilterGroup::new( + FilterGroupOperator::And, + vec![FilterItem::Segment(completed_segment)], + ))); + + let dummy_member = "orders.amount".to_string(); + let targets = vec![&dummy_member]; + + assert!( + filter_tree.find_subtree_for_members(&targets).is_none(), + "AND group containing only segments should return None" + ); +} + +#[test] +fn test_find_subtree_for_members_all_matching_and_group_preserves_group() { + let schema = MockSchema::from_yaml_file("common/integration_basic.yaml"); + let ctx = TestContext::new(schema).unwrap(); + let city_symbol = ctx.create_symbol("customers.city").unwrap(); + let amount_symbol = ctx.create_symbol("orders.amount").unwrap(); + + let city_filter = BaseFilter::try_new( + ctx.query_tools().clone(), + city_symbol.clone(), + crate::planner::filter::base_filter::FilterType::Dimension, + FilterOperator::Equal, + Some(vec![Some("New York".to_string())]), + ) + .unwrap(); + + let amount_filter = BaseFilter::try_new( + ctx.query_tools().clone(), + amount_symbol.clone(), + crate::planner::filter::base_filter::FilterType::Dimension, + FilterOperator::Gte, + Some(vec![Some("100".to_string())]), + ) + .unwrap(); + + let filter_tree = FilterItem::Group(Rc::new(FilterGroup::new( + FilterGroupOperator::And, + vec![ + FilterItem::Item(city_filter), + FilterItem::Item(amount_filter), + ], + ))); + + let city_member = city_symbol.resolve_reference_chain().full_name(); + let amount_member = amount_symbol.resolve_reference_chain().full_name(); + let targets = vec![&city_member, &amount_member]; + + let subtree = filter_tree + .find_subtree_for_members(&targets) + .expect("fully matching AND group should be returned"); + + let expected_sql = + "AND: [\n {customers.city} equals: ['New York'],\n {orders.amount} gte: ['100']\n]"; + assert_eq!(subtree.debug_sql(false), expected_sql); + + assert!( + subtree == filter_tree, + "fully matching group should be structurally identical to the original" + ); +}