From ed233ddaf239c7cb68bac97eaca2cf0f10301aed Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Tue, 3 Feb 2026 17:07:29 +0530 Subject: [PATCH 1/3] feature: add variable re-use --- crates/lance-graph/src/logical_plan.rs | 209 ++++++++++++++++++++++++- 1 file changed, 203 insertions(+), 6 deletions(-) diff --git a/crates/lance-graph/src/logical_plan.rs b/crates/lance-graph/src/logical_plan.rs index 9fae28c..3f67a92 100644 --- a/crates/lance-graph/src/logical_plan.rs +++ b/crates/lance-graph/src/logical_plan.rs @@ -377,12 +377,10 @@ impl LogicalPlanner { // For each segment, add an expand for segment in &path.segments { - // Determine / register target variable - let target_variable = segment - .end_node - .variable - .clone() - .unwrap_or_else(|| format!("_node_{}", self.variables.len())); + // Determine target variable and check for reuse + // TODO: create error types for these cases + let mut is_variable_reuse = false; + let mut original_variable_name = String::new(); let target_label = segment .end_node @@ -390,9 +388,63 @@ impl LogicalPlanner { .first() .cloned() .unwrap_or_else(|| "Node".to_string()); + + let target_variable = if let Some(ref var) = segment.end_node.variable { + if let Some(existing_label) = self.variables.get(var) { + // Check for label mismatches + if existing_label == "Node" { + return Err(GraphError::PlanError { + message: format!( + "Variable '{}' is not assigned a node label but re-used", + var + ), + location: snafu::Location::new(file!(), line!(), column!()), + }); + } + + if target_label != "Node" && existing_label != &target_label { + return Err(GraphError::PlanError { + message: format!( + "Variable '{}' has conflicting labels: '{}' and '{}'", + var, existing_label, target_label + ), + location: snafu::Location::new(file!(), line!(), column!()), + }); + } + + is_variable_reuse = true; + original_variable_name = var.clone(); + // Create a unique shadow variable to avoid naming collision + format!("_shadow_{}_{}", var, self.variables.len()) + } else { + var.clone() + } + } else { + format!("_node_{}", self.variables.len()) + }; + self.variables .insert(target_variable.clone(), target_label.clone()); + // Validate that a relationship variable is not already defined + if let Some(ref rel_var) = segment.relationship.variable { + if let Some(_) = self.variables.get(rel_var) { + return Err(GraphError::PlanError { + message: format!("Variable cannot be re-used on a rel: '{}'", rel_var), + location: snafu::Location::new(file!(), line!(), column!()), + }); + } + + let rel_type = segment + .relationship + .types + .first() + .cloned() + .unwrap_or_else(|| "Relationship".to_string()); + self.variables + .insert(rel_var.clone(), rel_type.clone()); + } + // Optimize fixed-length var-length expansions (*1 or *1..1) let next_plan = match segment.relationship.length.as_ref() { Some(length_range) @@ -435,6 +487,21 @@ impl LogicalPlanner { }; plan = next_plan; + + // If we reused a variable, add a filter to ensure identity equality + if is_variable_reuse { + let predicate = BooleanExpression::Comparison { + left: ValueExpression::Variable(original_variable_name), + operator: ComparisonOperator::Equal, + right: ValueExpression::Variable(target_variable.clone()), + }; + + plan = LogicalOperator::Filter { + input: Box::new(plan), + predicate, + }; + } + current_src = target_variable; } @@ -1171,4 +1238,134 @@ mod tests { _ => panic!("Expected Project at top level"), } } + + #[test] + fn test_var_reuse() { + let q = "MATCH (a:Person)-[:FOLLOWS]->(a) RETURN a"; + let ast = parse_cypher_query(q).unwrap(); + let mut planner = LogicalPlanner::new(); + let logical = planner.plan(&ast).unwrap(); + + match logical { + LogicalOperator::Project { input, .. } => { + match *input { + LogicalOperator::Filter { + predicate, + input: expand_input, + } => { + match predicate { + BooleanExpression::Comparison { + left, + operator, + right, + } => { + assert_eq!(operator, ComparisonOperator::Equal); + match (left, right) { + ( + ValueExpression::Variable(l), + ValueExpression::Variable(r), + ) => { + if l == "a" { + assert!(r.starts_with("_shadow_a_")); + } else if r == "a" { + assert!(l.starts_with("_shadow_a_")); + } else { + panic!("Expected comparison involving 'a' and a shadow variable"); + } + } + _ => panic!("Expected equality check between variables"), + } + } + _ => panic!("Expected comparison predicate"), + } + + match *expand_input { + LogicalOperator::Expand { + source_variable, + target_variable, + relationship_types, + input: scan_input, + .. + } => { + assert_eq!(source_variable, "a"); + assert!(target_variable.starts_with("_shadow_a_")); + assert_eq!(relationship_types, vec!["FOLLOWS".to_string()]); + + match *scan_input { + LogicalOperator::ScanByLabel { variable, label, .. } => { + assert_eq!(variable, "a"); + assert_eq!(label, "Person"); + } + _ => panic!("Expected Scan"), + } + } + _ => panic!("Expected Expand"), + } + } + _ => panic!("Expected Filter"), + } + } + _ => panic!("Expected Project"), + } + } + + #[test] + fn test_var_reuse_unlabelled_existing_var_and_new_var() { + let q = "MATCH (a)-[:KNOWS]->(a) RETURN a.name"; + let ast = parse_cypher_query(q).unwrap(); + let mut planner = LogicalPlanner::new(); + let result = planner.plan(&ast); + assert!(result.is_err()); + match result { + Err(GraphError::PlanError { message, .. }) => { + assert!(message.contains("Variable 'a' is not assigned a node label but re-used")); + } + _ => panic!("Expected PlanError"), + } + } + + #[test] + fn test_var_reuse_unlabelled_existing_var_labelled_new_var() { + let q = "MATCH (a)-[:KNOWS]->(a:Company) RETURN a.name"; + let ast = parse_cypher_query(q).unwrap(); + let mut planner = LogicalPlanner::new(); + let result = planner.plan(&ast); + assert!(result.is_err()); + match result { + Err(GraphError::PlanError { message, .. }) => { + assert!(message.contains("Variable 'a' is not assigned a node label but re-used")); + } + _ => panic!("Expected PlanError"), + } + } + + #[test] + fn test_var_reuse_label_mismatch() { + let q = "MATCH (a:Person)-[:KNOWS]->(a:Company) RETURN a.name"; + let ast = parse_cypher_query(q).unwrap(); + let mut planner = LogicalPlanner::new(); + let result = planner.plan(&ast); + assert!(result.is_err()); + match result { + Err(GraphError::PlanError { message, .. }) => { + assert!(message.contains("Variable 'a' has conflicting labels: 'Person' and 'Company'")); + } + _ => panic!("Expected PlanError"), + } + } + + #[test] + fn test_var_reuse_rel() { + let q = "MATCH (a:Person)-[a:KNOWS]->(b:Company) RETURN a.name"; + let ast = parse_cypher_query(q).unwrap(); + let mut planner = LogicalPlanner::new(); + let result = planner.plan(&ast); + assert!(result.is_err()); + match result { + Err(GraphError::PlanError { message, .. }) => { + assert!(message.contains("Variable cannot be re-used on a rel: 'a'")); + } + _ => panic!("Expected PlanError"), + } + } } From 643127824573163e44794d5ac1e51b569d171748 Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Fri, 6 Feb 2026 17:47:05 +0530 Subject: [PATCH 2/3] fix filter on re-used var --- crates/lance-graph/src/logical_plan.rs | 187 +++++++++++++++---------- crates/lance-graph/src/query.rs | 2 +- 2 files changed, 116 insertions(+), 73 deletions(-) diff --git a/crates/lance-graph/src/logical_plan.rs b/crates/lance-graph/src/logical_plan.rs index 3f67a92..3549917 100644 --- a/crates/lance-graph/src/logical_plan.rs +++ b/crates/lance-graph/src/logical_plan.rs @@ -9,6 +9,7 @@ //! Logical plans describe WHAT operations to perform, not HOW to perform them. use crate::ast::*; +use crate::config::GraphConfig; use crate::error::{GraphError, Result}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; @@ -152,12 +153,15 @@ pub struct SortItem { pub struct LogicalPlanner { /// Track variables in scope variables: HashMap, // variable -> label + /// Graph configuration for mapping + config: GraphConfig, } impl LogicalPlanner { - pub fn new() -> Self { + pub fn new(config: GraphConfig) -> Self { Self { variables: HashMap::new(), + config, } } @@ -380,9 +384,9 @@ impl LogicalPlanner { // Determine target variable and check for reuse // TODO: create error types for these cases let mut is_variable_reuse = false; - let mut original_variable_name = String::new(); + let mut original_target_variable = String::new(); - let target_label = segment + let mut target_label = segment .end_node .labels .first() @@ -403,7 +407,7 @@ impl LogicalPlanner { } if target_label != "Node" && existing_label != &target_label { - return Err(GraphError::PlanError { + return Err(GraphError::PlanError { message: format!( "Variable '{}' has conflicting labels: '{}' and '{}'", var, existing_label, target_label @@ -413,7 +417,8 @@ impl LogicalPlanner { } is_variable_reuse = true; - original_variable_name = var.clone(); + original_target_variable = var.clone(); + target_label = existing_label.clone(); // Create a unique shadow variable to avoid naming collision format!("_shadow_{}_{}", var, self.variables.len()) } else { @@ -441,8 +446,7 @@ impl LogicalPlanner { .first() .cloned() .unwrap_or_else(|| "Relationship".to_string()); - self.variables - .insert(rel_var.clone(), rel_type.clone()); + self.variables.insert(rel_var.clone(), rel_type.clone()); } // Optimize fixed-length var-length expansions (*1 or *1..1) @@ -491,9 +495,25 @@ impl LogicalPlanner { // If we reused a variable, add a filter to ensure identity equality if is_variable_reuse { let predicate = BooleanExpression::Comparison { - left: ValueExpression::Variable(original_variable_name), + left: ValueExpression::Property(PropertyRef { + variable: original_target_variable.clone(), + property: self + .config + .get_node_mapping(&target_label) + .unwrap() + .id_field + .clone(), + }), operator: ComparisonOperator::Equal, - right: ValueExpression::Variable(target_variable.clone()), + right: ValueExpression::Property(PropertyRef { + variable: target_variable.clone(), + property: self + .config + .get_node_mapping(&target_label) + .unwrap() + .id_field + .clone(), + }), }; plan = LogicalOperator::Filter { @@ -614,14 +634,14 @@ impl LogicalPlanner { impl Default for LogicalPlanner { fn default() -> Self { - Self::new() + Self::new(Default::default()) } } #[cfg(test)] mod tests { use super::*; - use crate::parser::parse_cypher_query; + use crate::{parser::parse_cypher_query, NodeMapping, RelationshipMapping}; #[test] fn test_relationship_query_logical_plan_structure() { @@ -631,7 +651,7 @@ mod tests { let ast = parse_cypher_query(query_text).unwrap(); // Plan to logical operators - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical_plan = planner.plan(&ast).unwrap(); // Verify the overall structure is a projection @@ -732,7 +752,7 @@ mod tests { let query_text = "MATCH (n:Person) RETURN n.name"; let ast = parse_cypher_query(query_text).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical_plan = planner.plan(&ast).unwrap(); // Should be: Project { input: ScanByLabel } @@ -758,7 +778,7 @@ mod tests { let query_text = "MATCH (n:Person {age: 25}) RETURN n.name"; let ast = parse_cypher_query(query_text).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical_plan = planner.plan(&ast).unwrap(); // Should be: Project { input: ScanByLabel with properties } @@ -793,7 +813,7 @@ mod tests { let query_text = "MATCH (a:Person)-[:KNOWS*1..2]->(b:Person) RETURN b.name"; let ast = parse_cypher_query(query_text).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical_plan = planner.plan(&ast).unwrap(); // Should be: Project { input: VariableLengthExpand { input: ScanByLabel } } @@ -836,7 +856,7 @@ mod tests { let query_text = r#"MATCH (n:Person) WHERE n.age > 25 RETURN n.name"#; let ast = parse_cypher_query(query_text).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical_plan = planner.plan(&ast).unwrap(); // Should be: Project { input: Filter { input: ScanByLabel } } @@ -883,7 +903,7 @@ mod tests { let query_text = "MATCH (a:Person), (b:Company) RETURN a.name, b.name"; let ast = parse_cypher_query(query_text).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical_plan = planner.plan(&ast).unwrap(); // Expect: Project { input: Join { left: Scan(a:Person), right: Scan(b:Company) } } @@ -929,7 +949,7 @@ mod tests { "MATCH (a:Person)-[:KNOWS]->(b:Person), (b)-[:LIKES]->(c:Thing) RETURN c.name"; let ast = parse_cypher_query(query_text).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical_plan = planner.plan(&ast).unwrap(); // Expect: Project { input: Expand (b->c) { input: Expand (a->b) { input: Scan(a) } } } @@ -976,7 +996,7 @@ mod tests { let query_text = "MATCH (a:Person)-[:KNOWS*1..1]->(b:Person) RETURN b.name"; let ast = parse_cypher_query(query_text).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical_plan = planner.plan(&ast).unwrap(); match &logical_plan { @@ -1000,7 +1020,7 @@ mod tests { // DISTINCT should wrap Project with Distinct let q1 = "MATCH (n:Person) RETURN DISTINCT n.name"; let ast1 = parse_cypher_query(q1).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical1 = planner.plan(&ast1).unwrap(); match logical1 { LogicalOperator::Distinct { input } => match *input { @@ -1013,7 +1033,7 @@ mod tests { // ORDER BY + LIMIT should be Limit(Sort(Project(..))) let q2 = "MATCH (n:Person) RETURN n.name ORDER BY n.name LIMIT 10"; let ast2 = parse_cypher_query(q2).unwrap(); - let mut planner2 = LogicalPlanner::new(); + let mut planner2 = LogicalPlanner::new(Default::default()); let logical2 = planner2.plan(&ast2).unwrap(); match logical2 { LogicalOperator::Limit { input, count } => { @@ -1035,7 +1055,7 @@ mod tests { // ORDER BY + SKIP + LIMIT should be Limit(Offset(Sort(Project(..)))) let q = "MATCH (n:Person) RETURN n.name ORDER BY n.name SKIP 5 LIMIT 10"; let ast = parse_cypher_query(q).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical = planner.plan(&ast).unwrap(); match logical { LogicalOperator::Limit { input, count } => { @@ -1066,7 +1086,7 @@ mod tests { // SKIP only should be Offset(Project(..)) let q = "MATCH (n:Person) RETURN n.name SKIP 3"; let ast = parse_cypher_query(q).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical = planner.plan(&ast).unwrap(); match logical { LogicalOperator::Offset { input, offset } => { @@ -1084,7 +1104,7 @@ mod tests { fn test_relationship_properties_pushed_into_expand() { let q = "MATCH (a)-[:KNOWS {since: 2020}]->(b) RETURN b"; let ast = parse_cypher_query(q).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical = planner.plan(&ast).unwrap(); match logical { LogicalOperator::Project { input, .. } => match *input { @@ -1102,7 +1122,7 @@ mod tests { fn test_multiple_match_clauses_cross_join() { let q = "MATCH (a:Person) MATCH (b:Company) RETURN a.name, b.name"; let ast = parse_cypher_query(q).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical = planner.plan(&ast).unwrap(); match logical { LogicalOperator::Project { input, .. } => match *input { @@ -1143,7 +1163,7 @@ mod tests { fn test_variable_only_node_default_label() { let q = "MATCH (x) RETURN x"; let ast = parse_cypher_query(q).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical = planner.plan(&ast).unwrap(); match logical { LogicalOperator::Project { input, .. } => match *input { @@ -1163,7 +1183,7 @@ mod tests { fn test_multi_label_node_uses_first_label() { let q = "MATCH (n:Person:Employee) RETURN n"; let ast = parse_cypher_query(q).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let logical = planner.plan(&ast).unwrap(); match logical { LogicalOperator::Project { input, .. } => match *input { @@ -1181,7 +1201,7 @@ mod tests { // * (unbounded) let q1 = "MATCH (a)-[:R*]->(b) RETURN b"; let ast1 = parse_cypher_query(q1).unwrap(); - let mut planner1 = LogicalPlanner::new(); + let mut planner1 = LogicalPlanner::new(Default::default()); let plan1 = planner1.plan(&ast1).unwrap(); match plan1 { LogicalOperator::Project { input, .. } => match *input { @@ -1201,7 +1221,7 @@ mod tests { // *2.. (min only) let q2 = "MATCH (a)-[:R*2..]->(b) RETURN b"; let ast2 = parse_cypher_query(q2).unwrap(); - let mut planner2 = LogicalPlanner::new(); + let mut planner2 = LogicalPlanner::new(Default::default()); let plan2 = planner2.plan(&ast2).unwrap(); match plan2 { LogicalOperator::Project { input, .. } => match *input { @@ -1221,7 +1241,7 @@ mod tests { // *..3 (max only) let q3 = "MATCH (a)-[:R*..3]->(b) RETURN b"; let ast3 = parse_cypher_query(q3).unwrap(); - let mut planner3 = LogicalPlanner::new(); + let mut planner3 = LogicalPlanner::new(Default::default()); let plan3 = planner3.plan(&ast3).unwrap(); match plan3 { LogicalOperator::Project { input, .. } => match *input { @@ -1243,7 +1263,31 @@ mod tests { fn test_var_reuse() { let q = "MATCH (a:Person)-[:FOLLOWS]->(a) RETURN a"; let ast = parse_cypher_query(q).unwrap(); - let mut planner = LogicalPlanner::new(); + let config = GraphConfig { + default_node_id_field: "id".to_string(), + default_relationship_type_field: "type".to_string(), + node_mappings: HashMap::from([( + "person".to_string(), + NodeMapping { + label: "Person".to_string(), + id_field: "id".to_string(), + property_fields: vec![], + filter_conditions: None, + }, + )]), + relationship_mappings: HashMap::from([( + "follows".to_string(), + RelationshipMapping { + relationship_type: "FOLLOWS".to_string(), + source_id_field: "src_id".to_string(), + target_id_field: "dst_id".to_string(), + type_field: None, + property_fields: vec![], + filter_conditions: None, + }, + )]), + }; + let mut planner = LogicalPlanner::new(config); let logical = planner.plan(&ast).unwrap(); match logical { @@ -1253,7 +1297,7 @@ mod tests { predicate, input: expand_input, } => { - match predicate { + match predicate { BooleanExpression::Comparison { left, operator, @@ -1261,25 +1305,20 @@ mod tests { } => { assert_eq!(operator, ComparisonOperator::Equal); match (left, right) { - ( - ValueExpression::Variable(l), - ValueExpression::Variable(r), - ) => { - if l == "a" { - assert!(r.starts_with("_shadow_a_")); - } else if r == "a" { - assert!(l.starts_with("_shadow_a_")); - } else { - panic!("Expected comparison involving 'a' and a shadow variable"); - } + (ValueExpression::Property(left_prop), ValueExpression::Property(right_prop)) => { + // The left property should be a.id and the right should be _shadow_a.id + assert_eq!(left_prop.variable, "a"); + assert_eq!(left_prop.property, "id"); + assert!(right_prop.variable.starts_with("_shadow_a_")); + assert_eq!(right_prop.property, "id"); } - _ => panic!("Expected equality check between variables"), + _ => panic!("Expected both sides of comparison to be property references"), } } _ => panic!("Expected comparison predicate"), } - match *expand_input { + match *expand_input { LogicalOperator::Expand { source_variable, target_variable, @@ -1292,20 +1331,22 @@ mod tests { assert_eq!(relationship_types, vec!["FOLLOWS".to_string()]); match *scan_input { - LogicalOperator::ScanByLabel { variable, label, .. } => { - assert_eq!(variable, "a"); - assert_eq!(label, "Person"); - } - _ => panic!("Expected Scan"), + LogicalOperator::ScanByLabel { + variable, label, .. + } => { + assert_eq!(variable, "a"); + assert_eq!(label, "Person"); + } + _ => panic!("Expected Scan"), } } _ => panic!("Expected Expand"), - } + } } _ => panic!("Expected Filter"), } } - _ => panic!("Expected Project"), + _ => panic!("Expected Project"), } } @@ -1313,14 +1354,14 @@ mod tests { fn test_var_reuse_unlabelled_existing_var_and_new_var() { let q = "MATCH (a)-[:KNOWS]->(a) RETURN a.name"; let ast = parse_cypher_query(q).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let result = planner.plan(&ast); assert!(result.is_err()); match result { - Err(GraphError::PlanError { message, .. }) => { - assert!(message.contains("Variable 'a' is not assigned a node label but re-used")); - } - _ => panic!("Expected PlanError"), + Err(GraphError::PlanError { message, .. }) => { + assert!(message.contains("Variable 'a' is not assigned a node label but re-used")); + } + _ => panic!("Expected PlanError"), } } @@ -1328,14 +1369,14 @@ mod tests { fn test_var_reuse_unlabelled_existing_var_labelled_new_var() { let q = "MATCH (a)-[:KNOWS]->(a:Company) RETURN a.name"; let ast = parse_cypher_query(q).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let result = planner.plan(&ast); assert!(result.is_err()); match result { - Err(GraphError::PlanError { message, .. }) => { - assert!(message.contains("Variable 'a' is not assigned a node label but re-used")); - } - _ => panic!("Expected PlanError"), + Err(GraphError::PlanError { message, .. }) => { + assert!(message.contains("Variable 'a' is not assigned a node label but re-used")); + } + _ => panic!("Expected PlanError"), } } @@ -1343,14 +1384,16 @@ mod tests { fn test_var_reuse_label_mismatch() { let q = "MATCH (a:Person)-[:KNOWS]->(a:Company) RETURN a.name"; let ast = parse_cypher_query(q).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let result = planner.plan(&ast); assert!(result.is_err()); match result { - Err(GraphError::PlanError { message, .. }) => { - assert!(message.contains("Variable 'a' has conflicting labels: 'Person' and 'Company'")); - } - _ => panic!("Expected PlanError"), + Err(GraphError::PlanError { message, .. }) => { + assert!( + message.contains("Variable 'a' has conflicting labels: 'Person' and 'Company'") + ); + } + _ => panic!("Expected PlanError"), } } @@ -1358,14 +1401,14 @@ mod tests { fn test_var_reuse_rel() { let q = "MATCH (a:Person)-[a:KNOWS]->(b:Company) RETURN a.name"; let ast = parse_cypher_query(q).unwrap(); - let mut planner = LogicalPlanner::new(); + let mut planner = LogicalPlanner::new(Default::default()); let result = planner.plan(&ast); assert!(result.is_err()); match result { - Err(GraphError::PlanError { message, .. }) => { - assert!(message.contains("Variable cannot be re-used on a rel: 'a'")); - } - _ => panic!("Expected PlanError"), + Err(GraphError::PlanError { message, .. }) => { + assert!(message.contains("Variable cannot be re-used on a rel: 'a'")); + } + _ => panic!("Expected PlanError"), } } } diff --git a/crates/lance-graph/src/query.rs b/crates/lance-graph/src/query.rs index 8f5f017..ae5a700 100644 --- a/crates/lance-graph/src/query.rs +++ b/crates/lance-graph/src/query.rs @@ -778,7 +778,7 @@ impl CypherQuery { } // Phase 2: Graph Logical Plan - let mut logical_planner = LogicalPlanner::new(); + let mut logical_planner = LogicalPlanner::new(config.clone()); let logical_plan = logical_planner.plan(&self.ast)?; // Phase 3: DataFusion Logical Plan From 2ae48fca8a023cff8e2bccf2c096b0e75b2447fb Mon Sep 17 00:00:00 2001 From: Ally Heev Date: Fri, 6 Feb 2026 19:45:20 +0530 Subject: [PATCH 3/3] add system test --- .../lance-graph/tests/test_var_reuse_match.rs | 212 ++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 crates/lance-graph/tests/test_var_reuse_match.rs diff --git a/crates/lance-graph/tests/test_var_reuse_match.rs b/crates/lance-graph/tests/test_var_reuse_match.rs new file mode 100644 index 0000000..c610b62 --- /dev/null +++ b/crates/lance-graph/tests/test_var_reuse_match.rs @@ -0,0 +1,212 @@ +use arrow_array::{Int64Array, RecordBatch, StringArray}; +use arrow_schema::{DataType, Field, Schema}; +use lance_graph::config::GraphConfig; +use lance_graph::{CypherQuery, ExecutionStrategy}; +use std::collections::HashMap; +use std::sync::Arc; + +// This test suite validates variable reuse scenarios in MATCH clauses +// +// Datasets used: +// +// Person Dataset: +// | id | name | +// |----|---------| +// | 1 | Alice | +// | 2 | Bob | +// | 3 | Charlie | +// +// Company Dataset: +// | id | name | +// |----|--------| +// | 10 | Acme | +// | 11 | Globex | +// +// WORKS_AT Relationship: +// | src | dst | +// |-----|-----| +// | 1 | 1 | Alice -> Alice (Self-employment/Loop) +// | 1 | 2 | Alice -> Bob +// | 2 | 10 | Bob -> Acme +// +// Scenarios Tested: +// 1. Valid Reuse: Matching a node that connects to itself (cyclic/self-loop) +// 2. Invalid Reuse (Unlabelled): Reusing a variable without defining its label in its first usage +// 3. Label Mismatch: Reusing a variable with a contradicting label (e.g., Person vs Company) +// 4. Type Mismatch: Reusing a node variable as a relationship variable + +/// Helper to create Person dataset +fn create_person_dataset() -> RecordBatch { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, false), + ])); + + RecordBatch::try_new( + schema, + vec![ + Arc::new(Int64Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), + ], + ) + .unwrap() +} + +/// Helper to create Company dataset +fn create_company_dataset() -> RecordBatch { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, false), + ])); + + RecordBatch::try_new( + schema, + vec![ + Arc::new(Int64Array::from(vec![10, 11])), + Arc::new(StringArray::from(vec!["Acme", "Globex"])), + ], + ) + .unwrap() +} + +/// Helper to create WORKS_AT relationship +fn create_works_at_dataset() -> RecordBatch { + let schema = Arc::new(Schema::new(vec![ + Field::new("src_id", DataType::Int64, false), + Field::new("dst_id", DataType::Int64, false), + ])); + + RecordBatch::try_new( + schema, + vec![ + Arc::new(Int64Array::from(vec![1, 1, 2])), + Arc::new(Int64Array::from(vec![1, 2, 10])), // Alice works at Alice, Alice works at Bob, Bob works at Acme + ], + ) + .unwrap() +} + +fn create_graph_config() -> GraphConfig { + GraphConfig::builder() + .with_node_label("Person", "id") + .with_node_label("Company", "id") + .with_relationship("WORKS_AT", "src_id", "dst_id") + .build() + .unwrap() +} + +#[tokio::test] +async fn test_var_reuse() { + let config = create_graph_config(); + let person_batch = create_person_dataset(); + let works_at_batch = create_works_at_dataset(); + + // MATCH (a:Person)-[:WORKS_AT]->(a) RETURN a.name + // Should find Alice (works at herself) + let query = CypherQuery::new( + "MATCH (a:Person)-[:WORKS_AT]->(a) RETURN a.name", + ) + .unwrap() + .with_config(config); + + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + datasets.insert("WORKS_AT".to_string(), works_at_batch); + + let out = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + assert_eq!(out.num_rows(), 1); + let names = out.column(0).as_any().downcast_ref::().unwrap(); + assert_eq!(names.value(0), "Alice"); +} + +#[tokio::test] +async fn test_var_reuse_unlabelled_existing_var_and_new_var() { + let config = create_graph_config(); + + let query = CypherQuery::new( + "MATCH (a)-[:WORKS_AT]->(a) RETURN a.name", + ) + .unwrap() + .with_config(config); + + let person_batch = create_person_dataset(); + let works_at_batch = create_works_at_dataset(); + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + datasets.insert("WORKS_AT".to_string(), works_at_batch); + + let result = query + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await; + + assert!(result.is_err()); + let err_msg = format!("{}", result.err().unwrap()); + assert!(err_msg.contains("Variable 'a' is not assigned a node label but re-used")); +} + +#[tokio::test] +async fn test_var_reuse_unlabelled_existing_var_labelled_new_var() { + let config = create_graph_config(); + let query = CypherQuery::new( + "MATCH (a)-[:WORKS_AT]->(a:Company) RETURN a.name", + ).unwrap().with_config(config); + + let person_batch = create_person_dataset(); + let works_at_batch = create_works_at_dataset(); + let company_batch = create_company_dataset(); + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + datasets.insert("WORKS_AT".to_string(), works_at_batch); + datasets.insert("Company".to_string(), company_batch); + + let result = query.execute(datasets, Some(ExecutionStrategy::DataFusion)).await; + assert!(result.is_err()); + let err_msg = format!("{}", result.err().unwrap()); + assert!(err_msg.contains("Variable 'a' is not assigned a node label but re-used")); +} + +#[tokio::test] +async fn test_var_reuse_label_mismatch() { + let config = create_graph_config(); + let query = CypherQuery::new( + "MATCH (a:Person)-[:WORKS_AT]->(a:Company) RETURN a.name", + ).unwrap().with_config(config); + + let person_batch = create_person_dataset(); + let works_at_batch = create_works_at_dataset(); + let company_batch = create_company_dataset(); + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + datasets.insert("WORKS_AT".to_string(), works_at_batch); + datasets.insert("Company".to_string(), company_batch); + + let result = query.execute(datasets, Some(ExecutionStrategy::DataFusion)).await; + assert!(result.is_err()); + let err_msg = format!("{}", result.err().unwrap()); + assert!(err_msg.contains("Variable 'a' has conflicting labels: 'Person' and 'Company'")); +} + +#[tokio::test] +async fn test_var_reuse_rel() { + let config = create_graph_config(); + let query = CypherQuery::new( + "MATCH (a:Person)-[a:WORKS_AT]->(b:Company) RETURN a.name", + ).unwrap().with_config(config); + + let person_batch = create_person_dataset(); + let works_at_batch = create_works_at_dataset(); + let company_batch = create_company_dataset(); + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + datasets.insert("WORKS_AT".to_string(), works_at_batch); + datasets.insert("Company".to_string(), company_batch); + + let result = query.execute(datasets, Some(ExecutionStrategy::DataFusion)).await; + assert!(result.is_err()); + let err_msg = format!("{}", result.err().unwrap()); + assert!(err_msg.contains("Variable cannot be re-used on a rel: 'a'") || err_msg.contains("Variable 'a' redefined with different type")); +}