From aace748bf15e97415e68dd0b2df089dedf8143e9 Mon Sep 17 00:00:00 2001 From: JoshuaTang <1240604020@qq.com> Date: Sat, 31 Jan 2026 09:58:50 -0800 Subject: [PATCH 1/5] feat: implement case-insensitive support --- crates/lance-graph/src/case_insensitive.rs | 377 ++++++++++++++++++ crates/lance-graph/src/config.rs | 184 ++++++++- .../datafusion_planner/builder/basic_ops.rs | 3 +- .../datafusion_planner/builder/expand_ops.rs | 9 +- .../builder/join_builder.rs | 20 +- .../src/datafusion_planner/config_helpers.rs | 15 +- .../src/datafusion_planner/expression.rs | 16 +- .../src/datafusion_planner/join_ops.rs | 30 +- .../src/datafusion_planner/scan_ops.rs | 65 +-- crates/lance-graph/src/lib.rs | 1 + crates/lance-graph/src/query.rs | 98 +++-- crates/lance-graph/src/semantic.rs | 39 +- .../src/simple_executor/aliases.rs | 13 +- .../src/simple_executor/path_executor.rs | 43 +- crates/lance-graph/src/source_catalog.rs | 18 +- .../tests/test_case_insensitivity.rs | 307 ++++++++++++++ .../tests/test_datafusion_with_context.rs | 4 +- 17 files changed, 1083 insertions(+), 159 deletions(-) create mode 100644 crates/lance-graph/src/case_insensitive.rs create mode 100644 crates/lance-graph/tests/test_case_insensitivity.rs diff --git a/crates/lance-graph/src/case_insensitive.rs b/crates/lance-graph/src/case_insensitive.rs new file mode 100644 index 0000000..048a3c7 --- /dev/null +++ b/crates/lance-graph/src/case_insensitive.rs @@ -0,0 +1,377 @@ +//! Case-insensitive string utilities +//! +//! We use case-insensitive identifiers throughout to provide a SQL-like, +//! forgiving user experience. +//! +//! All identifiers (labels, properties, relationships) are case-insensitive, +//! meaning `Person`, `PERSON`, and `person` all refer to the same entity. + +use std::collections::HashMap; +use std::hash::{Hash, Hasher}; + +/// A string wrapper that performs case-insensitive comparisons and hashing. +/// +/// Internally stores the lowercase version for efficient comparison. +/// This is the core building block for case-insensitive identifier handling. +/// +/// # Examples +/// +/// ``` +/// use lance_graph::case_insensitive::CaseInsensitiveStr; +/// +/// let a = CaseInsensitiveStr::new("Person"); +/// let b = CaseInsensitiveStr::new("person"); +/// let c = CaseInsensitiveStr::new("PERSON"); +/// +/// assert_eq!(a, b); +/// assert_eq!(b, c); +/// ``` +#[derive(Debug, Clone)] +pub struct CaseInsensitiveStr { + normalized: String, // Always lowercase +} + +impl CaseInsensitiveStr { + /// Create a new case-insensitive string from any string-like type. + /// + /// The input is immediately converted to lowercase for storage. + pub fn new(s: impl Into) -> Self { + Self { + normalized: s.into().to_lowercase(), + } + } + + /// Get the normalized (lowercase) string representation. + pub fn as_str(&self) -> &str { + &self.normalized + } +} + +impl PartialEq for CaseInsensitiveStr { + fn eq(&self, other: &Self) -> bool { + self.normalized == other.normalized + } +} + +impl Eq for CaseInsensitiveStr {} + +impl Hash for CaseInsensitiveStr { + fn hash(&self, state: &mut H) { + self.normalized.hash(state); + } +} + +impl From<&str> for CaseInsensitiveStr { + fn from(s: &str) -> Self { + Self::new(s) + } +} + +impl From for CaseInsensitiveStr { + fn from(s: String) -> Self { + Self::new(s) + } +} + +impl AsRef for CaseInsensitiveStr { + fn as_ref(&self) -> &str { + &self.normalized + } +} + +/// Case-insensitive HashMap type alias for convenience +/// +/// Use this type when you need a HashMap that performs case-insensitive +/// key lookups throughout the system. +pub type CaseInsensitiveMap = HashMap; + +/// Create a qualified column name for internal DataFusion operations. +/// +/// Returns format: `alias__property` (e.g., "p__name"). +/// Both alias and property are normalized to lowercase for case-insensitive behavior. +/// +/// This is the central utility for creating qualified column names throughout +/// the codebase. All join keys, scan projections, and expression translations +/// should use this function to ensure consistent case-insensitive behavior. +/// +/// # Examples +/// +/// ``` +/// use lance_graph::case_insensitive::qualify_column; +/// +/// assert_eq!(qualify_column("Person", "Name"), "person__name"); +/// assert_eq!(qualify_column("p", "fullName"), "p__fullname"); +/// ``` +#[inline] +pub fn qualify_column(alias: &str, property: &str) -> String { + format!("{}__{}", alias.to_lowercase(), property.to_lowercase()) +} + +/// Helper trait for case-insensitive lookups on standard HashMap +/// +/// This trait provides extension methods for performing case-insensitive +/// lookups on existing String-keyed HashMaps without requiring migration +/// to CaseInsensitiveMap. +/// +/// # Performance +/// +/// - **Best case**: O(1) when exact case matches (uses HashMap's fast path) +/// - **Worst case**: O(n) when case differs (iterates all keys to find match) +/// +/// **For hot paths** (executed per-row or frequently), store normalized (lowercase) +/// keys in the HashMap to guarantee O(1) lookups. This trait is most appropriate for: +/// - Small HashMaps (< 100 entries) +/// - Cold paths (planning phase, executed once per query) +/// - Cases where preserving original case in keys is important +/// +/// See `SemanticAnalyzer::variables` for an example of the optimized pattern where +/// keys are normalized to lowercase on insertion, ensuring all lookups hit the O(1) fast path. +/// +/// # Examples +/// +/// ``` +/// use std::collections::HashMap; +/// use lance_graph::case_insensitive::CaseInsensitiveLookup; +/// +/// let mut map = HashMap::new(); +/// map.insert("Person".to_string(), 1); +/// +/// assert_eq!(map.get_ci("person"), Some(&1)); +/// assert_eq!(map.get_ci("PERSON"), Some(&1)); +/// assert_eq!(map.get_ci("Person"), Some(&1)); +/// ``` +pub trait CaseInsensitiveLookup { + /// Get a value with case-insensitive key lookup. + /// + /// Returns `Some(&V)` if a key matches (case-insensitively), `None` otherwise. + fn get_ci(&self, key: &str) -> Option<&V>; + + /// Check if a key exists with case-insensitive lookup. + fn contains_key_ci(&self, key: &str) -> bool; + + /// Get a mutable reference with case-insensitive key lookup. + fn get_mut_ci(&mut self, key: &str) -> Option<&mut V>; +} + +impl CaseInsensitiveLookup for HashMap { + fn get_ci(&self, key: &str) -> Option<&V> { + // Try exact match first (fast path for common case) + if let Some(v) = self.get(key) { + return Some(v); + } + // Fall back to case-insensitive search + let key_lower = key.to_lowercase(); + self.iter() + .find(|(k, _)| k.to_lowercase() == key_lower) + .map(|(_, v)| v) + } + + fn contains_key_ci(&self, key: &str) -> bool { + self.get_ci(key).is_some() + } + + fn get_mut_ci(&mut self, key: &str) -> Option<&mut V> { + // Find the actual key first + let key_lower = key.to_lowercase(); + let actual_key = self.keys().find(|k| k.to_lowercase() == key_lower).cloned(); + + // Then get mutable reference using the actual key + actual_key.and_then(|k| self.get_mut(&k)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_case_insensitive_str_equality() { + let a = CaseInsensitiveStr::new("Person"); + let b = CaseInsensitiveStr::new("person"); + let c = CaseInsensitiveStr::new("PERSON"); + let d = CaseInsensitiveStr::new("PeRsOn"); + + assert_eq!(a, b); + assert_eq!(b, c); + assert_eq!(a, c); + assert_eq!(c, d); + } + + #[test] + fn test_case_insensitive_str_inequality() { + let a = CaseInsensitiveStr::new("Person"); + let b = CaseInsensitiveStr::new("Company"); + + assert_ne!(a, b); + } + + #[test] + fn test_case_insensitive_str_hash() { + use std::collections::HashSet; + + let mut set = HashSet::new(); + set.insert(CaseInsensitiveStr::new("Person")); + + // All variations should be found + assert!(set.contains(&CaseInsensitiveStr::new("person"))); + assert!(set.contains(&CaseInsensitiveStr::new("PERSON"))); + assert!(set.contains(&CaseInsensitiveStr::new("Person"))); + + // Different value should not be found + assert!(!set.contains(&CaseInsensitiveStr::new("Company"))); + } + + #[test] + fn test_case_insensitive_map() { + let mut map: CaseInsensitiveMap = HashMap::new(); + map.insert(CaseInsensitiveStr::new("Person"), 1); + map.insert(CaseInsensitiveStr::new("Company"), 2); + + // Test various cases + assert_eq!(map.get(&CaseInsensitiveStr::new("person")), Some(&1)); + assert_eq!(map.get(&CaseInsensitiveStr::new("PERSON")), Some(&1)); + assert_eq!(map.get(&CaseInsensitiveStr::new("Person")), Some(&1)); + assert_eq!(map.get(&CaseInsensitiveStr::new("PeRsOn")), Some(&1)); + + assert_eq!(map.get(&CaseInsensitiveStr::new("company")), Some(&2)); + assert_eq!(map.get(&CaseInsensitiveStr::new("COMPANY")), Some(&2)); + + assert_eq!(map.get(&CaseInsensitiveStr::new("Unknown")), None); + } + + #[test] + fn test_case_insensitive_lookup_trait() { + let mut map = HashMap::new(); + map.insert("Person".to_string(), 1); + map.insert("Company".to_string(), 2); + map.insert("fullName".to_string(), 3); + + // Test get_ci + assert_eq!(map.get_ci("person"), Some(&1)); + assert_eq!(map.get_ci("PERSON"), Some(&1)); + assert_eq!(map.get_ci("Person"), Some(&1)); + assert_eq!(map.get_ci("PeRsOn"), Some(&1)); + + assert_eq!(map.get_ci("company"), Some(&2)); + assert_eq!(map.get_ci("COMPANY"), Some(&2)); + + assert_eq!(map.get_ci("fullname"), Some(&3)); + assert_eq!(map.get_ci("FULLNAME"), Some(&3)); + assert_eq!(map.get_ci("FullName"), Some(&3)); + + assert_eq!(map.get_ci("Unknown"), None); + + // Test contains_key_ci + assert!(map.contains_key_ci("person")); + assert!(map.contains_key_ci("COMPANY")); + assert!(map.contains_key_ci("FullName")); + assert!(!map.contains_key_ci("Unknown")); + } + + #[test] + fn test_case_insensitive_lookup_exact_match_fast_path() { + let mut map = HashMap::new(); + map.insert("Person".to_string(), 1); + + // Exact match should use fast path + assert_eq!(map.get_ci("Person"), Some(&1)); + + // Case variations should still work + assert_eq!(map.get_ci("person"), Some(&1)); + assert_eq!(map.get_ci("PERSON"), Some(&1)); + } + + #[test] + fn test_case_insensitive_str_as_str() { + let s = CaseInsensitiveStr::new("Person"); + assert_eq!(s.as_str(), "person"); // Stored as lowercase + } + + #[test] + fn test_case_insensitive_str_from_string() { + let s = String::from("Person"); + let ci_str: CaseInsensitiveStr = s.into(); + assert_eq!(ci_str.as_str(), "person"); + } + + #[test] + fn test_case_insensitive_str_from_str() { + let ci_str: CaseInsensitiveStr = "Person".into(); + assert_eq!(ci_str.as_str(), "person"); + } + + #[test] + fn test_case_insensitive_map_insertion_deduplication() { + let mut map: CaseInsensitiveMap = HashMap::new(); + + // Insert with different cases - should overwrite + map.insert(CaseInsensitiveStr::new("Person"), 1); + map.insert(CaseInsensitiveStr::new("person"), 2); + map.insert(CaseInsensitiveStr::new("PERSON"), 3); + + // Should have only one entry with the latest value + assert_eq!(map.len(), 1); + assert_eq!(map.get(&CaseInsensitiveStr::new("person")), Some(&3)); + } + + #[test] + fn test_get_mut_ci() { + let mut map = HashMap::new(); + map.insert("Person".to_string(), 1); + map.insert("Company".to_string(), 2); + + // Test mutable access with different cases + if let Some(v) = map.get_mut_ci("person") { + *v = 10; + } + assert_eq!(map.get_ci("Person"), Some(&10)); + + if let Some(v) = map.get_mut_ci("COMPANY") { + *v = 20; + } + assert_eq!(map.get_ci("company"), Some(&20)); + + // Non-existent key + assert!(map.get_mut_ci("Unknown").is_none()); + } + + #[test] + fn test_property_name_normalization() { + // Test realistic property names from Issue #105 + let mut map = HashMap::new(); + map.insert("fullName".to_string(), 1); + map.insert("isActive".to_string(), 2); + map.insert("numFollowers".to_string(), 3); + + // All variations should work + assert_eq!(map.get_ci("fullname"), Some(&1)); + assert_eq!(map.get_ci("FULLNAME"), Some(&1)); + assert_eq!(map.get_ci("FullName"), Some(&1)); + + assert_eq!(map.get_ci("isactive"), Some(&2)); + assert_eq!(map.get_ci("ISACTIVE"), Some(&2)); + assert_eq!(map.get_ci("IsActive"), Some(&2)); + + assert_eq!(map.get_ci("numfollowers"), Some(&3)); + assert_eq!(map.get_ci("NUMFOLLOWERS"), Some(&3)); + assert_eq!(map.get_ci("NumFollowers"), Some(&3)); + } + + #[test] + fn test_qualify_column() { + use super::qualify_column; + + // Basic usage + assert_eq!(qualify_column("p", "name"), "p__name"); + assert_eq!(qualify_column("person", "age"), "person__age"); + + // Case normalization + assert_eq!(qualify_column("P", "Name"), "p__name"); + assert_eq!(qualify_column("PERSON", "AGE"), "person__age"); + assert_eq!(qualify_column("Person", "fullName"), "person__fullname"); + + // Mixed case + assert_eq!(qualify_column("MyVar", "IsActive"), "myvar__isactive"); + assert_eq!(qualify_column("a", "NumFollowers"), "a__numfollowers"); + } +} diff --git a/crates/lance-graph/src/config.rs b/crates/lance-graph/src/config.rs index b565475..3cde41b 100644 --- a/crates/lance-graph/src/config.rs +++ b/crates/lance-graph/src/config.rs @@ -8,14 +8,50 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; /// Configuration for mapping Lance datasets to property graphs +/// +/// # Important: Case-Insensitive Keys +/// +/// **WARNING**: `node_mappings` and `relationship_mappings` store keys as **lowercase** for +/// case-insensitive behavior. If you directly insert into these maps, you **must** normalize +/// keys to lowercase to maintain invariants. +/// +/// **Recommended**: Use `GraphConfigBuilder` instead of direct field access. The builder +/// automatically normalizes keys. +/// +/// # Future API Changes +/// +/// These fields may become private in a future major version to enforce invariants. +/// Code should migrate to using accessor methods (`get_node_mapping()`, `get_relationship_mapping()`) +/// rather than direct field access. +/// +/// # TODO: API Safety +/// +/// TODO: Make `node_mappings` and `relationship_mappings` private to prevent external code +/// from bypassing key normalization. This would require: +/// 1. Adding iterator methods for external access (e.g., `iter_node_mappings()`) +/// 2. Making this a breaking API change in next major version +/// 3. Ensuring all internal code uses accessor methods #[derive(Debug, Clone, Serialize, Deserialize)] pub struct GraphConfig { /// Mapping of node labels to their field configurations + /// + /// **Keys are stored as lowercase** for case-insensitive lookups. + /// Use `get_node_mapping()` for lookups instead of direct access. + /// + /// TODO: Make this private to enforce key normalization invariants pub node_mappings: HashMap, - /// Mapping of relationship types to their field configurations + + /// Mapping of relationship types to their field configurations + /// + /// **Keys are stored as lowercase** for case-insensitive lookups. + /// Use `get_relationship_mapping()` for lookups instead of direct access. + /// + /// TODO: Make this private to enforce key normalization invariants pub relationship_mappings: HashMap, + /// Default node ID field if not specified in mappings pub default_node_id_field: String, + /// Default relationship type field if not specified in mappings pub default_relationship_type_field: String, } @@ -67,20 +103,43 @@ impl GraphConfig { GraphConfigBuilder::new() } - /// Get node mapping for a given label + /// Get node mapping for a given label (case-insensitive) + /// + /// Looks up the node mapping using case-insensitive comparison. + /// For example, "Person", "PERSON", and "person" all refer to the same label. pub fn get_node_mapping(&self, label: &str) -> Option<&NodeMapping> { - self.node_mappings.get(label) + self.node_mappings.get(&label.to_lowercase()) } - /// Get relationship mapping for a given type + /// Get relationship mapping for a given type (case-insensitive) + /// + /// Looks up the relationship mapping using case-insensitive comparison. + /// For example, "FOLLOWS", "follows", and "Follows" all refer to the same type. pub fn get_relationship_mapping(&self, rel_type: &str) -> Option<&RelationshipMapping> { - self.relationship_mappings.get(rel_type) + self.relationship_mappings.get(&rel_type.to_lowercase()) } /// Validate the configuration + /// + /// Checks for: + /// - Empty ID fields + /// - Non-normalized keys (must be lowercase) + /// - Case-insensitive duplicates pub fn validate(&self) -> Result<()> { - // Check for conflicting field names + // Validate node mappings for (label, mapping) in &self.node_mappings { + // Check that keys are normalized (lowercase) + if label != &label.to_lowercase() { + return Err(GraphError::ConfigError { + message: format!( + "Node mapping key '{}' is not normalized. \ + Keys must be lowercase. Use GraphConfigBuilder to ensure proper normalization.", + label + ), + location: snafu::Location::new(file!(), line!(), column!()), + }); + } + if mapping.id_field.is_empty() { return Err(GraphError::ConfigError { message: format!("Node mapping for '{}' has empty id_field", label), @@ -89,7 +148,20 @@ impl GraphConfig { } } + // Validate relationship mappings for (rel_type, mapping) in &self.relationship_mappings { + // Check that keys are normalized (lowercase) + if rel_type != &rel_type.to_lowercase() { + return Err(GraphError::ConfigError { + message: format!( + "Relationship mapping key '{}' is not normalized. \ + Keys must be lowercase. Use GraphConfigBuilder to ensure proper normalization.", + rel_type + ), + location: snafu::Location::new(file!(), line!(), column!()), + }); + } + if mapping.source_id_field.is_empty() || mapping.target_id_field.is_empty() { return Err(GraphError::ConfigError { message: format!( @@ -121,12 +193,16 @@ impl GraphConfigBuilder { } /// Add a node label mapping + /// + /// Note: Labels are case-insensitive. If you add "Person" and "person", the second + /// will overwrite the first. Keys are stored as lowercase to prevent duplicates. pub fn with_node_label>(mut self, label: S, id_field: S) -> Self { let label_str = label.into(); + let normalized_key = label_str.to_lowercase(); self.node_mappings.insert( - label_str.clone(), + normalized_key, NodeMapping { - label: label_str, + label: label_str, // Keep original case for display id_field: id_field.into(), property_fields: Vec::new(), filter_conditions: None, @@ -137,7 +213,8 @@ impl GraphConfigBuilder { /// Add a node mapping with additional configuration pub fn with_node_mapping(mut self, mapping: NodeMapping) -> Self { - self.node_mappings.insert(mapping.label.clone(), mapping); + let normalized_key = mapping.label.to_lowercase(); + self.node_mappings.insert(normalized_key, mapping); self } @@ -149,10 +226,11 @@ impl GraphConfigBuilder { target_field: S, ) -> Self { let type_str = rel_type.into(); + let normalized_key = type_str.to_lowercase(); self.relationship_mappings.insert( - type_str.clone(), + normalized_key, RelationshipMapping { - relationship_type: type_str, + relationship_type: type_str, // Keep original case for display source_id_field: source_field.into(), target_id_field: target_field.into(), type_field: None, @@ -165,8 +243,8 @@ impl GraphConfigBuilder { /// Add a relationship mapping with additional configuration pub fn with_relationship_mapping(mut self, mapping: RelationshipMapping) -> Self { - self.relationship_mappings - .insert(mapping.relationship_type.clone(), mapping); + let normalized_key = mapping.relationship_type.to_lowercase(); + self.relationship_mappings.insert(normalized_key, mapping); self } @@ -305,4 +383,84 @@ mod tests { assert_eq!(mapping.property_fields.len(), 2); assert!(mapping.filter_conditions.is_some()); } + + #[test] + fn test_case_insensitive_node_label_lookup() { + // Test that node label lookups are case-insensitive + let config = GraphConfig::builder() + .with_node_label("Person", "person_id") + .with_node_label("Company", "company_id") + .build() + .unwrap(); + + // All case variations should work + assert!(config.get_node_mapping("Person").is_some()); + assert!(config.get_node_mapping("person").is_some()); + assert!(config.get_node_mapping("PERSON").is_some()); + assert!(config.get_node_mapping("PeRsOn").is_some()); + + assert!(config.get_node_mapping("Company").is_some()); + assert!(config.get_node_mapping("company").is_some()); + assert!(config.get_node_mapping("COMPANY").is_some()); + + // Non-existent labels should return None + assert!(config.get_node_mapping("Unknown").is_none()); + assert!(config.get_node_mapping("unknown").is_none()); + + // Verify we get the same mapping regardless of case + let mapping1 = config.get_node_mapping("Person").unwrap(); + let mapping2 = config.get_node_mapping("person").unwrap(); + let mapping3 = config.get_node_mapping("PERSON").unwrap(); + + assert_eq!(mapping1.id_field, mapping2.id_field); + assert_eq!(mapping2.id_field, mapping3.id_field); + assert_eq!(mapping1.id_field, "person_id"); + } + + #[test] + fn test_case_insensitive_relationship_type_lookup() { + // Test that relationship type lookups are case-insensitive + let config = GraphConfig::builder() + .with_relationship("FOLLOWS", "src_id", "dst_id") + .with_relationship("WORKS_FOR", "person_id", "company_id") + .build() + .unwrap(); + + // All case variations should work + assert!(config.get_relationship_mapping("FOLLOWS").is_some()); + assert!(config.get_relationship_mapping("follows").is_some()); + assert!(config.get_relationship_mapping("Follows").is_some()); + assert!(config.get_relationship_mapping("FoLlOwS").is_some()); + + assert!(config.get_relationship_mapping("WORKS_FOR").is_some()); + assert!(config.get_relationship_mapping("works_for").is_some()); + assert!(config.get_relationship_mapping("Works_For").is_some()); + + // Non-existent types should return None + assert!(config.get_relationship_mapping("UNKNOWN").is_none()); + assert!(config.get_relationship_mapping("unknown").is_none()); + + // Verify we get the same mapping regardless of case + let mapping1 = config.get_relationship_mapping("FOLLOWS").unwrap(); + let mapping2 = config.get_relationship_mapping("follows").unwrap(); + let mapping3 = config.get_relationship_mapping("Follows").unwrap(); + + assert_eq!(mapping1.source_id_field, mapping2.source_id_field); + assert_eq!(mapping2.source_id_field, mapping3.source_id_field); + assert_eq!(mapping1.source_id_field, "src_id"); + } + + #[test] + fn test_duplicate_label_different_case_should_overwrite() { + let builder = GraphConfig::builder() + .with_node_label("Person", "id") + .with_node_label("person", "id2"); // Should overwrite first entry + + // Should have only 1 entry (second overwrites first due to lowercase key normalization) + assert_eq!(builder.node_mappings.len(), 1); + + // The second one should have won (id2) + let mapping = builder.node_mappings.get("person").unwrap(); + assert_eq!(mapping.id_field, "id2"); + } } diff --git a/crates/lance-graph/src/datafusion_planner/builder/basic_ops.rs b/crates/lance-graph/src/datafusion_planner/builder/basic_ops.rs index 59f5807..8f1c893 100644 --- a/crates/lance-graph/src/datafusion_planner/builder/basic_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/builder/basic_ops.rs @@ -55,8 +55,9 @@ impl DataFusionPlanner { .map(|p| { let expr = super::super::expression::to_df_value_expr(&p.expression); // Apply alias if provided, otherwise use Cypher dot notation + // Normalize alias to lowercase for case-insensitive behavior if let Some(alias) = &p.alias { - expr.alias(alias) + expr.alias(alias.to_lowercase()) } else { // Convert to Cypher dot notation (e.g., p__name -> p.name) let cypher_name = diff --git a/crates/lance-graph/src/datafusion_planner/builder/expand_ops.rs b/crates/lance-graph/src/datafusion_planner/builder/expand_ops.rs index 7e31bae..85231ce 100644 --- a/crates/lance-graph/src/datafusion_planner/builder/expand_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/builder/expand_ops.rs @@ -40,7 +40,8 @@ impl DataFusionPlanner { }; let rel_instance = ctx.next_relationship_instance(rel_type)?; - let Some(rel_map) = self.config.relationship_mappings.get(rel_type) else { + // Use case-insensitive lookups + let Some(rel_map) = self.config.get_relationship_mapping(rel_type) else { return Ok(left_plan); }; @@ -48,7 +49,7 @@ impl DataFusionPlanner { return Ok(left_plan); }; - let Some(node_map) = self.config.node_mappings.get(src_label) else { + let Some(node_map) = self.config.get_node_mapping(src_label) else { return Ok(left_plan); }; @@ -70,8 +71,8 @@ impl DataFusionPlanner { }; let builder = self.join_source_to_relationship(left_plan, rel_scan, &source_params)?; - // Join relationship with target node using the explicit target_label - let target_node_map = self.config.node_mappings.get(target_label).ok_or_else(|| { + // Join relationship with target node using the explicit target_label (case-insensitive) + let target_node_map = self.config.get_node_mapping(target_label).ok_or_else(|| { crate::error::GraphError::ConfigError { message: format!("No mapping found for target label: {}", target_label), location: snafu::Location::new(file!(), line!(), column!()), diff --git a/crates/lance-graph/src/datafusion_planner/builder/join_builder.rs b/crates/lance-graph/src/datafusion_planner/builder/join_builder.rs index df676d7..019d771 100644 --- a/crates/lance-graph/src/datafusion_planner/builder/join_builder.rs +++ b/crates/lance-graph/src/datafusion_planner/builder/join_builder.rs @@ -3,6 +3,7 @@ //! Join inference and building +use crate::case_insensitive::qualify_column; use crate::datafusion_planner::analysis::PlanningContext; use crate::datafusion_planner::DataFusionPlanner; use crate::error::Result; @@ -163,14 +164,13 @@ impl DataFusionPlanner { for var in &shared_vars { // Try to resolve as a node variable first if let Some(label) = ctx.analysis.var_to_label.get(var) { - // This is a node variable - get the node mapping for its label - if let Some(node_map) = self.config.node_mappings.get(label) { + // This is a node variable - get the node mapping for its label (case-insensitive) + if let Some(node_map) = self.config.get_node_mapping(label) { // Generate qualified column names for node ID // Example: var="b", id_field="id" -> "b__id" - let left_key = format!("{}__{}", var, node_map.id_field); - let right_key = format!("{}__{}", var, node_map.id_field); - left_keys.push(left_key); - right_keys.push(right_key); + let key = qualify_column(var, &node_map.id_field); + left_keys.push(key.clone()); + right_keys.push(key); } } else { // Not a node variable - check if it's a relationship variable @@ -192,10 +192,10 @@ impl DataFusionPlanner { // The columns are qualified as: {alias}__{original_field_name} // Example: var="r", source_id_field="src_person_id" // -> "r__src_person_id" - let left_src = format!("{}__{}", var, &rel_map.source_id_field); - let right_src = format!("{}__{}", var, &rel_map.source_id_field); - let left_dst = format!("{}__{}", var, &rel_map.target_id_field); - let right_dst = format!("{}__{}", var, &rel_map.target_id_field); + let left_src = qualify_column(var, &rel_map.source_id_field); + let right_src = qualify_column(var, &rel_map.source_id_field); + let left_dst = qualify_column(var, &rel_map.target_id_field); + let right_dst = qualify_column(var, &rel_map.target_id_field); left_keys.push(left_src); right_keys.push(right_src); diff --git a/crates/lance-graph/src/datafusion_planner/config_helpers.rs b/crates/lance-graph/src/datafusion_planner/config_helpers.rs index b2c4a86..c8172b7 100644 --- a/crates/lance-graph/src/datafusion_planner/config_helpers.rs +++ b/crates/lance-graph/src/datafusion_planner/config_helpers.rs @@ -14,11 +14,10 @@ use crate::source_catalog::GraphSourceCatalog; use std::sync::Arc; impl DataFusionPlanner { - /// Get relationship mapping from config + /// Get relationship mapping from config (case-insensitive) pub(crate) fn get_relationship_mapping(&self, rel_type: &str) -> Result<&RelationshipMapping> { self.config - .relationship_mappings - .get(rel_type) + .get_relationship_mapping(rel_type) .ok_or_else(|| crate::error::GraphError::ConfigError { message: format!("No mapping found for relationship type: {}", rel_type), location: snafu::Location::new(file!(), line!(), column!()), @@ -79,14 +78,12 @@ impl DataFusionPlanner { }); }; - let node_map = self - .config - .node_mappings - .get(&target_label) - .ok_or_else(|| crate::error::GraphError::ConfigError { + let node_map = self.config.get_node_mapping(&target_label).ok_or_else(|| { + crate::error::GraphError::ConfigError { message: format!("No mapping found for node label: {}", target_label), location: snafu::Location::new(file!(), line!(), column!()), - })?; + } + })?; Ok((target_label, node_map)) } diff --git a/crates/lance-graph/src/datafusion_planner/expression.rs b/crates/lance-graph/src/datafusion_planner/expression.rs index 2ba65e3..2f4dd7e 100644 --- a/crates/lance-graph/src/datafusion_planner/expression.rs +++ b/crates/lance-graph/src/datafusion_planner/expression.rs @@ -6,6 +6,7 @@ //! Converts AST expressions to DataFusion expressions use crate::ast::{BooleanExpression, PropertyValue, ValueExpression}; +use crate::case_insensitive::qualify_column; use crate::datafusion_planner::udf; use datafusion::functions::string::lower; use datafusion::functions::string::upper; @@ -110,11 +111,10 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { use crate::ast::{PropertyValue as PV, ValueExpression as VE}; match expr { VE::Property(prop) => { - // Create qualified column name: variable__property - let qualified_name = format!("{}__{}", prop.variable, prop.property); - col(&qualified_name) + // Create qualified column name: variable__property (lowercase for case-insensitivity) + col(qualify_column(&prop.variable, &prop.property)) } - VE::Variable(v) => col(v), + VE::Variable(v) => col(v.to_lowercase()), VE::Literal(PV::String(s)) => lit(s.clone()), VE::Literal(PV::Integer(i)) => lit(*i), VE::Literal(PV::Float(f)) => lit(*f), @@ -124,9 +124,8 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { } VE::Literal(PV::Parameter(_)) => lit(0), VE::Literal(PV::Property(prop)) => { - // Create qualified column name: variable__property - let qualified_name = format!("{}__{}", prop.variable, prop.property); - col(&qualified_name) + // Create qualified column name: variable__property (lowercase for case-insensitivity) + col(qualify_column(&prop.variable, &prop.property)) } VE::ScalarFunction { name, args } => { match name.to_lowercase().as_str() { @@ -178,7 +177,8 @@ pub(crate) fn to_df_value_expr(expr: &ValueExpression) -> Expr { // Those cases will produce a runtime "column not found" error. // TODO: Consider using COUNT(1) for non-node variables, or add // semantic validation to reject COUNT(variable) for non-node types. - col(format!("{}__id", v)) + // Use qualify_column for consistent case-insensitive normalization + col(qualify_column(v, "id")) } } else { // COUNT(p.property) - count non-null values of that property diff --git a/crates/lance-graph/src/datafusion_planner/join_ops.rs b/crates/lance-graph/src/datafusion_planner/join_ops.rs index ede6e29..a1e83ba 100644 --- a/crates/lance-graph/src/datafusion_planner/join_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/join_ops.rs @@ -11,6 +11,7 @@ use super::analysis::{PlanningContext, RelationshipInstance}; use super::DataFusionPlanner; use crate::ast::{PropertyValue, RelationshipDirection}; +use crate::case_insensitive::qualify_column; use crate::config::{NodeMapping, RelationshipMapping}; use crate::error::Result; use crate::source_catalog::GraphSourceCatalog; @@ -54,8 +55,8 @@ impl DataFusionPlanner { RelationshipDirection::Undirected => ¶ms.rel_map.source_id_field, }; - let qualified_left_key = format!("{}__{}", params.source_variable, params.node_id_field); - let qualified_right_key = format!("{}__{}", params.rel_qualifier, right_key); + let qualified_left_key = qualify_column(params.source_variable, params.node_id_field); + let qualified_right_key = qualify_column(params.rel_qualifier, right_key); LogicalPlanBuilder::from(left_plan) .join( @@ -95,10 +96,11 @@ impl DataFusionPlanner { // Create target node scan with qualified column aliases and property filters let target_schema = target_source.schema(); - let mut target_builder = LogicalPlanBuilder::scan(&target_label, target_source, None) - .map_err(|e| { - self.plan_error(&format!("Failed to scan target node '{}'", target_label), e) - })?; + let normalized_target_label = target_label.to_lowercase(); + let mut target_builder = + LogicalPlanBuilder::scan(&normalized_target_label, target_source, None).map_err( + |e| self.plan_error(&format!("Failed to scan target node '{}'", target_label), e), + )?; // Apply target property filters (e.g., (b {age: 30})) for (k, v) in params.target_properties.iter() { @@ -106,7 +108,7 @@ impl DataFusionPlanner { &crate::ast::ValueExpression::Literal(v.clone()), ); let filter_expr = Expr::BinaryExpr(BinaryExpr { - left: Box::new(col(k)), + left: Box::new(col(k.to_lowercase())), op: Operator::Eq, right: Box::new(lit_expr), }); @@ -119,7 +121,7 @@ impl DataFusionPlanner { .fields() .iter() .map(|field| { - let qualified_name = format!("{}__{}", params.target_variable, field.name()); + let qualified_name = qualify_column(params.target_variable, field.name()); col(field.name()).alias(&qualified_name) }) .collect(); @@ -137,9 +139,9 @@ impl DataFusionPlanner { RelationshipDirection::Undirected => ¶ms.rel_map.target_id_field, }; - let qualified_rel_target_key = format!("{}__{}", params.rel_qualifier, target_key); + let qualified_rel_target_key = qualify_column(params.rel_qualifier, target_key); let qualified_target_key = - format!("{}__{}", params.target_variable, ¶ms.node_map.id_field); + qualify_column(params.target_variable, ¶ms.node_map.id_field); builder = builder .join( @@ -192,8 +194,8 @@ impl DataFusionPlanner { direction: &RelationshipDirection, ) -> Result { let source_key = Self::get_source_join_key(direction, rel_map); - let qualified_source_key = format!("{}__{}", source_variable, &node_map.id_field); - let qualified_rel_source_key = format!("{}__{}", rel_instance.alias, source_key); + let qualified_source_key = qualify_column(source_variable, &node_map.id_field); + let qualified_rel_source_key = qualify_column(&rel_instance.alias, source_key); LogicalPlanBuilder::from(input_plan) .join( @@ -221,8 +223,8 @@ impl DataFusionPlanner { direction: &RelationshipDirection, ) -> Result { let target_key = Self::get_target_join_key(direction, rel_map); - let qualified_rel_target_key = format!("{}__{}", rel_instance.alias, target_key); - let qualified_target_key = format!("{}__{}", target_variable, &node_map.id_field); + let qualified_rel_target_key = qualify_column(&rel_instance.alias, target_key); + let qualified_target_key = qualify_column(target_variable, &node_map.id_field); builder .join( diff --git a/crates/lance-graph/src/datafusion_planner/scan_ops.rs b/crates/lance-graph/src/datafusion_planner/scan_ops.rs index 82f5234..912f07d 100644 --- a/crates/lance-graph/src/datafusion_planner/scan_ops.rs +++ b/crates/lance-graph/src/datafusion_planner/scan_ops.rs @@ -7,6 +7,7 @@ use super::analysis::{PlanningContext, RelationshipInstance}; use crate::ast::PropertyValue; +use crate::case_insensitive::qualify_column; use crate::error::Result; use crate::source_catalog::GraphSourceCatalog; use datafusion::logical_expr::{col, BinaryExpr, Expr, LogicalPlan, LogicalPlanBuilder, Operator}; @@ -31,9 +32,12 @@ impl DataFusionPlanner { if let Some(source) = cat.node_source(label) { // Get schema before moving source let schema = source.schema(); - let mut builder = LogicalPlanBuilder::scan(label, source, None).map_err(|e| { - self.plan_error(&format!("Failed to scan node source '{}'", label), e) - })?; + // Normalize label for table scan (case-insensitive) + let normalized_label = label.to_lowercase(); + let mut builder = LogicalPlanBuilder::scan(&normalized_label, source, None) + .map_err(|e| { + self.plan_error(&format!("Failed to scan node source '{}'", label), e) + })?; // Combine property filters into single predicate for efficiency if !properties.is_empty() { @@ -69,16 +73,12 @@ impl DataFusionPlanner { } // Create qualified column aliases: variable__property - // Optimization: Pre-allocate string capacity to reduce allocations + // Normalize both variable and field names for case-insensitive behavior let qualified_exprs: Vec = schema .fields() .iter() .map(|field| { - let mut qualified_name = - String::with_capacity(variable.len() + field.name().len() + 2); - qualified_name.push_str(variable); - qualified_name.push_str("__"); - qualified_name.push_str(field.name()); + let qualified_name = qualify_column(variable, field.name()); col(field.name()).alias(&qualified_name) }) .collect(); @@ -108,9 +108,11 @@ impl DataFusionPlanner { // This allows planners created with DataFusionPlanner::new() to work // without requiring a catalog, though they won't have actual data sources let empty_source = Arc::new(crate::source_catalog::SimpleTableSource::empty()); - let builder = LogicalPlanBuilder::scan(label, empty_source, None).map_err(|e| { - self.plan_error(&format!("Failed to create table scan for '{}'", label), e) - })?; + let normalized_label = label.to_lowercase(); + let builder = + LogicalPlanBuilder::scan(&normalized_label, empty_source, None).map_err(|e| { + self.plan_error(&format!("Failed to create table scan for '{}'", label), e) + })?; builder .build() @@ -125,7 +127,8 @@ impl DataFusionPlanner { relationship_properties: &HashMap, ) -> Result { let rel_schema = rel_source.schema(); - let mut rel_builder = LogicalPlanBuilder::scan(&rel_instance.rel_type, rel_source, None) + let normalized_rel_type = rel_instance.rel_type.to_lowercase(); + let mut rel_builder = LogicalPlanBuilder::scan(&normalized_rel_type, rel_source, None) .map_err(|e| { self.plan_error( &format!("Failed to scan relationship '{}'", rel_instance.rel_type), @@ -156,7 +159,7 @@ impl DataFusionPlanner { .fields() .iter() .map(|field| { - let qualified_name = format!("{}__{}", rel_instance.alias, field.name()); + let qualified_name = qualify_column(&rel_instance.alias, field.name()); col(field.name()).alias(&qualified_name) }) .collect(); @@ -188,10 +191,8 @@ impl DataFusionPlanner { // Get source node label and schema if let Some(source_label) = ctx.analysis.var_to_label.get(source_variable) { if let Some(source) = cat.node_source(source_label) { - let source_schema = source.schema(); - for field in source_schema.fields() { - let qualified_name = format!("{}__{}", source_variable, field.name()); - expected.insert(qualified_name); + for field in source.schema().fields() { + expected.insert(qualify_column(source_variable, field.name())); } } } @@ -199,10 +200,8 @@ impl DataFusionPlanner { // Get target node label and schema if let Some(target_label) = ctx.analysis.var_to_label.get(target_variable) { if let Some(target) = cat.node_source(target_label) { - let target_schema = target.schema(); - for field in target_schema.fields() { - let qualified_name = format!("{}__{}", target_variable, field.name()); - expected.insert(qualified_name); + for field in target.schema().fields() { + expected.insert(qualify_column(target_variable, field.name())); } } } @@ -227,17 +226,19 @@ impl DataFusionPlanner { })?; let rel_schema = rel_source.schema(); - let rel_builder = LogicalPlanBuilder::scan(&rel_instance.rel_type, rel_source, None) + let normalized_rel_type = rel_instance.rel_type.to_lowercase(); + let rel_builder = LogicalPlanBuilder::scan(&normalized_rel_type, rel_source, None) .map_err(|e| crate::error::GraphError::PlanError { message: format!("Failed to scan relationship: {}", e), location: snafu::Location::new(file!(), line!(), column!()), })?; + let rel_alias_lower = rel_instance.alias.to_lowercase(); let rel_qualified_exprs: Vec = rel_schema .fields() .iter() .map(|field| { - let qualified_name = format!("{}__{}", rel_instance.alias, field.name()); + let qualified_name = qualify_column(&rel_alias_lower, field.name()); col(field.name()).alias(&qualified_name) }) .collect(); @@ -271,11 +272,14 @@ impl DataFusionPlanner { })?; let target_schema = target_source.schema(); - let mut target_builder = LogicalPlanBuilder::scan(target_label, target_source, None) - .map_err(|e| crate::error::GraphError::PlanError { - message: format!("Failed to scan target node: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - })?; + let normalized_target_label = target_label.to_lowercase(); + let mut target_builder = + LogicalPlanBuilder::scan(&normalized_target_label, target_source, None).map_err( + |e| crate::error::GraphError::PlanError { + message: format!("Failed to scan target node: {}", e), + location: snafu::Location::new(file!(), line!(), column!()), + }, + )?; // Apply target property filters for (k, v) in target_properties.iter() { @@ -295,11 +299,12 @@ impl DataFusionPlanner { })?; } + let target_var_lower = target_variable.to_lowercase(); let target_qualified_exprs: Vec = target_schema .fields() .iter() .map(|field| { - let qualified_name = format!("{}__{}", target_variable, field.name()); + let qualified_name = qualify_column(&target_var_lower, field.name()); col(field.name()).alias(&qualified_name) }) .collect(); diff --git a/crates/lance-graph/src/lib.rs b/crates/lance-graph/src/lib.rs index c9a3524..cba6d6f 100644 --- a/crates/lance-graph/src/lib.rs +++ b/crates/lance-graph/src/lib.rs @@ -36,6 +36,7 @@ //! ``` pub mod ast; +pub mod case_insensitive; pub mod config; pub mod datafusion_planner; pub mod error; diff --git a/crates/lance-graph/src/query.rs b/crates/lance-graph/src/query.rs index a7e15ae..9eddb8d 100644 --- a/crates/lance-graph/src/query.rs +++ b/crates/lance-graph/src/query.rs @@ -13,8 +13,44 @@ use crate::parser::parse_cypher_query; use crate::simple_executor::{ to_df_boolean_expr_simple, to_df_order_by_expr_simple, to_df_value_expr_simple, PathExecutor, }; +use arrow_array::RecordBatch; +use arrow_schema::{Field, Schema, SchemaRef}; use lance_namespace::models::DescribeTableRequest; use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +/// Normalize an Arrow schema to have lowercase field names. +/// +/// This ensures that column names in the dataset match the normalized +/// qualified column names used internally (e.g., "fullName" becomes "fullname"). +fn normalize_schema(schema: SchemaRef) -> Result { + let fields: Vec<_> = schema + .fields() + .iter() + .map(|f| { + Arc::new(Field::new( + f.name().to_lowercase(), + f.data_type().clone(), + f.is_nullable(), + )) + }) + .collect(); + Ok(Arc::new(Schema::new(fields))) +} + +/// Normalize a RecordBatch to have lowercase column names. +/// +/// This creates a new RecordBatch with a normalized schema while +/// preserving all the data arrays. +fn normalize_record_batch(batch: &RecordBatch) -> Result { + let normalized_schema = normalize_schema(batch.schema())?; + RecordBatch::try_new(normalized_schema, batch.columns().to_vec()).map_err(|e| { + GraphError::PlanError { + message: format!("Failed to normalize record batch schema: {}", e), + location: snafu::Location::new(file!(), line!(), column!()), + } + }) +} /// Execution strategy for Cypher queries #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] @@ -262,15 +298,9 @@ impl CypherQuery { /// /// **WARNING**: This method is experimental and the generated SQL dialect may change. /// - /// **Case Sensitivity Limitation**: All table names in the generated SQL are lowercased - /// (e.g., `Person` becomes `person`, `Company` becomes `company`), due to the internal - /// handling of DataFusion's SQL unparser. Note that this only affects the SQL string - /// representation - actual query execution with `execute()` handles case-sensitive labels - /// correctly. - /// - /// If you need case-sensitive table names in the SQL output, consider: - /// - Using lowercase labels consistently in your Cypher queries and table names - /// - Post-processing the SQL string to replace table names with the correct case + /// **Note**: All table names and column names in the generated SQL are lowercased + /// (e.g., `Person` becomes `person`, `fullName` becomes `fullname`), consistent with + /// the system's case-insensitive identifier behavior. /// /// # Arguments /// * `datasets` - HashMap of table name to RecordBatch (nodes and relationships) @@ -548,17 +578,25 @@ impl CypherQuery { // Register all datasets as tables for (name, batch) in &datasets { + // Normalize the schema to lowercase field names + let normalized_batch = normalize_record_batch(batch)?; + let mem_table = Arc::new( - MemTable::try_new(batch.schema(), vec![vec![batch.clone()]]).map_err(|e| { - GraphError::PlanError { - message: format!("Failed to create MemTable for {}: {}", name, e), - location: snafu::Location::new(file!(), line!(), column!()), - } + MemTable::try_new( + normalized_batch.schema(), + vec![vec![normalized_batch.clone()]], + ) + .map_err(|e| GraphError::PlanError { + message: format!("Failed to create MemTable for {}: {}", name, e), + location: snafu::Location::new(file!(), line!(), column!()), })?, ); + // Normalize table name to lowercase + let normalized_name = name.to_lowercase(); + // Register in session context for execution - ctx.register_table(name, mem_table.clone()) + ctx.register_table(&normalized_name, mem_table.clone()) .map_err(|e| GraphError::PlanError { message: format!("Failed to register table {}: {}", name, e), location: snafu::Location::new(file!(), line!(), column!()), @@ -566,8 +604,8 @@ impl CypherQuery { let table_source = Arc::new(DefaultTableSource::new(mem_table)); - // Register as both node and relationship source - // The planner will use whichever is appropriate based on the query + // Register as both node and relationship source with original name + // The config lookup is already case-insensitive, so we can use original name catalog = catalog .with_node_source(name, table_source.clone()) .with_relationship_source(name, table_source); @@ -644,7 +682,9 @@ impl CypherQuery { let provider: Arc = Arc::new(LanceTableProvider::new(dataset.clone(), true, true)); - ctx.register_table(&table_name, provider.clone()) + // Register with lowercase table name for case-insensitive behavior + let normalized_table_name = table_name.to_lowercase(); + ctx.register_table(&normalized_table_name, provider.clone()) .map_err(|e| GraphError::PlanError { message: format!( "Failed to register table '{}' in SessionContext: {}", @@ -911,16 +951,22 @@ impl CypherQuery { } // Create DataFusion context and register all provided tables + // Normalize schemas and table names for case-insensitive behavior let ctx = SessionContext::new(); for (name, batch) in &datasets { - let table = - MemTable::try_new(batch.schema(), vec![vec![batch.clone()]]).map_err(|e| { - GraphError::PlanError { - message: format!("Failed to create DataFusion table: {}", e), - location: snafu::Location::new(file!(), line!(), column!()), - } - })?; - ctx.register_table(name, Arc::new(table)) + let normalized_batch = normalize_record_batch(batch)?; + let table = MemTable::try_new( + normalized_batch.schema(), + vec![vec![normalized_batch.clone()]], + ) + .map_err(|e| GraphError::PlanError { + message: format!("Failed to create DataFusion table: {}", e), + location: snafu::Location::new(file!(), line!(), column!()), + })?; + + // Register with lowercase table name + let normalized_name = name.to_lowercase(); + ctx.register_table(&normalized_name, Arc::new(table)) .map_err(|e| GraphError::PlanError { message: format!("Failed to register table '{}': {}", name, e), location: snafu::Location::new(file!(), line!(), column!()), diff --git a/crates/lance-graph/src/semantic.rs b/crates/lance-graph/src/semantic.rs index 70ac9dc..065e36a 100644 --- a/crates/lance-graph/src/semantic.rs +++ b/crates/lance-graph/src/semantic.rs @@ -9,6 +9,7 @@ //! Semantic analysis validates the query and enriches the AST with type information. use crate::ast::*; +use crate::case_insensitive::CaseInsensitiveLookup; use crate::config::GraphConfig; use crate::error::{GraphError, Result}; use std::collections::{HashMap, HashSet}; @@ -169,9 +170,10 @@ impl SemanticAnalyzer { fn analyze_unwind_clause(&mut self, unwind_clause: &UnwindClause) -> Result<()> { self.analyze_value_expression(&unwind_clause.expression)?; - // Register the aliased variable + // Register the aliased variable (normalize to lowercase for case-insensitive behavior) let var_name = &unwind_clause.alias; - if let Some(existing) = self.variables.get_mut(var_name) { + let var_name_lower = var_name.to_lowercase(); + if let Some(existing) = self.variables.get_mut(&var_name_lower) { // Shadowing or redefinition - in Cypher variables can be bound multiple times in some contexts // But here we enforce uniqueness of types mostly. // For now, treat UNWIND alias as a Property type variable. @@ -189,7 +191,7 @@ impl SemanticAnalyzer { properties: HashSet::new(), defined_in: self.current_scope.clone(), }; - self.variables.insert(var_name.clone(), var_info); + self.variables.insert(var_name_lower, var_info); } Ok(()) } @@ -224,7 +226,9 @@ impl SemanticAnalyzer { /// Register a node variable fn register_node_variable(&mut self, node: &NodePattern) -> Result<()> { if let Some(var_name) = &node.variable { - if let Some(existing) = self.variables.get_mut(var_name) { + // Normalize to lowercase for case-insensitive behavior + let var_name_lower = var_name.to_lowercase(); + if let Some(existing) = self.variables.get_mut(&var_name_lower) { if existing.variable_type != VariableType::Node { return Err(GraphError::PlanError { message: format!("Variable '{}' redefined with different type", var_name), @@ -247,7 +251,7 @@ impl SemanticAnalyzer { properties: node.properties.keys().cloned().collect(), defined_in: self.current_scope.clone(), }; - self.variables.insert(var_name.clone(), var_info); + self.variables.insert(var_name_lower, var_info); } } Ok(()) @@ -259,7 +263,9 @@ impl SemanticAnalyzer { var_name: &str, rel: &RelationshipPattern, ) -> Result<()> { - if let Some(existing) = self.variables.get_mut(var_name) { + // Normalize to lowercase for case-insensitive behavior + let var_name_lower = var_name.to_lowercase(); + if let Some(existing) = self.variables.get_mut(&var_name_lower) { if existing.variable_type != VariableType::Relationship { return Err(GraphError::PlanError { message: format!("Variable '{}' redefined with different type", var_name), @@ -282,7 +288,7 @@ impl SemanticAnalyzer { properties: rel.properties.keys().cloned().collect(), defined_in: self.current_scope.clone(), }; - self.variables.insert(var_name.to_string(), var_info); + self.variables.insert(var_name_lower, var_info); } Ok(()) } @@ -350,7 +356,8 @@ impl SemanticAnalyzer { // Literals are always valid } ValueExpression::Variable(var) => { - if !self.variables.contains_key(var) { + // Use case-insensitive lookup + if !self.variables.contains_key_ci(var) { return Err(GraphError::PlanError { message: format!("Undefined variable: '{}'", var), location: snafu::Location::new(file!(), line!(), column!()), @@ -548,7 +555,8 @@ impl SemanticAnalyzer { } fn register_projection_alias(&mut self, alias: &str) { - if self.variables.contains_key(alias) { + // Use case-insensitive lookup and store normalized key + if self.variables.contains_key_ci(alias) { return; } @@ -559,12 +567,13 @@ impl SemanticAnalyzer { properties: HashSet::new(), defined_in: self.current_scope.clone(), }; - self.variables.insert(alias.to_string(), var_info); + self.variables.insert(alias.to_lowercase(), var_info); } /// Validate property reference fn validate_property_reference(&self, prop_ref: &PropertyRef) -> Result<()> { - if !self.variables.contains_key(&prop_ref.variable) { + // Use case-insensitive lookup + if !self.variables.contains_key_ci(&prop_ref.variable) { return Err(GraphError::PlanError { message: format!("Undefined variable: '{}'", prop_ref.variable), location: snafu::Location::new(file!(), line!(), column!()), @@ -660,8 +669,10 @@ impl SemanticAnalyzer { if !label_property_sets.is_empty() { 'prop: for prop in &var_info.properties { // Property is valid if present in at least one label's property_fields + // Use case-insensitive comparison + let prop_lower = prop.to_lowercase(); for fields in &label_property_sets { - if fields.iter().any(|f| f == prop) { + if fields.iter().any(|f| f.to_lowercase() == prop_lower) { continue 'prop; } } @@ -685,8 +696,10 @@ impl SemanticAnalyzer { if !rel_property_sets.is_empty() { 'prop_rel: for prop in &var_info.properties { + // Use case-insensitive comparison for relationship properties + let prop_lower = prop.to_lowercase(); for fields in &rel_property_sets { - if fields.iter().any(|f| f == prop) { + if fields.iter().any(|f| f.to_lowercase() == prop_lower) { continue 'prop_rel; } } diff --git a/crates/lance-graph/src/simple_executor/aliases.rs b/crates/lance-graph/src/simple_executor/aliases.rs index 1eedae8..45b6b95 100644 --- a/crates/lance-graph/src/simple_executor/aliases.rs +++ b/crates/lance-graph/src/simple_executor/aliases.rs @@ -1,11 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors +use crate::case_insensitive::qualify_column; + /// Qualify a column name for internal DataFusion operations. /// Returns format: `alias__property` (e.g., "p__name"). +/// Both alias and property are normalized to lowercase for case-insensitive behavior. /// Note: This is for internal use only. Final output uses Cypher dot notation. +#[inline] pub(super) fn qualify_alias_property(alias: &str, property: &str) -> String { - format!("{}__{}", alias, property) + qualify_column(alias, property) } /// Convert to Cypher-style column name for query results. @@ -21,8 +25,15 @@ mod tests { #[test] fn test_qualify_alias_property() { + // Should normalize to lowercase assert_eq!(qualify_alias_property("p", "name"), "p__name"); assert_eq!(qualify_alias_property("person", "age"), "person__age"); + assert_eq!(qualify_alias_property("P", "Name"), "p__name"); + assert_eq!(qualify_alias_property("PERSON", "AGE"), "person__age"); + assert_eq!( + qualify_alias_property("Person", "fullName"), + "person__fullname" + ); } #[test] diff --git a/crates/lance-graph/src/simple_executor/path_executor.rs b/crates/lance-graph/src/simple_executor/path_executor.rs index 3353d29..eaea5e5 100644 --- a/crates/lance-graph/src/simple_executor/path_executor.rs +++ b/crates/lance-graph/src/simple_executor/path_executor.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors +use crate::case_insensitive::qualify_column; use crate::error::{GraphError, Result}; use datafusion::logical_expr::JoinType; @@ -110,7 +111,7 @@ impl<'a> PathExecutor<'a> { let mut node_maps: HashMap = HashMap::new(); let mut rel_maps: HashMap = HashMap::new(); node_maps.insert( - start_alias.clone(), + start_alias.to_lowercase(), cfg.get_node_mapping(start_label) .ok_or_else(|| GraphError::PlanError { message: format!("No node mapping for '{}'", start_label), @@ -119,7 +120,7 @@ impl<'a> PathExecutor<'a> { ); for seg in &segs { node_maps.insert( - seg.end_alias.clone(), + seg.end_alias.to_lowercase(), cfg.get_node_mapping(seg.end_label) .ok_or_else(|| GraphError::PlanError { message: format!("No node mapping for '{}'", seg.end_label), @@ -127,7 +128,7 @@ impl<'a> PathExecutor<'a> { })?, ); rel_maps.insert( - seg.rel_alias.clone(), + seg.rel_alias.to_lowercase(), cfg.get_relationship_mapping(seg.rel_type).ok_or_else(|| { GraphError::PlanError { message: format!("No relationship mapping for '{}'", seg.rel_type), @@ -165,9 +166,7 @@ impl<'a> PathExecutor<'a> { let proj: Vec = schema .fields() .iter() - .map(|f| { - datafusion::logical_expr::col(f.name()).alias(format!("{}__{}", alias, f.name())) - }) + .map(|f| datafusion::logical_expr::col(f.name()).alias(qualify_column(alias, f.name()))) .collect(); df.alias(alias)? .select(proj) @@ -206,17 +205,20 @@ impl<'a> PathExecutor<'a> { let mut current_node_alias = self.start_alias.as_str(); for s in &self.segs { let rel_df = self.open_aliased(s.rel_type, &s.rel_alias).await?; - let node_map = self.node_maps.get(current_node_alias).unwrap(); - let rel_map = self.rel_maps.get(&s.rel_alias).unwrap(); + let node_map = self + .node_maps + .get(¤t_node_alias.to_lowercase()) + .unwrap(); + let rel_map = self.rel_maps.get(&s.rel_alias.to_lowercase()).unwrap(); let (left_key, right_key) = match s.dir { crate::ast::RelationshipDirection::Outgoing | crate::ast::RelationshipDirection::Undirected => ( - format!("{}__{}", current_node_alias, node_map.id_field), - format!("{}__{}", s.rel_alias, rel_map.source_id_field), + qualify_column(current_node_alias, &node_map.id_field), + qualify_column(&s.rel_alias, &rel_map.source_id_field), ), crate::ast::RelationshipDirection::Incoming => ( - format!("{}__{}", current_node_alias, node_map.id_field), - format!("{}__{}", s.rel_alias, rel_map.target_id_field), + qualify_column(current_node_alias, &node_map.id_field), + qualify_column(&s.rel_alias, &rel_map.target_id_field), ), }; df = df @@ -233,23 +235,16 @@ impl<'a> PathExecutor<'a> { })?; let end_df = self.open_aliased(s.end_label, &s.end_alias).await?; + let end_node_map = self.node_maps.get(&s.end_alias.to_lowercase()).unwrap(); let (left_key2, right_key2) = match s.dir { crate::ast::RelationshipDirection::Outgoing | crate::ast::RelationshipDirection::Undirected => ( - format!("{}__{}", s.rel_alias, rel_map.target_id_field), - format!( - "{}__{}", - s.end_alias, - self.node_maps.get(&s.end_alias).unwrap().id_field - ), + qualify_column(&s.rel_alias, &rel_map.target_id_field), + qualify_column(&s.end_alias, &end_node_map.id_field), ), crate::ast::RelationshipDirection::Incoming => ( - format!("{}__{}", s.rel_alias, rel_map.source_id_field), - format!( - "{}__{}", - s.end_alias, - self.node_maps.get(&s.end_alias).unwrap().id_field - ), + qualify_column(&s.rel_alias, &rel_map.source_id_field), + qualify_column(&s.end_alias, &end_node_map.id_field), ), }; df = df diff --git a/crates/lance-graph/src/source_catalog.rs b/crates/lance-graph/src/source_catalog.rs index f85dea6..a122c87 100644 --- a/crates/lance-graph/src/source_catalog.rs +++ b/crates/lance-graph/src/source_catalog.rs @@ -35,7 +35,9 @@ impl InMemoryCatalog { label: impl Into, source: Arc, ) -> Self { - self.node_sources.insert(label.into(), source); + // Normalize key to lowercase for case-insensitive lookup + self.node_sources + .insert(label.into().to_lowercase(), source); self } @@ -44,7 +46,9 @@ impl InMemoryCatalog { rel_type: impl Into, source: Arc, ) -> Self { - self.rel_sources.insert(rel_type.into(), source); + // Normalize key to lowercase for case-insensitive lookup + self.rel_sources + .insert(rel_type.into().to_lowercase(), source); self } } @@ -56,12 +60,18 @@ impl Default for InMemoryCatalog { } impl GraphSourceCatalog for InMemoryCatalog { + /// Get node source with case-insensitive label lookup + /// + /// Note: Keys are stored as lowercase, so this is an O(1) operation. fn node_source(&self, label: &str) -> Option> { - self.node_sources.get(label).cloned() + self.node_sources.get(&label.to_lowercase()).cloned() } + /// Get relationship source with case-insensitive type lookup + /// + /// Note: Keys are stored as lowercase, so this is an O(1) operation. fn relationship_source(&self, rel_type: &str) -> Option> { - self.rel_sources.get(rel_type).cloned() + self.rel_sources.get(&rel_type.to_lowercase()).cloned() } } diff --git a/crates/lance-graph/tests/test_case_insensitivity.rs b/crates/lance-graph/tests/test_case_insensitivity.rs new file mode 100644 index 0000000..14d827d --- /dev/null +++ b/crates/lance-graph/tests/test_case_insensitivity.rs @@ -0,0 +1,307 @@ +//! Case-insensitivity tests +//! +//! Verifies that all identifier types (labels, properties, relationships, variables) +//! are case-insensitive throughout the system. + +use arrow_array::{BooleanArray, Int64Array, RecordBatch, StringArray}; +use arrow_schema::{DataType, Field, Schema}; +use lance_graph::{CypherQuery, ExecutionStrategy, GraphConfig}; +use std::collections::HashMap; +use std::sync::Arc; + +/// Helper to create a test person dataset with mixed-case properties +fn create_test_person_dataset() -> RecordBatch { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, false), + Field::new("fullName", DataType::Utf8, false), + Field::new("isActive", DataType::Boolean, false), + Field::new("numFollowers", DataType::Int64, false), + ])); + + RecordBatch::try_new( + schema, + vec![ + Arc::new(Int64Array::from(vec![1, 2, 3])), + Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie"])), + Arc::new(StringArray::from(vec![ + "Alice Smith", + "Bob Jones", + "Charlie Brown", + ])), + Arc::new(BooleanArray::from(vec![true, false, true])), + Arc::new(Int64Array::from(vec![100, 200, 150])), + ], + ) + .unwrap() +} + +#[tokio::test] +async fn test_identifiers_case_insensitive() { + // Test labels, properties, and table names with various case combinations + let config = GraphConfig::builder() + .with_node_label("Person", "id") + .build() + .unwrap(); + let person_batch = create_test_person_dataset(); + + let test_cases = vec![ + // Label case variations + ("MATCH (p:Person) RETURN p.name", 3), + ("MATCH (p:PERSON) RETURN p.name", 3), + ("MATCH (p:person) RETURN p.name", 3), + // Property case variations + ("MATCH (p:Person) RETURN p.fullName", 3), + ("MATCH (p:Person) RETURN p.FULLNAME", 3), + ("MATCH (p:Person) RETURN p.fullname", 3), + ("MATCH (p:Person) RETURN p.isActive", 3), + ("MATCH (p:Person) RETURN p.ISACTIVE", 3), + ("MATCH (p:Person) RETURN p.numFollowers", 3), + ("MATCH (p:Person) RETURN p.NUMFOLLOWERS", 3), + ]; + + for (query_str, expected_rows) in test_cases { + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch.clone()); + + let result = CypherQuery::new(query_str) + .unwrap() + .with_config(config.clone()) + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await; + + assert!( + result.is_ok(), + "Query failed: {} with error: {:?}", + query_str, + result.err() + ); + assert_eq!( + result.unwrap().num_rows(), + expected_rows, + "Query: {}", + query_str + ); + } + + // Test with lowercase table registration + let mut datasets = HashMap::new(); + datasets.insert("person".to_string(), person_batch.clone()); + let result = CypherQuery::new("MATCH (p:Person) RETURN p.name") + .unwrap() + .with_config(config.clone()) + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await; + assert!(result.is_ok()); + assert_eq!(result.unwrap().num_rows(), 3); + + // Test with uppercase table registration + let mut datasets = HashMap::new(); + datasets.insert("PERSON".to_string(), person_batch); + let result = CypherQuery::new("MATCH (p:person) RETURN p.name") + .unwrap() + .with_config(config) + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await; + assert!(result.is_ok()); + assert_eq!(result.unwrap().num_rows(), 3); +} + +#[tokio::test] +async fn test_clauses_case_insensitive() { + // Test WHERE, ORDER BY, and variable references with case variations + let config = GraphConfig::builder() + .with_node_label("Person", "id") + .build() + .unwrap(); + let person_batch = create_test_person_dataset(); + + let test_cases = vec![ + // WHERE clause with different property cases + ("MATCH (p:Person) WHERE p.isActive = true RETURN p.name", 2), + ("MATCH (p:Person) WHERE p.ISACTIVE = true RETURN p.name", 2), + ("MATCH (p:Person) WHERE p.numFollowers > 100 RETURN p.name", 2), + ("MATCH (p:Person) WHERE p.NUMFOLLOWERS > 100 RETURN p.name", 2), + // Variable case: lowercase p in MATCH, uppercase P in WHERE/RETURN + ("MATCH (p:Person) WHERE P.isActive = true RETURN p.name", 2), + ("MATCH (p:Person) RETURN P.name", 3), + ("MATCH (P:Person) WHERE p.numFollowers > 100 RETURN P.name", 2), + // ORDER BY with different property cases + ("MATCH (p:Person) RETURN p.name ORDER BY p.numFollowers DESC", 3), + ("MATCH (p:Person) RETURN p.name ORDER BY P.NUMFOLLOWERS DESC", 3), + // Alias case: different case in ORDER BY + ("MATCH (p:Person) RETURN p.numFollowers AS Score ORDER BY score DESC", 3), + ("MATCH (p:Person) RETURN p.numFollowers AS score ORDER BY SCORE DESC", 3), + // Aggregate functions with variable case + ("MATCH (p:Person) RETURN count(P)", 1), + ("MATCH (P:Person) RETURN count(p)", 1), + ("MATCH (p:Person) RETURN sum(P.numFollowers)", 1), + ]; + + for (query_str, expected_rows) in test_cases { + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch.clone()); + + let result = CypherQuery::new(query_str) + .unwrap() + .with_config(config.clone()) + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await; + + assert!( + result.is_ok(), + "Query failed: {} with error: {:?}", + query_str, + result.err() + ); + assert_eq!( + result.unwrap().num_rows(), + expected_rows, + "Wrong row count for query: {}", + query_str + ); + } + + // Verify ORDER BY actually orders correctly + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch); + let result = CypherQuery::new("MATCH (p:Person) RETURN p.name ORDER BY p.NUMFOLLOWERS DESC") + .unwrap() + .with_config(config) + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await + .unwrap(); + + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!(names.value(0), "Bob"); // 200 followers + assert_eq!(names.value(1), "Charlie"); // 150 followers + assert_eq!(names.value(2), "Alice"); // 100 followers +} + +#[tokio::test] +async fn test_relationships_case_insensitive() { + // Test relationship types and relationship variables with case variations + let config = GraphConfig::builder() + .with_node_label("Person", "id") + .with_relationship("KNOWS", "src_id", "dst_id") + .build() + .unwrap(); + + let person_schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, false), + ])); + + let knows_schema = Arc::new(Schema::new(vec![ + Field::new("src_id", DataType::Int64, false), + Field::new("dst_id", DataType::Int64, false), + Field::new("since", DataType::Int64, false), + ])); + + let person_batch = RecordBatch::try_new( + person_schema, + vec![ + Arc::new(Int64Array::from(vec![1, 2])), + Arc::new(StringArray::from(vec!["Alice", "Bob"])), + ], + ) + .unwrap(); + + let knows_batch = RecordBatch::try_new( + knows_schema, + vec![ + Arc::new(Int64Array::from(vec![1])), + Arc::new(Int64Array::from(vec![2])), + Arc::new(Int64Array::from(vec![2020])), + ], + ) + .unwrap(); + + let test_cases = vec![ + // Relationship type case variations + "MATCH (a:Person)-[:KNOWS]->(b:Person) RETURN a.name", + "MATCH (a:Person)-[:knows]->(b:Person) RETURN a.name", + "MATCH (a:Person)-[:Knows]->(b:Person) RETURN a.name", + // Relationship variable case: lowercase r in pattern, uppercase R in RETURN/WHERE + "MATCH (a:Person)-[r:KNOWS]->(b:Person) RETURN R.since", + "MATCH (a:Person)-[R:KNOWS]->(b:Person) RETURN r.since", + "MATCH (a:Person)-[r:KNOWS]->(b:Person) WHERE R.since > 2019 RETURN r.since", + ]; + + for query_str in test_cases { + let mut datasets = HashMap::new(); + datasets.insert("Person".to_string(), person_batch.clone()); + datasets.insert("KNOWS".to_string(), knows_batch.clone()); + + let result = CypherQuery::new(query_str) + .unwrap() + .with_config(config.clone()) + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await; + + assert!( + result.is_ok(), + "Query failed: {} with error: {:?}", + query_str, + result.err() + ); + assert_eq!(result.unwrap().num_rows(), 1, "Query: {}", query_str); + } +} + +#[tokio::test] +async fn test_complex_mixed_case_query() { + // Comprehensive end-to-end test with mixed case everywhere + let config = GraphConfig::builder() + .with_node_label("Person", "id") + .build() + .unwrap(); + let person_batch = create_test_person_dataset(); + + let mut datasets = HashMap::new(); + datasets.insert("PERSON".to_string(), person_batch); + + // Query with mixed case in labels, properties, variables, and aliases + let query = " + MATCH (P:person) + WHERE P.IsActive = true AND P.NumFollowers > 50 + RETURN P.FullName AS PersonName, P.NumFollowers AS Followers + ORDER BY Followers DESC + "; + + let result = CypherQuery::new(query) + .unwrap() + .with_config(config) + .execute(datasets, Some(ExecutionStrategy::DataFusion)) + .await; + + assert!( + result.is_ok(), + "Complex mixed-case query failed: {:?}", + result.err() + ); + let result = result.unwrap(); + assert_eq!(result.num_rows(), 2); // Alice and Charlie + + // Verify correct ordering and values + let names = result + .column(0) + .as_any() + .downcast_ref::() + .unwrap(); + let followers = result + .column(1) + .as_any() + .downcast_ref::() + .unwrap(); + + // Should be ordered by Followers DESC: Charlie (150), Alice (100) + assert_eq!(names.value(0), "Charlie Brown"); + assert_eq!(followers.value(0), 150); + assert_eq!(names.value(1), "Alice Smith"); + assert_eq!(followers.value(1), 100); +} diff --git a/crates/lance-graph/tests/test_datafusion_with_context.rs b/crates/lance-graph/tests/test_datafusion_with_context.rs index 0e234c7..ef214fc 100644 --- a/crates/lance-graph/tests/test_datafusion_with_context.rs +++ b/crates/lance-graph/tests/test_datafusion_with_context.rs @@ -279,9 +279,9 @@ async fn test_execute_with_context_missing_table() { // Should error because Person table is not registered assert!(result.is_err()); - let err_msg = result.unwrap_err().to_string(); + let err_msg = result.unwrap_err().to_string().to_lowercase(); assert!( - err_msg.contains("Person") && err_msg.contains("not found"), + err_msg.contains("person") && err_msg.contains("not found"), "Error should mention missing Person table: {}", err_msg ); From 28330cf2f53714fcc7847f731eac1658fcf4b37e Mon Sep 17 00:00:00 2001 From: JoshuaTang <1240604020@qq.com> Date: Sat, 31 Jan 2026 10:02:52 -0800 Subject: [PATCH 2/5] format code --- .../tests/test_case_insensitivity.rs | 35 +++++++++++++++---- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/crates/lance-graph/tests/test_case_insensitivity.rs b/crates/lance-graph/tests/test_case_insensitivity.rs index 14d827d..6cd1c95 100644 --- a/crates/lance-graph/tests/test_case_insensitivity.rs +++ b/crates/lance-graph/tests/test_case_insensitivity.rs @@ -120,18 +120,39 @@ async fn test_clauses_case_insensitive() { // WHERE clause with different property cases ("MATCH (p:Person) WHERE p.isActive = true RETURN p.name", 2), ("MATCH (p:Person) WHERE p.ISACTIVE = true RETURN p.name", 2), - ("MATCH (p:Person) WHERE p.numFollowers > 100 RETURN p.name", 2), - ("MATCH (p:Person) WHERE p.NUMFOLLOWERS > 100 RETURN p.name", 2), + ( + "MATCH (p:Person) WHERE p.numFollowers > 100 RETURN p.name", + 2, + ), + ( + "MATCH (p:Person) WHERE p.NUMFOLLOWERS > 100 RETURN p.name", + 2, + ), // Variable case: lowercase p in MATCH, uppercase P in WHERE/RETURN ("MATCH (p:Person) WHERE P.isActive = true RETURN p.name", 2), ("MATCH (p:Person) RETURN P.name", 3), - ("MATCH (P:Person) WHERE p.numFollowers > 100 RETURN P.name", 2), + ( + "MATCH (P:Person) WHERE p.numFollowers > 100 RETURN P.name", + 2, + ), // ORDER BY with different property cases - ("MATCH (p:Person) RETURN p.name ORDER BY p.numFollowers DESC", 3), - ("MATCH (p:Person) RETURN p.name ORDER BY P.NUMFOLLOWERS DESC", 3), + ( + "MATCH (p:Person) RETURN p.name ORDER BY p.numFollowers DESC", + 3, + ), + ( + "MATCH (p:Person) RETURN p.name ORDER BY P.NUMFOLLOWERS DESC", + 3, + ), // Alias case: different case in ORDER BY - ("MATCH (p:Person) RETURN p.numFollowers AS Score ORDER BY score DESC", 3), - ("MATCH (p:Person) RETURN p.numFollowers AS score ORDER BY SCORE DESC", 3), + ( + "MATCH (p:Person) RETURN p.numFollowers AS Score ORDER BY score DESC", + 3, + ), + ( + "MATCH (p:Person) RETURN p.numFollowers AS score ORDER BY SCORE DESC", + 3, + ), // Aggregate functions with variable case ("MATCH (p:Person) RETURN count(P)", 1), ("MATCH (P:Person) RETURN count(p)", 1), From 415529b5aeb02032566258df5ae217ecab479eea Mon Sep 17 00:00:00 2001 From: JoshuaTang <1240604020@qq.com> Date: Sat, 31 Jan 2026 10:09:57 -0800 Subject: [PATCH 3/5] fix spell error --- crates/lance-graph/src/config.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/lance-graph/src/config.rs b/crates/lance-graph/src/config.rs index 3cde41b..6f986a7 100644 --- a/crates/lance-graph/src/config.rs +++ b/crates/lance-graph/src/config.rs @@ -430,7 +430,6 @@ mod tests { assert!(config.get_relationship_mapping("FOLLOWS").is_some()); assert!(config.get_relationship_mapping("follows").is_some()); assert!(config.get_relationship_mapping("Follows").is_some()); - assert!(config.get_relationship_mapping("FoLlOwS").is_some()); assert!(config.get_relationship_mapping("WORKS_FOR").is_some()); assert!(config.get_relationship_mapping("works_for").is_some()); From 59107933e622272bac541a097dcd8ab64e25351c Mon Sep 17 00:00:00 2001 From: JoshuaTang <1240604020@qq.com> Date: Sat, 31 Jan 2026 11:00:11 -0800 Subject: [PATCH 4/5] fix ci test error --- crates/lance-graph/src/query.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/lance-graph/src/query.rs b/crates/lance-graph/src/query.rs index 9eddb8d..4291058 100644 --- a/crates/lance-graph/src/query.rs +++ b/crates/lance-graph/src/query.rs @@ -693,7 +693,8 @@ impl CypherQuery { location: snafu::Location::new(file!(), line!(), column!()), })?; - providers.insert(table_name, provider); + // Store provider with normalized (lowercase) key for consistent lookup + providers.insert(normalized_table_name.clone(), provider); } for label in config.node_mappings.keys() { From d6bca6acd0c7570fe2d8d81de157931872dff8e9 Mon Sep 17 00:00:00 2001 From: JoshuaTang <1240604020@qq.com> Date: Sat, 31 Jan 2026 11:12:49 -0800 Subject: [PATCH 5/5] fix the CI test error --- crates/lance-graph/src/query.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/crates/lance-graph/src/query.rs b/crates/lance-graph/src/query.rs index 4291058..8f5f017 100644 --- a/crates/lance-graph/src/query.rs +++ b/crates/lance-graph/src/query.rs @@ -631,8 +631,15 @@ impl CypherQuery { let config = self.require_config()?; let mut required_tables: HashSet = HashSet::new(); - required_tables.extend(config.node_mappings.keys().cloned()); - required_tables.extend(config.relationship_mappings.keys().cloned()); + // Use original label/type names (not lowercase keys) for namespace resolution + // The namespace needs the original casing to find files on disk + required_tables.extend(config.node_mappings.values().map(|m| m.label.clone())); + required_tables.extend( + config + .relationship_mappings + .values() + .map(|m| m.relationship_type.clone()), + ); if required_tables.is_empty() { return Err(GraphError::ConfigError {