Skip to content

Commit b467129

Browse files
cpsievertclaude
andcommitted
Fix case-sensitive VISUALISE column reference validation
The VISUALISE parser preserves the exact casing from the user's query (e.g., `VISUALISE ROOM_TYPE AS x` stores "ROOM_TYPE"). However, DuckDB lowercases unquoted identifiers in query results, so the schema column is "room_type". Since ggsql quotes column names in generated SQL (making them case-sensitive in DuckDB), this mismatch caused validation errors like: "aesthetic 'x' references non-existent column 'ROOM_TYPE'" Add a normalize_column_names() step early in the prepare_data pipeline that resolves VISUALISE column references to match the actual schema column names using case-insensitive matching. This is reader-agnostic: it normalizes to whatever the reader returns, not specifically to lowercase. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ccf4b28 commit b467129

1 file changed

Lines changed: 139 additions & 0 deletions

File tree

src/execute/mod.rs

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,71 @@ use crate::reader::Reader;
3333
#[cfg(all(feature = "duckdb", test))]
3434
use crate::reader::DuckDBReader;
3535

36+
// =============================================================================
37+
// Column Name Normalization
38+
// =============================================================================
39+
40+
/// Normalize aesthetic column references to match the actual schema column names.
41+
///
42+
/// DuckDB lowercases unquoted identifiers, but users may write column names in
43+
/// any case in VISUALISE/MAPPING clauses. Since ggsql quotes column names in
44+
/// generated SQL (making them case-sensitive), we must normalize references to
45+
/// match the actual column names returned by the database.
46+
///
47+
/// This function walks all aesthetic mappings (global and per-layer) and replaces
48+
/// column names with the matching schema column name (matched case-insensitively).
49+
fn normalize_column_names(specs: &mut [Plot], layer_schemas: &[Schema]) {
50+
for spec in specs {
51+
// Normalize global mappings using the first layer's schema (global mappings
52+
// are merged into all layers, so any layer's schema suffices for normalization)
53+
if let Some(schema) = layer_schemas.first() {
54+
let schema_names: Vec<&str> = schema.iter().map(|c| c.name.as_str()).collect();
55+
for value in spec.global_mappings.aesthetics.values_mut() {
56+
normalize_aesthetic_value(value, &schema_names);
57+
}
58+
}
59+
60+
// Normalize per-layer mappings and partition_by using each layer's own schema
61+
for (layer, schema) in spec.layers.iter_mut().zip(layer_schemas.iter()) {
62+
let schema_names: Vec<&str> = schema.iter().map(|c| c.name.as_str()).collect();
63+
for value in layer.mappings.aesthetics.values_mut() {
64+
normalize_aesthetic_value(value, &schema_names);
65+
}
66+
for col in &mut layer.partition_by {
67+
normalize_column_ref(col, &schema_names);
68+
}
69+
}
70+
}
71+
}
72+
73+
/// Resolve a column name to match actual schema casing (case-insensitive).
74+
///
75+
/// Only normalizes when there is no exact match and exactly one case-insensitive
76+
/// match exists. This preserves correct behavior when the schema contains columns
77+
/// that differ only by case (e.g., via quoted identifiers like `"Foo"` and `"foo"`).
78+
fn normalize_column_ref(name: &mut String, schema_names: &[&str]) {
79+
if schema_names.contains(&name.as_str()) {
80+
return;
81+
}
82+
83+
let name_lower = name.to_lowercase();
84+
let matches: Vec<&&str> = schema_names
85+
.iter()
86+
.filter(|s| s.to_lowercase() == name_lower)
87+
.collect();
88+
89+
if matches.len() == 1 {
90+
*name = matches[0].to_string();
91+
}
92+
}
93+
94+
/// Normalize a single aesthetic value's column name to match schema casing.
95+
fn normalize_aesthetic_value(value: &mut AestheticValue, schema_names: &[&str]) {
96+
if let AestheticValue::Column { name, .. } = value {
97+
normalize_column_ref(name, schema_names);
98+
}
99+
}
100+
36101
// =============================================================================
37102
// Validation
38103
// =============================================================================
@@ -570,6 +635,12 @@ pub fn prepare_data_with_reader<R: Reader>(query: &str, reader: &R) -> Result<Pr
570635
.map(|ti| schema::type_info_to_schema(ti))
571636
.collect();
572637

638+
// Normalize column names in aesthetic references to match actual schema casing.
639+
// DuckDB lowercases unquoted identifiers, but users may write them in any case
640+
// in VISUALISE/MAPPING. Since generated SQL quotes column names (case-sensitive),
641+
// we must normalize before merging or building queries.
642+
normalize_column_names(&mut specs, &layer_schemas);
643+
573644
// Merge global mappings into layer aesthetics and expand wildcards
574645
// Smart wildcard expansion only creates mappings for columns that exist in schema
575646
merge_global_mappings_into_layers(&mut specs, &layer_schemas);
@@ -1243,4 +1314,72 @@ mod tests {
12431314
// Should have fewer rows than original (binned)
12441315
assert!(layer_df.height() < 100);
12451316
}
1317+
1318+
/// Test that VISUALISE column references are matched case-insensitively.
1319+
///
1320+
/// DuckDB lowercases unquoted identifiers, so `SELECT UPPER_COL` returns a
1321+
/// column named `upper_col`. VISUALISE references like `UPPER_COL AS x` must
1322+
/// be normalized to match the actual schema column name before validation
1323+
/// and query building.
1324+
#[cfg(feature = "duckdb")]
1325+
#[test]
1326+
fn test_case_insensitive_column_references() {
1327+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
1328+
1329+
// Create a table with uppercase column names via aliasing
1330+
reader
1331+
.connection()
1332+
.execute(
1333+
"CREATE TABLE case_test AS SELECT 'A' AS category, 10 AS value UNION ALL SELECT 'B', 20",
1334+
duckdb::params![],
1335+
)
1336+
.unwrap();
1337+
1338+
// VISUALISE references use UPPERCASE but DuckDB stores them as lowercase
1339+
let query = r#"
1340+
SELECT category, value FROM case_test
1341+
VISUALISE CATEGORY AS x, VALUE AS y
1342+
DRAW bar
1343+
"#;
1344+
1345+
let result = prepare_data_with_reader(query, &reader);
1346+
assert!(
1347+
result.is_ok(),
1348+
"Case-insensitive column references should work: {:?}",
1349+
result.err()
1350+
);
1351+
1352+
let result = result.unwrap();
1353+
let layer_df = result.data.get(&naming::layer_key(0)).unwrap();
1354+
assert_eq!(layer_df.height(), 2);
1355+
}
1356+
1357+
/// Mixed-case VISUALISE references (e.g., `CaTeGoRy`) should normalize
1358+
/// to the actual schema column name.
1359+
#[cfg(feature = "duckdb")]
1360+
#[test]
1361+
fn test_mixed_case_column_references() {
1362+
let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap();
1363+
1364+
reader
1365+
.connection()
1366+
.execute(
1367+
"CREATE TABLE mixed_case AS SELECT 'A' AS category, 10 AS value UNION ALL SELECT 'B', 20",
1368+
duckdb::params![],
1369+
)
1370+
.unwrap();
1371+
1372+
let query = r#"
1373+
SELECT category, value FROM mixed_case
1374+
VISUALISE CaTeGoRy AS x, VaLuE AS y
1375+
DRAW bar
1376+
"#;
1377+
1378+
let result = prepare_data_with_reader(query, &reader);
1379+
assert!(
1380+
result.is_ok(),
1381+
"Mixed-case column references should be normalized: {:?}",
1382+
result.err()
1383+
);
1384+
}
12461385
}

0 commit comments

Comments
 (0)