diff --git a/CLAUDE.md b/CLAUDE.md index 0b6b02b1..1d4473e6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -367,16 +367,18 @@ pub enum ScaleType { } pub enum Facet { - /// FACET WRAP variables + /// FACET variables (wrap layout) Wrap { variables: Vec, scales: FacetScales, + properties: HashMap, // From SETTING clause }, - /// FACET rows BY cols + /// FACET rows BY cols (grid layout) Grid { rows: Vec, cols: Vec, scales: FacetScales, + properties: HashMap, // From SETTING clause }, } @@ -1177,7 +1179,7 @@ Where `` can be: | `VISUALISE` | ✅ Yes | Entry point | `VISUALISE date AS x, revenue AS y` | | `DRAW` | ✅ Yes | Define layers | `DRAW line MAPPING date AS x, value AS y` | | `SCALE` | ✅ Yes | Configure scales | `SCALE x VIA date` | -| `FACET` | ❌ No | Small multiples | `FACET WRAP region` | +| `FACET` | ❌ No | Small multiples | `FACET region` | | `COORD` | ❌ No | Coordinate system | `COORD cartesian SETTING xlim => [0,100]` | | `LABEL` | ❌ No | Text labels | `LABEL title => 'My Chart', x => 'Date'` | | `THEME` | ❌ No | Visual styling | `THEME minimal` | @@ -1372,25 +1374,52 @@ SCALE x VIA date FROM ['2024-01-01', '2024-12-31'] SETTING breaks => '1 month' **Syntax**: ```sql --- Grid layout -FACET BY [SETTING scales => ] +-- Wrap layout (single variable = automatic wrap) +FACET [SETTING => , ...] --- Wrapped layout -FACET WRAP [SETTING scales => ] +-- Grid layout (BY clause for row × column) +FACET BY [SETTING ...] ``` -**Scale Sharing**: +**SETTING Properties**: -- `'fixed'` (default) - Same scales across all facets -- `'free'` - Independent scales for each facet -- `'free_x'` - Independent x-axis, shared y-axis -- `'free_y'` - Independent y-axis, shared x-axis +- `free => ` - Which axes have independent scales (see below) +- `ncol => ` - Number of columns for wrap layout +- `spacing => ` - Space between facets -**Example**: +**Free Scales** (`free` property): + +- `null` or omitted (default) - Shared/fixed scales across all facets +- `'x'` - Independent x-axis, shared y-axis +- `'y'` - Shared x-axis, independent y-axis +- `['x', 'y']` - Independent scales for both axes + +**Customizing Strip Labels**: + +To customize facet strip labels, use `SCALE panel RENAMING ...` (for wrap) or `SCALE row/column RENAMING ...` (for grid). + +**Examples**: ```sql -FACET WRAP region SETTING scales => 'free_y' -FACET region BY category SETTING scales => 'fixed' +-- Simple wrap facet +FACET region + +-- Grid facet with BY +FACET region BY category + +-- With free y-axis scales +FACET region SETTING free => 'y' + +-- With column count for wrap +FACET region SETTING ncol => 3 + +-- With label renaming via scale +FACET region +SCALE panel RENAMING 'N' => 'North', 'S' => 'South' + +-- Combined grid with settings +FACET region BY category + SETTING free => ['x', 'y'], spacing => 10 ``` ### COORD Clause @@ -1536,7 +1565,7 @@ DRAW line DRAW point MAPPING sale_date AS x, total AS y, region AS color SCALE x VIA date -FACET WRAP region +FACET region LABEL title => 'Sales Trends by Region', x => 'Date', y => 'Total Quantity' THEME minimal ``` diff --git a/EXAMPLES.md b/EXAMPLES.md index 2e07eed4..19b96816 100644 --- a/EXAMPLES.md +++ b/EXAMPLES.md @@ -215,13 +215,13 @@ THEME dark SETTING background => '#1a1a1a' ## Faceting -### Facet Wrap +### Facet (Wrap Layout) ```sql SELECT date, value, region FROM sales VISUALISE date AS x, value AS y DRAW line -FACET WRAP region +FACET region ``` ### Facet Grid @@ -239,7 +239,7 @@ FACET region BY product SELECT date, value, category FROM metrics VISUALISE date AS x, value AS y DRAW line -FACET WRAP category SETTING scales => 'free_y' +FACET category SETTING scales => 'free_y' ``` --- @@ -373,7 +373,7 @@ WITH ranked_products AS ( SELECT * FROM ranked_products WHERE rank <= 5 VISUALISE product_name AS x, revenue AS y, category AS color DRAW bar -FACET WRAP category SETTING scales => 'free_x' +FACET category SETTING scales => 'free_x' COORD flip LABEL title => 'Top 5 Products per Category', x => 'Product', @@ -450,7 +450,7 @@ VISUALISE sale_date AS x, total_quantity AS y, region AS color DRAW line DRAW point SCALE x SETTING type => 'date' -FACET WRAP region +FACET region LABEL title => 'Sales Trends by Region', x => 'Date', y => 'Total Quantity' diff --git a/doc/examples.qmd b/doc/examples.qmd index acc0510b..832de4cc 100644 --- a/doc/examples.qmd +++ b/doc/examples.qmd @@ -387,7 +387,7 @@ LABEL ## Faceting -### Facet Wrap by Region +### Facet by Region ```{ggsql} SELECT sale_date, revenue, region FROM 'sales.csv' @@ -395,7 +395,7 @@ WHERE category = 'Electronics' VISUALISE sale_date AS x, revenue AS y DRAW line SCALE x VIA date -FACET WRAP region +FACET region LABEL title => 'Electronics Sales by Region', x => 'Date', @@ -787,8 +787,8 @@ VISUALISE sale_date AS x, total_quantity AS y, region AS color DRAW line DRAW point SCALE x VIA date -FACET WRAP region -LABEL +FACET region +LABEL title => 'Sales Trends by Region', x => 'Date', y => 'Total Quantity' diff --git a/doc/ggsql.xml b/doc/ggsql.xml index 3ee645b7..6667c50b 100644 --- a/doc/ggsql.xml +++ b/doc/ggsql.xml @@ -105,9 +105,9 @@ REMAPPING SETTING FILTER - WRAP ORDER PARTITION + RENAMING TO VIA @@ -603,11 +603,13 @@ - - - - + + + + + + diff --git a/doc/index.qmd b/doc/index.qmd index cdd0ed1d..d1c883df 100644 --- a/doc/index.qmd +++ b/doc/index.qmd @@ -52,7 +52,7 @@ DRAW point MAPPING date AS x, value AS y SCALE x SETTING type => 'date' -FACET WRAP region +FACET region ``` ::: @@ -144,7 +144,7 @@ SCALE x SCALE y SETTING type => 'linear', limits => [0, 100000] -FACET WRAP region SETTING scales => 'free_y' +FACET region SETTING scales => 'free_y' LABEL title => 'Revenue Trends by Region', diff --git a/doc/syntax/clause/facet.qmd b/doc/syntax/clause/facet.qmd index 0a974dba..c6b08b46 100644 --- a/doc/syntax/clause/facet.qmd +++ b/doc/syntax/clause/facet.qmd @@ -1,3 +1,42 @@ --- title: "Create small multiples with `FACET`" --- + +The `FACET` clause allows you to split your data, by one or two variables, and plot each group as a small version of the plot: a technique called 'small multiples'. The technique is great for preventing overplotting. Because each small plot shares the same positional scales by default, visual comparisons between groups become easy. + +## Clause syntax +The `FACET` syntax contains a number of subclauses that are all optional + +```ggsql +FACET BY + SETTING => , ... +``` + +The first `column` is mandatory. It names a column in the layer data that will be used for splitting the data. If the layer data does not contain the column the behavior for that layer depends on the `missing` parameter of the facet. + +### `BY` +The optional `BY` clause is used to define an additional column to split the data by. If it is missing the small multiples are laid out in a grid with the facet panels filling the cells in a row-wise fashion. If `BY` is present then the categories of the first `column` defines the rows of the grid and the categories of the second `column` the columns of the grid. Each multiple is then positioned according to that. + +### `SETTING` +This clause behaves much like the `SETTINGS` clause in `DRAW`, in that it allows you to fine-tune specific behavior of the faceting. The following parameters exist: + +* `free`: Controls whether the positional scales are independent across the small multiples. Permissible values are: + * `null` or omitted (default): Shared/fixed scales across all panels + * `'x'`: Independent x-axis scale, shared y-axis scale + * `'y'`: Shared x-axis scale, independent y-axis scale + * `['x', 'y']`: Independent scales for both axes +* `missing`: Determines how layers behave when the faceting column is missing. It can take two values: `'repeat'` (default), and `'null'`. If `'repeat'` is set, then the layer data is repeated in each panel. If `'null'`, then such layers are only displayed if a null panel is shown, as controlled by the facet scale. +* `ncol`: The number of panel columns to use when faceting by a single variable. Default is 3 when fewer than 6 categories are present, 4 when fewer than 12 categeries are present and otherwise 5. When the `BY`-clause is used to set a second faceting variable, the `ncol` setting is not allowed. There is no `nrow` setting as this is derived from the number of panels and the `ncol` setting. + +### Facet variables as aesthetics +When you apply faceting to a plot you are creating new aesthetics you can control. For 1-dimensional faceting (no `BY` clause) the aesthetic is called `panel` and for 2-dimensional faceting the aesthetics are called `row` and `column`. You can read more about these aesthetics in [their documentation](../scale/aesthetic/Z_facetting.qmd) + +### Customizing facet strip labels +To customize facet strip labels (e.g., renaming categories), use the `RENAMING` clause on the facet scale: + +```ggsql +FACET region +SCALE panel RENAMING 'N' => 'North', 'S' => 'South' +``` + +See the [facet scale documentation](../scale/aesthetic/Z_facetting.qmd) for more details on label customization. diff --git a/doc/syntax/scale/aesthetic/Z_facetting.qmd b/doc/syntax/scale/aesthetic/Z_facetting.qmd new file mode 100644 index 00000000..2d0fc200 --- /dev/null +++ b/doc/syntax/scale/aesthetic/Z_facetting.qmd @@ -0,0 +1,34 @@ +--- +title: Faceting +--- + +ggsql provides one or two aesthetics related to faceting. These are special in the sense that they do not alter the display of the single data values, but rather alter in which plot they appear. While it is possible to map to these aesthetics in a layer they are most often applied globally as part of the [`FACET` clause](../../clause/facet.qmd). + +The aesthetics provided are either `panel` (for 1-dimensional faceting) or `row` and `column` (for 2-dimensional faceting). These aesthetics have to compatible with the facet clause: it is not possible to map to `panel` in a 2-dimensional faceting plot nor is it possible to map `row` and `column` in a 1-dimensional plot. + +## Literal values +Scales for facet aesthetics never use an output range and always relate to the input range. This means that no concept of literal values applies. + +## Scale types +Since panels are discrete by nature it is not possible to have a continuous scale for a facet. If continuous data is mapped to a facet aesthetic, a binned scale will be applied by default. + +```{ggsql} +VISUALISE Date AS x, Temp AS y FROM ggsql:airquality +DRAW line +FACET Date + SETTING free => 'x' +SCALE panel + SETTING breaks => 'month' +SCALE x + SETTING breaks => 'weeks' +``` + +In order to show data where the facet variable is null, it is necessary to explicitly include `null` in the input range of a facet aesthetic scale. Just like discrete positional aesthetics. You can also use `RENAMING` on the scale to customize facet strip labels. + +```{ggsql} +VISUALISE sex AS x FROM ggsql:penguins +DRAW bar +FACET species +SCALE panel FROM ['Adelie', null] + RENAMING null => 'The rest' +``` diff --git a/ggsql-jupyter/tests/fixtures/sample_notebook.ipynb b/ggsql-jupyter/tests/fixtures/sample_notebook.ipynb index f71bc768..0fb07810 100644 --- a/ggsql-jupyter/tests/fixtures/sample_notebook.ipynb +++ b/ggsql-jupyter/tests/fixtures/sample_notebook.ipynb @@ -188,7 +188,7 @@ "ORDER BY date\n", "VISUALISE date AS x, revenue AS y\n", "DRAW line\n", - "FACET WRAP region\n", + "FACET region\n", "SCALE x SETTING type => 'date'\n", "LABEL title => 'Revenue by Region', x => 'Date', y => 'Revenue ($)'" ] diff --git a/ggsql-jupyter/tests/fixtures/test_queries.sql b/ggsql-jupyter/tests/fixtures/test_queries.sql index 1c33ac56..ed818e45 100644 --- a/ggsql-jupyter/tests/fixtures/test_queries.sql +++ b/ggsql-jupyter/tests/fixtures/test_queries.sql @@ -58,7 +58,7 @@ SELECT FROM generate_series(1, 10) as t(n) VISUALISE x, y DRAW point -FACET WRAP group +FACET group LABEL title => 'Faceted Plot'; -- Visualization with FILTER clause - global mapping with layer filter diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py index 6e9183fc..4e20bca5 100644 --- a/ggsql-python/tests/test_ggsql.py +++ b/ggsql-python/tests/test_ggsql.py @@ -275,7 +275,7 @@ def test_layered_chart_can_round_trip(self): assert isinstance(recreated, altair.LayerChart) def test_faceted_chart_returns_facet_chart(self): - """FACET WRAP specs produce FacetChart.""" + """FACET specs produce FacetChart.""" df = pl.DataFrame( { "x": [1, 2, 3, 4, 5, 6], @@ -285,7 +285,7 @@ def test_faceted_chart_returns_facet_chart(self): ) # Need validate=False because ggsql produces v6 specs chart = ggsql.render_altair( - df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False + df, "VISUALISE x, y FACET group DRAW point", validate=False ) assert isinstance(chart, altair.FacetChart) @@ -299,7 +299,7 @@ def test_faceted_chart_can_round_trip(self): } ) chart = ggsql.render_altair( - df, "VISUALISE x, y FACET WRAP group DRAW point", validate=False + df, "VISUALISE x, y FACET group DRAW point", validate=False ) # Convert to dict (skip validation for ggsql specs) diff --git a/ggsql-vscode/examples/sample.gsql b/ggsql-vscode/examples/sample.gsql index f9978c1d..ab2dc8d6 100644 --- a/ggsql-vscode/examples/sample.gsql +++ b/ggsql-vscode/examples/sample.gsql @@ -29,7 +29,7 @@ DRAW line DRAW point SETTING size => 3 SCALE x SETTING type => 'date' SCALE color TO viridis -FACET WRAP region SETTING scales => 'free_y' +FACET region SETTING scales => 'free_y' LABEL title => 'Sales Trends by Region', x => 'Month', y => 'Total Quantity', @@ -73,7 +73,7 @@ WHERE age BETWEEN 18 AND 80 VISUALISE age AS x, gender AS fill DRAW histogram SCALE x SETTING type => 'linear', limits => [18, 80] -FACET WRAP gender SETTING scales => 'fixed' +FACET gender SETTING scales => 'fixed' LABEL title => 'Age Distribution by Gender', x => 'Age', y => 'Count' diff --git a/ggsql-vscode/syntaxes/ggsql.tmLanguage.json b/ggsql-vscode/syntaxes/ggsql.tmLanguage.json index 54ddfe74..7055f3be 100644 --- a/ggsql-vscode/syntaxes/ggsql.tmLanguage.json +++ b/ggsql-vscode/syntaxes/ggsql.tmLanguage.json @@ -249,7 +249,7 @@ "patterns": [ { "name": "support.type.aesthetic.ggsql", - "match": "\\b(x|y|xmin|xmax|ymin|ymax|xend|yend|weight|color|colour|fill|stroke|opacity|size|shape|linetype|linewidth|width|height|label|family|fontface|hjust|vjust)\\b" + "match": "\\b(x|y|xmin|xmax|ymin|ymax|xend|yend|weight|color|colour|fill|stroke|opacity|size|shape|linetype|linewidth|width|height|label|family|fontface|hjust|vjust|panel|row|column)\\b" } ] }, @@ -267,6 +267,10 @@ "name": "keyword.other.ggsql", "match": "\\b(?i:MAPPING|REMAPPING|SETTING|FILTER|FROM|ORDER|BY|PARTITION|RENAMING)\\b" }, + { + "name": "constant.language.null.ggsql", + "match": "\\b(?i:NULL|TRUE|FALSE)\\b" + }, { "name": "punctuation.separator.comma.ggsql", "match": "," @@ -308,7 +312,7 @@ }, { "name": "keyword.other.ggsql", - "match": "\\b(?i:TO|VIA)\\b" + "match": "\\b(?i:TO|VIA|RENAMING)\\b" }, { "name": "constant.language.scale-type.ggsql", @@ -328,17 +332,21 @@ }, "end": "(?i)(?=\\b(DRAW|SCALE|COORD|FACET|LABEL|THEME|VISUALISE|VISUALIZE|SELECT|WHERE|WITH)\\b)", "patterns": [ - { - "name": "keyword.other.ggsql", - "match": "\\b(?i:WRAP)\\b" - }, { "name": "constant.language.facet-scales.ggsql", "match": "\\b(fixed|free|free_x|free_y)\\b" }, { "name": "support.type.property.ggsql", - "match": "\\b(scales)\\b" + "match": "\\b(scales|ncol|columns|missing)\\b" + }, + { + "name": "keyword.operator.wildcard.ggsql", + "match": "\\*" + }, + { + "name": "keyword.other.ggsql", + "match": "\\b(?i:RENAMING)\\b" }, { "include": "#common-clause-patterns" } ] diff --git a/src/execute/layer.rs b/src/execute/layer.rs index cf4e2647..d4d911fb 100644 --- a/src/execute/layer.rs +++ b/src/execute/layer.rs @@ -7,7 +7,7 @@ use crate::plot::{ AestheticValue, DefaultAestheticValue, Layer, ParameterValue, Scale, Schema, SqlTypeNames, StatResult, }; -use crate::{naming, DataFrame, Facet, GgsqlError, Result}; +use crate::{naming, DataFrame, GgsqlError, Result}; use polars::prelude::DataType; use std::collections::{HashMap, HashSet}; @@ -323,7 +323,7 @@ pub fn build_layer_base_query( /// 1. Builds the aesthetic-named schema for stat transforms /// 2. Updates layer mappings to use prefixed aesthetic names /// 3. Applies pre-stat transforms (e.g., binning, discrete censoring) -/// 4. Builds group_by columns from partition_by and facet +/// 4. Builds group_by columns from partition_by /// 5. Applies statistical transformation /// 6. Applies ORDER BY /// @@ -335,7 +335,6 @@ pub fn build_layer_base_query( /// * `layer` - The layer to transform (modified by stat transforms) /// * `base_query` - The base query from build_layer_base_query /// * `schema` - The layer's schema (with min/max from base_query) -/// * `facet` - Optional facet configuration (needed for group_by columns) /// * `scales` - All resolved scales /// * `type_names` - SQL type names for the database backend /// * `execute_query` - Function to execute queries (needed for some stat transforms) @@ -347,7 +346,6 @@ pub fn apply_layer_transforms( layer: &mut Layer, base_query: &str, schema: &Schema, - facet: Option<&Facet>, scales: &[Scale], type_names: &SqlTypeNames, execute_query: &F, @@ -392,18 +390,14 @@ where type_names, ); - // Build group_by columns from partition_by and facet variables + // Build group_by columns from partition_by + // Note: Facet aesthetics are already in partition_by via add_discrete_columns_to_partition_by, + // so we don't add facet.get_variables() here (which would add original column names + // instead of aesthetic column names, breaking pre-stat transforms like domain censoring). let mut group_by: Vec = Vec::new(); for col in &layer.partition_by { group_by.push(col.clone()); } - if let Some(f) = facet { - for var in f.get_variables() { - if !group_by.contains(&var) { - group_by.push(var); - } - } - } // Add literal aesthetic columns to group_by so they survive stat transforms. // Since literal columns contain constant values (same for every row), adding them diff --git a/src/execute/mod.rs b/src/execute/mod.rs index cdd394ce..3d29d087 100644 --- a/src/execute/mod.rs +++ b/src/execute/mod.rs @@ -24,6 +24,7 @@ pub use schema::TypeInfo; use crate::naming; use crate::parser; use crate::plot::aesthetic::{primary_aesthetic, ALL_POSITIONAL}; +use crate::plot::facet::{resolve_properties as resolve_facet_properties, FacetDataContext}; use crate::plot::{AestheticValue, Layer, Scale, ScaleTypeKind, Schema}; use crate::{DataFrame, GgsqlError, Plot, Result}; use std::collections::{HashMap, HashSet}; @@ -162,9 +163,12 @@ fn merge_global_mappings_into_layers(specs: &mut [Plot], layer_schemas: &[Schema // 1. First merge explicit global aesthetics (layer overrides global) // Note: "color"/"colour" are accepted even though not in supported, // because split_color_aesthetic will convert them to fill/stroke later + // Note: facet aesthetics (panel, row, column) are also accepted, + // as they apply to all layers regardless of geom support for (aesthetic, value) in &spec.global_mappings.aesthetics { let is_color_alias = matches!(aesthetic.as_str(), "color" | "colour"); - if supported.contains(&aesthetic.as_str()) || is_color_alias { + let is_facet_aesthetic = crate::plot::scale::is_facet_aesthetic(aesthetic.as_str()); + if supported.contains(&aesthetic.as_str()) || is_color_alias || is_facet_aesthetic { layer .mappings .aesthetics @@ -261,6 +265,382 @@ fn split_color_aesthetic(spec: &mut Plot) { } } +// ============================================================================= +// Facet Mapping Injection +// ============================================================================= + +/// Add facet variable mappings to each layer's mappings. +/// +/// This allows facet aesthetics to flow through the same code paths as +/// regular aesthetics (scale resolution, type casting, SELECT list building, +/// partition_by handling, etc.). +/// +/// Skips injection if: +/// - The layer already has the facet aesthetic mapped (from MAPPING or global) +/// - The variables list is empty (inferred from layer mappings, not FACET clause) +/// - The column doesn't exist in this layer's schema (different data source) +fn add_facet_mappings_to_layers( + layers: &mut [Layer], + facet: &crate::plot::Facet, + layer_type_info: &[Vec], +) { + for (layer_idx, layer) in layers.iter_mut().enumerate() { + if layer_idx >= layer_type_info.len() { + continue; + } + let type_info = &layer_type_info[layer_idx]; + + for (var, aesthetic) in facet.layout.get_aesthetic_mappings() { + // Skip if layer already has this facet aesthetic mapped (from MAPPING or global) + if layer.mappings.aesthetics.contains_key(aesthetic) { + continue; + } + + // Only inject if the column exists in this layer's schema + // (variables list is empty when inferred from layer mappings - no injection needed) + if type_info.iter().any(|(col, _, _)| col == var) { + // Add mapping: variable → facet aesthetic + layer.mappings.aesthetics.insert( + aesthetic.to_string(), + AestheticValue::Column { + name: var.to_string(), + original_name: Some(var.to_string()), + is_dummy: false, + }, + ); + } + } + } +} + +// ============================================================================= +// Facet Missing Column Detection and Handling +// ============================================================================= + +/// Identify which layers are missing the facet column. +/// +/// Returns a vector of booleans, one per layer. A layer is considered "missing" +/// the facet column if ANY of the facet variables are not present in the layer's +/// schema (type_info). +/// +/// This is used to determine which layers need data duplication when +/// `missing => 'repeat'` is set on the facet. +fn identify_layers_missing_facet_column( + layers: &[Layer], + facet: &crate::plot::Facet, + layer_type_info: &[Vec], +) -> Vec { + let facet_variables = facet.get_variables(); + + // If variables list is empty (inferred from layer mappings), no layers are "missing" + if facet_variables.is_empty() { + return vec![false; layers.len()]; + } + + layers + .iter() + .enumerate() + .map(|(layer_idx, _layer)| { + if layer_idx >= layer_type_info.len() { + return false; + } + let type_info = &layer_type_info[layer_idx]; + let schema_columns: std::collections::HashSet<&str> = + type_info.iter().map(|(name, _, _)| name.as_str()).collect(); + + // Layer is missing if ANY facet variable is absent from its schema + facet_variables + .iter() + .any(|var| !schema_columns.contains(var.as_str())) + }) + .collect() +} + +/// Get unique facet values from layers that have the facet column. +/// +/// Collects all unique values for a facet aesthetic from layers that have the column, +/// to be used for cross-joining with layers that are missing the column. +fn get_unique_facet_values( + data_map: &HashMap, + facet_aesthetic: &str, + layers: &[Layer], + layers_missing_facet: &[bool], +) -> Option { + use polars::prelude::*; + + let aes_col = naming::aesthetic_column(facet_aesthetic); + let mut all_values: Vec = Vec::new(); + + for (idx, layer) in layers.iter().enumerate() { + // Skip layers that are missing the facet column + if idx < layers_missing_facet.len() && layers_missing_facet[idx] { + continue; + } + + if let Some(ref data_key) = layer.data_key { + if let Some(df) = data_map.get(data_key) { + if let Ok(col) = df.column(&aes_col) { + all_values.push(col.as_materialized_series().clone()); + } + } + } + } + + if all_values.is_empty() { + return None; + } + + // Concatenate all series and get unique values + let mut combined = all_values.remove(0); + for s in all_values { + let _ = combined.extend(&s); + } + + combined.unique().ok() +} + +/// Cross-join a DataFrame with facet values (duplicate for each facet panel). +/// +/// Creates a new DataFrame where every row is duplicated for each unique facet value. +/// The facet column is added with the appropriate values. +fn cross_join_with_facet_values( + df: &DataFrame, + unique_values: &polars::prelude::Series, + facet_aesthetic: &str, +) -> Result { + use polars::prelude::*; + + let aes_col = naming::aesthetic_column(facet_aesthetic); + let n_values = unique_values.len(); + + if n_values == 0 { + return Ok(df.clone()); + } + + let n_rows = df.height(); + + // Create the repeated data manually (polars cross_join requires an import we may not have) + // For each row in df, repeat n_values times + // For facet column, for each row's repetitions, cycle through unique_values + + // 1. Repeat each original column n_values times + let mut new_columns: Vec = Vec::new(); + for col in df.get_columns() { + // Repeat each value n_values times: [a, b, c] with n_values=2 -> [a, a, b, b, c, c] + let indices: Vec = (0..n_rows) + .flat_map(|i| std::iter::repeat_n(i as u32, n_values)) + .collect(); + let idx = IdxCa::new(PlSmallStr::EMPTY, &indices); + let repeated = col.as_materialized_series().take(&idx).map_err(|e| { + crate::GgsqlError::InternalError(format!("Failed to repeat column: {}", e)) + })?; + new_columns.push(repeated.into()); + } + + // 2. Create the facet column: tile unique_values for each row + // [v1, v2, v1, v2, v1, v2] for n_rows=3, n_values=2 + let facet_indices: Vec = (0..n_rows) + .flat_map(|_| (0..n_values).map(|j| j as u32)) + .collect(); + let facet_idx = IdxCa::new(PlSmallStr::EMPTY, &facet_indices); + let facet_col = unique_values + .take(&facet_idx) + .map_err(|e| { + crate::GgsqlError::InternalError(format!("Failed to create facet column: {}", e)) + })? + .with_name(aes_col.into()); + new_columns.push(facet_col.into()); + + DataFrame::new(new_columns).map_err(|e| { + crate::GgsqlError::InternalError(format!("Failed to create expanded DataFrame: {}", e)) + }) +} + +/// Handle layers missing the facet column based on facet.missing setting. +/// +/// - `repeat` (default): Cross-join layer data with all unique facet values, +/// effectively duplicating the layer's data across all facet panels. +/// - `null`: Do nothing (current behavior - nulls added during unification, +/// layer appears only in null panel if null is in scale's input range). +fn handle_missing_facet_columns( + spec: &Plot, + data_map: &mut HashMap, + layers_missing_facet: &[bool], +) -> Result<()> { + use crate::plot::ParameterValue; + + let facet = match &spec.facet { + Some(f) => f, + None => return Ok(()), + }; + + // Get the missing setting (default to "repeat") + let missing_setting = facet + .properties + .get("missing") + .and_then(|v| { + if let ParameterValue::String(s) = v { + Some(s.as_str()) + } else { + None + } + }) + .unwrap_or("repeat"); + + // If null, do nothing (existing behavior handles this) + if missing_setting == "null" { + return Ok(()); + } + + // Get facet aesthetics from layout + let facet_aesthetics = facet.layout.get_aesthetics(); + + // Process each facet aesthetic + for facet_aesthetic in facet_aesthetics { + // Get unique values from layers that HAVE the column + let unique_values = match get_unique_facet_values( + data_map, + facet_aesthetic, + &spec.layers, + layers_missing_facet, + ) { + Some(v) => v, + None => continue, // No layers have this column, skip + }; + + // For each layer MISSING the column, cross-join with facet values + for (idx, layer) in spec.layers.iter().enumerate() { + if idx >= layers_missing_facet.len() || !layers_missing_facet[idx] { + continue; + } + + if let Some(ref data_key) = layer.data_key { + if let Some(df) = data_map.get(data_key) { + // Only process if this DataFrame doesn't already have the column + let aes_col = naming::aesthetic_column(facet_aesthetic); + if df.column(&aes_col).is_err() { + let expanded_df = + cross_join_with_facet_values(df, &unique_values, facet_aesthetic)?; + data_map.insert(data_key.clone(), expanded_df); + } + } + } + } + } + + Ok(()) +} + +// ============================================================================= +// Facet Resolution from Layer Mappings +// ============================================================================= + +/// Resolve facet configuration from layer mappings and FACET clause. +/// +/// Logic: +/// 1. Collect all facet aesthetic mappings from layers (after global merge) +/// 2. Validate no conflicting layout types (cannot mix 'panel' with 'row'/'column') +/// 3. Validate Grid layout has both 'row' and 'column' if either is used +/// 4. If FACET clause exists: +/// - Validate layer mappings are compatible with layout type +/// - Layer mappings take precedence (override FACET clause columns) +/// 5. If no FACET clause: infer layout from layer mappings +/// +/// Returns: +/// - `Ok(Some(Facet))` - Resolved facet configuration +/// - `Ok(None)` - No faceting needed +/// - `Err(...)` - Validation error +fn resolve_facet( + layers: &[crate::plot::Layer], + existing_facet: Option, +) -> Result> { + use crate::plot::facet::FacetLayout; + use crate::plot::scale::is_facet_aesthetic; + + // Collect facet aesthetic mappings from all layers + let mut has_facet = false; + let mut has_row = false; + let mut has_column = false; + + for layer in layers { + for aesthetic in layer.mappings.aesthetics.keys() { + if is_facet_aesthetic(aesthetic) { + match aesthetic.as_str() { + "panel" => has_facet = true, + "row" => has_row = true, + "column" => has_column = true, + _ => {} + } + } + } + } + + // Validate: cannot mix Wrap (panel) with Grid (row/column) + if has_facet && (has_row || has_column) { + return Err(GgsqlError::ValidationError( + "Cannot mix 'panel' aesthetic (Wrap layout) with 'row'/'column' aesthetics (Grid layout). \ + Use either 'panel' for Wrap or 'row'/'column' for Grid.".to_string() + )); + } + + // Validate: Grid requires both row and column + if (has_row || has_column) && !(has_row && has_column) { + let missing = if has_row { "column" } else { "row" }; + return Err(GgsqlError::ValidationError(format!( + "Grid facet layout requires both 'row' and 'column' aesthetics. Missing: '{}'", + missing + ))); + } + + // Determine inferred layout from layer mappings + let inferred_layout = if has_facet { + Some(FacetLayout::Wrap { + variables: vec![], // Empty - each layer has its own mapping + }) + } else if has_row && has_column { + Some(FacetLayout::Grid { + row: vec![], // Empty - each layer has its own mapping + column: vec![], // Empty - each layer has its own mapping + }) + } else { + None + }; + + // If no layer mappings and no FACET clause, no faceting + if inferred_layout.is_none() && existing_facet.is_none() { + return Ok(None); + } + + // If FACET clause exists, validate compatibility with layer mappings + if let Some(ref facet) = existing_facet { + let is_wrap = facet.is_wrap(); + + if is_wrap && (has_row || has_column) { + return Err(GgsqlError::ValidationError( + "FACET clause uses Wrap layout, but layer mappings use 'row'/'column' (Grid layout). \ + Remove FACET clause to infer Grid layout, or use 'panel' aesthetic instead.".to_string() + )); + } + + if !is_wrap && has_facet { + return Err(GgsqlError::ValidationError( + "FACET clause uses Grid layout, but layer mappings use 'panel' aesthetic (Wrap layout). \ + Remove FACET clause to infer Wrap layout, or use 'row'/'column' aesthetics instead.".to_string() + )); + } + + // FACET clause exists and is compatible - use it (layer mappings will override columns) + return Ok(Some(facet.clone())); + } + + // No FACET clause - infer from layer mappings + if let Some(layout) = inferred_layout { + return Ok(Some(crate::plot::Facet::new(layout))); + } + + Ok(None) +} + // ============================================================================= // Discrete Column Handling // ============================================================================= @@ -381,10 +761,13 @@ fn collect_layer_required_columns(layer: &Layer, spec: &Plot) -> HashSet let mut required = HashSet::new(); - // Facet variables (shared across all layers) + // Facet aesthetic columns (shared across all layers) + // Only the aesthetic-prefixed columns are needed for Vega-Lite output. + // The original variable names (e.g., "species") are not needed after + // the aesthetic columns (e.g., "__ggsql_aes_panel__") have been created. if let Some(ref facet) = spec.facet { - for var in facet.get_variables() { - required.insert(var); + for aesthetic in facet.layout.get_aesthetics() { + required.insert(naming::aesthetic_column(aesthetic)); } } @@ -590,6 +973,24 @@ pub fn prepare_data_with_reader(query: &str, reader: &R) -> Result(query: &str, reader: &R) -> Result(query: &str, reader: &R) -> Result(query: &str, reader: &R) -> Result = facet + .layout + .get_aesthetics() + .iter() + .map(|aes| naming::aesthetic_column(aes)) + .collect(); + let context = FacetDataContext::from_dataframe(facet_df, &aesthetic_cols); + resolve_facet_properties(facet, &context) + .map_err(|e| GgsqlError::ValidationError(format!("Facet: {}", e)))?; + } + } + // Apply post-stat binning for Binned scales on remapped aesthetics // This handles cases like SCALE BINNED fill when fill is remapped from count for spec in &specs { @@ -769,6 +1187,12 @@ pub fn prepare_data_with_reader(query: &str, reader: &R) -> Result Layer { + let mut layer = Layer::new(Geom::point()); + layer.mappings.aesthetics.insert( + aesthetic.to_string(), + AestheticValue::standard_column(column), + ); + layer + } + + #[test] + fn test_resolve_facet_infers_wrap_from_layer_mapping() { + let layers = vec![make_layer_with_mapping("panel", "region")]; + + let result = resolve_facet(&layers, None).unwrap(); + + assert!(result.is_some()); + let facet = result.unwrap(); + assert!(facet.is_wrap()); + // Variables should be empty (each layer has its own mapping) + assert!(facet.get_variables().is_empty()); + } + + #[test] + fn test_resolve_facet_infers_grid_from_layer_mappings() { + let mut layer = Layer::new(Geom::point()); + layer + .mappings + .aesthetics + .insert("row".to_string(), AestheticValue::standard_column("region")); + layer.mappings.aesthetics.insert( + "column".to_string(), + AestheticValue::standard_column("year"), + ); + let layers = vec![layer]; + + let result = resolve_facet(&layers, None).unwrap(); + + assert!(result.is_some()); + let facet = result.unwrap(); + assert!(facet.is_grid()); + // Variables should be empty + assert!(facet.get_variables().is_empty()); + } + + #[test] + fn test_resolve_facet_error_mixed_wrap_and_grid() { + let mut layer = Layer::new(Geom::point()); + layer.mappings.aesthetics.insert( + "panel".to_string(), + AestheticValue::standard_column("region"), + ); + layer + .mappings + .aesthetics + .insert("row".to_string(), AestheticValue::standard_column("year")); + let layers = vec![layer]; + + let result = resolve_facet(&layers, None); + + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("Cannot mix")); + assert!(err.contains("panel")); + assert!(err.contains("row")); + } + + #[test] + fn test_resolve_facet_error_incomplete_grid() { + // Only row, missing column + let layers = vec![make_layer_with_mapping("row", "region")]; + + let result = resolve_facet(&layers, None); + + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("requires both")); + assert!(err.contains("column")); + } + + #[test] + fn test_resolve_facet_uses_existing_facet_clause() { + let layers = vec![Layer::new(Geom::point())]; // No facet mappings + + let existing_facet = Facet::new(FacetLayout::Wrap { + variables: vec!["region".to_string()], + }); + + let result = resolve_facet(&layers, Some(existing_facet.clone())).unwrap(); + + assert!(result.is_some()); + let facet = result.unwrap(); + assert!(facet.is_wrap()); + assert_eq!(facet.get_variables(), vec!["region".to_string()]); + } + + #[test] + fn test_resolve_facet_error_wrap_clause_with_grid_mapping() { + let mut layer = Layer::new(Geom::point()); + layer.mappings.aesthetics.insert( + "row".to_string(), + AestheticValue::standard_column("category"), + ); + layer.mappings.aesthetics.insert( + "column".to_string(), + AestheticValue::standard_column("year"), + ); + let layers = vec![layer]; + + let existing_facet = Facet::new(FacetLayout::Wrap { + variables: vec!["region".to_string()], + }); + + let result = resolve_facet(&layers, Some(existing_facet)); + + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("Wrap layout")); + assert!(err.contains("row")); + } + + #[test] + fn test_resolve_facet_error_grid_clause_with_wrap_mapping() { + let layers = vec![make_layer_with_mapping("panel", "region")]; + + let existing_facet = Facet::new(FacetLayout::Grid { + row: vec!["region".to_string()], + column: vec!["year".to_string()], + }); + + let result = resolve_facet(&layers, Some(existing_facet)); + + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!(err.contains("Grid layout")); + assert!(err.contains("panel")); + } + + #[test] + fn test_resolve_facet_no_mappings_no_clause() { + let layers = vec![Layer::new(Geom::point())]; + + let result = resolve_facet(&layers, None).unwrap(); + + assert!(result.is_none()); + } + + #[test] + fn test_resolve_facet_layer_override_compatible_with_clause() { + // Layer has panel mapping, FACET clause is Wrap - compatible + let layers = vec![make_layer_with_mapping("panel", "category")]; + + let existing_facet = Facet::new(FacetLayout::Wrap { + variables: vec!["region".to_string()], + }); + + // Should succeed - layer mapping takes precedence over FACET clause columns + let result = resolve_facet(&layers, Some(existing_facet)).unwrap(); + assert!(result.is_some()); + assert!(result.unwrap().is_wrap()); + } + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_facet_aesthetic_mapping_wrap() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + reader + .connection() + .execute( + "CREATE TABLE facet_test AS SELECT * FROM (VALUES + (1, 10, 'A'), (2, 20, 'A'), (3, 30, 'B'), (4, 40, 'B') + ) AS t(x, y, region)", + duckdb::params![], + ) + .unwrap(); + + // Use panel aesthetic in layer mapping (not FACET clause) + let query = r#" + SELECT * FROM facet_test + VISUALISE + DRAW point MAPPING x AS x, y AS y, region AS panel + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + // Should have a facet configuration inferred from layer mapping + assert!(result.specs[0].facet.is_some()); + let facet = result.specs[0].facet.as_ref().unwrap(); + assert!(facet.is_wrap()); + + // Data should have panel aesthetic column + let layer_df = result.data.get(&naming::layer_key(0)).unwrap(); + let facet_col = naming::aesthetic_column("panel"); + assert!( + layer_df.column(&facet_col).is_ok(), + "Should have '{}' column: {:?}", + facet_col, + layer_df.get_column_names_str() + ); + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_facet_aesthetic_mapping_grid() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + reader + .connection() + .execute( + "CREATE TABLE grid_facet_test AS SELECT * FROM (VALUES + (1, 10, 'A', 2020), (2, 20, 'B', 2020), + (3, 30, 'A', 2021), (4, 40, 'B', 2021) + ) AS t(x, y, region, year)", + duckdb::params![], + ) + .unwrap(); + + // Use row/column aesthetics in layer mapping + let query = r#" + SELECT * FROM grid_facet_test + VISUALISE + DRAW point MAPPING x AS x, y AS y, region AS row, year AS column + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + // Should have a grid facet configuration + assert!(result.specs[0].facet.is_some()); + let facet = result.specs[0].facet.as_ref().unwrap(); + assert!(facet.is_grid()); + + // Data should have row and column aesthetic columns + let layer_df = result.data.get(&naming::layer_key(0)).unwrap(); + let row_col = naming::aesthetic_column("row"); + let col_col = naming::aesthetic_column("column"); + assert!( + layer_df.column(&row_col).is_ok(), + "Should have '{}' column", + row_col + ); + assert!( + layer_df.column(&col_col).is_ok(), + "Should have '{}' column", + col_col + ); + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_facet_global_mapping() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + reader + .connection() + .execute( + "CREATE TABLE global_facet_test AS SELECT * FROM (VALUES + (1, 10, 'A'), (2, 20, 'B') + ) AS t(x, y, region)", + duckdb::params![], + ) + .unwrap(); + + // Use panel aesthetic in global VISUALISE mapping + let query = r#" + SELECT * FROM global_facet_test + VISUALISE region AS panel + DRAW point MAPPING x AS x, y AS y + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + // Should have a facet configuration + assert!(result.specs[0].facet.is_some()); + assert!(result.specs[0].facet.as_ref().unwrap().is_wrap()); + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_facet_layer_override_of_facet_clause() { + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + reader + .connection() + .execute( + "CREATE TABLE override_test AS SELECT * FROM (VALUES + (1, 10, 'A', 'X'), (2, 20, 'B', 'Y') + ) AS t(x, y, region, category)", + duckdb::params![], + ) + .unwrap(); + + // FACET clause specifies region, but layer mapping uses category + let query = r#" + SELECT * FROM override_test + VISUALISE + FACET region + DRAW point MAPPING x AS x, y AS y, category AS panel + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + // Should succeed - layer mapping overrides FACET clause + let layer = &result.specs[0].layers[0]; + let facet_mapping = layer.mappings.aesthetics.get("panel").unwrap(); + // Use label_name() which returns original column name before internal renaming + assert_eq!( + facet_mapping.label_name(), + Some("category"), + "Layer should override FACET clause with category column" + ); + } + + // ========================================================================= + // Facet Missing Column Tests + // ========================================================================= + + #[cfg(feature = "duckdb")] + #[test] + fn test_facet_missing_repeat_broadcasts_layer() { + // Test that missing => 'repeat' (default) broadcasts a layer without the facet column + // across all facet panels + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Create main data with facet column + reader + .connection() + .execute( + "CREATE TABLE main_data AS SELECT * FROM (VALUES + (1, 10, 'A'), (2, 20, 'A'), (3, 30, 'B'), (4, 40, 'B') + ) AS t(x, y, region)", + duckdb::params![], + ) + .unwrap(); + + // Create reference line data WITHOUT the facet column + reader + .connection() + .execute( + "CREATE TABLE ref_data AS SELECT * FROM (VALUES + (0, 25) + ) AS t(x, y)", + duckdb::params![], + ) + .unwrap(); + + // Query with two layers: main data has facet, ref line doesn't + // Default missing => 'repeat' should broadcast ref line to both panels + let query = r#" + SELECT * FROM main_data + VISUALISE + FACET region + DRAW point MAPPING x AS x, y AS y + DRAW point MAPPING x AS x, y AS y FROM ref_data + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + // Layer 1 (ref_data point) should have its data expanded to include both facet values + let ref_key = result.specs[0].layers[1] + .data_key + .as_ref() + .expect("ref layer should have data_key"); + let ref_df = result.data.get(ref_key).unwrap(); + + // With repeat, the ref_data should have 2 rows (one per facet value: A and B) + assert_eq!( + ref_df.height(), + 2, + "ref layer should be repeated for each facet panel (A and B)" + ); + + // The panel column should exist in the ref_data + let facet_col = naming::aesthetic_column("panel"); + assert!( + ref_df.column(&facet_col).is_ok(), + "ref data should have panel column after broadcast" + ); + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_facet_missing_null_no_broadcast() { + // Test that missing => 'null' does NOT broadcast layers + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Create main data with facet column + reader + .connection() + .execute( + "CREATE TABLE main_data_null AS SELECT * FROM (VALUES + (1, 10, 'A'), (2, 20, 'A'), (3, 30, 'B'), (4, 40, 'B') + ) AS t(x, y, region)", + duckdb::params![], + ) + .unwrap(); + + // Create reference line data WITHOUT the facet column + reader + .connection() + .execute( + "CREATE TABLE ref_data_null AS SELECT * FROM (VALUES + (0, 25) + ) AS t(x, y)", + duckdb::params![], + ) + .unwrap(); + + // Query with missing => 'null' + let query = r#" + SELECT * FROM main_data_null + VISUALISE + FACET region SETTING missing => 'null' + DRAW point MAPPING x AS x, y AS y + DRAW point MAPPING x AS x, y AS y FROM ref_data_null + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + // Layer 1 should NOT have its data expanded + let ref_key = result.specs[0].layers[1] + .data_key + .as_ref() + .expect("ref layer should have data_key"); + let ref_df = result.data.get(ref_key).unwrap(); + + // With null, the ref data should have 1 row (not repeated) + assert_eq!( + ref_df.height(), + 1, + "ref layer should NOT be repeated with missing => 'null'" + ); + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_facet_missing_repeat_grid_layout() { + // Test repeat behavior with grid facets (row + column) + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Create main data with row and column facet variables + reader + .connection() + .execute( + "CREATE TABLE grid_main AS SELECT * FROM (VALUES + (1, 10, 'A', 2020), (2, 20, 'A', 2021), + (3, 30, 'B', 2020), (4, 40, 'B', 2021) + ) AS t(x, y, region, year)", + duckdb::params![], + ) + .unwrap(); + + // Create reference data WITHOUT facet columns + reader + .connection() + .execute( + "CREATE TABLE grid_ref AS SELECT * FROM (VALUES + (0, 25) + ) AS t(x, y)", + duckdb::params![], + ) + .unwrap(); + + // Grid facet with default repeat + let query = r#" + SELECT * FROM grid_main + VISUALISE + FACET region BY year + DRAW point MAPPING x AS x, y AS y + DRAW point MAPPING x AS x, y AS y FROM grid_ref + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + // Layer 1 should be expanded for both row and column + let ref_key = result.specs[0].layers[1] + .data_key + .as_ref() + .expect("ref layer should have data_key"); + let ref_df = result.data.get(ref_key).unwrap(); + + // With grid (2 regions x 2 years = 4 panels), the ref should have 4 rows + assert_eq!( + ref_df.height(), + 4, + "ref layer should be repeated for each grid panel (2 regions x 2 years)" + ); + } + + #[cfg(feature = "duckdb")] + #[test] + fn test_facet_missing_layer_with_facet_column_unchanged() { + // Ensure layers that DO have the facet column are not affected + let reader = DuckDBReader::from_connection_string("duckdb://memory").unwrap(); + + // Create data where both layers have the facet column + reader + .connection() + .execute( + "CREATE TABLE both_have_facet AS SELECT * FROM (VALUES + (1, 10, 'A'), (2, 20, 'B') + ) AS t(x, y, region)", + duckdb::params![], + ) + .unwrap(); + + let query = r#" + SELECT * FROM both_have_facet + VISUALISE + FACET region + DRAW point MAPPING x AS x, y AS y + DRAW line MAPPING x AS x, y AS y + "#; + + let result = prepare_data_with_reader(query, &reader).unwrap(); + + // Both layers should have 2 rows (original data, not expanded) + let point_key = result.specs[0].layers[0].data_key.as_ref().unwrap(); + let line_key = result.specs[0].layers[1].data_key.as_ref().unwrap(); + + let point_df = result.data.get(point_key).unwrap(); + let line_df = result.data.get(line_key).unwrap(); + + assert_eq!( + point_df.height(), + 2, + "point layer with facet column should not be expanded" + ); + assert_eq!( + line_df.height(), + 2, + "line layer with facet column should not be expanded" + ); + } } diff --git a/src/execute/scale.rs b/src/execute/scale.rs index 630f622b..3f034d85 100644 --- a/src/execute/scale.rs +++ b/src/execute/scale.rs @@ -9,7 +9,7 @@ use crate::plot::aesthetic::primary_aesthetic; use crate::plot::layer::geom::get_aesthetic_family; use crate::plot::scale::{ default_oob, gets_default_scale, infer_scale_target_type, infer_transform_from_input_range, - transform::Transform, OOB_CENSOR, OOB_KEEP, OOB_SQUISH, + is_facet_aesthetic, transform::Transform, OOB_CENSOR, OOB_KEEP, OOB_SQUISH, }; use crate::plot::{ AestheticValue, ArrayElement, ArrayElementType, ColumnInfo, Layer, ParameterValue, Plot, Scale, @@ -258,6 +258,17 @@ pub fn resolve_scale_types_and_transforms( for scale in &mut spec.scales { // Skip scales that already have explicit types (user specified) if let Some(scale_type) = &scale.scale_type { + // Validate facet aesthetics cannot use Continuous scales + if is_facet_aesthetic(&scale.aesthetic) + && scale_type.scale_type_kind() == ScaleTypeKind::Continuous + { + return Err(GgsqlError::ValidationError(format!( + "SCALE {}: facet variables require Discrete or Binned scales, got Continuous. \ + Use SCALE BINNED {} to bin continuous data.", + scale.aesthetic, scale.aesthetic + ))); + } + // Collect all dtypes for validation and transform inference let all_dtypes = collect_dtypes_for_aesthetic(&spec.layers, &scale.aesthetic, layer_type_info); @@ -343,14 +354,16 @@ pub fn resolve_scale_types_and_transforms( | TransformKind::Integer => ScaleType::continuous(), // Discrete transforms (String, Bool) use Discrete scale TransformKind::String | TransformKind::Bool => ScaleType::discrete(), - // Identity: fall back to dtype inference - TransformKind::Identity => ScaleType::infer(&common_dtype), + // Identity: fall back to dtype inference (considers aesthetic) + TransformKind::Identity => { + ScaleType::infer_for_aesthetic(&common_dtype, &scale.aesthetic) + } } } else { - ScaleType::infer(&common_dtype) + ScaleType::infer_for_aesthetic(&common_dtype, &scale.aesthetic) } } else { - ScaleType::infer(&common_dtype) + ScaleType::infer_for_aesthetic(&common_dtype, &scale.aesthetic) }; scale.scale_type = Some(inferred_scale_type.clone()); @@ -895,7 +908,10 @@ pub fn resolve_scales(spec: &mut Plot, data_map: &mut HashMap // Infer scale_type if not already set if spec.scales[idx].scale_type.is_none() { - spec.scales[idx].scale_type = Some(ScaleType::infer(column_refs[0].dtype())); + spec.scales[idx].scale_type = Some(ScaleType::infer_for_aesthetic( + column_refs[0].dtype(), + &aesthetic, + )); } // Clone scale_type (cheap Arc clone) to avoid borrow conflict with mutations @@ -1159,11 +1175,28 @@ pub fn apply_oob_to_column_numeric( .map(|opt| opt.map(|v| v.clamp(range_min, range_max))) .collect(); + // Restore temporal type if original column was temporal + // This ensures Date/DateTime/Time values serialize to ISO strings in JSON + let original_dtype = series.dtype().clone(); + let clamped_series = clamped.into_series(); + + let restored_series = match &original_dtype { + DataType::Date | DataType::Datetime(_, _) | DataType::Time => { + clamped_series.cast(&original_dtype).map_err(|e| { + GgsqlError::InternalError(format!( + "Failed to restore temporal type for '{}': {}", + col_name, e + )) + })? + } + _ => clamped_series, + }; + // Replace column with clamped values, maintaining original name - let clamped_series = clamped.into_series().with_name(col_name.into()); + let named_series = restored_series.with_name(col_name.into()); df.clone() - .with_column(clamped_series) + .with_column(named_series) .map(|df| df.clone()) .map_err(|e| GgsqlError::InternalError(format!("Failed to replace column: {}", e))) } diff --git a/src/lib.rs b/src/lib.rs index 73f24503..b81e6c74 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -52,7 +52,8 @@ pub mod validate; // Re-export key types for convenience pub use plot::{ - AestheticValue, DataSource, Facet, Geom, Layer, Mappings, Plot, Scale, SqlExpression, + AestheticValue, DataSource, Facet, FacetLayout, Geom, Layer, Mappings, Plot, Scale, + SqlExpression, }; // Re-export aesthetic classification utilities @@ -693,17 +694,21 @@ mod integration_tests { stroke_col, col_names ); - // Facet columns should be included + // Facet aesthetic columns should be included (row and column for grid facet) + let row_col = naming::aesthetic_column("row"); + let column_col = naming::aesthetic_column("column"); assert!( - col_names.iter().any(|c| c.as_str() == "region"), - "Layer {} should have 'region' facet column: {:?}", + col_names.iter().any(|c| c.as_str() == row_col), + "Layer {} should have '{}' facet column: {:?}", layer_idx, + row_col, col_names ); assert!( - col_names.iter().any(|c| c.as_str() == "category"), - "Layer {} should have 'category' facet column: {:?}", + col_names.iter().any(|c| c.as_str() == column_col), + "Layer {} should have '{}' facet column: {:?}", layer_idx, + column_col, col_names ); } diff --git a/src/parser/builder.rs b/src/parser/builder.rs index e6ff74e9..0cc4ec68 100644 --- a/src/parser/builder.rs +++ b/src/parser/builder.rs @@ -5,7 +5,7 @@ use crate::plot::aesthetic::is_aesthetic_name; use crate::plot::layer::geom::Geom; -use crate::plot::scale::{color_to_hex, is_color_aesthetic, Transform}; +use crate::plot::scale::{color_to_hex, is_color_aesthetic, is_facet_aesthetic, Transform}; use crate::plot::*; use crate::{GgsqlError, Result}; use std::collections::HashMap; @@ -673,6 +673,14 @@ fn build_scale(node: &Node, source: &SourceTree) -> Result { } } + // Validate facet aesthetics cannot have output ranges (TO clause) + if is_facet_aesthetic(&aesthetic) && output_range.is_some() { + return Err(GgsqlError::ValidationError(format!( + "SCALE {}: facet variables cannot have output ranges (TO clause)", + aesthetic + ))); + } + Ok(Scale { aesthetic, scale_type, @@ -778,6 +786,7 @@ fn parse_scale_renaming_clause( "*" => "*".to_string(), "string" => parse_string_node(&name_node, source), "number" => source.get_text(&name_node), + "null_literal" => "null".to_string(), // null key for renaming null values _ => { return Err(GgsqlError::ParseError(format!( "Invalid 'from' type in scale renaming: {}", @@ -817,54 +826,58 @@ fn parse_scale_renaming_clause( // ============================================================================ /// Build a Facet from a facet_clause node +/// +/// FACET vars [BY vars] [SETTING ...] +/// - Single variable = wrap layout (no WRAP keyword needed) +/// - BY clause = grid layout fn build_facet(node: &Node, source: &SourceTree) -> Result { - let mut is_wrap = false; let mut row_vars = Vec::new(); - let mut col_vars = Vec::new(); - let mut scales = FacetScales::Fixed; + let mut column_vars = Vec::new(); + let mut properties = HashMap::new(); let mut cursor = node.walk(); let mut next_vars_are_cols = false; for child in node.children(&mut cursor) { match child.kind() { - "FACET" | "SETTING" | "=>" => continue, - "facet_wrap" => { - is_wrap = true; - } + "FACET" => continue, "facet_by" => { next_vars_are_cols = true; } "facet_vars" => { // Parse list of variable names let vars = parse_facet_vars(&child, source)?; - if is_wrap { - row_vars = vars; - } else if next_vars_are_cols { - col_vars = vars; + if next_vars_are_cols { + column_vars = vars; } else { row_vars = vars; } } - "facet_scales" => { - scales = parse_facet_scales(&child, source)?; + "setting_clause" => { + // Reuse existing setting_clause parser + properties = parse_setting_clause(&child, source)?; } _ => {} } } - if is_wrap { - Ok(Facet::Wrap { + // Determine layout variant: if column_vars is empty, it's a wrap layout + let layout = if column_vars.is_empty() { + FacetLayout::Wrap { variables: row_vars, - scales, - }) + } } else { - Ok(Facet::Grid { - rows: row_vars, - cols: col_vars, - scales, - }) - } + FacetLayout::Grid { + row: row_vars, + column: column_vars, + } + }; + + Ok(Facet { + layout, + properties, + resolved: false, + }) } /// Parse facet variables from a facet_vars node @@ -873,21 +886,6 @@ fn parse_facet_vars(node: &Node, source: &SourceTree) -> Result> { Ok(source.find_texts(node, query)) } -/// Parse facet scales from a facet_scales node -fn parse_facet_scales(node: &Node, source: &SourceTree) -> Result { - let text = source.get_text(node); - match text.as_str() { - "fixed" => Ok(FacetScales::Fixed), - "free" => Ok(FacetScales::Free), - "free_x" => Ok(FacetScales::FreeX), - "free_y" => Ok(FacetScales::FreeY), - _ => Err(GgsqlError::ParseError(format!( - "Unknown facet scales: {}", - text - ))), - } -} - // ============================================================================ // Coord Building // ============================================================================ diff --git a/src/plot/aesthetic.rs b/src/plot/aesthetic.rs index e4f3471a..c990db7a 100644 --- a/src/plot/aesthetic.rs +++ b/src/plot/aesthetic.rs @@ -36,6 +36,14 @@ pub const AESTHETIC_FAMILIES: &[(&str, &str)] = &[ ("yend", "y"), ]; +/// Facet aesthetics (for creating small multiples) +/// +/// These aesthetics control faceting layout: +/// - `panel`: Single variable faceting (wrap layout) +/// - `row`: Row variable for grid faceting +/// - `column`: Column variable for grid faceting +pub const FACET_AESTHETICS: &[&str] = &["panel", "row", "column"]; + /// Non-positional aesthetics (visual properties shown in legends or applied to marks) /// /// These include: @@ -68,6 +76,15 @@ pub fn is_primary_positional(aesthetic: &str) -> bool { PRIMARY_POSITIONAL.contains(&aesthetic) } +/// Check if aesthetic is a facet aesthetic (panel, row, column) +/// +/// Facet aesthetics control the creation of small multiples (faceted plots). +/// They only support Discrete and Binned scale types, and cannot have output ranges (TO clause). +#[inline] +pub fn is_facet_aesthetic(aesthetic: &str) -> bool { + FACET_AESTHETICS.contains(&aesthetic) +} + /// Check if aesthetic is positional (maps to axis, not legend) /// /// Positional aesthetics include x, y, and their variants (xmin, xmax, ymin, ymax, xend, yend). @@ -140,6 +157,15 @@ mod tests { assert!(!is_primary_positional("color")); } + #[test] + fn test_facet_aesthetic() { + assert!(is_facet_aesthetic("panel")); + assert!(is_facet_aesthetic("row")); + assert!(is_facet_aesthetic("column")); + assert!(!is_facet_aesthetic("x")); + assert!(!is_facet_aesthetic("color")); + } + #[test] fn test_positional_aesthetic() { // Primary diff --git a/src/plot/facet/mod.rs b/src/plot/facet/mod.rs index 73573a15..4946f834 100644 --- a/src/plot/facet/mod.rs +++ b/src/plot/facet/mod.rs @@ -2,6 +2,8 @@ //! //! This module defines faceting configuration for small multiples. +mod resolve; mod types; -pub use types::{Facet, FacetScales}; +pub use resolve::{resolve_properties, FacetDataContext}; +pub use types::{Facet, FacetLayout}; diff --git a/src/plot/facet/resolve.rs b/src/plot/facet/resolve.rs new file mode 100644 index 00000000..0c93c7cd --- /dev/null +++ b/src/plot/facet/resolve.rs @@ -0,0 +1,688 @@ +//! Facet property resolution +//! +//! Validates facet properties and applies data-aware defaults. + +use crate::plot::ParameterValue; +use crate::DataFrame; +use std::collections::HashMap; + +use super::types::Facet; + +/// Context for facet resolution with data-derived information +pub struct FacetDataContext { + /// Number of unique values in the first facet variable + pub num_levels: usize, + /// Unique values for each facet variable (as strings for label formatting) + pub unique_values: HashMap>, +} + +impl FacetDataContext { + /// Create context from a DataFrame and facet variables + /// + /// Extracts unique values from each facet variable for label resolution. + pub fn from_dataframe(df: &DataFrame, variables: &[String]) -> Self { + let mut unique_values = HashMap::new(); + let mut num_levels = 1; + + for (i, var) in variables.iter().enumerate() { + if let Ok(col) = df.column(var) { + let unique = col.unique().ok(); + let values: Vec = unique + .as_ref() + .map(|u| { + (0..u.len()) + .filter_map(|j| u.get(j).ok().map(|v| format!("{}", v))) + .collect() + }) + .unwrap_or_default(); + + if i == 0 { + num_levels = values.len().max(1); + } + unique_values.insert(var.clone(), values); + } + } + + Self { + num_levels, + unique_values, + } + } +} + +/// Allowed properties for wrap facets +const WRAP_ALLOWED: &[&str] = &["free", "ncol", "missing"]; + +/// Allowed properties for grid facets +const GRID_ALLOWED: &[&str] = &["free", "missing"]; + +/// Valid values for the missing property +const MISSING_VALUES: &[&str] = &["repeat", "null"]; + +/// Valid string values for the free property +const FREE_STRING_VALUES: &[&str] = &["x", "y"]; + +/// Compute smart default ncol for wrap facets based on number of levels +/// +/// Returns an optimal column count that creates a balanced grid: +/// - n ≤ 3: ncol = n (single row) +/// - n ≤ 6: ncol = 3 +/// - n ≤ 12: ncol = 4 +/// - n > 12: ncol = 5 +fn compute_default_ncol(num_levels: usize) -> i64 { + if num_levels <= 3 { + num_levels as i64 + } else if num_levels <= 6 { + 3 + } else if num_levels <= 12 { + 4 + } else { + 5 + } +} + +/// Resolve and validate facet properties +/// +/// This function: +/// 1. Skips if already resolved +/// 2. Validates all properties are allowed for this layout +/// 3. Validates property values: +/// - `free`: must be null, 'x', 'y', or ['x', 'y'] +/// - `ncol`: positive integer +/// 4. Applies defaults for missing properties: +/// - `ncol` (wrap only): computed from `context.num_levels` +/// 5. Sets `resolved = true` +pub fn resolve_properties(facet: &mut Facet, context: &FacetDataContext) -> Result<(), String> { + // Skip if already resolved + if facet.resolved { + return Ok(()); + } + + let is_wrap = facet.is_wrap(); + + // Step 1: Validate all properties are allowed for this layout + let allowed = if is_wrap { WRAP_ALLOWED } else { GRID_ALLOWED }; + for key in facet.properties.keys() { + if !allowed.contains(&key.as_str()) { + if key == "ncol" && !is_wrap { + return Err( + "Setting `ncol` is only allowed for 1 dimensional facets, not 2 dimensional facets".to_string(), + ); + } + return Err(format!( + "Unknown setting: '{}'. Allowed settings: {}", + key, + allowed.join(", ") + )); + } + } + + // Step 2: Validate property values + validate_free_property(facet)?; + validate_ncol_property(facet)?; + validate_missing_property(facet)?; + + // Step 3: Apply defaults for missing properties + apply_defaults(facet, context); + + // Mark as resolved + facet.resolved = true; + + Ok(()) +} + +/// Validate free property value +/// +/// Accepts: +/// - `null` (ParameterValue::Null) - shared scales (default when absent) +/// - `'x'` or `'y'` (strings) - independent scale for that axis only +/// - `['x', 'y']` or `['y', 'x']` (arrays) - independent scales for both axes +fn validate_free_property(facet: &Facet) -> Result<(), String> { + if let Some(value) = facet.properties.get("free") { + match value { + ParameterValue::Null => { + // Explicit null means shared scales (same as default) + Ok(()) + } + ParameterValue::String(s) => { + if !FREE_STRING_VALUES.contains(&s.as_str()) { + return Err(format!( + "invalid 'free' value '{}'. Expected 'x', 'y', ['x', 'y'], or null", + s + )); + } + Ok(()) + } + ParameterValue::Array(arr) => { + // Must be exactly ['x', 'y'] or ['y', 'x'] + if arr.len() != 2 { + return Err(format!( + "invalid 'free' array: expected ['x', 'y'], got {} elements", + arr.len() + )); + } + let mut has_x = false; + let mut has_y = false; + for elem in arr { + match elem { + crate::plot::ArrayElement::String(s) if s == "x" => has_x = true, + crate::plot::ArrayElement::String(s) if s == "y" => has_y = true, + _ => { + return Err( + "invalid 'free' array: elements must be 'x' or 'y'".to_string() + ); + } + } + } + if !has_x || !has_y { + return Err( + "invalid 'free' array: expected ['x', 'y'] with both 'x' and 'y'" + .to_string(), + ); + } + Ok(()) + } + _ => Err( + "'free' must be null, a string ('x' or 'y'), or an array ['x', 'y']".to_string(), + ), + } + } else { + Ok(()) + } +} + +/// Validate ncol property value +fn validate_ncol_property(facet: &Facet) -> Result<(), String> { + if let Some(value) = facet.properties.get("ncol") { + match value { + ParameterValue::Number(n) => { + if *n <= 0.0 || n.fract() != 0.0 { + return Err(format!("`ncol` must be a positive integer, got {}", n)); + } + } + _ => { + return Err("'ncol' must be a number".to_string()); + } + } + } + Ok(()) +} + +/// Validate missing property value +fn validate_missing_property(facet: &Facet) -> Result<(), String> { + if let Some(value) = facet.properties.get("missing") { + match value { + ParameterValue::String(s) => { + if !MISSING_VALUES.contains(&s.as_str()) { + return Err(format!( + "invalid 'missing' value '{}'. Expected one of: {}", + s, + MISSING_VALUES.join(", ") + )); + } + } + _ => { + return Err("'missing' must be a string ('repeat' or 'null')".to_string()); + } + } + } + Ok(()) +} + +/// Apply default values for missing properties +fn apply_defaults(facet: &mut Facet, context: &FacetDataContext) { + // Note: absence of 'free' property means fixed/shared scales (default) + // No need to insert a default value + + // Default ncol for wrap facets (computed from data) + if facet.is_wrap() && !facet.properties.contains_key("ncol") { + let default_cols = compute_default_ncol(context.num_levels); + facet.properties.insert( + "ncol".to_string(), + ParameterValue::Number(default_cols as f64), + ); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::plot::facet::FacetLayout; + use polars::prelude::*; + + fn make_wrap_facet() -> Facet { + Facet::new(FacetLayout::Wrap { + variables: vec!["category".to_string()], + }) + } + + fn make_grid_facet() -> Facet { + Facet::new(FacetLayout::Grid { + row: vec!["row_var".to_string()], + column: vec!["col_var".to_string()], + }) + } + + fn make_context(num_levels: usize) -> FacetDataContext { + FacetDataContext { + num_levels, + unique_values: HashMap::new(), + } + } + + #[test] + fn test_compute_default_ncol() { + assert_eq!(compute_default_ncol(1), 1); + assert_eq!(compute_default_ncol(2), 2); + assert_eq!(compute_default_ncol(3), 3); + assert_eq!(compute_default_ncol(4), 3); + assert_eq!(compute_default_ncol(6), 3); + assert_eq!(compute_default_ncol(7), 4); + assert_eq!(compute_default_ncol(12), 4); + assert_eq!(compute_default_ncol(13), 5); + assert_eq!(compute_default_ncol(100), 5); + } + + #[test] + fn test_resolve_applies_defaults() { + let mut facet = make_wrap_facet(); + let context = make_context(5); + + resolve_properties(&mut facet, &context).unwrap(); + + assert!(facet.resolved); + // Note: absence of 'free' means fixed scales (no default inserted) + assert!(!facet.properties.contains_key("free")); + assert_eq!( + facet.properties.get("ncol"), + Some(&ParameterValue::Number(3.0)) + ); + } + + #[test] + fn test_resolve_preserves_user_values() { + let mut facet = make_wrap_facet(); + facet.properties.insert( + "free".to_string(), + ParameterValue::Array(vec![ + crate::plot::ArrayElement::String("x".to_string()), + crate::plot::ArrayElement::String("y".to_string()), + ]), + ); + facet + .properties + .insert("ncol".to_string(), ParameterValue::Number(2.0)); + + let context = make_context(10); + resolve_properties(&mut facet, &context).unwrap(); + + // free => ['x', 'y'] preserved + assert!(facet.properties.contains_key("free")); + assert_eq!( + facet.properties.get("ncol"), + Some(&ParameterValue::Number(2.0)) + ); + } + + #[test] + fn test_resolve_skips_if_already_resolved() { + let mut facet = make_wrap_facet(); + facet.resolved = true; + + let context = make_context(5); + resolve_properties(&mut facet, &context).unwrap(); + + // Should not have applied defaults since it was already resolved + assert!(!facet.properties.contains_key("ncol")); + } + + #[test] + fn test_error_columns_is_unknown_property() { + // "columns" is Vega-Lite's name, we use "ncol" + let mut facet = make_wrap_facet(); + facet + .properties + .insert("columns".to_string(), ParameterValue::Number(4.0)); + + let context = make_context(10); + let result = resolve_properties(&mut facet, &context); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("Unknown setting")); + assert!(err.contains("columns")); + } + + #[test] + fn test_error_ncol_on_grid() { + let mut facet = make_grid_facet(); + facet + .properties + .insert("ncol".to_string(), ParameterValue::Number(3.0)); + + let context = make_context(10); + let result = resolve_properties(&mut facet, &context); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("ncol")); + assert!(err.contains("1 dimensional")); + } + + #[test] + fn test_error_unknown_property() { + let mut facet = make_wrap_facet(); + facet + .properties + .insert("unknown".to_string(), ParameterValue::Number(1.0)); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("Unknown setting")); + } + + #[test] + fn test_error_invalid_free_value() { + let mut facet = make_wrap_facet(); + facet.properties.insert( + "free".to_string(), + ParameterValue::String("invalid".to_string()), + ); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("invalid")); + assert!(err.contains("free")); + } + + #[test] + fn test_error_negative_ncol() { + let mut facet = make_wrap_facet(); + facet + .properties + .insert("ncol".to_string(), ParameterValue::Number(-1.0)); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("ncol")); + assert!(err.contains("positive")); + } + + #[test] + fn test_error_non_integer_ncol() { + let mut facet = make_wrap_facet(); + facet + .properties + .insert("ncol".to_string(), ParameterValue::Number(2.5)); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("ncol")); + assert!(err.contains("integer")); + } + + #[test] + fn test_grid_no_ncol_default() { + let mut facet = make_grid_facet(); + let context = make_context(10); + + resolve_properties(&mut facet, &context).unwrap(); + + // Grid facets should not get ncol default + assert!(!facet.properties.contains_key("ncol")); + // No free property by default (means fixed/shared scales) + assert!(!facet.properties.contains_key("free")); + assert!(facet.resolved); + } + + #[test] + fn test_context_from_dataframe() { + let df = df! { + "category" => &["A", "B", "C", "A", "B", "C"], + "value" => &[1, 2, 3, 4, 5, 6], + } + .unwrap(); + + let context = FacetDataContext::from_dataframe(&df, &["category".to_string()]); + assert_eq!(context.num_levels, 3); + } + + #[test] + fn test_context_from_dataframe_missing_column() { + let df = df! { + "other" => &[1, 2, 3], + } + .unwrap(); + + let context = FacetDataContext::from_dataframe(&df, &["missing".to_string()]); + assert_eq!(context.num_levels, 1); // Falls back to 1 + } + + #[test] + fn test_context_from_dataframe_empty_variables() { + let df = df! { + "x" => &[1, 2, 3], + } + .unwrap(); + + let context = FacetDataContext::from_dataframe(&df, &[]); + assert_eq!(context.num_levels, 1); + } + + // ======================================== + // Missing Property Tests + // ======================================== + + #[test] + fn test_missing_property_repeat_valid() { + let mut facet = make_wrap_facet(); + facet.properties.insert( + "missing".to_string(), + ParameterValue::String("repeat".to_string()), + ); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + assert!(result.is_ok()); + } + + #[test] + fn test_missing_property_null_valid() { + let mut facet = make_wrap_facet(); + facet.properties.insert( + "missing".to_string(), + ParameterValue::String("null".to_string()), + ); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + assert!(result.is_ok()); + } + + #[test] + fn test_error_invalid_missing_value() { + let mut facet = make_wrap_facet(); + facet.properties.insert( + "missing".to_string(), + ParameterValue::String("invalid".to_string()), + ); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("invalid")); + assert!(err.contains("missing")); + } + + #[test] + fn test_error_missing_not_string() { + let mut facet = make_wrap_facet(); + facet + .properties + .insert("missing".to_string(), ParameterValue::Number(1.0)); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("missing")); + assert!(err.contains("string")); + } + + #[test] + fn test_missing_allowed_on_grid_facet() { + let mut facet = make_grid_facet(); + facet.properties.insert( + "missing".to_string(), + ParameterValue::String("repeat".to_string()), + ); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + assert!(result.is_ok()); + } + + // ======================================== + // Free Property Tests + // ======================================== + + #[test] + fn test_free_property_x_valid() { + let mut facet = make_wrap_facet(); + facet + .properties + .insert("free".to_string(), ParameterValue::String("x".to_string())); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + assert!(result.is_ok()); + } + + #[test] + fn test_free_property_y_valid() { + let mut facet = make_wrap_facet(); + facet + .properties + .insert("free".to_string(), ParameterValue::String("y".to_string())); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + assert!(result.is_ok()); + } + + #[test] + fn test_free_property_array_valid() { + let mut facet = make_wrap_facet(); + facet.properties.insert( + "free".to_string(), + ParameterValue::Array(vec![ + crate::plot::ArrayElement::String("x".to_string()), + crate::plot::ArrayElement::String("y".to_string()), + ]), + ); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + assert!(result.is_ok()); + } + + #[test] + fn test_free_property_array_reversed_valid() { + let mut facet = make_wrap_facet(); + facet.properties.insert( + "free".to_string(), + ParameterValue::Array(vec![ + crate::plot::ArrayElement::String("y".to_string()), + crate::plot::ArrayElement::String("x".to_string()), + ]), + ); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + assert!(result.is_ok()); + } + + #[test] + fn test_free_property_null_valid() { + let mut facet = make_wrap_facet(); + facet + .properties + .insert("free".to_string(), ParameterValue::Null); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + assert!(result.is_ok()); + } + + #[test] + fn test_error_free_array_single_element() { + let mut facet = make_wrap_facet(); + facet.properties.insert( + "free".to_string(), + ParameterValue::Array(vec![crate::plot::ArrayElement::String("x".to_string())]), + ); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("free")); + // Single element fails both the length check (1 != 2) and the "both x and y" check + assert!( + err.contains("1 elements") || err.contains("both 'x' and 'y'"), + "Expected error about array length or missing elements, got: {}", + err + ); + } + + #[test] + fn test_error_free_array_invalid_element() { + let mut facet = make_wrap_facet(); + facet.properties.insert( + "free".to_string(), + ParameterValue::Array(vec![ + crate::plot::ArrayElement::String("x".to_string()), + crate::plot::ArrayElement::String("z".to_string()), + ]), + ); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("free")); + assert!(err.contains("'x' or 'y'")); + } + + #[test] + fn test_error_free_wrong_type() { + let mut facet = make_wrap_facet(); + facet + .properties + .insert("free".to_string(), ParameterValue::Number(1.0)); + + let context = make_context(5); + let result = resolve_properties(&mut facet, &context); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.contains("free")); + } +} diff --git a/src/plot/facet/types.rs b/src/plot/facet/types.rs index d146d7c7..6c29cfae 100644 --- a/src/plot/facet/types.rs +++ b/src/plot/facet/types.rs @@ -2,47 +2,123 @@ //! //! This module defines faceting configuration for small multiples. +use crate::plot::ParameterValue; use serde::{Deserialize, Serialize}; +use std::collections::HashMap; /// Faceting specification (from FACET clause) #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub enum Facet { - /// FACET WRAP variables - Wrap { - variables: Vec, - scales: FacetScales, - }, - /// FACET rows BY cols - Grid { - rows: Vec, - cols: Vec, - scales: FacetScales, - }, +pub struct Facet { + /// Layout type: wrap or grid + pub layout: FacetLayout, + /// Properties from SETTING clause (e.g., scales, ncol, missing) + /// After resolution, includes validated and defaulted values + #[serde(default)] + pub properties: HashMap, + /// Whether properties have been resolved (validated and defaults applied) + #[serde(skip, default)] + pub resolved: bool, } -/// Scale sharing options for facets +/// Facet variable layout specification #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub enum FacetScales { - Fixed, - Free, - FreeX, - FreeY, +pub enum FacetLayout { + /// FACET variables (wrap layout) + Wrap { variables: Vec }, + /// FACET row BY column (grid layout) + Grid { + row: Vec, + column: Vec, + }, } impl Facet { + /// Create a new Facet with the given layout + /// + /// Properties start empty and unresolved. Call `resolve_properties` after + /// data is available to validate and apply defaults. + pub fn new(layout: FacetLayout) -> Self { + Self { + layout, + properties: HashMap::new(), + resolved: false, + } + } + /// Get all variables used for faceting /// /// Returns all column names that will be used to split the data into facets. /// For Wrap facets, returns the variables list. - /// For Grid facets, returns combined rows and cols variables. + /// For Grid facets, returns combined rows and columns variables. + pub fn get_variables(&self) -> Vec { + self.layout.get_variables() + } + + /// Check if this is a wrap layout facet + pub fn is_wrap(&self) -> bool { + self.layout.is_wrap() + } + + /// Check if this is a grid layout facet + pub fn is_grid(&self) -> bool { + self.layout.is_grid() + } +} + +impl FacetLayout { + /// Get all variables used for faceting + /// + /// Returns all column names that will be used to split the data into facets. + /// For Wrap facets, returns the variables list. + /// For Grid facets, returns combined row and column variables. pub fn get_variables(&self) -> Vec { match self { - Facet::Wrap { variables, .. } => variables.clone(), - Facet::Grid { rows, cols, .. } => { - let mut vars = rows.clone(); - vars.extend(cols.iter().cloned()); + FacetLayout::Wrap { variables } => variables.clone(), + FacetLayout::Grid { row, column } => { + let mut vars = row.clone(); + vars.extend(column.iter().cloned()); vars } } } + + /// Check if this is a wrap layout + pub fn is_wrap(&self) -> bool { + matches!(self, FacetLayout::Wrap { .. }) + } + + /// Check if this is a grid layout + pub fn is_grid(&self) -> bool { + matches!(self, FacetLayout::Grid { .. }) + } + + /// Get variable names mapped to their aesthetic names. + /// + /// Returns tuples of (column_name, aesthetic_name): + /// - Wrap: [("region", "panel")] + /// - Grid: [("region", "row"), ("year", "column")] + pub fn get_aesthetic_mappings(&self) -> Vec<(&str, &'static str)> { + match self { + FacetLayout::Wrap { variables } => { + variables.iter().map(|v| (v.as_str(), "panel")).collect() + } + FacetLayout::Grid { row, column } => { + let mut result: Vec<(&str, &'static str)> = + row.iter().map(|v| (v.as_str(), "row")).collect(); + result.extend(column.iter().map(|v| (v.as_str(), "column"))); + result + } + } + } + + /// Get the aesthetic names used by this layout. + /// + /// - Wrap: ["panel"] + /// - Grid: ["row", "column"] + pub fn get_aesthetics(&self) -> Vec<&'static str> { + match self { + FacetLayout::Wrap { .. } => vec!["panel"], + FacetLayout::Grid { .. } => vec!["row", "column"], + } + } } diff --git a/src/plot/main.rs b/src/plot/main.rs index c51ff708..d56e66bb 100644 --- a/src/plot/main.rs +++ b/src/plot/main.rs @@ -45,7 +45,7 @@ pub use super::scale::{Scale, ScaleType}; pub use super::coord::{Coord, CoordType}; // Re-export Facet types from the facet module -pub use super::facet::{Facet, FacetScales}; +pub use super::facet::{Facet, FacetLayout}; /// Complete ggsql visualization specification #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] diff --git a/src/plot/scale/breaks.rs b/src/plot/scale/breaks.rs index 331b6e80..afc9f497 100644 --- a/src/plot/scale/breaks.rs +++ b/src/plot/scale/breaks.rs @@ -1231,6 +1231,11 @@ impl TemporalInterval { /// Calculate temporal breaks at interval boundaries for Date scale. /// min/max are days since epoch for Date. +/// +/// For binning, breaks must SPAN the data range. This means we need a terminal +/// break AFTER max_date to close the last bin. For example, data from Jan 15 to +/// Mar 15 with monthly breaks needs: [Jan-01, Feb-01, Mar-01, Apr-01] to create +/// bins that cover all data. pub fn temporal_breaks_date( min_days: i32, max_days: i32, @@ -1248,10 +1253,18 @@ pub fn temporal_breaks_date( let mut breaks = vec![]; let mut current = align_date_to_interval(min_date, &interval); + // Generate breaks up to and including max_date while current <= max_date { breaks.push(current.format("%Y-%m-%d").to_string()); current = advance_date_by_interval(current, &interval); } + + // Add terminal break after max_date to close the last bin + // This ensures data at max_date falls within a bin, not outside + if !breaks.is_empty() { + breaks.push(current.format("%Y-%m-%d").to_string()); + } + breaks } @@ -1302,6 +1315,9 @@ fn advance_date_by_interval( /// Calculate temporal breaks at interval boundaries for DateTime scale. /// min/max are microseconds since epoch. +/// +/// For binning, breaks must SPAN the data range. This means we need a terminal +/// break AFTER max_dt to close the last bin. pub fn temporal_breaks_datetime( min_us: i64, max_us: i64, @@ -1327,10 +1343,17 @@ pub fn temporal_breaks_datetime( let mut breaks = vec![]; let mut current = align_datetime_to_interval(min_dt, &interval); + // Generate breaks up to and including max_dt while current <= max_dt { breaks.push(current.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string()); current = advance_datetime_by_interval(current, &interval); } + + // Add terminal break after max_dt to close the last bin + if !breaks.is_empty() { + breaks.push(current.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string()); + } + breaks } @@ -1426,6 +1449,9 @@ fn advance_datetime_by_interval( /// Calculate temporal breaks at interval boundaries for Time scale. /// min/max are nanoseconds since midnight. +/// +/// For binning, breaks must SPAN the data range. This means we need a terminal +/// break AFTER max_time to close the last bin (unless it would overflow past 24 hours). pub fn temporal_breaks_time(min_ns: i64, max_ns: i64, interval: TemporalInterval) -> Vec { use chrono::NaiveTime; @@ -1450,6 +1476,7 @@ pub fn temporal_breaks_time(min_ns: i64, max_ns: i64, interval: TemporalInterval let mut breaks = vec![]; let mut current = align_time_to_interval(min_time, &interval); + // Generate breaks up to and including max_time while current <= max_time { breaks.push(current.format("%H:%M:%S%.3f").to_string()); current = match advance_time_by_interval(current, &interval) { @@ -1457,6 +1484,12 @@ pub fn temporal_breaks_time(min_ns: i64, max_ns: i64, interval: TemporalInterval _ => break, // Overflow past 24 hours }; } + + // Add terminal break after max_time to close the last bin (if no overflow) + if !breaks.is_empty() && current > max_time { + breaks.push(current.format("%H:%M:%S%.3f").to_string()); + } + breaks } @@ -2254,4 +2287,61 @@ mod tests { assert!(breaks[0] <= 0.0); assert!(*breaks.last().unwrap() >= 100.0); } + + // ========================================================================= + // Terminal Break Tests (for binning coverage) + // ========================================================================= + + #[test] + fn test_temporal_breaks_date_includes_terminal_break() { + // Test that temporal_breaks_date includes a terminal break AFTER max_date + // to ensure all data falls within a bin. + // + // Data spanning Jan 15 to Mar 15 with monthly breaks should produce: + // [Jan-01, Feb-01, Mar-01, Apr-01] - the Apr-01 is the terminal break + // that closes the [Mar-01, Apr-01] bin for data in March. + + // 2024-01-15 to 2024-03-15 (days since epoch) + // Jan 15, 2024 = 19738 days since epoch + // Mar 15, 2024 = 19798 days since epoch + let monthly = TemporalInterval::create_from_str("month").unwrap(); + let breaks = temporal_breaks_date(19738, 19798, monthly); + + // Should include terminal break (Apr-01) after max_date (Mar-15) + assert!( + breaks.contains(&"2024-04-01".to_string()), + "Should include terminal break Apr-01 to close the last bin. Got: {:?}", + breaks + ); + + // Verify all expected breaks are present + assert!(breaks.contains(&"2024-01-01".to_string())); + assert!(breaks.contains(&"2024-02-01".to_string())); + assert!(breaks.contains(&"2024-03-01".to_string())); + + // The terminal break ensures data from Mar 2-15 falls within [Mar-01, Apr-01] + assert_eq!(breaks.len(), 4, "Should have 4 breaks for 3 bins"); + } + + #[test] + fn test_temporal_breaks_datetime_includes_terminal_break() { + // Test that temporal_breaks_datetime includes a terminal break + // 2024-01-01T00:00:00 to 2024-01-01T02:30:00 with hourly breaks + + let hourly = TemporalInterval::create_from_str("hour").unwrap(); + // Jan 1, 2024 00:00:00 = 1704067200 seconds = 1704067200000000 microseconds + let min_us = 1704067200_i64 * 1_000_000; + // Jan 1, 2024 02:30:00 = 1704076200 seconds + let max_us = 1704076200_i64 * 1_000_000; + + let breaks = temporal_breaks_datetime(min_us, max_us, hourly); + + // Should include 00:00, 01:00, 02:00, and terminal 03:00 + assert_eq!( + breaks.len(), + 4, + "Should have 4 breaks (including terminal). Got: {:?}", + breaks + ); + } } diff --git a/src/plot/scale/mod.rs b/src/plot/scale/mod.rs index 7c22f369..b381ac00 100644 --- a/src/plot/scale/mod.rs +++ b/src/plot/scale/mod.rs @@ -12,6 +12,7 @@ pub mod transform; mod types; pub use crate::format::apply_label_template; +pub use crate::plot::aesthetic::is_facet_aesthetic; pub use crate::plot::types::{CastTargetType, SqlTypeNames}; pub use colour::{color_to_hex, gradient, interpolate_colors, is_color_aesthetic, ColorSpace}; pub use linetype::linetype_to_stroke_dash; @@ -51,6 +52,8 @@ pub fn gets_default_scale(aesthetic: &str) -> bool { | "size" | "linewidth" // Other visual aesthetics | "opacity" | "shape" | "linetype" + // Facet aesthetics (need Discrete/Binned, not Identity) + | "panel" | "row" | "column" ) } diff --git a/src/plot/scale/scale_type/mod.rs b/src/plot/scale/scale_type/mod.rs index 9a6de218..f46ee31c 100644 --- a/src/plot/scale/scale_type/mod.rs +++ b/src/plot/scale/scale_type/mod.rs @@ -26,7 +26,7 @@ use std::collections::HashMap; use std::sync::Arc; use super::transform::{Transform, TransformKind}; -use crate::plot::aesthetic::is_positional_aesthetic; +use crate::plot::aesthetic::{is_facet_aesthetic, is_positional_aesthetic}; use crate::plot::{ArrayElement, ColumnInfo, ParameterValue}; // Scale type implementations @@ -1107,6 +1107,25 @@ impl ScaleType { } } + /// Infer scale type from data type, considering the aesthetic. + /// + /// For most aesthetics, uses standard inference: + /// - Numeric/temporal → Continuous + /// - String/boolean → Discrete + /// + /// For facet aesthetics (panel, row, column): + /// - Numeric/temporal → Binned (not Continuous, since facets need discrete categories) + /// - String/boolean → Discrete + pub fn infer_for_aesthetic(dtype: &DataType, aesthetic: &str) -> Self { + let stype = Self::infer(dtype); + if is_facet_aesthetic(aesthetic) && stype.scale_type_kind() == ScaleTypeKind::Continuous { + // Facet aesthetics: numeric/temporal defaults to Binned + Self::binned() + } else { + stype + } + } + /// Create a ScaleType from a ScaleTypeKind pub fn from_kind(kind: ScaleTypeKind) -> Self { match kind { diff --git a/src/reader/duckdb.rs b/src/reader/duckdb.rs index 26d74c56..ca53d018 100644 --- a/src/reader/duckdb.rs +++ b/src/reader/duckdb.rs @@ -866,7 +866,7 @@ mod tests { .unwrap(); assert_eq!( result.column("id").unwrap().i32().unwrap().get(0).unwrap(), - (n - 1) as i32 + (n - 1) ); assert_eq!( result diff --git a/src/writer/vegalite/coord.rs b/src/writer/vegalite/coord.rs index 30b5a374..57439585 100644 --- a/src/writer/vegalite/coord.rs +++ b/src/writer/vegalite/coord.rs @@ -10,15 +10,20 @@ use serde_json::{json, Value}; /// Apply coordinate transformations to the spec and data /// Returns (possibly transformed DataFrame, possibly modified spec) +/// +/// The `free_x` and `free_y` flags indicate whether facet free scales are enabled. +/// When true, axis limits (xlim/ylim) should not be applied for that axis. pub(super) fn apply_coord_transforms( spec: &Plot, data: &DataFrame, vl_spec: &mut Value, + free_x: bool, + free_y: bool, ) -> Result> { if let Some(ref coord) = spec.coord { match coord.coord_type { CoordType::Cartesian => { - apply_cartesian_coord(coord, vl_spec)?; + apply_cartesian_coord(coord, vl_spec, free_x, free_y)?; Ok(None) // No DataFrame transformation needed } CoordType::Flip => { @@ -41,18 +46,32 @@ pub(super) fn apply_coord_transforms( } /// Apply Cartesian coordinate properties (xlim, ylim, aesthetic domains) -fn apply_cartesian_coord(coord: &Coord, vl_spec: &mut Value) -> Result<()> { +/// +/// The `free_x` and `free_y` flags indicate whether facet free scales are enabled. +/// When true, axis limits (xlim/ylim) should not be applied for that axis. +fn apply_cartesian_coord( + coord: &Coord, + vl_spec: &mut Value, + free_x: bool, + free_y: bool, +) -> Result<()> { // Apply xlim/ylim to scale domains for (prop_name, prop_value) in &coord.properties { match prop_name.as_str() { "xlim" => { - if let Some(limits) = extract_limits(prop_value)? { - apply_axis_limits(vl_spec, "x", limits)?; + // Skip if facet has free x scale - let Vega-Lite compute independent domains + if !free_x { + if let Some(limits) = extract_limits(prop_value)? { + apply_axis_limits(vl_spec, "x", limits)?; + } } } "ylim" => { - if let Some(limits) = extract_limits(prop_value)? { - apply_axis_limits(vl_spec, "y", limits)?; + // Skip if facet has free y scale - let Vega-Lite compute independent domains + if !free_y { + if let Some(limits) = extract_limits(prop_value)? { + apply_axis_limits(vl_spec, "y", limits)?; + } } } _ if is_aesthetic_name(prop_name) => { diff --git a/src/writer/vegalite/encoding.rs b/src/writer/vegalite/encoding.rs index f489d2d7..d9c074db 100644 --- a/src/writer/vegalite/encoding.rs +++ b/src/writer/vegalite/encoding.rs @@ -400,6 +400,10 @@ struct ScaleContext<'a> { is_binned_legend: bool, #[allow(dead_code)] spec: &'a Plot, // Reserved for future use (e.g., multi-scale legend decisions) + /// Whether to skip domain for x axis (facet free scales) + free_x: bool, + /// Whether to skip domain for y axis (facet free scales) + free_y: bool, } /// Build scale properties from SCALE clause @@ -414,9 +418,15 @@ fn build_scale_properties( let mut scale_obj = serde_json::Map::new(); let mut needs_gradient_legend = false; + // Check if we should skip domain due to facet free scales + // When using free scales, Vega-Lite computes independent domains per facet panel. + // Setting an explicit domain would override this behavior. + let skip_domain = (ctx.aesthetic == "x" && ctx.free_x) || (ctx.aesthetic == "y" && ctx.free_y); + // Apply domain from input_range (FROM clause) // Skip for threshold scales - they use internal breaks as domain instead - if !ctx.is_binned_legend { + // Skip for free facet scales - Vega-Lite should compute independent domains + if !ctx.is_binned_legend && !skip_domain { if let Some(ref domain_values) = scale.input_range { let domain_json: Vec = domain_values.iter().map(|elem| elem.to_json()).collect(); scale_obj.insert("domain".to_string(), json!(domain_json)); @@ -730,6 +740,10 @@ pub(super) struct EncodingContext<'a> { pub spec: &'a Plot, pub titled_families: &'a mut HashSet, pub primary_aesthetics: &'a HashSet, + /// Whether facet has free x scale (independent domains per panel) + pub free_x: bool, + /// Whether facet has free y scale (independent domains per panel) + pub free_y: bool, } /// Build encoding channel from aesthetic mapping @@ -807,6 +821,8 @@ fn build_column_encoding( aesthetic, spec: ctx.spec, is_binned_legend, + free_x: ctx.free_x, + free_y: ctx.free_y, }; let (scale_obj, needs_gradient) = build_scale_properties(scale, &scale_ctx); diff --git a/src/writer/vegalite/mod.rs b/src/writer/vegalite/mod.rs index e7d72da3..08f87951 100644 --- a/src/writer/vegalite/mod.rs +++ b/src/writer/vegalite/mod.rs @@ -25,10 +25,8 @@ mod data; mod encoding; mod layer; -// ArrayElement is used in tests and for pattern matching; suppress unused import warning -#[allow(unused_imports)] use crate::plot::ArrayElement; -use crate::plot::ParameterValue; +use crate::plot::{ParameterValue, Scale, ScaleTypeKind}; use crate::writer::Writer; use crate::{ is_primary_positional, naming, primary_aesthetic, AestheticValue, DataFrame, GgsqlError, Plot, @@ -141,12 +139,17 @@ fn prepare_layer_data( /// - Adds detail encoding for partition_by columns /// - Applies geom-specific modifications via renderer /// - Finalizes layers (may expand composite geoms into multiple layers) +/// +/// The `free_x` and `free_y` flags indicate whether facet free scales are enabled. +/// When true, explicit domains should not be set for that axis. fn build_layers( spec: &Plot, data: &HashMap, layer_data_keys: &[String], layer_renderers: &[Box], prepared_data: &[PreparedData], + free_x: bool, + free_y: bool, ) -> Result> { let mut layers = Vec::new(); @@ -179,8 +182,8 @@ fn build_layers( // Set transform array on layer spec layer_spec["transform"] = json!(transforms); - // Build encoding for this layer - let encoding = build_layer_encoding(layer, df, spec)?; + // Build encoding for this layer (pass free scale flags) + let encoding = build_layer_encoding(layer, df, spec, free_x, free_y)?; layer_spec["encoding"] = Value::Object(encoding); // Apply geom-specific spec modifications via renderer @@ -203,10 +206,15 @@ fn build_layers( /// - Aesthetic parameters from SETTING as literal encodings /// - Detail encoding for partition_by columns /// - Geom-specific encoding modifications via renderer +/// +/// The `free_x` and `free_y` flags indicate whether facet free scales are enabled. +/// When true, explicit domains should not be set for that axis. fn build_layer_encoding( layer: &crate::plot::Layer, df: &DataFrame, spec: &Plot, + free_x: bool, + free_y: bool, ) -> Result> { let mut encoding = serde_json::Map::new(); @@ -229,10 +237,19 @@ fn build_layer_encoding( spec, titled_families: &mut titled_families, primary_aesthetics: &primary_aesthetics, + free_x, + free_y, }; // Build encoding channels for each aesthetic mapping for (aesthetic, value) in &layer.mappings.aesthetics { + // Skip facet aesthetics - they are handled via top-level facet structure, + // not as encoding channels. Adding them to encoding would create row-based + // faceting instead of the intended wrap/grid layout. + if matches!(aesthetic.as_str(), "panel" | "row" | "column") { + continue; + } + let channel_name = map_aesthetic_name(aesthetic); let channel_encoding = build_encoding_channel(aesthetic, value, &mut enc_ctx)?; encoding.insert(channel_name, channel_encoding); @@ -287,48 +304,88 @@ fn build_layer_encoding( /// Apply faceting to Vega-Lite spec /// /// Handles: -/// - FACET WRAP (single variable faceting) -/// - FACET GRID (row × column faceting) +/// - FACET vars (wrap layout) +/// - FACET rows BY columns (grid layout) /// - Moves layers into nested `spec` object -/// - Infers field types for facet variables -fn apply_faceting(vl_spec: &mut Value, facet: &crate::plot::Facet, facet_df: &DataFrame) { - use crate::plot::Facet; - - match facet { - Facet::Wrap { variables, .. } => { - if !variables.is_empty() { - let field_type = infer_field_type(facet_df, &variables[0]); - vl_spec["facet"] = json!({ - "field": variables[0], - "type": field_type, - }); +/// - Uses aesthetic column names (e.g., __ggsql_aes_panel__) +/// - Respects scale types (Binned facets use bin: "binned") +/// - Scale resolution (scales property) +/// - Label renaming (RENAMING clause) +/// - Additional properties (ncol, etc.) +fn apply_faceting( + vl_spec: &mut Value, + facet: &crate::plot::Facet, + facet_df: &DataFrame, + scales: &[Scale], +) { + use crate::plot::FacetLayout; - // Move layer into spec (data reference stays at top level) - let mut spec_inner = json!({}); - if let Some(layer) = vl_spec.get("layer") { - spec_inner["layer"] = layer.clone(); - } + match &facet.layout { + FacetLayout::Wrap { variables: _ } => { + // Use the aesthetic column name for panel + let aes_col = naming::aesthetic_column("panel"); + + // Look up scale for "panel" aesthetic + let scale = scales.iter().find(|s| s.aesthetic == "panel"); - vl_spec["spec"] = spec_inner; - vl_spec.as_object_mut().unwrap().remove("layer"); + // Build facet field definition with proper binned support + let mut facet_def = build_facet_field_def(facet_df, &aes_col, scale); + + // Use scale label_mapping for custom labels + let label_mapping = scale.and_then(|s| s.label_mapping.as_ref()); + + // Apply label renaming via header.labelExpr + apply_facet_label_renaming(&mut facet_def, label_mapping, scale); + + // Apply facet ordering from breaks/reverse + apply_facet_ordering(&mut facet_def, scale); + + vl_spec["facet"] = facet_def; + + // Move layer into spec (data reference stays at top level) + let mut spec_inner = json!({}); + if let Some(layer) = vl_spec.get("layer") { + spec_inner["layer"] = layer.clone(); } + + vl_spec["spec"] = spec_inner; + vl_spec.as_object_mut().unwrap().remove("layer"); + + // Apply scale resolution + apply_facet_scale_resolution(vl_spec, &facet.properties); + + // Apply additional properties (columns for wrap) + apply_facet_properties(vl_spec, &facet.properties, true); } - Facet::Grid { rows, cols, .. } => { + FacetLayout::Grid { row: _, column: _ } => { let mut facet_spec = serde_json::Map::new(); - if !rows.is_empty() { - let field_type = infer_field_type(facet_df, &rows[0]); - facet_spec.insert( - "row".to_string(), - json!({"field": rows[0], "type": field_type}), - ); + + // Row facet: use aesthetic column "row" + let row_aes_col = naming::aesthetic_column("row"); + if facet_df.column(&row_aes_col).is_ok() { + let row_scale = scales.iter().find(|s| s.aesthetic == "row"); + let mut row_def = build_facet_field_def(facet_df, &row_aes_col, row_scale); + + let row_label_mapping = row_scale.and_then(|s| s.label_mapping.as_ref()); + apply_facet_label_renaming(&mut row_def, row_label_mapping, row_scale); + apply_facet_ordering(&mut row_def, row_scale); + + facet_spec.insert("row".to_string(), row_def); } - if !cols.is_empty() { - let field_type = infer_field_type(facet_df, &cols[0]); - facet_spec.insert( - "column".to_string(), - json!({"field": cols[0], "type": field_type}), - ); + + // Column facet: use aesthetic column "column" + let col_aes_col = naming::aesthetic_column("column"); + if facet_df.column(&col_aes_col).is_ok() { + let col_scale = scales.iter().find(|s| s.aesthetic == "column"); + let mut col_def = build_facet_field_def(facet_df, &col_aes_col, col_scale); + + let col_label_mapping = col_scale.and_then(|s| s.label_mapping.as_ref()); + apply_facet_label_renaming(&mut col_def, col_label_mapping, col_scale); + apply_facet_ordering(&mut col_def, col_scale); + + facet_spec.insert("column".to_string(), col_def); } + vl_spec["facet"] = Value::Object(facet_spec); // Move layer into spec (data reference stays at top level) @@ -339,6 +396,443 @@ fn apply_faceting(vl_spec: &mut Value, facet: &crate::plot::Facet, facet_df: &Da vl_spec["spec"] = spec_inner; vl_spec.as_object_mut().unwrap().remove("layer"); + + // Apply scale resolution + apply_facet_scale_resolution(vl_spec, &facet.properties); + + // Apply additional properties (not columns for grid) + apply_facet_properties(vl_spec, &facet.properties, false); + } + } +} + +/// Build a facet field definition with proper type. +/// +/// Facets always use "type": "nominal" since facet values are categorical +/// (even for binned data, the bin labels are discrete categories). +fn build_facet_field_def(df: &DataFrame, col: &str, scale: Option<&Scale>) -> Value { + let mut field_def = json!({ + "field": col, + }); + + if let Some(scale) = scale { + if let Some(ref scale_type) = scale.scale_type { + match scale_type.scale_type_kind() { + // All scale types use nominal for facets - the data column contains + // categorical values (bin midpoints for binned, categories for discrete) + ScaleTypeKind::Binned + | ScaleTypeKind::Discrete + | ScaleTypeKind::Ordinal + | ScaleTypeKind::Continuous + | ScaleTypeKind::Identity => { + field_def["type"] = json!("nominal"); + return field_def; + } + } + } + } + + // Fall back to column type inference + field_def["type"] = json!(infer_field_type(df, col)); + field_def +} + +/// Apply facet ordering via Vega-Lite's sort property. +/// +/// For discrete facets: uses input_range (FROM clause) or breaks array order +/// For binned facets: uses "descending" sort if reversed +fn apply_facet_ordering(facet_def: &mut Value, scale: Option<&Scale>) { + let Some(scale) = scale else { + return; + }; + + let is_reversed = matches!( + scale.properties.get("reverse"), + Some(ParameterValue::Boolean(true)) + ); + + let is_binned = scale + .scale_type + .as_ref() + .map(|st| st.scale_type_kind() == ScaleTypeKind::Binned) + .unwrap_or(false); + + if is_binned { + // For binned facets: use "descending" sort if reversed + if is_reversed { + facet_def["sort"] = json!("descending"); + } + // Default is ascending, no need to specify + } else { + // For discrete facets: use input_range (FROM clause) if present, + // otherwise fall back to breaks property + let order_values: Vec = if let Some(ref input_range) = scale.input_range { + // Use explicit input_range from FROM clause + input_range.clone() + } else if let Some(ParameterValue::Array(arr)) = scale.properties.get("breaks") { + // Fall back to breaks if present + arr.clone() + } else { + return; + }; + + // Convert to JSON values, preserving null + let mut sort_values: Vec = order_values.iter().map(|e| e.to_json()).collect(); + + // Apply reverse if specified + if is_reversed { + sort_values.reverse(); + } + + facet_def["sort"] = json!(sort_values); + } +} + +/// Apply scale resolution to Vega-Lite spec based on facet free property +/// +/// Maps ggsql free property to Vega-Lite resolve.scale configuration: +/// - absent or null: shared scales (Vega-Lite default, no resolve needed) +/// - 'x': independent x scale, shared y scale +/// - 'y': shared x scale, independent y scale +/// - ['x', 'y']: independent scales for both x and y +fn apply_facet_scale_resolution(vl_spec: &mut Value, properties: &HashMap) { + let Some(free_value) = properties.get("free") else { + // No free property means fixed/shared scales (Vega-Lite default) + return; + }; + + match free_value { + ParameterValue::Null => { + // Explicit null means shared scales (same as default) + } + ParameterValue::String(s) => match s.as_str() { + "x" => { + vl_spec["resolve"] = json!({ + "scale": {"x": "independent"} + }); + } + "y" => { + vl_spec["resolve"] = json!({ + "scale": {"y": "independent"} + }); + } + _ => { + // Unknown value - resolution should have validated this + } + }, + ParameterValue::Array(arr) => { + // Array means both x and y are free (already validated to be ['x', 'y']) + let has_x = arr + .iter() + .any(|e| matches!(e, crate::plot::ArrayElement::String(s) if s == "x")); + let has_y = arr + .iter() + .any(|e| matches!(e, crate::plot::ArrayElement::String(s) if s == "y")); + + if has_x && has_y { + vl_spec["resolve"] = json!({ + "scale": {"x": "independent", "y": "independent"} + }); + } else if has_x { + vl_spec["resolve"] = json!({ + "scale": {"x": "independent"} + }); + } else if has_y { + vl_spec["resolve"] = json!({ + "scale": {"y": "independent"} + }); + } + } + _ => { + // Invalid type - resolution should have validated this + } + } +} + +/// Apply label renaming to a facet definition via header.labelExpr +/// +/// Uses Vega expression to transform facet labels: +/// - For discrete facets: 'A' => 'Alpha' becomes datum.value == 'A' ? 'Alpha' : ... +/// - For binned facets: uses build_symbol_legend_label_mapping for range-style labels +/// - NULL values suppress labels (maps to empty string) +/// +/// Note: Wildcard templates are resolved during facet property resolution, +/// so by this point label_mapping contains all expanded mappings. +fn apply_facet_label_renaming( + facet_def: &mut Value, + label_mapping: Option<&HashMap>>, + scale: Option<&Scale>, +) { + // Only apply if there's a label mapping + let has_mapping = label_mapping.is_some_and(|m| !m.is_empty()); + + if !has_mapping { + return; + } + + // Check if this is a binned facet + let is_binned = scale + .and_then(|s| s.scale_type.as_ref()) + .map(|st| st.scale_type_kind() == ScaleTypeKind::Binned) + .unwrap_or(false); + + let label_expr = if is_binned { + // For binned facets: reuse build_symbol_legend_label_mapping and build_label_expr + build_binned_facet_label_expr(label_mapping, scale) + } else { + // For discrete facets: compare datum.value against string values + build_discrete_facet_label_expr(label_mapping) + }; + + // Add to facet definition + facet_def["header"] = json!({ + "labelExpr": label_expr + }); +} + +/// Build labelExpr for binned facet values. +/// +/// For binned facets, `datum.value` contains the bin midpoint (e.g., 25 for bin [20-30)). +/// This function maps midpoint values to range-style labels like "Lower – Upper", +/// using custom labels from label_mapping when available. +/// +/// Unlike `build_symbol_legend_label_mapping` which maps Vega-Lite's auto-generated +/// range labels, this function maps numeric midpoints to our range labels. +fn build_binned_facet_label_expr( + label_mapping: Option<&HashMap>>, + scale: Option<&Scale>, +) -> String { + let Some(scale) = scale else { + return "datum.value".to_string(); + }; + + let breaks = match scale.properties.get("breaks") { + Some(ParameterValue::Array(arr)) => arr, + _ => return "datum.value".to_string(), + }; + + if breaks.len() < 2 { + return "datum.value".to_string(); + } + + // Get closed property for determining open-format labels + let closed = scale + .properties + .get("closed") + .and_then(|v| match v { + ParameterValue::String(s) => Some(s.as_str()), + _ => None, + }) + .unwrap_or("left"); + + let num_bins = breaks.len() - 1; + + // Build mapping from midpoint to range label + let mut midpoint_to_range: Vec<(String, Option)> = Vec::new(); + + for i in 0..num_bins { + let lower = &breaks[i]; + let upper = &breaks[i + 1]; + + // Calculate midpoint for comparison + let midpoint_str = calculate_midpoint_string(lower, upper, scale.transform.as_ref()); + let Some(midpoint_str) = midpoint_str else { + continue; + }; + + // Get break values as strings (for default labels) + let lower_str = lower.to_key_string(); + let upper_str = upper.to_key_string(); + + // Build the range label + let range_label = if let Some(label_mapping) = label_mapping { + // Check if terminals are suppressed + let lower_suppressed = label_mapping.get(&lower_str) == Some(&None); + let upper_suppressed = label_mapping.get(&upper_str) == Some(&None); + + // Get custom labels (fall back to break values) + let lower_label = label_mapping + .get(&lower_str) + .cloned() + .flatten() + .unwrap_or_else(|| lower_str.clone()); + let upper_label = label_mapping + .get(&upper_str) + .cloned() + .flatten() + .unwrap_or_else(|| upper_str.clone()); + + // Determine label format based on terminal suppression + if i == 0 && lower_suppressed { + // First bin with suppressed lower terminal → open format + let symbol = if closed == "right" { "≤" } else { "<" }; + Some(format!("{} {}", symbol, upper_label)) + } else if i == num_bins - 1 && upper_suppressed { + // Last bin with suppressed upper terminal → open format + let symbol = if closed == "right" { ">" } else { "≥" }; + Some(format!("{} {}", symbol, lower_label)) + } else { + // Standard range format: "lower – upper" + Some(format!("{} – {}", lower_label, upper_label)) + } + } else { + // No label mapping - use default range format with break values + Some(format!("{} – {}", lower_str, upper_str)) + }; + + midpoint_to_range.push((midpoint_str, range_label)); + } + + if midpoint_to_range.is_empty() { + return "datum.value".to_string(); + } + + // Build labelExpr comparing datum.value against midpoints + build_binned_facet_value_expr(&midpoint_to_range) +} + +/// Build labelExpr comparing datum.value against midpoint values +fn build_binned_facet_value_expr(mappings: &[(String, Option)]) -> String { + let mut expr_parts: Vec = Vec::new(); + + for (midpoint, label) in mappings { + // Compare as number for numeric midpoints, string for temporal + let condition = format!("datum.value == {}", midpoint); + let result = match label { + Some(l) => format!("'{}'", escape_vega_string(l)), + None => "''".to_string(), + }; + expr_parts.push(format!("{} ? {}", condition, result)); + } + + if expr_parts.is_empty() { + return "datum.value".to_string(); + } + + // Chain: cond1 ? val1 : cond2 ? val2 : datum.value + let mut expr = "datum.value".to_string(); + for part in expr_parts.into_iter().rev() { + expr = format!("{} : {}", part, expr); + } + expr +} + +/// Calculate the midpoint string for a bin +fn calculate_midpoint_string( + lower: &ArrayElement, + upper: &ArrayElement, + transform: Option<&crate::plot::scale::Transform>, +) -> Option { + match (lower, upper) { + (ArrayElement::Number(l), ArrayElement::Number(u)) => { + let midpoint = (*l + *u) / 2.0; + + // Check if temporal transform - format as ISO string (quoted for comparison) + if let Some(t) = transform { + if let Some(iso) = t.format_as_iso(midpoint) { + return Some(format!("'{}'", iso)); + } + } + + // Numeric: format without trailing decimals if whole number + Some(if midpoint.fract() == 0.0 { + format!("{}", midpoint as i64) + } else { + format!("{}", midpoint) + }) + } + // Temporal ArrayElements - calculate midpoint and format as ISO (quoted) + (ArrayElement::Date(l), ArrayElement::Date(u)) => { + let midpoint = ((*l as f64) + (*u as f64)) / 2.0; + Some(format!("'{}'", ArrayElement::date_to_iso(midpoint as i32))) + } + (ArrayElement::DateTime(l), ArrayElement::DateTime(u)) => { + let midpoint = ((*l as f64) + (*u as f64)) / 2.0; + Some(format!( + "'{}'", + ArrayElement::datetime_to_iso(midpoint as i64) + )) + } + (ArrayElement::Time(l), ArrayElement::Time(u)) => { + let midpoint = ((*l as f64) + (*u as f64)) / 2.0; + Some(format!("'{}'", ArrayElement::time_to_iso(midpoint as i64))) + } + _ => None, + } +} + +/// Build labelExpr for discrete facet values. +fn build_discrete_facet_label_expr( + label_mapping: Option<&HashMap>>, +) -> String { + let Some(mappings) = label_mapping else { + return "datum.value".to_string(); + }; + + // Build labelExpr for Vega-Lite + let mut expr_parts: Vec = Vec::new(); + + // Add explicit mappings + for (from, to) in mappings { + // Handle null values: 'null' key maps to JSON null comparison + let condition = if from == "null" { + "datum.value == null".to_string() + } else { + format!("datum.value == '{}'", escape_vega_string(from)) + }; + let result = match to { + Some(label) => format!("'{}'", escape_vega_string(label)), + None => "''".to_string(), // NULL suppresses label + }; + expr_parts.push(format!("{} ? {}", condition, result)); + } + + // Default case: show original value + let default_expr = "datum.value".to_string(); + + // Build the full expression as nested ternary + if expr_parts.is_empty() { + default_expr + } else { + // Chain conditions: cond1 ? val1 : cond2 ? val2 : default + let mut expr = default_expr; + for part in expr_parts.into_iter().rev() { + expr = format!("{} : {}", part, expr); + } + expr + } +} + +/// Escape a string for use in Vega expressions +fn escape_vega_string(s: &str) -> String { + s.replace('\\', "\\\\").replace('\'', "\\'") +} + +/// Apply additional facet properties to Vega-Lite spec +/// +/// Handles: +/// - ncol: Number of columns for wrap facets (maps to Vega-Lite's "columns") +/// +/// Note: free is handled separately by apply_facet_scale_resolution +fn apply_facet_properties( + vl_spec: &mut Value, + properties: &HashMap, + is_wrap: bool, +) { + for (name, value) in properties { + match name.as_str() { + "ncol" if is_wrap => { + // ncol maps to Vega-Lite's "columns" property + if let ParameterValue::Number(n) = value { + vl_spec["columns"] = json!(*n as i64); + } + } + "free" => { + // Handled by apply_facet_scale_resolution + } + _ => { + // Unknown properties ignored (resolution should have validated) + } } } } @@ -432,7 +926,33 @@ impl Writer for VegaLiteWriter { // 1. Validate spec self.validate(spec)?; - // 2. Determine layer data keys + // 2. Determine if facet free scales should omit x/y domains + // When using free scales, Vega-Lite computes independent domains per facet panel. + // We must not set explicit domains (from SCALE or COORD) as they would override this. + let (free_x, free_y) = if let Some(ref facet) = spec.facet { + match facet.properties.get("free") { + Some(ParameterValue::String(s)) => match s.as_str() { + "x" => (true, false), + "y" => (false, true), + _ => (false, false), + }, + Some(ParameterValue::Array(arr)) => { + let has_x = arr + .iter() + .any(|e| matches!(e, crate::plot::ArrayElement::String(s) if s == "x")); + let has_y = arr + .iter() + .any(|e| matches!(e, crate::plot::ArrayElement::String(s) if s == "y")); + (has_x, has_y) + } + // null or absent means fixed/shared scales + _ => (false, false), + } + } else { + (false, false) + }; + + // 3. Determine layer data keys let layer_data_keys: Vec = spec .layers .iter() @@ -445,7 +965,7 @@ impl Writer for VegaLiteWriter { }) .collect(); - // 3. Validate columns for each layer + // 4. Validate columns for each layer for (layer_idx, (layer, key)) in spec.layers.iter().zip(layer_data_keys.iter()).enumerate() { let df = data.get(key).ok_or_else(|| { @@ -458,7 +978,7 @@ impl Writer for VegaLiteWriter { validate_layer_columns(layer, df, layer_idx)?; } - // 4. Build base Vega-Lite spec + // 5. Build base Vega-Lite spec let mut vl_spec = json!({ "$schema": self.schema }); @@ -471,40 +991,42 @@ impl Writer for VegaLiteWriter { } } - // 5. Collect binned columns + // 6. Collect binned columns let binned_columns = collect_binned_columns(spec); - // 6. Prepare layer data + // 7. Prepare layer data let prep = prepare_layer_data(spec, data, &layer_data_keys, &binned_columns)?; - // 7. Unify datasets + // 8. Unify datasets let unified_data = unify_datasets(&prep.datasets)?; vl_spec["data"] = json!({"values": unified_data}); - // 8. Build layers + // 9. Build layers (pass free scale flags for domain handling) let layers = build_layers( spec, data, &layer_data_keys, &prep.renderers, &prep.prepared, + free_x, + free_y, )?; vl_spec["layer"] = json!(layers); - // 9. Apply coordinate transforms + // 10. Apply coordinate transforms (pass free scale flags for domain handling) let first_df = data.get(&layer_data_keys[0]).unwrap(); - apply_coord_transforms(spec, first_df, &mut vl_spec)?; + apply_coord_transforms(spec, first_df, &mut vl_spec, free_x, free_y)?; - // 10. Apply faceting + // 11. Apply faceting if let Some(facet) = &spec.facet { let facet_df = data.get(&layer_data_keys[0]).unwrap(); - apply_faceting(&mut vl_spec, facet, facet_df); + apply_faceting(&mut vl_spec, facet, facet_df, &spec.scales); } - // 11. Add default theme config (ggplot2-like gray theme) + // 12. Add default theme config (ggplot2-like gray theme) vl_spec["config"] = self.default_theme_config(); - // 12. Serialize + // 13. Serialize serde_json::to_string_pretty(&vl_spec).map_err(|e| { GgsqlError::WriterError(format!("Failed to serialize Vega-Lite JSON: {}", e)) }) @@ -1038,4 +1560,476 @@ mod tests { "Last bin with closed='right' should use '> lower' format" ); } + + #[test] + fn test_facet_ordering_uses_input_range() { + // Test that apply_facet_ordering uses input_range (FROM clause) for discrete scales + use crate::plot::scale::Scale; + + let mut facet_def = json!({"field": "__ggsql_aes_panel__", "type": "nominal"}); + + // Create a scale with input_range (simulating SCALE panel FROM ['A', 'B', 'C']) + let mut scale = Scale::new("panel"); + scale.input_range = Some(vec![ + ArrayElement::String("A".to_string()), + ArrayElement::String("B".to_string()), + ArrayElement::String("C".to_string()), + ]); + + apply_facet_ordering(&mut facet_def, Some(&scale)); + + // Verify sort order matches input_range + assert_eq!( + facet_def["sort"], + json!(["A", "B", "C"]), + "Facet sort should use input_range order" + ); + } + + #[test] + fn test_facet_ordering_with_null_in_input_range() { + // Test that apply_facet_ordering preserves null values in input_range + // This is the fix for the bug where null panels appear first + use crate::plot::scale::Scale; + + let mut facet_def = json!({"field": "__ggsql_aes_panel__", "type": "nominal"}); + + // Create a scale with input_range including null at the end + // (simulating SCALE panel FROM ['Adelie', 'Gentoo', null]) + let mut scale = Scale::new("panel"); + scale.input_range = Some(vec![ + ArrayElement::String("Adelie".to_string()), + ArrayElement::String("Gentoo".to_string()), + ArrayElement::Null, + ]); + + apply_facet_ordering(&mut facet_def, Some(&scale)); + + // Verify sort order preserves null at the end + assert_eq!( + facet_def["sort"], + json!(["Adelie", "Gentoo", null]), + "Facet sort should preserve null position from input_range" + ); + } + + #[test] + fn test_facet_ordering_with_null_first_in_input_range() { + // Test that null at the beginning of input_range produces null first in sort + use crate::plot::scale::Scale; + + let mut facet_def = json!({"field": "__ggsql_aes_panel__", "type": "nominal"}); + + // Create a scale with null at the beginning + let mut scale = Scale::new("panel"); + scale.input_range = Some(vec![ + ArrayElement::Null, + ArrayElement::String("Adelie".to_string()), + ArrayElement::String("Gentoo".to_string()), + ]); + + apply_facet_ordering(&mut facet_def, Some(&scale)); + + // Verify null is first in sort order + assert_eq!( + facet_def["sort"], + json!([null, "Adelie", "Gentoo"]), + "Facet sort should preserve null at beginning" + ); + } + + #[test] + fn test_discrete_facet_label_expr_renames_null() { + // Test that 'null' key in label_mapping generates correct Vega expression + // for comparing against JSON null (not string 'null') + let mut mappings = HashMap::new(); + mappings.insert("Adelie".to_string(), Some("Adelie Penguin".to_string())); + mappings.insert("null".to_string(), Some("Missing".to_string())); + + let expr = build_discrete_facet_label_expr(Some(&mappings)); + + // Should contain null comparison without quotes + assert!( + expr.contains("datum.value == null"), + "Label expr should use 'datum.value == null' (not string), got: {}", + expr + ); + // Should map null to 'Missing' + assert!( + expr.contains("'Missing'"), + "Label expr should contain 'Missing', got: {}", + expr + ); + // Should still use string comparison for non-null values + assert!( + expr.contains("datum.value == 'Adelie'"), + "Label expr should use string comparison for Adelie, got: {}", + expr + ); + } + + #[test] + fn test_binned_facet_label_expr_uses_range_labels() { + // Test that binned facet labelExpr uses range-style labels "Lower – Upper" + use crate::plot::scale::Scale; + use crate::plot::{ParameterValue, ScaleType}; + + // Create a binned scale with breaks [0, 20, 40, 60] + let mut scale = Scale::new("panel"); + scale.scale_type = Some(ScaleType::binned()); + scale.properties.insert( + "breaks".to_string(), + ParameterValue::Array(vec![ + ArrayElement::Number(0.0), + ArrayElement::Number(20.0), + ArrayElement::Number(40.0), + ArrayElement::Number(60.0), + ]), + ); + + // Create label mapping keyed by lower bound + let mut label_mapping = HashMap::new(); + label_mapping.insert("0".to_string(), Some("Low".to_string())); + label_mapping.insert("20".to_string(), Some("Medium".to_string())); + label_mapping.insert("40".to_string(), Some("High".to_string())); + label_mapping.insert("60".to_string(), Some("Very High".to_string())); + + let expr = build_binned_facet_label_expr(Some(&label_mapping), Some(&scale)); + + // Should contain midpoint comparisons: + // Bin [0, 20) -> midpoint 10 + // Bin [20, 40) -> midpoint 30 + // Bin [40, 60] -> midpoint 50 + assert!( + expr.contains("datum.value == 10"), + "labelExpr should compare against midpoint 10, got: {}", + expr + ); + assert!( + expr.contains("datum.value == 30"), + "labelExpr should compare against midpoint 30, got: {}", + expr + ); + assert!( + expr.contains("datum.value == 50"), + "labelExpr should compare against midpoint 50, got: {}", + expr + ); + + // Should map to range-style labels using custom label names + assert!( + expr.contains("'Low – Medium'"), + "labelExpr should contain range label 'Low – Medium', got: {}", + expr + ); + assert!( + expr.contains("'Medium – High'"), + "labelExpr should contain range label 'Medium – High', got: {}", + expr + ); + assert!( + expr.contains("'High – Very High'"), + "labelExpr should contain range label 'High – Very High', got: {}", + expr + ); + } + + #[test] + fn test_binned_facet_label_expr_with_suppressed_lower_terminal() { + // Test that suppressed lower terminal creates open-format label "< Upper" + use crate::plot::scale::Scale; + use crate::plot::{ParameterValue, ScaleType}; + + let mut scale = Scale::new("panel"); + scale.scale_type = Some(ScaleType::binned()); + scale.properties.insert( + "breaks".to_string(), + ParameterValue::Array(vec![ + ArrayElement::Number(0.0), + ArrayElement::Number(50.0), + ArrayElement::Number(100.0), + ]), + ); + + // Create label mapping with suppressed first terminal (oob='squish' behavior) + let mut label_mapping = HashMap::new(); + label_mapping.insert("0".to_string(), None); // Suppress lower terminal + label_mapping.insert("50".to_string(), Some("High".to_string())); + label_mapping.insert("100".to_string(), Some("Max".to_string())); + + let expr = build_binned_facet_label_expr(Some(&label_mapping), Some(&scale)); + + // First bin with suppressed lower terminal → open format "< 50" or "< High" + // (uses upper bound label since lower is suppressed) + assert!( + expr.contains("'< High'"), + "First bin with suppressed lower should use '< Upper' format, got: {}", + expr + ); + // Second bin should use range format + assert!( + expr.contains("'High – Max'"), + "Second bin should use range format 'High – Max', got: {}", + expr + ); + } + + #[test] + fn test_binned_facet_label_expr_default_range_format() { + // Test that binned facet without label_mapping uses default range format + use crate::plot::scale::Scale; + use crate::plot::{ParameterValue, ScaleType}; + + let mut scale = Scale::new("panel"); + scale.scale_type = Some(ScaleType::binned()); + scale.properties.insert( + "breaks".to_string(), + ParameterValue::Array(vec![ + ArrayElement::Number(0.0), + ArrayElement::Number(25.0), + ArrayElement::Number(50.0), + ]), + ); + + // No label_mapping - should use break values in range format + let expr = build_binned_facet_label_expr(None, Some(&scale)); + + // Should use default range format with break values + assert!( + expr.contains("'0 – 25'"), + "Should use default range format '0 – 25', got: {}", + expr + ); + assert!( + expr.contains("'25 – 50'"), + "Should use default range format '25 – 50', got: {}", + expr + ); + } + + #[test] + fn test_facet_free_scales_omits_domain() { + // Test that FACET with free => ['x', 'y'] does not set explicit domains + // This allows Vega-Lite to compute independent domains per facet panel + use crate::plot::scale::Scale; + use crate::plot::{ArrayElement, Facet, FacetLayout, ParameterValue}; + + let writer = VegaLiteWriter::new(); + + let mut spec = Plot::new(); + let layer = Layer::new(Geom::point()) + .with_aesthetic( + "x".to_string(), + AestheticValue::standard_column("x".to_string()), + ) + .with_aesthetic( + "y".to_string(), + AestheticValue::standard_column("y".to_string()), + ); + spec.layers.push(layer); + + // Add facet with free => ['x', 'y'] + let mut facet_properties = HashMap::new(); + facet_properties.insert( + "free".to_string(), + ParameterValue::Array(vec![ + ArrayElement::String("x".to_string()), + ArrayElement::String("y".to_string()), + ]), + ); + spec.facet = Some(Facet { + layout: FacetLayout::Wrap { + variables: vec!["category".to_string()], + }, + properties: facet_properties, + resolved: true, + }); + + // Add scale with explicit domain that should be skipped + let mut x_scale = Scale::new("x"); + x_scale.input_range = Some(vec![ArrayElement::Number(0.0), ArrayElement::Number(100.0)]); + spec.scales.push(x_scale); + + let df = df! { + "x" => &[1, 2, 3], + "y" => &[4, 5, 6], + "category" => &["A", "A", "B"], + "__ggsql_aes_panel__" => &["A", "A", "B"], + } + .unwrap(); + + let json_str = writer.write(&spec, &wrap_data(df)).unwrap(); + let vl_spec: Value = serde_json::from_str(&json_str).unwrap(); + + // Verify resolve.scale is set to independent for both axes + assert_eq!( + vl_spec["resolve"]["scale"]["x"], "independent", + "x scale should be independent" + ); + assert_eq!( + vl_spec["resolve"]["scale"]["y"], "independent", + "y scale should be independent" + ); + + // Verify NO explicit domain is set on x encoding (would override free scales) + // The encoding should exist but scale.domain should not be present + let x_encoding = &vl_spec["spec"]["layer"][0]["encoding"]["x"]; + let has_domain = x_encoding + .get("scale") + .and_then(|s| s.get("domain")) + .is_some(); + assert!( + !has_domain, + "x encoding should NOT have explicit domain when using free scales, got: {}", + serde_json::to_string_pretty(&x_encoding).unwrap() + ); + } + + #[test] + fn test_facet_free_y_only_omits_y_domain() { + // Test that FACET with free => 'y' omits y domain but keeps x domain + use crate::plot::scale::Scale; + use crate::plot::{ArrayElement, Facet, FacetLayout, ParameterValue}; + + let writer = VegaLiteWriter::new(); + + let mut spec = Plot::new(); + let layer = Layer::new(Geom::point()) + .with_aesthetic( + "x".to_string(), + AestheticValue::standard_column("x".to_string()), + ) + .with_aesthetic( + "y".to_string(), + AestheticValue::standard_column("y".to_string()), + ); + spec.layers.push(layer); + + // Add facet with free => 'y' + let mut facet_properties = HashMap::new(); + facet_properties.insert("free".to_string(), ParameterValue::String("y".to_string())); + spec.facet = Some(Facet { + layout: FacetLayout::Wrap { + variables: vec!["category".to_string()], + }, + properties: facet_properties, + resolved: true, + }); + + // Add scales with explicit domains + let mut x_scale = Scale::new("x"); + x_scale.input_range = Some(vec![ArrayElement::Number(0.0), ArrayElement::Number(100.0)]); + spec.scales.push(x_scale); + + let mut y_scale = Scale::new("y"); + y_scale.input_range = Some(vec![ArrayElement::Number(0.0), ArrayElement::Number(50.0)]); + spec.scales.push(y_scale); + + let df = df! { + "x" => &[1, 2, 3], + "y" => &[4, 5, 6], + "category" => &["A", "A", "B"], + "__ggsql_aes_panel__" => &["A", "A", "B"], + } + .unwrap(); + + let json_str = writer.write(&spec, &wrap_data(df)).unwrap(); + let vl_spec: Value = serde_json::from_str(&json_str).unwrap(); + + // Verify only y scale is independent + assert!( + vl_spec["resolve"]["scale"].get("x").is_none(), + "x scale should not be in resolve (shared)" + ); + assert_eq!( + vl_spec["resolve"]["scale"]["y"], "independent", + "y scale should be independent" + ); + + // x encoding SHOULD have domain (not free) + let x_encoding = &vl_spec["spec"]["layer"][0]["encoding"]["x"]; + let x_has_domain = x_encoding + .get("scale") + .and_then(|s| s.get("domain")) + .is_some(); + assert!( + x_has_domain, + "x encoding SHOULD have domain when using free => 'y'" + ); + + // y encoding should NOT have domain (free) + let y_encoding = &vl_spec["spec"]["layer"][0]["encoding"]["y"]; + let y_has_domain = y_encoding + .get("scale") + .and_then(|s| s.get("domain")) + .is_some(); + assert!( + !y_has_domain, + "y encoding should NOT have domain when using free => 'y', got: {}", + serde_json::to_string_pretty(&y_encoding).unwrap() + ); + } + + #[test] + fn test_facet_fixed_scales_keeps_domain() { + // Test that FACET without free property (default) keeps explicit domains + use crate::plot::scale::Scale; + use crate::plot::{ArrayElement, Facet, FacetLayout}; + + let writer = VegaLiteWriter::new(); + + let mut spec = Plot::new(); + let layer = Layer::new(Geom::point()) + .with_aesthetic( + "x".to_string(), + AestheticValue::standard_column("x".to_string()), + ) + .with_aesthetic( + "y".to_string(), + AestheticValue::standard_column("y".to_string()), + ); + spec.layers.push(layer); + + // Add facet without free property (default = fixed/shared scales) + spec.facet = Some(Facet { + layout: FacetLayout::Wrap { + variables: vec!["category".to_string()], + }, + properties: HashMap::new(), // No free property + resolved: true, + }); + + // Add scale with explicit domain + let mut x_scale = Scale::new("x"); + x_scale.input_range = Some(vec![ArrayElement::Number(0.0), ArrayElement::Number(100.0)]); + spec.scales.push(x_scale); + + let df = df! { + "x" => &[1, 2, 3], + "y" => &[4, 5, 6], + "category" => &["A", "A", "B"], + "__ggsql_aes_panel__" => &["A", "A", "B"], + } + .unwrap(); + + let json_str = writer.write(&spec, &wrap_data(df)).unwrap(); + let vl_spec: Value = serde_json::from_str(&json_str).unwrap(); + + // Verify NO resolve.scale (fixed scales don't need it) + assert!( + vl_spec.get("resolve").is_none(), + "Fixed scales should not have resolve property" + ); + + // x encoding SHOULD have domain (fixed scales keep explicit domains) + let x_encoding = &vl_spec["spec"]["layer"][0]["encoding"]["x"]; + let has_domain = x_encoding + .get("scale") + .and_then(|s| s.get("domain")) + .is_some(); + assert!( + has_domain, + "x encoding SHOULD have domain when using fixed scales" + ); + } } diff --git a/tree-sitter-ggsql/grammar.js b/tree-sitter-ggsql/grammar.js index dfb18c53..1977eb24 100644 --- a/tree-sitter-ggsql/grammar.js +++ b/tree-sitter-ggsql/grammar.js @@ -648,6 +648,8 @@ module.exports = grammar({ 'size', 'shape', 'linetype', 'linewidth', 'width', 'height', // Text aesthetics 'label', 'family', 'fontface', 'hjust', 'vjust', + // Facet aesthetics + 'panel', 'row', 'column', // Computed variables 'offset' ), @@ -691,7 +693,8 @@ module.exports = grammar({ field('name', choice( '*', // Wildcard for template $.string, - $.number + $.number, + $.null_literal // NULL for renaming null values )), '=>', field('value', choice($.string, $.null_literal)) // String label or NULL to suppress @@ -727,26 +730,18 @@ module.exports = grammar({ $.identifier ), - // FACET clause - FACET ... SETTING scales => ... - facet_clause: $ => choice( - // FACET row_vars BY col_vars - seq( - caseInsensitive('FACET'), - $.facet_vars, + // FACET clause - FACET vars [BY vars] [SETTING ...] + // Single variable = wrap layout, BY clause = grid layout + facet_clause: $ => seq( + caseInsensitive('FACET'), + $.facet_vars, + optional(seq( alias(caseInsensitive('BY'), $.facet_by), - $.facet_vars, - optional(seq(caseInsensitive('SETTING'), caseInsensitive('scales'), '=>', $.facet_scales)) - ), - // FACET WRAP vars - seq( - caseInsensitive('FACET'), - alias(caseInsensitive('WRAP'), $.facet_wrap), - $.facet_vars, - optional(seq(caseInsensitive('SETTING'), caseInsensitive('scales'), '=>', $.facet_scales)) - ) + $.facet_vars + )), + optional($.setting_clause) // Reuse from DRAW/SCALE ), - facet_wrap: $ => 'WRAP', facet_by: $ => 'BY', facet_vars: $ => seq( @@ -754,10 +749,6 @@ module.exports = grammar({ repeat(seq(',', $.identifier)) ), - facet_scales: $ => choice( - 'fixed', 'free', 'free_x', 'free_y' - ), - // COORD clause - COORD [type] [SETTING prop => value, ...] coord_clause: $ => seq( caseInsensitive('COORD'), diff --git a/tree-sitter-ggsql/queries/highlights.scm b/tree-sitter-ggsql/queries/highlights.scm index 168ad6b5..30e40dda 100644 --- a/tree-sitter-ggsql/queries/highlights.scm +++ b/tree-sitter-ggsql/queries/highlights.scm @@ -57,6 +57,9 @@ "fontface" "hjust" "vjust" + "panel" + "row" + "column" ] @attribute ; String literals diff --git a/tree-sitter-ggsql/test/corpus/basic.txt b/tree-sitter-ggsql/test/corpus/basic.txt index a2d14b9c..8da25bc6 100644 --- a/tree-sitter-ggsql/test/corpus/basic.txt +++ b/tree-sitter-ggsql/test/corpus/basic.txt @@ -2336,3 +2336,134 @@ SELECT SUM(TRY_CAST(price AS INTEGER)) as total FROM data VISUALISE DRAW bar MAP (identifier (bare_identifier)))) name: (aesthetic_name))))))))) + +================================================================================ +FACET single variable (wrap layout) +================================================================================ + +VISUALISE x AS x, y AS y +DRAW point +FACET region + +-------------------------------------------------------------------------------- + +(query + (visualise_statement + (visualise_keyword) + (global_mapping + (mapping_list + (mapping_element + (explicit_mapping + value: (mapping_value + (column_reference + (identifier + (bare_identifier)))) + name: (aesthetic_name))) + (mapping_element + (explicit_mapping + value: (mapping_value + (column_reference + (identifier + (bare_identifier)))) + name: (aesthetic_name))))) + (viz_clause + (draw_clause + (geom_type))) + (viz_clause + (facet_clause + (facet_vars + (identifier + (bare_identifier))))))) + +================================================================================ +FACET grid layout with BY +================================================================================ + +VISUALISE x AS x, y AS y +DRAW point +FACET region BY category + +-------------------------------------------------------------------------------- + +(query + (visualise_statement + (visualise_keyword) + (global_mapping + (mapping_list + (mapping_element + (explicit_mapping + value: (mapping_value + (column_reference + (identifier + (bare_identifier)))) + name: (aesthetic_name))) + (mapping_element + (explicit_mapping + value: (mapping_value + (column_reference + (identifier + (bare_identifier)))) + name: (aesthetic_name))))) + (viz_clause + (draw_clause + (geom_type))) + (viz_clause + (facet_clause + (facet_vars + (identifier + (bare_identifier))) + (facet_by) + (facet_vars + (identifier + (bare_identifier))))))) + +================================================================================ +FACET with SETTING clause +================================================================================ + +VISUALISE x AS x, y AS y +DRAW point +FACET region SETTING scales => 'free_y', ncol => 3 + +-------------------------------------------------------------------------------- + +(query + (visualise_statement + (visualise_keyword) + (global_mapping + (mapping_list + (mapping_element + (explicit_mapping + value: (mapping_value + (column_reference + (identifier + (bare_identifier)))) + name: (aesthetic_name))) + (mapping_element + (explicit_mapping + value: (mapping_value + (column_reference + (identifier + (bare_identifier)))) + name: (aesthetic_name))))) + (viz_clause + (draw_clause + (geom_type))) + (viz_clause + (facet_clause + (facet_vars + (identifier + (bare_identifier))) + (setting_clause + (parameter_assignment + name: (parameter_name + (identifier + (bare_identifier))) + value: (parameter_value + (string))) + (parameter_assignment + name: (parameter_name + (identifier + (bare_identifier))) + value: (parameter_value + (number))))))))