fix(py): custom reader tests for new execution pipeline (#131)

cpsievert · claude · web-flow · commit 0da25467dda1 · 2026-02-17T20:04:59.000-06:00
* fix(python): update custom reader tests for new execution pipeline The "New Scale syntax and implementation" PR (#82, 7a5ed62) introduced a column renaming pipeline where the execution engine generates SQL like `SELECT "x" AS "__ggsql_aes_x__"` and passes it to readers. This requires custom readers to actually execute the SQL they receive, since the renamed columns are expected downstream during pruning. The custom reader tests were written with static readers that returned hardcoded DataFrames, ignoring the SQL parameter entirely. This worked with the old pipeline but fails with the new one because the returned DataFrames lack the `__ggsql_aes_*` prefixed columns. These failures were never caught on main because the Python CI workflow only triggers on changes to `ggsql-python/**`, and #82 only changed `src/` files. Changes: - Update 4 custom reader tests to use in-memory DuckDB connections that properly execute the SQL they receive - Add duckdb and pyarrow as test dependencies - Fix CI workflow to install the locally-built wheel instead of downloading a stale version from PyPI (pip install with glob instead of --find-links) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(ci): fix wheel glob expansion in Python CI install step The glob pattern in `pip install target/wheels/ggsql-*.whl'[test]'` was not expanding because the shell treated `[test]` as a character class, preventing any file from matching. Use `ls` to expand the glob into a variable first. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
@@ -47,7 +47,9 @@ jobs:
 
       - name: Install wheel and test dependencies
         shell: bash
-        run: pip install --find-links target/wheels/ ggsql[test]
+        run: |
+          WHEEL=$(ls target/wheels/ggsql-*.whl)
+          pip install "${WHEEL}[test]"
 
       - name: Run tests
         shell: bash
diff --git a/ggsql-python/pyproject.toml b/ggsql-python/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-test = ["pytest>=7.0"]
+test = ["pytest>=7.0", "duckdb>=1.0", "pyarrow>=14.0"]
 dev = ["maturin>=1.4"]
 
 [tool.maturin]
@@ -31,6 +31,8 @@ module-name = "ggsql._ggsql"
 
 [dependency-groups]
 dev = [
+    "duckdb>=1.0",
     "maturin>=1.11.5",
+    "pyarrow>=14.0",
     "pytest>=9.0.2",
 ]
diff --git a/ggsql-python/tests/test_ggsql.py b/ggsql-python/tests/test_ggsql.py
@@ -10,6 +10,7 @@
 
 import json
 
+import duckdb
 import pytest
 import polars as pl
 import altair
@@ -399,8 +400,16 @@ def test_simple_custom_reader(self):
         """Custom reader with execute_sql() method works."""
 
         class SimpleReader:
+            def __init__(self):
+                self.conn = duckdb.connect()
+                self.conn.execute(
+                    "CREATE TABLE data AS SELECT * FROM ("
+                    "VALUES (1, 10), (2, 20), (3, 30)"
+                    ") AS t(x, y)"
+                )
+
             def execute_sql(self, sql: str) -> pl.DataFrame:
-                return pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+                return self.conn.execute(sql).pl()
 
         reader = SimpleReader()
         spec = ggsql.execute("SELECT * FROM data VISUALISE x, y DRAW point", reader)
@@ -411,19 +420,16 @@ def test_custom_reader_with_register(self):
 
         class RegisterReader:
             def __init__(self):
-                self.tables = {}
+                self.conn = duckdb.connect()
 
             def execute_sql(self, sql: str) -> pl.DataFrame:
-                # Simple: just return the first registered table
-                if self.tables:
-                    return next(iter(self.tables.values()))
-                return pl.DataFrame({"x": [1], "y": [2]})
+                return self.conn.execute(sql).pl()
 
             def supports_register(self) -> bool:
                 return True
 
             def register(self, name: str, df: pl.DataFrame) -> None:
-                self.tables[name] = df
+                self.conn.register(name, df)
 
         reader = RegisterReader()
         spec = ggsql.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point", reader)
@@ -460,17 +466,20 @@ def test_native_reader_fast_path(self):
     def test_custom_reader_can_render(self):
         """Custom reader result can be rendered to Vega-Lite."""
 
-        class StaticReader:
-            def execute_sql(self, sql: str) -> pl.DataFrame:
-                return pl.DataFrame(
-                    {
-                        "x": [1, 2, 3, 4, 5],
-                        "y": [10, 40, 20, 50, 30],
-                        "category": ["A", "B", "A", "B", "A"],
-                    }
+        class DuckDBBackedReader:
+            def __init__(self):
+                self.conn = duckdb.connect()
+                self.conn.execute(
+                    "CREATE TABLE data AS SELECT * FROM ("
+                    "VALUES (1, 10, 'A'), (2, 40, 'B'), (3, 20, 'A'), "
+                    "(4, 50, 'B'), (5, 30, 'A')"
+                    ") AS t(x, y, category)"
                 )
 
-        reader = StaticReader()
+            def execute_sql(self, sql: str) -> pl.DataFrame:
+                return self.conn.execute(sql).pl()
+
+        reader = DuckDBBackedReader()
         spec = ggsql.execute(
             "SELECT * FROM data VISUALISE x, y, category AS color DRAW point",
             reader,
@@ -488,11 +497,16 @@ def test_custom_reader_execute_sql_called(self):
 
         class RecordingReader:
             def __init__(self):
+                self.conn = duckdb.connect()
+                self.conn.execute(
+                    "CREATE TABLE data AS SELECT * FROM ("
+                    "VALUES (1, 2)) AS t(x, y)"
+                )
                 self.execute_calls = []
 
             def execute_sql(self, sql: str) -> pl.DataFrame:
                 self.execute_calls.append(sql)
-                return pl.DataFrame({"x": [1], "y": [2]})
+                return self.conn.execute(sql).pl()
 
         reader = RecordingReader()
         ggsql.execute(

Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ dependencies = [`
`21`	`21`	`]`
`22`	`22`
`23`	`23`	`[project.optional-dependencies]`
`24`		`-test = ["pytest>=7.0"]`
	`24`	`+test = ["pytest>=7.0", "duckdb>=1.0", "pyarrow>=14.0"]`
`25`	`25`	`dev = ["maturin>=1.4"]`
`26`	`26`
`27`	`27`	`[tool.maturin]`
`@@ -31,6 +31,8 @@ module-name = "ggsql._ggsql"`
`31`	`31`
`32`	`32`	`[dependency-groups]`
`33`	`33`	`dev = [`
	`34`	`+ "duckdb>=1.0",`
`34`	`35`	`"maturin>=1.11.5",`
	`36`	`+ "pyarrow>=14.0",`
`35`	`37`	`"pytest>=9.0.2",`
`36`	`38`	`]`