Skip to content

Commit 595481d

Browse files
rustyconoverclaude
andcommitted
test(fixtures): add bad-enum + missing-enum-variant reference workers
Make the example catalog a conformance floor for other-language VGI implementations by ensuring every wire-enum variant is emitted by at least one fixture: - bad_enum.py (vgi-fixture-bad-enum-worker): advertises an unrecognized null_handling value ("WEIRD") on the `double` scalar, to exercise the C++ extension's strict wire-enum validation. A real Enum member whose .name is bogus serializes through ArrowSerializableDataclass without a typed path. - QuerySeedFunction: emits stability=CONSISTENT_WITHIN_QUERY (no fixture emitted this variant before). - OverlappingRangePartitionedFunction: emits partition_kind=OVERLAPPING_PARTITIONS (no fixture emitted this before). All three are registered in the example catalog; bad-enum is wired as a vgi-fixtures entry point alongside bad-protocol. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 59e332b commit 595481d

7 files changed

Lines changed: 226 additions & 0 deletions

File tree

packages/vgi-fixtures/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ vgi-fixture-versioned-worker = "vgi._test_fixtures.versioned:main"
4343
vgi-fixture-versioned-tables-worker = "vgi._test_fixtures.versioned_tables:main"
4444
vgi-fixture-attach-options-worker = "vgi._test_fixtures.attach_options:main"
4545
vgi-fixture-bad-protocol-worker = "vgi._test_fixtures.bad_protocol:main"
46+
vgi-fixture-bad-enum-worker = "vgi._test_fixtures.bad_enum:main"
4647
vgi-fixture-writable-worker = "vgi._test_fixtures.writable.worker:main"
4748
vgi-fixture-simple-writable-worker = "vgi._test_fixtures.simple_writable:main"
4849

vgi/_test_fixtures/bad_enum.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
2+
3+
"""Fixture worker that deliberately advertises an unrecognized enum value.
4+
5+
This fixture exercises the C++ extension's *wire-enum validation* end-to-end:
6+
the catalog-metadata parser (``ParseFunctionInfo`` in
7+
``vgi/src/vgi_catalog_api.cpp``) must reject an enum string it does not
8+
recognize with a loud ``IOException`` rather than silently falling back to a
9+
default. A silent fallback would run with behavior inconsistent with what the
10+
worker declared (e.g. treating a ``SPECIAL`` null-handling function as
11+
``DEFAULT``).
12+
13+
The trick is entirely Python-side and needs no extension rebuild. The normal
14+
metadata path can only ever emit valid enum names because the values come from
15+
typed Python ``Enum`` members. To get a bogus string onto the wire we override
16+
:meth:`ExampleCatalog._function_to_info` for one scalar function (``double``)
17+
and swap its ``null_handling`` for :class:`_BogusNullHandling.WEIRD` — a real
18+
``Enum`` member whose ``.name`` is ``"WEIRD"``. The vgi-rpc serializer converts
19+
any ``Enum`` field to ``value.name`` (see ``ArrowSerializableDataclass``), so
20+
``"WEIRD"`` lands in the ``null_handling`` Arrow column and the C++ parser
21+
trips on it the moment the ``double`` function's metadata is loaded.
22+
23+
Otherwise this is a drop-in replacement for ``vgi-fixture-worker``: every other
24+
function and the catalog are inherited unchanged from :class:`ExampleWorker`,
25+
so any function except ``double`` still resolves normally.
26+
27+
Registered as the ``vgi-fixture-bad-enum-worker`` entry point.
28+
"""
29+
30+
from __future__ import annotations
31+
32+
from dataclasses import replace
33+
from enum import Enum
34+
35+
from vgi._test_fixtures.worker import ExampleCatalog, ExampleWorker
36+
from vgi.catalog.catalog_interface import FunctionInfo
37+
38+
# The scalar function whose null_handling we corrupt. Tests reference this name
39+
# to force the broken metadata onto the parse path.
40+
BAD_ENUM_FUNCTION = "double"
41+
42+
43+
class _BogusNullHandling(Enum):
44+
"""An enum member whose ``.name`` is a value the C++ parser cannot map."""
45+
46+
WEIRD = "WEIRD"
47+
48+
49+
class BadEnumCatalog(ExampleCatalog):
50+
"""ExampleCatalog that advertises a bogus null_handling for one function."""
51+
52+
def _function_to_info(self, func_cls: type, schema_name: str) -> FunctionInfo:
53+
info = super()._function_to_info(func_cls, schema_name)
54+
if info.name == BAD_ENUM_FUNCTION and info.null_handling is not None:
55+
# FunctionInfo is frozen; replace() returns a corrupted copy.
56+
return replace(info, null_handling=_BogusNullHandling.WEIRD) # type: ignore[arg-type]
57+
return info
58+
59+
60+
class BadEnumWorker(ExampleWorker):
61+
"""ExampleWorker that serves the example catalog with one bad enum value."""
62+
63+
catalog_interface = BadEnumCatalog
64+
65+
66+
def main() -> None:
67+
"""Run the bad-enum fixture worker process."""
68+
BadEnumWorker.main()
69+
70+
71+
if __name__ == "__main__":
72+
main()

vgi/_test_fixtures/scalar/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
from vgi._test_fixtures.scalar.random_demo import (
5353
BernoulliFunction,
5454
HashSeedFunction,
55+
QuerySeedFunction,
5556
RandomBytesFunction,
5657
RandomIntFunction,
5758
)
@@ -102,6 +103,7 @@
102103
"PairTypeIntIntFunction",
103104
"PairTypeIntStrFunction",
104105
"PairTypeStrStrFunction",
106+
"QuerySeedFunction",
105107
"RandomBytesFunction",
106108
"RandomIntFunction",
107109
"ReturnSecretValueFunction",

vgi/_test_fixtures/scalar/random_demo.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,50 @@ def compute(
136136
return pa.array([seed + i for i in range(_length)], type=pa.int64())
137137

138138

139+
class QuerySeedFunction(ScalarFunction):
140+
"""Adds a per-query-stable seed to each input value.
141+
142+
Demonstrates ``FunctionStability.CONSISTENT_WITHIN_QUERY`` — the only
143+
fixture that emits this stability variant. Semantically the value is fixed
144+
for the duration of a single query but may differ across queries (like
145+
``now()``). DuckDB has no behavioral consumer that this fixture asserts; it
146+
exists so the wire path for the third stability value stays exercised and
147+
so other-language workers must specify it.
148+
149+
Example:
150+
SQL: SELECT query_seed(value) FROM data
151+
152+
"""
153+
154+
class Meta:
155+
"""Function metadata."""
156+
157+
name = "query_seed"
158+
description = "Add a per-query-stable seed to each value (demonstrates CONSISTENT_WITHIN_QUERY stability)"
159+
stability = FunctionStability.CONSISTENT_WITHIN_QUERY
160+
examples = [
161+
FunctionExample(
162+
sql="SELECT query_seed(value) FROM data",
163+
description="Offset each value by a seed that is constant within a query",
164+
),
165+
]
166+
167+
@classmethod
168+
def compute(
169+
cls,
170+
value: Annotated[pa.Int64Array, Param(doc="Value to offset")],
171+
) -> Annotated[pa.Int64Array, Returns()]:
172+
"""Add a fixed per-query offset to each value.
173+
174+
The offset is deterministic here (a constant) so SQL tests have a
175+
stable expected output; the stability flag is what is under test, not
176+
the numeric result.
177+
"""
178+
import pyarrow.compute as pc
179+
180+
return pc.add(value, 1000)
181+
182+
139183
class RandomBytesFunction(ScalarFunction):
140184
"""Generates deterministic pseudo-random binary blobs from a seed."""
141185

vgi/_test_fixtures/table/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
from vgi._test_fixtures.table.partition_columns import (
8383
CountryPartitionedSalesFunction,
8484
DisjointRangePartitionedFunction,
85+
OverlappingRangePartitionedFunction,
8586
PartitionedWithExplicitOverrideFunction,
8687
RegionYearPartitionedFunction,
8788
)
@@ -182,6 +183,7 @@
182183
"NestedSequenceFunction",
183184
"NonMonotoneBatchIndexFunction",
184185
"OrderEchoFunction",
186+
"OverlappingRangePartitionedFunction",
185187
"PartitionedBatchIndexFunction",
186188
"PartitionedBatchIndexMarkedFunction",
187189
"PartitionedFixedOrderFunction",

vgi/_test_fixtures/table/partition_columns.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@
3333
disjoint integer range. Verifies the wire path; DuckDB falls back to
3434
``HASH_GROUP_BY`` for GROUP BY queries against it.
3535
36+
* :class:`OverlappingRangePartitionedFunction` — declares
37+
``OVERLAPPING_PARTITIONS`` (the only fixture that does). Consecutive
38+
chunks share ``key`` values. Wire-level only; DuckDB falls back to
39+
``HASH_GROUP_BY``.
40+
3641
All fixtures use the in-memory state pattern (no work-queue / no
3742
stream_state) — they're simpler than the v1 partitioned_batch_index
3843
since the v2 plan is about correctness of the partition contract,
@@ -470,3 +475,99 @@ def process(
470475
)
471476
out.emit(batch)
472477
state.current_idx = rpp
478+
479+
480+
# =============================================================================
481+
# OVERLAPPING_PARTITIONS — wire-level only
482+
# =============================================================================
483+
484+
485+
@dataclass(slots=True, frozen=True)
486+
class _OverlappingArgs:
487+
"""Arguments for ``overlapping_range_partitioned``."""
488+
489+
partitions: Annotated[int, Arg(0, doc="Number of overlapping partitions", ge=1)]
490+
rows_per_partition: Annotated[int, Arg("rows_per_partition", default=10, doc="Rows per partition", ge=1)]
491+
492+
493+
@dataclass(kw_only=True)
494+
class _OverlappingState(ArrowSerializableDataclass):
495+
current_partition_idx: int = -1
496+
current_idx: int = 0
497+
started: bool = False
498+
499+
500+
@bind_fixed_schema
501+
@_cardinality_from_count
502+
class OverlappingRangePartitionedFunction(TableFunctionGenerator[_OverlappingArgs, _OverlappingState]):
503+
"""Per-chunk *overlapping* integer ranges on ``key``.
504+
505+
Each chunk N emits ``key`` values in ``[N*500, N*500 + rows)``. With the
506+
default ``rows_per_partition`` of 10 the ranges are disjoint, but callers
507+
pass ``rows_per_partition > 500`` to make consecutive chunks overlap on
508+
``key`` — distinguishing this from
509+
:class:`DisjointRangePartitionedFunction`.
510+
511+
Declares ``OVERLAPPING_PARTITIONS``. Like DISJOINT, DuckDB has no consumer
512+
for OVERLAPPING today, so GROUP BY queries fall back to ``HASH_GROUP_BY``;
513+
the value's purpose here is to keep the wire path (declaration, per-batch
514+
min/max metadata, C++ extraction → ``get_partition_info``) exercised so
515+
other-language workers must specify it. This is the only fixture that emits
516+
``OVERLAPPING_PARTITIONS``.
517+
"""
518+
519+
FIXED_SCHEMA: ClassVar[pa.Schema] = pa.schema(
520+
[
521+
partition_field("key", pa.int64()),
522+
pa.field("value", pa.int64()),
523+
]
524+
)
525+
526+
class Meta:
527+
name = "overlapping_range_partitioned"
528+
description = (
529+
"Overlapping per-chunk integer ranges on ``key``. Declares "
530+
"OVERLAPPING_PARTITIONS (wire-level only; DuckDB falls back to "
531+
"HASH_GROUP_BY for now)."
532+
)
533+
categories = ["generator", "partitioning", "testing"]
534+
partition_kind = PartitionKind.OVERLAPPING_PARTITIONS
535+
536+
@classmethod
537+
def on_init(cls, params: InitParams[_OverlappingArgs]) -> GlobalInitResponse:
538+
items = [struct.pack(_QUEUE_ITEM_FMT, i) for i in range(params.args.partitions)]
539+
params.storage.queue_push(items)
540+
return GlobalInitResponse()
541+
542+
@classmethod
543+
def initial_state(cls, params: ProcessParams[_OverlappingArgs]) -> _OverlappingState:
544+
return _OverlappingState()
545+
546+
@classmethod
547+
def process(
548+
cls,
549+
params: ProcessParams[_OverlappingArgs],
550+
state: _OverlappingState,
551+
out: OutputCollector,
552+
) -> None:
553+
if not state.started or state.current_idx >= params.args.rows_per_partition:
554+
item = params.storage.queue_pop()
555+
if item is None:
556+
out.finish()
557+
return
558+
(state.current_partition_idx,) = struct.unpack(_QUEUE_ITEM_FMT, item)
559+
state.current_idx = 0
560+
state.started = True
561+
562+
rpp = params.args.rows_per_partition
563+
# Stride of 500 (< rpp when callers want overlap) makes consecutive
564+
# chunks share key values.
565+
base = state.current_partition_idx * 500
566+
keys = [base + i for i in range(rpp)]
567+
values = [state.current_partition_idx * 10 + i for i in range(rpp)]
568+
batch = pa.RecordBatch.from_pydict(
569+
{"key": keys, "value": values},
570+
schema=cls.FIXED_SCHEMA,
571+
)
572+
out.emit(batch)
573+
state.current_idx = rpp

vgi/_test_fixtures/worker.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
PairTypeIntIntFunction,
8787
PairTypeIntStrFunction,
8888
PairTypeStrStrFunction,
89+
QuerySeedFunction,
8990
RandomBytesFunction,
9091
RandomIntFunction,
9192
ReturnSecretValueFunction,
@@ -146,6 +147,7 @@
146147
NestedSequenceFunction,
147148
NonMonotoneBatchIndexFunction,
148149
OrderEchoFunction,
150+
OverlappingRangePartitionedFunction,
149151
PartitionedBatchIndexFunction,
150152
PartitionedBatchIndexMarkedFunction,
151153
PartitionedFixedOrderFunction,
@@ -385,6 +387,7 @@ def _build_enum_stats() -> dict[str, ColumnStatisticsInput]:
385387
# — see vgi/_test_fixtures/table/partition_columns.py.
386388
CountryPartitionedSalesFunction,
387389
DisjointRangePartitionedFunction,
390+
OverlappingRangePartitionedFunction,
388391
PartitionedWithExplicitOverrideFunction,
389392
RegionYearPartitionedFunction,
390393
# Deliberately-broken batch_index fixtures (see
@@ -457,6 +460,7 @@ def _build_enum_stats() -> dict[str, ColumnStatisticsInput]:
457460
PairTypeIntIntFunction,
458461
PairTypeIntStrFunction,
459462
PairTypeStrStrFunction,
463+
QuerySeedFunction,
460464
RandomBytesFunction,
461465
RandomIntFunction,
462466
ReturnSecretValueFunction,

0 commit comments

Comments
 (0)