Skip to content

Commit 2fcf21a

Browse files
rustyconoverclaude
andcommitted
feat: per-parameter documentation for macros
Macros gain an optional arguments_schema (one nullable field per parameter, carrying the per-parameter description via the same vgi_doc field-metadata key functions use) on MacroCreateRequest and the macro listing/get responses, plus a declarative Macro.parameter_docs API. Mirrors how functions carry per-argument docs; additive + optional so older workers/extensions are unaffected. Closes the gap where macro parameters were name-only and undocumentable. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent f6ee114 commit 2fcf21a

10 files changed

Lines changed: 292 additions & 3 deletions

File tree

tests/catalog/test_serialization.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,140 @@ def test_macro_type_enum_survival(self) -> None:
615615
restored = MacroInfo.deserialize_from_batch(batch)
616616
assert restored.macro_type == macro_type
617617

618+
def test_arguments_schema_round_trip(self) -> None:
619+
"""arguments_schema carries vgi_doc per documented param; absent doc -> no key."""
620+
from vgi.argument_spec import VGI_DOC_KEY, macro_arguments_schema, macro_parameter_docs_from_schema
621+
622+
defaults = pa.RecordBatch.from_pydict({"lo": pa.array([0], type=pa.int64())})
623+
args = macro_arguments_schema(
624+
parameters=["val", "lo", "hi"],
625+
parameter_default_values=defaults,
626+
parameter_docs={"val": "value to clamp", "hi": "upper bound"},
627+
)
628+
original = MacroInfo(
629+
name="clamp",
630+
schema_name="main",
631+
macro_type=MacroType.SCALAR,
632+
parameters=["val", "lo", "hi"],
633+
parameter_default_values=defaults,
634+
definition="GREATEST(lo, LEAST(hi, val))",
635+
comment=None,
636+
tags={},
637+
arguments_schema=args,
638+
)
639+
serialized = original.serialize_to_bytes()
640+
batch, _ = deserialize_record_batch(serialized)
641+
restored = MacroInfo.deserialize_from_batch(batch)
642+
643+
assert restored.arguments_schema is not None
644+
rs = restored.arguments_schema
645+
# One field per parameter, in order.
646+
assert rs.names == ["val", "lo", "hi"]
647+
# Field type tracks the default value type when known, else null.
648+
assert rs.field("lo").type == pa.int64()
649+
assert rs.field("val").type == pa.null()
650+
assert rs.field("hi").type == pa.null()
651+
# Documented params carry vgi_doc; undocumented (lo) has no key.
652+
assert (rs.field("val").metadata or {}).get(VGI_DOC_KEY) == b"value to clamp"
653+
assert (rs.field("hi").metadata or {}).get(VGI_DOC_KEY) == b"upper bound"
654+
assert VGI_DOC_KEY not in (rs.field("lo").metadata or {})
655+
# Convenience extractor returns only documented params.
656+
assert macro_parameter_docs_from_schema(rs) == {"val": "value to clamp", "hi": "upper bound"}
657+
658+
def test_none_arguments_schema(self) -> None:
659+
"""arguments_schema defaults to None (older workers) and survives round-trip."""
660+
original = MacroInfo(
661+
name="simple",
662+
schema_name="main",
663+
macro_type=MacroType.SCALAR,
664+
parameters=["x"],
665+
definition="x",
666+
comment=None,
667+
tags={},
668+
)
669+
serialized = original.serialize_to_bytes()
670+
batch, _ = deserialize_record_batch(serialized)
671+
restored = MacroInfo.deserialize_from_batch(batch)
672+
assert restored.arguments_schema is None
673+
674+
675+
class TestMacroArgumentsSchemaWire:
676+
"""Macro per-parameter docs flow over create/list wire types."""
677+
678+
def test_declarative_macro_to_info_carries_docs(self) -> None:
679+
"""Declarative Macro.parameter_docs -> MacroInfo.arguments_schema vgi_doc."""
680+
from vgi.argument_spec import macro_parameter_docs_from_schema
681+
from vgi.catalog.descriptors import Macro
682+
683+
m = Macro(
684+
name="clamp",
685+
macro_type=MacroType.SCALAR,
686+
parameters=["x", "lo", "hi"],
687+
parameter_default_values=pa.RecordBatch.from_pydict(
688+
{"lo": pa.array([0], type=pa.int64()), "hi": pa.array([100], type=pa.int64())}
689+
),
690+
parameter_docs={"x": "value to clamp"},
691+
definition="GREATEST(lo, LEAST(hi, x))",
692+
)
693+
info = m.to_macro_info("main")
694+
assert info.arguments_schema is not None
695+
assert info.arguments_schema.names == ["x", "lo", "hi"]
696+
assert macro_parameter_docs_from_schema(info.arguments_schema) == {"x": "value to clamp"}
697+
698+
def test_declarative_macro_rejects_unknown_doc_param(self) -> None:
699+
"""parameter_docs keys must be in parameters (validated like defaults)."""
700+
from vgi.catalog.descriptors import Macro
701+
702+
with pytest.raises(ValueError, match="documented parameter 'bogus' not found"):
703+
Macro(
704+
name="bad",
705+
macro_type=MacroType.SCALAR,
706+
parameters=["x"],
707+
parameter_docs={"bogus": "nope"},
708+
definition="x",
709+
)
710+
711+
def test_macro_create_request_round_trip(self) -> None:
712+
"""MacroCreateRequest carries arguments_schema over the wire."""
713+
from vgi.argument_spec import macro_arguments_schema, macro_parameter_docs_from_schema
714+
from vgi.catalog import OnConflict
715+
from vgi.protocol import MacroCreateRequest
716+
717+
args = macro_arguments_schema(
718+
parameters=["x", "y"],
719+
parameter_docs={"x": "first", "y": "second"},
720+
)
721+
req = MacroCreateRequest(
722+
attach_opaque_data=b"attach",
723+
schema_name="main",
724+
name="add",
725+
macro_type=MacroType.SCALAR,
726+
parameters=["x", "y"],
727+
definition="x + y",
728+
on_conflict=OnConflict.ERROR,
729+
arguments_schema=args,
730+
)
731+
restored = MacroCreateRequest.deserialize_from_bytes(req.serialize_to_bytes())
732+
assert restored.arguments_schema is not None
733+
assert macro_parameter_docs_from_schema(restored.arguments_schema) == {"x": "first", "y": "second"}
734+
735+
def test_macro_create_request_none_arguments_schema(self) -> None:
736+
"""MacroCreateRequest.arguments_schema defaults to None and round-trips."""
737+
from vgi.catalog import OnConflict
738+
from vgi.protocol import MacroCreateRequest
739+
740+
req = MacroCreateRequest(
741+
attach_opaque_data=b"attach",
742+
schema_name="main",
743+
name="add",
744+
macro_type=MacroType.SCALAR,
745+
parameters=["x", "y"],
746+
definition="x + y",
747+
on_conflict=OnConflict.ERROR,
748+
)
749+
restored = MacroCreateRequest.deserialize_from_bytes(req.serialize_to_bytes())
750+
assert restored.arguments_schema is None
751+
618752

619753
class TestFunctionInfoSerialization:
620754
"""Test FunctionInfo serialization round-trip."""

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vgi/_test_fixtures/catalog.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,7 @@ def macro_create(
752752
definition: str,
753753
on_conflict: OnConflict,
754754
parameter_default_values: pa.RecordBatch | None = None,
755+
arguments_schema: pa.Schema | None = None,
755756
) -> None:
756757
"""Create a new macro."""
757758
schema_data = self._get_schema(attach_opaque_data, schema_name)
@@ -773,6 +774,7 @@ def macro_create(
773774
definition=definition,
774775
comment=None,
775776
tags={},
777+
arguments_schema=arguments_schema,
776778
)
777779
)
778780
self._increment_version(attach_opaque_data)

vgi/_test_fixtures/worker.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,10 @@ def _build_enum_stats() -> dict[str, ColumnStatisticsInput]:
519519
parameters=["x", "y"],
520520
definition="x * y",
521521
comment="Multiply two values",
522+
parameter_docs={
523+
"x": "First factor",
524+
"y": "Second factor",
525+
},
522526
),
523527
Macro(
524528
name="vgi_clamp",
@@ -530,13 +534,19 @@ def _build_enum_stats() -> dict[str, ColumnStatisticsInput]:
530534
),
531535
definition="GREATEST(lo, LEAST(hi, val))",
532536
comment="Clamp a value between lo and hi (defaults: 0..100)",
537+
parameter_docs={
538+
"val": "Value to clamp",
539+
"lo": "Lower bound (inclusive)",
540+
"hi": "Upper bound (inclusive)",
541+
},
533542
),
534543
Macro(
535544
name="vgi_range_table",
536545
macro_type=MacroType.TABLE,
537546
parameters=["n"],
538547
definition="SELECT * FROM range(n)",
539548
comment="Table macro returning range of values",
549+
parameter_docs={"n": "Number of rows to generate"},
540550
),
541551
],
542552
),

vgi/argument_spec.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
"ArgumentSpec",
2828
"argument_specs_to_schema",
2929
"extract_argument_specs",
30+
"macro_arguments_schema",
31+
"macro_parameter_docs_from_schema",
3032
"schema_to_argument_specs",
3133
# Metadata constants for parsing schemas
3234
"VGI_ARG_KEY",
@@ -280,6 +282,86 @@ def schema_to_argument_specs(schema: pa.Schema) -> list[ArgumentSpec]:
280282
return specs
281283

282284

285+
# =============================================================================
286+
# Macro Argument Schemas
287+
# =============================================================================
288+
289+
290+
def macro_arguments_schema(
291+
parameters: Sequence[str],
292+
parameter_default_values: pa.RecordBatch | None = None,
293+
parameter_docs: dict[str, str] | None = None,
294+
) -> pa.Schema:
295+
"""Build a macro ``arguments_schema`` describing macro parameters.
296+
297+
Mirrors the function ``arguments_schema`` mechanism: one Arrow field per
298+
macro parameter, in ``parameters`` order, each nullable. The per-parameter
299+
description is carried via the same ``vgi_doc`` field-metadata key functions
300+
use (UTF-8, presence-only — the key is omitted entirely when there is no
301+
doc). A parameter's field type is the type of its default value when one is
302+
known (from ``parameter_default_values``), else ``pa.null()``.
303+
304+
Args:
305+
parameters: Ordered list of macro parameter names.
306+
parameter_default_values: Optional one-row ``RecordBatch`` whose columns
307+
are parameter names with typed default values; used to infer each
308+
parameter's field type.
309+
parameter_docs: Optional mapping of parameter name to description. Empty
310+
or missing descriptions yield no ``vgi_doc`` metadata on the field.
311+
312+
Returns:
313+
Arrow schema with one nullable field per parameter, in order.
314+
315+
"""
316+
docs = parameter_docs or {}
317+
318+
# Map parameter name -> Arrow type from the typed default values, if any.
319+
default_types: dict[str, pa.DataType] = {}
320+
if parameter_default_values is not None:
321+
for default_field in parameter_default_values.schema:
322+
default_types[default_field.name] = default_field.type
323+
324+
fields: list[pa.Field[Any]] = []
325+
for name in parameters:
326+
metadata: dict[bytes, bytes] = {}
327+
doc = docs.get(name, "")
328+
if doc:
329+
metadata[VGI_DOC_KEY] = doc.encode("utf-8")
330+
331+
field = pa.field(
332+
name,
333+
default_types.get(name, pa.null()),
334+
nullable=True,
335+
metadata=metadata if metadata else None,
336+
)
337+
fields.append(field)
338+
339+
return pa.schema(fields)
340+
341+
342+
def macro_parameter_docs_from_schema(schema: pa.Schema) -> dict[str, str]:
343+
"""Extract per-parameter descriptions from a macro ``arguments_schema``.
344+
345+
Inverse of [`macro_arguments_schema`][]'s ``vgi_doc`` handling: reads the
346+
``vgi_doc`` field metadata (UTF-8) for each field. Fields without the key
347+
(undocumented) are omitted from the result.
348+
349+
Args:
350+
schema: A macro ``arguments_schema`` (one field per parameter).
351+
352+
Returns:
353+
Mapping of parameter name to description, for documented parameters only.
354+
355+
"""
356+
docs: dict[str, str] = {}
357+
for field in schema:
358+
metadata = field.metadata or {}
359+
doc_bytes = metadata.get(VGI_DOC_KEY)
360+
if doc_bytes:
361+
docs[field.name] = doc_bytes.decode("utf-8")
362+
return docs
363+
364+
283365
# =============================================================================
284366
# Extraction from Function Classes
285367
# =============================================================================

vgi/catalog/catalog_interface.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,13 +435,23 @@ class MacroInfo(CatalogSchemaObject, ArrowSerializableDataclass):
435435
names and values are typed defaults. None if no defaults.
436436
Serialized as IPC bytes over the wire.
437437
definition: The SQL expression (scalar) or query (table).
438+
arguments_schema: Optional Arrow schema (serialized as IPC bytes) with one
439+
nullable field per parameter, in ``parameters`` order. Each field's type
440+
is the parameter's default value type when known (else null), and the
441+
``vgi_doc`` field metadata key carries the parameter's description (UTF-8,
442+
presence-only — omitted when undocumented). Mirrors the per-argument doc
443+
channel functions expose via ``FunctionInfo.arguments``. None means the
444+
worker did not supply per-parameter docs (older workers); the extension
445+
falls back to ``parameters`` for names. Built with
446+
``vgi.argument_spec.macro_arguments_schema``.
438447
439448
"""
440449

441450
macro_type: "MacroType"
442451
parameters: list[str]
443452
parameter_default_values: Annotated[pa.RecordBatch | None, ArrowType(pa.binary())] = None
444453
definition: str = ""
454+
arguments_schema: Annotated[pa.Schema | None, ArrowType(pa.binary())] = None
445455

446456

447457
class FunctionType(Enum):
@@ -1979,8 +1989,25 @@ def macro_create(
19791989
definition: str,
19801990
on_conflict: OnConflict,
19811991
parameter_default_values: pa.RecordBatch | None = None,
1992+
arguments_schema: pa.Schema | None = None,
19821993
) -> None:
1983-
"""Create a new macro with the given definition."""
1994+
"""Create a new macro with the given definition.
1995+
1996+
Args:
1997+
attach_opaque_data: Per-attach catalog session token.
1998+
transaction_opaque_data: Optional transaction handle.
1999+
schema_name: Schema to create the macro in.
2000+
name: Name for the new macro.
2001+
macro_type: Whether this is a scalar or table macro.
2002+
parameters: Ordered list of parameter names.
2003+
definition: SQL expression (scalar) or query (table).
2004+
on_conflict: Behavior if the macro already exists.
2005+
parameter_default_values: One-row ``RecordBatch`` with typed defaults.
2006+
arguments_schema: Optional Arrow schema (one nullable field per
2007+
parameter, in ``parameters`` order) carrying per-parameter
2008+
descriptions via the ``vgi_doc`` field metadata key. ``None`` when
2009+
no per-parameter docs are supplied.
2010+
"""
19842011
raise NotImplementedError("Macro create not implemented.")
19852012

19862013
def macro_drop(

0 commit comments

Comments
 (0)