Skip to content

Commit 57940c9

Browse files
rustyconoverclaude
andcommitted
refactor: consolidate setting/attach_option and transactor DML states
- Hoist the duplicated declarative-descriptor machinery shared by Setting/AttachOption (SettingSpec/AttachOptionSpec serialize+deserialize, the descriptor __set_name__/__get__, the Python→Arrow mapping, and the extract_*_specs walk) into the new vgi/catalog/_descriptor_spec.py. setting.py and attach_option.py are now thin subclasses; public names, wire format (ARROW_SCHEMA), and behaviour are unchanged. test_setting.py imports _resolve_arrow_type from its new home. - transactor/server.py: give _Insert/_Delete/_UpdateState a shared _DmlState base holding the common __init__ and the count-vs-RETURNING _emit_result (previously inlined in two of the three); exchange() bodies stay per-DML. No behavior change. mypy + pydoclint clean; full pytest (1952 passed) and make test_launcher (205/206; the bool_in_union failure pre-exists on main) green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 6b6a145 commit 57940c9

5 files changed

Lines changed: 299 additions & 445 deletions

File tree

tests/catalog/test_setting.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
import pytest
99
from vgi_rpc.utils import deserialize_record_batch
1010

11+
from vgi.catalog._descriptor_spec import _resolve_arrow_type
1112
from vgi.catalog.setting import (
1213
Setting,
1314
SettingSpec,
14-
_resolve_arrow_type,
1515
extract_setting_specs,
1616
)
1717

vgi/catalog/_descriptor_spec.py

Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
# Copyright 2025, 2026 Query Farm LLC - https://query.farm
2+
3+
"""Shared base classes for declarative descriptor/spec pairs.
4+
5+
[`Setting`][vgi.catalog.setting.Setting] (session-level, resent every call) and
6+
[`AttachOption`][vgi.catalog.attach_option.AttachOption] (delivered once at
7+
``catalog_attach``) are declared the same way — an `Annotated`-hint descriptor
8+
plus a serializable spec with an identical Arrow IPC wire format. This module
9+
holds the machinery both share; the two public modules subclass it so their
10+
names, wire format, and behaviour are unchanged.
11+
"""
12+
13+
from __future__ import annotations
14+
15+
from dataclasses import dataclass, field
16+
from typing import (
17+
TYPE_CHECKING,
18+
Annotated,
19+
Any,
20+
ClassVar,
21+
cast,
22+
get_args,
23+
get_origin,
24+
get_type_hints,
25+
)
26+
27+
import pyarrow as pa
28+
from vgi_rpc.utils import deserialize_record_batch, serialize_record_batch_bytes
29+
30+
from vgi.schema_utils import schema
31+
32+
if TYPE_CHECKING:
33+
from collections.abc import Callable
34+
from typing import Self
35+
36+
37+
# Python type to Arrow type mapping shared by every declarative descriptor.
38+
_PYTHON_TO_ARROW: dict[type, pa.DataType] = {
39+
bool: pa.bool_(),
40+
int: pa.int64(),
41+
float: pa.float64(),
42+
str: pa.string(),
43+
bytes: pa.binary(),
44+
}
45+
46+
47+
def _resolve_arrow_type(type_hint: type | pa.DataType) -> pa.DataType:
48+
"""Resolve Arrow type from either a Python type or Arrow DataType.
49+
50+
Args:
51+
type_hint: A Python type (bool, int, float, str, bytes) or Arrow DataType.
52+
53+
Returns:
54+
The resolved Arrow DataType.
55+
56+
Raises:
57+
TypeError: If the type cannot be resolved.
58+
59+
"""
60+
if isinstance(type_hint, pa.DataType):
61+
return type_hint
62+
if type_hint in _PYTHON_TO_ARROW:
63+
return _PYTHON_TO_ARROW[type_hint]
64+
raise TypeError(
65+
f"Cannot resolve Arrow type from: {type_hint}. "
66+
"Use a Python type (bool, int, float, str, bytes) or Arrow DataType."
67+
)
68+
69+
70+
@dataclass(frozen=True)
71+
class _SpecBase:
72+
"""Resolved descriptor metadata for catalog serialization.
73+
74+
The resolved form of a declarative descriptor, with all types inferred and
75+
ready for serialization over the wire.
76+
77+
Attributes:
78+
name: The attribute name (from the class attribute name).
79+
desc: Human-readable description.
80+
type: The Arrow data type for this entry.
81+
default: The default value (Python object) or ``None`` if unset.
82+
ARROW_SCHEMA: Arrow IPC schema used to (de)serialize this spec over the wire.
83+
84+
"""
85+
86+
name: str
87+
desc: str
88+
type: pa.DataType
89+
default: Any
90+
91+
ARROW_SCHEMA: ClassVar[pa.Schema] = pa.schema(
92+
[
93+
pa.field("name", pa.string(), nullable=False),
94+
pa.field("description", pa.string(), nullable=False),
95+
pa.field("type", pa.binary(), nullable=False),
96+
pa.field("default_value", pa.binary(), nullable=True),
97+
] # type: ignore[arg-type]
98+
)
99+
100+
def serialize(self) -> bytes:
101+
"""Serialize to Arrow IPC bytes."""
102+
# Serialize type as a single-field schema
103+
type_schema = schema(value=self.type)
104+
type_bytes = type_schema.serialize().to_pybytes()
105+
106+
# Serialize default value if present
107+
default_bytes: bytes | None = None
108+
if self.default is not None:
109+
default_batch = pa.RecordBatch.from_pydict({"value": [self.default]}, schema=type_schema)
110+
default_bytes = serialize_record_batch_bytes(default_batch)
111+
112+
batch = pa.RecordBatch.from_pylist(
113+
[
114+
{
115+
"name": self.name,
116+
"description": self.desc,
117+
"type": type_bytes,
118+
"default_value": default_bytes,
119+
}
120+
],
121+
schema=self.ARROW_SCHEMA,
122+
)
123+
return serialize_record_batch_bytes(batch)
124+
125+
@classmethod
126+
def deserialize(cls, batch: pa.RecordBatch) -> Self:
127+
"""Deserialize from Arrow RecordBatch."""
128+
from vgi_rpc.utils import _validate_single_row_batch
129+
130+
row = _validate_single_row_batch(
131+
batch,
132+
cls.__name__,
133+
required_fields=["name", "description", "type"],
134+
)
135+
# Deserialize type from schema bytes
136+
type_schema = pa.ipc.read_schema(pa.py_buffer(cast(bytes, row["type"])))
137+
data_type = type_schema.field("value").type
138+
139+
# Deserialize default value if present
140+
default: Any = None
141+
if row["default_value"] is not None:
142+
default_batch, _ = deserialize_record_batch(cast(bytes, row["default_value"]))
143+
default = default_batch.column("value")[0].as_py()
144+
145+
return cls(
146+
name=cast(str, row["name"]),
147+
desc=cast(str, row["description"]),
148+
type=data_type,
149+
default=default,
150+
)
151+
152+
153+
@dataclass
154+
class _DescriptorBase:
155+
"""Base for declarative descriptors defined via `Annotated` hints.
156+
157+
The Arrow type is resolved from the base type in the `Annotated` hint, or
158+
overridden by an explicit ``arrow_type``.
159+
160+
Attributes:
161+
desc: Human-readable description.
162+
arrow_type: Optional explicit Arrow type (overrides inference).
163+
164+
"""
165+
166+
desc: str = ""
167+
arrow_type: pa.DataType | None = None
168+
169+
# Internal field set during class creation
170+
_name: str = field(default="", init=False, repr=False)
171+
172+
def __set_name__(self, owner: type, name: str) -> None:
173+
"""Store the attribute name when assigned to a class."""
174+
self._name = name
175+
176+
def __get__(self, obj: object | None, objtype: type | None = None) -> Any:
177+
"""Return the descriptor on class access; the class-level default on instance access."""
178+
if obj is None:
179+
return self
180+
return getattr(type(obj), self._name, None)
181+
182+
183+
def _extract_specs[D: _DescriptorBase, S: _SpecBase](
184+
declaring_cls: type,
185+
*,
186+
descriptor_type: type[D],
187+
spec_factory: Callable[..., S],
188+
) -> list[S]:
189+
"""Extract specs from a class whose attributes are ``descriptor_type`` instances.
190+
191+
Parses ``Annotated[type, descriptor_type(...)]`` attributes and resolves each
192+
into a spec built by ``spec_factory``.
193+
194+
Args:
195+
declaring_cls: The class declaring the descriptors (Settings / AttachOptions).
196+
descriptor_type: The descriptor class to match in the annotation metadata.
197+
spec_factory: Callable building a spec from ``name``/``desc``/``type``/``default``.
198+
199+
Returns:
200+
List of specs extracted from the class.
201+
202+
Raises:
203+
TypeError: If an entry's Arrow type cannot be resolved.
204+
205+
"""
206+
specs: list[S] = []
207+
208+
# Get type hints with extras (preserves Annotated)
209+
try:
210+
hints = get_type_hints(declaring_cls, include_extras=True)
211+
except Exception:
212+
# If type hints can't be resolved, return empty list
213+
return specs
214+
215+
for name, hint in hints.items():
216+
if get_origin(hint) is not Annotated:
217+
continue
218+
219+
args = get_args(hint)
220+
if len(args) < 2:
221+
continue
222+
223+
base_type = args[0]
224+
225+
descriptor: D | None = None
226+
for arg in args[1:]:
227+
if isinstance(arg, descriptor_type):
228+
descriptor = arg
229+
break
230+
231+
if descriptor is None:
232+
continue
233+
234+
default = getattr(declaring_cls, name, None)
235+
arrow_type = descriptor.arrow_type if descriptor.arrow_type is not None else _resolve_arrow_type(base_type)
236+
237+
specs.append(
238+
spec_factory(
239+
name=name,
240+
desc=descriptor.desc,
241+
type=arrow_type,
242+
default=default,
243+
)
244+
)
245+
246+
return specs

0 commit comments

Comments
 (0)