Skip to content

Commit 4351f33

Browse files
rustyconoverclaude
andcommitted
feat(secrets): scope- and type-aware ResolvedSecrets + name-keying
Resolved secrets are now keyed by name (the connector change). Make SecretsAccessor.to_dict() return a ResolvedSecrets dict subclass with for_scope / for_scope_of_type / of_type / secret_type / field_for, and make the scalar @secret(type) resolver match on each secret's serialized "type" field (column-name fallback for the old format). Update the secret fixtures to select by type via of_type, and add a multi_secret_demo fixture that resolves two same-type scoped secrets in one bind. Unit-tested. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent b61fd9d commit 4351f33

6 files changed

Lines changed: 209 additions & 14 deletions

File tree

tests/test_resolved_secrets.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"""Unit tests for ResolvedSecrets type- and scope-aware selection."""
2+
3+
from vgi.table_function import ResolvedSecrets
4+
5+
6+
def _secrets() -> ResolvedSecrets:
7+
# Values are plain strings here; ResolvedSecrets also accepts pyarrow Scalars
8+
# (it calls .as_py() when present).
9+
return ResolvedSecrets(
10+
{
11+
"my_s3": {"type": "s3", "key_id": "AAA", "scope": "s3://bucket-a"},
12+
"my_s3_b": {
13+
"type": "s3",
14+
"key_id": "BBB",
15+
"scope": "s3://bucket-b\ns3://bucket-b2",
16+
},
17+
"my_gcs": {"type": "gcs", "key_id": "G"},
18+
}
19+
)
20+
21+
22+
def test_type_aware() -> None:
23+
"""Type-aware accessors find/identify secrets by type."""
24+
s = _secrets()
25+
assert s.secret_type("my_s3") == "s3"
26+
assert s.secret_type("my_gcs") == "gcs"
27+
assert len(s.of_type("s3")) == 2
28+
assert len(s.of_type("gcs")) == 1
29+
assert s.of_type("azure") == []
30+
31+
32+
def test_for_scope_of_type_per_bucket() -> None:
33+
"""Per-bucket scope selection picks the right s3 secret."""
34+
s = _secrets()
35+
assert s.for_scope_of_type("s3://bucket-a/x.dat", "s3")["key_id"] == "AAA"
36+
assert s.for_scope_of_type("s3://bucket-b2/y.dat", "s3")["key_id"] == "BBB"
37+
assert s.field_for("s3://bucket-a/x.dat", "key_id") == "AAA"
38+
39+
40+
def test_longest_prefix_and_fallback() -> None:
41+
"""Longest scope prefix wins; unscoped is the fallback."""
42+
s = ResolvedSecrets(
43+
{
44+
"broad": {"type": "s3", "key_id": "broad", "scope": "s3://bucket"},
45+
"narrow": {"type": "s3", "key_id": "narrow", "scope": "s3://bucket/data"},
46+
}
47+
)
48+
assert s.for_scope("s3://bucket/data/x.dat")["key_id"] == "narrow"
49+
assert s.for_scope("s3://bucket/other/x.dat")["key_id"] == "broad"
50+
51+
unscoped = ResolvedSecrets({"only": {"type": "s3", "key_id": "only"}})
52+
assert unscoped.for_scope("s3://any/x")["key_id"] == "only"
53+
54+
assert s.for_scope("s3://nope/x") is None
55+
56+
57+
def test_dict_access_still_works() -> None:
58+
"""ResolvedSecrets keeps plain dict access."""
59+
s = _secrets()
60+
assert s["my_s3"]["key_id"] == "AAA"
61+
assert s.get("missing") is None

vgi/_test_fixtures/table/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@
119119
TenThousandFunction,
120120
)
121121
from vgi._test_fixtures.table.settings import (
122+
MultiSecretDemoFunction,
122123
ScopedSecretDemoFunction,
123124
SecretDemoFunction,
124125
SettingsAwareFunction,
@@ -212,6 +213,7 @@
212213
"RffStructScanFunction",
213214
"RowIdSequenceFunction",
214215
"SampleEchoFunction",
216+
"MultiSecretDemoFunction",
215217
"ScopedSecretDemoFunction",
216218
"SecretDemoFunction",
217219
"SequenceFunction",

vgi/_test_fixtures/table/settings.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ def on_bind(
321321
@classmethod
322322
def initial_state(cls, params: ProcessParams[None]) -> SecretDemoState:
323323
"""Build initial state from secret key-value pairs."""
324-
secret = params.secrets.get("vgi_example", {})
324+
secret = next(iter(params.secrets.of_type("vgi_example")), {})
325325
keys = list(secret.keys())
326326
values = [str(v.as_py()) for v in secret.values()]
327327
types = [str(v.type) for v in secret.values()]
@@ -406,12 +406,52 @@ def on_bind(
406406
@classmethod
407407
def initial_state(cls, params: ProcessParams[ScopedSecretDemoArgs]) -> ScopedSecretDemoState:
408408
"""Build state from resolved secrets."""
409-
secret = params.secrets.get("vgi_example", {})
409+
secret = next(iter(params.secrets.of_type("vgi_example")), {})
410410
return ScopedSecretDemoState(
411411
found=bool(secret),
412412
secret_keys=",".join(secret.keys()) if secret else "",
413413
)
414414

415+
416+
@dataclass(kw_only=True)
417+
class MultiSecretDemoState(ArrowSerializableDataclass):
418+
"""State for MultiSecretDemoFunction."""
419+
420+
api_key: str = ""
421+
422+
423+
@init_single_worker
424+
class MultiSecretDemoFunction(TableFunctionGenerator[ScopedSecretDemoArgs, MultiSecretDemoState]):
425+
"""Resolve TWO same-type scoped secrets in one bind, then select per path.
426+
427+
Requests the ``vgi_example`` secret for both ``s3://bucket-a/`` and
428+
``s3://bucket-b/`` scopes in a single bind. Because resolved secrets are keyed
429+
by name, both survive; ``for_scope_of_type`` then picks the one whose scope
430+
matches the ``path`` argument and returns its ``api_key``.
431+
"""
432+
433+
class Meta:
434+
"""Metadata for MultiSecretDemoFunction."""
435+
436+
name = "multi_secret_demo"
437+
description = "Demo: two same-type scoped secrets resolved in one bind"
438+
439+
@classmethod
440+
def on_bind(cls, params: BindParams[ScopedSecretDemoArgs]) -> BindResponse:
441+
"""Request the secret for two distinct scopes of the same type."""
442+
params.secrets.get("vgi_example", scope="s3://bucket-a/")
443+
params.secrets.get("vgi_example", scope="s3://bucket-b/")
444+
return BindResponse(output_schema=schema({"api_key": pa.string()}))
445+
446+
@classmethod
447+
def initial_state(cls, params: ProcessParams[ScopedSecretDemoArgs]) -> MultiSecretDemoState:
448+
"""Select the resolved secret matching the path and return its api_key."""
449+
secret = params.secrets.for_scope_of_type(params.args.path, "vgi_example") or {}
450+
api_key = secret.get("api_key")
451+
if api_key is not None and hasattr(api_key, "as_py"):
452+
api_key = api_key.as_py()
453+
return MultiSecretDemoState(api_key="" if api_key is None else str(api_key))
454+
415455
@classmethod
416456
def process(
417457
cls,

vgi/_test_fixtures/worker.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@
171171
RffStructScanFunction,
172172
RowIdSequenceFunction,
173173
SampleEchoFunction,
174+
MultiSecretDemoFunction,
174175
ScopedSecretDemoFunction,
175176
SecretDemoFunction,
176177
SequenceFunction,
@@ -412,6 +413,7 @@ def _build_enum_stats() -> dict[str, ColumnStatisticsInput]:
412413
SampleEchoFunction,
413414
RowIdSequenceFunction,
414415
SecretDemoFunction,
416+
MultiSecretDemoFunction,
415417
ScopedSecretDemoFunction,
416418
ExpressionFilterTestFunction,
417419
SequenceFunction,

vgi/scalar_function.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,12 +1067,30 @@ def _extract_compute_kwargs(
10671067
col_idx = settings_schema.get_field_index(setting_key)
10681068
kwargs[name] = bind_call.settings.column(col_idx)[0] if col_idx >= 0 else None
10691069

1070-
# Secret params: extract dict[str, pa.Scalar] from secrets RecordBatch
1070+
# Secret params: extract dict[str, pa.Scalar] from secrets RecordBatch.
1071+
# Secrets are keyed by secret NAME (not type), so match the requested
1072+
# secret_type on each resolved secret's serialized "type" field. The
1073+
# first secret of that type wins (use the scope-aware accessors on a
1074+
# table function's params.secrets to disambiguate several of one type).
10711075
if bind_call.secrets is not None and cls._secret_params:
1072-
secrets_schema = bind_call.secrets.schema
1076+
by_type: dict[str, dict[str, Any]] = {}
1077+
by_col: dict[str, dict[str, Any]] = {}
1078+
names = bind_call.secrets.schema.names
1079+
for i in range(bind_call.secrets.num_columns):
1080+
scalar = bind_call.secrets.column(i)[0]
1081+
if not scalar.is_valid:
1082+
continue
1083+
fields = _struct_scalar_to_dict(scalar)
1084+
t = fields.get("type")
1085+
t = t.as_py() if t is not None and hasattr(t, "as_py") else t
1086+
if t is not None and str(t) not in by_type:
1087+
by_type[str(t)] = fields
1088+
by_col.setdefault(names[i], fields)
10731089
for name, secret in cls._secret_params.items():
1074-
col_idx = secrets_schema.get_field_index(secret.secret_type)
1075-
kwargs[name] = _struct_scalar_to_dict(bind_call.secrets.column(col_idx)[0]) if col_idx >= 0 else None
1090+
chosen = by_type.get(secret.secret_type)
1091+
if chosen is None:
1092+
chosen = by_col.get(secret.secret_type)
1093+
kwargs[name] = chosen
10761094

10771095
# OutputLength param: pass the batch row count
10781096
if cls._output_length_param is not None:

vgi/table_function.py

Lines changed: 80 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -240,24 +240,28 @@ def pending_lookups(self) -> list[SecretLookupEntry]:
240240
"""Return the list of pending secret lookups."""
241241
return list(self._pending_lookups)
242242

243-
def to_dict(self) -> dict[str, dict[str, pa.Scalar[Any]]]:
244-
"""Return all resolved secrets as a flat dict keyed by secret_type.
243+
def to_dict(self) -> "ResolvedSecrets":
244+
"""Return all resolved secrets keyed by secret name.
245245
246-
Combines unscoped entries (column name = secret_type) with scoped
247-
entries (``secret_N`` columns, keyed by ``secret_type`` from Arrow
248-
field metadata). Null/unresolved entries are omitted.
246+
Resolved secrets are keyed by their unique DuckDB secret name, so several
247+
secrets of the same type (e.g. one per S3 bucket) coexist. Each carries a
248+
``type`` field (the DuckDB secret type) and a ``scope`` field
249+
(newline-joined scope prefixes). Scoped ``secret_N`` columns (keyed by
250+
``secret_type`` from Arrow field metadata) are merged in. Null/unresolved
251+
entries are omitted.
249252
250253
Returns:
251-
Mapping of secret_type to its resolved key/scalar dict.
254+
A :class:`ResolvedSecrets` (a dict keyed by secret name) with
255+
type- and scope-aware selection helpers.
252256
253257
"""
254258
result = dict(self._unscoped)
255259
for meta, secret_dict in self._scoped:
256260
if secret_dict is not None:
257-
key = meta.get("secret_type", "")
261+
key = meta.get("secret_name") or meta.get("secret_type", "")
258262
if key:
259263
result[key] = secret_dict
260-
return result
264+
return ResolvedSecrets(result)
261265

262266
def _find_scoped(
263267
self,
@@ -287,6 +291,74 @@ def _find_scoped(
287291
return None
288292

289293

294+
def _secret_scalar_str(v: Any) -> str:
295+
"""Render a resolved-secret field (a pyarrow Scalar or plain value) to str."""
296+
if v is None:
297+
return ""
298+
py = v.as_py() if hasattr(v, "as_py") else v
299+
return "" if py is None else str(py)
300+
301+
302+
class ResolvedSecrets(dict):
303+
"""Resolved secrets keyed by secret name, with type- and scope-aware lookup.
304+
305+
A plain ``dict`` (so ``secrets[name]`` and ``secrets.get(name)`` still work)
306+
plus selectors that read each secret's connector-serialized ``type`` and
307+
``scope`` fields. Mirrors ``vgi::Secrets`` in the Rust SDK.
308+
"""
309+
310+
def secret_type(self, name: str) -> str | None:
311+
"""The DuckDB secret type of the named secret (its ``type`` field)."""
312+
fields = self.get(name)
313+
if not fields or "type" not in fields:
314+
return None
315+
return _secret_scalar_str(fields["type"])
316+
317+
def of_type(self, secret_type: str) -> list[dict[str, Any]]:
318+
"""Every resolved secret whose ``type`` field matches ``secret_type``."""
319+
return [
320+
f for f in self.values() if _secret_scalar_str(f.get("type")) == secret_type
321+
]
322+
323+
def for_scope(self, path: str) -> dict[str, Any] | None:
324+
"""The secret whose ``scope`` is the longest prefix of ``path``.
325+
326+
The connector serializes each secret's scope as a newline-joined list of
327+
prefixes; a secret with no (or empty) scope matches as a last-resort
328+
fallback. Returns ``None`` only when there are no candidate secrets.
329+
"""
330+
return self._select_for_scope(path, None)
331+
332+
def for_scope_of_type(self, path: str, secret_type: str) -> dict[str, Any] | None:
333+
"""Like :meth:`for_scope` but only over secrets of ``secret_type``."""
334+
return self._select_for_scope(path, secret_type)
335+
336+
def field_for(self, path: str, field: str) -> Any | None:
337+
"""A field of the best scope-matching secret for ``path``."""
338+
fields = self.for_scope(path)
339+
return None if fields is None else fields.get(field)
340+
341+
def _select_for_scope(
342+
self, path: str, secret_type: str | None
343+
) -> dict[str, Any] | None:
344+
best: dict[str, Any] | None = None
345+
best_len = -1
346+
fallback: dict[str, Any] | None = None
347+
for fields in self.values():
348+
if secret_type is not None and _secret_scalar_str(fields.get("type")) != secret_type:
349+
continue
350+
scope = _secret_scalar_str(fields.get("scope"))
351+
if not scope:
352+
if fallback is None:
353+
fallback = fields
354+
continue
355+
for prefix in scope.split("\n"):
356+
if prefix and path.startswith(prefix) and len(prefix) > best_len:
357+
best_len = len(prefix)
358+
best = fields
359+
return best if best is not None else fallback
360+
361+
290362
def project_schema(projection_ids: list[int] | None, schema: pa.Schema) -> pa.Schema:
291363
"""Create the projected schema if projection_ids are supplied.
292364

0 commit comments

Comments
 (0)